From d8d63fd71b8d56f73469f7db41864098f087599c Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 14 Nov 2021 15:54:04 +0100 Subject: [PATCH] Optimize performance with clang-tidy The code was partially formatted with clang-format and optimized with clang-tidy --checks="-*,perfor*" --fix src/*/*.cpp Signed-off-by: Stefan Weil --- src/api/hocrrenderer.cpp | 104 +++--- src/ccmain/equationdetect.cpp | 33 +- src/ccmain/fixxht.cpp | 3 +- src/ccmain/osdetect.cpp | 2 +- src/ccmain/paragraphs.cpp | 1 + src/ccstruct/blobbox.cpp | 9 +- src/ccstruct/boxword.cpp | 18 +- src/ccstruct/boxword.h | 3 +- src/ccstruct/imagedata.cpp | 96 ++--- src/ccstruct/pageres.cpp | 180 ++++++---- src/ccstruct/pageres.h | 67 ++-- src/ccstruct/ratngs.cpp | 3 +- src/ccutil/unicharset.cpp | 175 ++++++---- src/ccutil/unicharset.h | 82 +++-- src/classify/adaptmatch.cpp | 6 +- src/classify/cluster.cpp | 10 +- src/classify/clusttool.cpp | 2 +- src/classify/intmatcher.cpp | 2 +- src/classify/intproto.cpp | 16 +- src/classify/kdtree.cpp | 4 +- src/dict/dawg.cpp | 56 +-- src/dict/dawg.h | 93 +++-- src/lstm/networkio.cpp | 9 +- src/lstm/recodebeam.cpp | 446 ++++++++++++++---------- src/textord/baselinedetect.cpp | 167 +++++---- src/textord/cjkpitch.cpp | 5 +- src/textord/colpartition.cpp | 423 +++++++++++++--------- src/textord/colpartition.h | 81 +++-- src/textord/colpartitiongrid.cpp | 261 ++++++++------ src/textord/colpartitiongrid.h | 47 ++- src/textord/colpartitionset.cpp | 70 ++-- src/textord/colpartitionset.h | 30 +- src/textord/makerow.cpp | 21 +- src/textord/oldbasel.cpp | 3 +- src/textord/tablefind.cpp | 211 +++++++---- src/textord/tablefind.h | 38 +- src/textord/tablerecog.cpp | 9 - src/textord/tablerecog.h | 14 +- src/textord/tordmain.cpp | 4 +- src/textord/tospace.cpp | 25 +- src/textord/wordseg.cpp | 4 +- src/training/combine_tessdata.cpp | 22 +- src/training/common/ctc.cpp | 5 +- src/training/common/mastertrainer.cpp | 150 ++++---- src/training/mergenf.cpp | 16 +- src/training/unicharset/lstmtrainer.cpp | 291 ++++++++++------ src/training/unicharset/lstmtrainer.h | 94 +++-- src/viewer/svutil.cpp | 29 +- src/wordrec/chop.cpp | 2 +- src/wordrec/findseam.cpp | 2 +- src/wordrec/gradechop.cpp | 2 +- src/wordrec/language_model.cpp | 18 +- src/wordrec/params_model.cpp | 2 +- 53 files changed, 2091 insertions(+), 1375 deletions(-) diff --git a/src/api/hocrrenderer.cpp b/src/api/hocrrenderer.cpp index 44609845..a3b042a3 100644 --- a/src/api/hocrrenderer.cpp +++ b/src/api/hocrrenderer.cpp @@ -37,7 +37,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { tesseract::WritingDirection writing_direction; tesseract::TextlineOrder textline_order; float deskew_angle; - it->Orientation(&orientation, &writing_direction, &textline_order, &deskew_angle); + it->Orientation(&orientation, &writing_direction, &textline_order, + &deskew_angle); return orientation; } @@ -49,7 +50,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { * method currently only inserts a 'textangle' property to indicate the rotation * direction and does not add any baseline information to the hocr string. */ -static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel level, +static void AddBaselineCoordsTohOCR(const PageIterator *it, + PageIteratorLevel level, std::stringstream &hocr_str) { tesseract::Orientation orientation = GetBlockTextOrientation(it); if (orientation != ORIENTATION_PAGE_UP) { @@ -82,7 +84,8 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel le double p1 = (y2 - y1) / static_cast(x2 - x1); double p0 = y1 - p1 * x1; - hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " " << round(p0 * 1000.0) / 1000.0; + hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " " + << round(p0 * 1000.0) / 1000.0; } static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level, @@ -91,7 +94,8 @@ static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level, it->BoundingBox(level, &left, &top, &right, &bottom); // This is the only place we use double quotes instead of single quotes, // but it may too late to change for consistency - hocr_str << " title=\"bbox " << left << " " << top << " " << right << " " << bottom; + hocr_str << " title=\"bbox " << left << " " << top << " " << right << " " + << bottom; // Add baseline coordinates & heights for textlines only. if (level == RIL_TEXTLINE) { AddBaselineCoordsTohOCR(it, level, hocr_str); @@ -99,8 +103,8 @@ static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level, float row_height, descenders, ascenders; // row attributes it->RowAttributes(&row_height, &descenders, &ascenders); // TODO(rays): Do we want to limit these to a single decimal place? - hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders << "; x_ascenders " - << ascenders; + hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders + << "; x_ascenders " << ascenders; } hocr_str << "\">"; } @@ -128,7 +132,8 @@ char *TessBaseAPI::GetHOCRText(int page_number) { * Returned string must be freed with the delete [] operator. */ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) { + if (tesseract_ == nullptr || + (page_res_ == nullptr && Recognize(monitor) < 0)) { return nullptr; } @@ -147,13 +152,16 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { #ifdef _WIN32 // convert input name from ANSI encoding to utf-8 - int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0); + int str16_len = + MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0); wchar_t *uni16_str = new WCHAR[str16_len]; - str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len); - int utf8_len = - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr); + str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, + str16_len); + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, + 0, nullptr, nullptr); char *utf8_str = new char[utf8_len]; - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr); + WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, + nullptr, nullptr); input_file_ = utf8_str; delete[] uni16_str; delete[] utf8_str; @@ -174,8 +182,8 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { hocr_str << "unknown"; } - hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " " << rect_width_ << " " - << rect_height_ << "; ppageno " << page_number + hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " " + << rect_width_ << " " << rect_height_ << "; ppageno " << page_number << "; scan_res " << GetSourceYResolution() << " " << GetSourceYResolution() << "'>\n"; @@ -230,7 +238,8 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { // Now, process the word... int32_t lstm_choice_mode = tesseract_->lstm_choice_mode; - std::vector>>> *rawTimestepMap = nullptr; + std::vector>>> + *rawTimestepMap = nullptr; std::vector>> *CTCMap = nullptr; if (lstm_choice_mode) { CTCMap = res_it->GetBestLSTMSymbolChoices(); @@ -244,10 +253,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { int pointsize, font_id; const char *font_name; res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, - &smallcaps, &pointsize, &font_id); - hocr_str << " title='bbox " << left << " " << top << " " << right << " " << bottom - << "; x_wconf " << static_cast(res_it->Confidence(RIL_WORD)); + font_name = + res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, + &serif, &smallcaps, &pointsize, &font_id); + hocr_str << " title='bbox " << left << " " << top << " " << right << " " + << bottom << "; x_wconf " + << static_cast(res_it->Confidence(RIL_WORD)); if (font_info) { if (font_name) { hocr_str << "; x_font " << HOcrEscape(font_name).c_str(); @@ -287,31 +298,36 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { hocr_str << ""; } do { - const std::unique_ptr grapheme(res_it->GetUTF8Text(RIL_SYMBOL)); + const std::unique_ptr grapheme( + res_it->GetUTF8Text(RIL_SYMBOL)); if (grapheme && grapheme[0] != 0) { if (hocr_boxes) { res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom); - hocr_str << "\n "; + hocr_str << "\n "; } hocr_str << HOcrEscape(grapheme.get()).c_str(); if (hocr_boxes) { hocr_str << ""; tesseract::ChoiceIterator ci(*res_it); if (lstm_choice_mode == 1 && ci.Timesteps() != nullptr) { - std::vector>> *symbol = ci.Timesteps(); + std::vector>> *symbol = + ci.Timesteps(); hocr_str << "\n "; - for (auto timestep : *symbol) { + << "symbol_" << page_id << "_" << wcnt << "_" << scnt + << "'>"; + for (const auto ×tep : *symbol) { hocr_str << "\n "; + << "timestep" << page_id << "_" << wcnt << "_" << tcnt + << "'>"; for (auto conf : timestep) { hocr_str << "\n " << HOcrEscape(conf.first).c_str() << ""; ++ccnt; @@ -324,16 +340,18 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { } else if (lstm_choice_mode == 2) { hocr_str << "\n "; + << "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt + << "'>"; do { const char *choice = ci.GetUTF8Text(); float choiceconf = ci.Confidence(); if (choice != nullptr) { hocr_str << "\n " << HOcrEscape(choice).c_str() - << ""; + << "choice_" << page_id << "_" << wcnt << "_" << ccnt + << "'" + << " title='x_confs " << choiceconf << "'>" + << HOcrEscape(choice).c_str() << ""; ccnt++; } } while (ci.Next()); @@ -352,18 +370,20 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { } // If the lstm choice mode is required it is added here if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) { - for (auto symbol : *rawTimestepMap) { + for (const auto &symbol : *rawTimestepMap) { hocr_str << "\n "; - for (auto timestep : symbol) { + for (const auto ×tep : symbol) { hocr_str << "\n "; + << "timestep" << page_id << "_" << wcnt << "_" << tcnt + << "'>"; for (auto conf : timestep) { hocr_str << "\n " << HOcrEscape(conf.first).c_str() << ""; ++ccnt; @@ -375,11 +395,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { ++scnt; } } else if (lstm_choice_mode == 2 && !hocr_boxes && CTCMap != nullptr) { - for (auto timestep : *CTCMap) { + for (const auto ×tep : *CTCMap) { if (timestep.size() > 0) { hocr_str << "\n "; + << "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt + << "'>"; for (auto &j : timestep) { float conf = 100 - tesseract_->lstm_rating_coefficient * j.second; if (conf < 0.0f) { @@ -390,9 +411,10 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { } hocr_str << "\n " << HOcrEscape(j.first).c_str() - << ""; + << "choice_" << page_id << "_" << wcnt << "_" << ccnt + << "'" + << " title='x_confs " << conf << "'>" + << HOcrEscape(j.first).c_str() << ""; ccnt++; } hocr_str << ""; diff --git a/src/ccmain/equationdetect.cpp b/src/ccmain/equationdetect.cpp index 0c8a335a..a50ce6d1 100644 --- a/src/ccmain/equationdetect.cpp +++ b/src/ccmain/equationdetect.cpp @@ -35,6 +35,7 @@ #include #include +#include #include #include @@ -189,11 +190,11 @@ void EquationDetect::IdentifySpecialText(BLOBNBOX *blobnbox, const int height_th const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8; // The scores here are negative, so the max/min == fabs(min/max). // float ratio = fmax(lang_score, equ_score) / fmin(lang_score, equ_score); - const float diff = fabs(lang_score - equ_score); + const float diff = std::fabs(lang_score - equ_score); BlobSpecialTextType type = BSTT_NONE; // Classification. - if (fmax(lang_score, equ_score) < kConfScoreTh) { + if (std::fmax(lang_score, equ_score) < kConfScoreTh) { // If both score are very small, then mark it as unclear. type = BSTT_UNCLEAR; } else if (diff > kConfDiffTh && equ_score > lang_score) { @@ -727,7 +728,7 @@ int EquationDetect::CountAlignment(const std::vector &sorted_vec, const int if (sorted_vec.empty()) { return 0; } - const int kDistTh = static_cast(round(0.03f * resolution_)); + const int kDistTh = static_cast(std::round(0.03f * resolution_)); auto pos = std::upper_bound(sorted_vec.begin(), sorted_vec.end(), val); if (pos > sorted_vec.begin()) { --pos; @@ -772,7 +773,7 @@ void EquationDetect::IdentifyInlinePartsHorizontal() { ASSERT_HOST(cps_super_bbox_); std::vector new_seeds; const int kMarginDiffTh = IntCastRounded(0.5 * lang_tesseract_->source_resolution()); - const int kGapTh = static_cast(round(1.0f * lang_tesseract_->source_resolution())); + const int kGapTh = static_cast(std::round(1.0f * lang_tesseract_->source_resolution())); ColPartitionGridSearch search(part_grid_); search.SetUniqueMode(true); // The center x coordinate of the cp_super_bbox_. @@ -923,8 +924,8 @@ bool EquationDetect::IsInline(const bool search_bottom, const int textparts_line // Check if neighbor and part is inline similar. const float kHeightRatioTh = 0.5; const int kYGapTh = textparts_linespacing > 0 - ? textparts_linespacing + static_cast(round(0.02f * resolution_)) - : static_cast(round(0.05f * resolution_)); // Default value. + ? textparts_linespacing + static_cast(std::round(0.02f * resolution_)) + : static_cast(std::round(0.05f * resolution_)); // Default value. if (part_box.x_overlap(neighbor_box) && // Location feature. part_box.y_gap(neighbor_box) <= kYGapTh && // Line spacing. // Geo feature. @@ -978,9 +979,9 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition *part) { ColPartitionGridSearch search(part_grid_); ColPartition *neighbor = nullptr; const TBOX &part_box(part->bounding_box()); - const int kXGapTh = static_cast(round(0.5f * resolution_)); - const int kRadiusTh = static_cast(round(3.0f * resolution_)); - const int kYGapTh = static_cast(round(0.5f * resolution_)); + const int kXGapTh = static_cast(std::round(0.5f * resolution_)); + const int kRadiusTh = static_cast(std::round(3.0f * resolution_)); + const int kYGapTh = static_cast(std::round(0.5f * resolution_)); // Here we use a simple approximation algorithm: from the center of part, We // perform the radius search, and check if we can find a neighboring partition @@ -1080,7 +1081,7 @@ void EquationDetect::ExpandSeedHorizontal(const bool search_left, ColPartition * std::vector *parts_to_merge) { ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr); const float kYOverlapTh = 0.6; - const int kXGapTh = static_cast(round(0.2f * resolution_)); + const int kXGapTh = static_cast(std::round(0.2f * resolution_)); ColPartitionGridSearch search(part_grid_); const TBOX &seed_box(seed->bounding_box()); @@ -1132,7 +1133,7 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition * std::vector *parts_to_merge) { ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr && cps_super_bbox_ != nullptr); const float kXOverlapTh = 0.4; - const int kYGapTh = static_cast(round(0.2f * resolution_)); + const int kYGapTh = static_cast(std::round(0.2f * resolution_)); ColPartitionGridSearch search(part_grid_); const TBOX &seed_box(seed->bounding_box()); @@ -1210,8 +1211,8 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition * } bool EquationDetect::IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const { - const int kXGapTh = static_cast(round(0.25f * resolution_)); - const int kYGapTh = static_cast(round(0.05f * resolution_)); + const int kXGapTh = static_cast(std::round(0.25f * resolution_)); + const int kYGapTh = static_cast(std::round(0.05f * resolution_)); // Check geometric feature. if (part_box.height() > seed_box.height() || part_box.width() > seed_box.width()) { @@ -1266,7 +1267,7 @@ void EquationDetect::ProcessMathBlockSatelliteParts() { int med_height = text_box.height(); if (text_parts.size() % 2 == 0 && text_parts.size() > 1) { const TBOX &text_box = text_parts[text_parts.size() / 2 - 1]->bounding_box(); - med_height = static_cast(round(0.5f * (text_box.height() + med_height))); + med_height = static_cast(std::round(0.5f * (text_box.height() + med_height))); } // Iterate every text_parts and check if it is a math block satellite. @@ -1348,7 +1349,7 @@ bool EquationDetect::IsMathBlockSatellite(ColPartition *part, ColPartition *EquationDetect::SearchNNVertical(const bool search_bottom, const ColPartition *part) { ASSERT_HOST(part); ColPartition *nearest_neighbor = nullptr, *neighbor = nullptr; - const int kYGapTh = static_cast(round(resolution_ * 0.5f)); + const int kYGapTh = static_cast(std::round(resolution_ * 0.5f)); ColPartitionGridSearch search(part_grid_); search.SetUniqueMode(true); @@ -1383,7 +1384,7 @@ bool EquationDetect::IsNearMathNeighbor(const int y_gap, const ColPartition *nei if (!neighbor) { return false; } - const int kYGapTh = static_cast(round(resolution_ * 0.1f)); + const int kYGapTh = static_cast(std::round(resolution_ * 0.1f)); return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh; } diff --git a/src/ccmain/fixxht.cpp b/src/ccmain/fixxht.cpp index 20f15498..80ea0831 100644 --- a/src/ccmain/fixxht.cpp +++ b/src/ccmain/fixxht.cpp @@ -23,6 +23,7 @@ #include #include +#include #include namespace tesseract { @@ -205,7 +206,7 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh new_xht / word_res->denorm.y_scale()); } // The xheight must change by at least x_ht_min_change to be used. - if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) { + if (std::fabs(new_xht - kBlnXHeight) >= x_ht_min_change) { return new_xht / word_res->denorm.y_scale(); } else { return bottom_shift != 0 ? word_res->x_height : 0.0f; diff --git a/src/ccmain/osdetect.cpp b/src/ccmain/osdetect.cpp index 8b5aeae4..daee2b40 100644 --- a/src/ccmain/osdetect.cpp +++ b/src/ccmain/osdetect.cpp @@ -428,7 +428,7 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) { // Normalize the orientation scores for the blob and use them to // update the aggregated orientation score. for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) { - osr_->orientations[i] += log(blob_o_score[i] / total_blob_o_score); + osr_->orientations[i] += std::log(blob_o_score[i] / total_blob_o_score); } // TODO(ranjith) Add an early exit test, based on min_orientation_margin, diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp index 4af6aecb..601afe9f 100644 --- a/src/ccmain/paragraphs.cpp +++ b/src/ccmain/paragraphs.cpp @@ -113,6 +113,7 @@ static void PrintTable(const std::vector> &rows, const } std::vector col_width_patterns; + col_width_patterns.reserve(max_col_widths.size()); for (int max_col_width : max_col_widths) { col_width_patterns.push_back(std::string("%-") + std::to_string(max_col_width) + "s"); } diff --git a/src/ccstruct/blobbox.cpp b/src/ccstruct/blobbox.cpp index f6f1c4d2..6539a723 100644 --- a/src/ccstruct/blobbox.cpp +++ b/src/ccstruct/blobbox.cpp @@ -33,6 +33,7 @@ #include // for pixGetHeight, pixGetPixel #include // for max, min +#include #include // for INT32_MAX, INT16_MAX #define PROJECTION_MARGIN 10 // arbitrary @@ -133,7 +134,7 @@ void BLOBNBOX::chop( // chop blobs BLOBNBOX_IT blob_it; // blob iterator // get no of chops - blobcount = static_cast(floor(box.width() / xheight)); + blobcount = static_cast(std::floor(box.width() / xheight)); if (blobcount > 1 && cblob_ptr != nullptr) { // width of each blobwidth = static_cast(box.width() + 1) / blobcount; @@ -150,12 +151,12 @@ void BLOBNBOX::chop( // chop blobs UpdateRange(test_ymin, test_ymax, &ymin, &ymax); } while (blob != end_it->data()); if (ymin < ymax) { - leftx = static_cast(floor(rightx - blobwidth)); + leftx = static_cast(std::floor(rightx - blobwidth)); if (leftx < box.left()) { leftx = box.left(); // clip to real box } - bl = ICOORD(leftx, static_cast(floor(ymin))); - tr = ICOORD(static_cast(ceil(rightx)), static_cast(ceil(ymax))); + bl = ICOORD(leftx, static_cast(std::floor(ymin))); + tr = ICOORD(static_cast(std::ceil(rightx)), static_cast(std::ceil(ymax))); if (blobindex == 0) { box = TBOX(bl, tr); // change box } else { diff --git a/src/ccstruct/boxword.cpp b/src/ccstruct/boxword.cpp index 3a08056e..7627a21c 100644 --- a/src/ccstruct/boxword.cpp +++ b/src/ccstruct/boxword.cpp @@ -63,7 +63,8 @@ BoxWord *BoxWord::CopyFromNormalized(TWERD *tessword) { for (unsigned b = 0; b < boxword->length_; ++b) { TBLOB *tblob = tessword->blobs[b]; TBOX blob_box; - for (TESSLINE *outline = tblob->outlines; outline != nullptr; outline = outline->next) { + for (TESSLINE *outline = tblob->outlines; outline != nullptr; + outline = outline->next) { EDGEPT *edgept = outline->loop; // Iterate over the edges. do { @@ -92,7 +93,8 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) { for (unsigned i = 0; i < length_; ++i) { TBOX box = boxes_[i]; // Expand by a single pixel, as the poly approximation error is 1 pixel. - box = TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1); + box = + TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1); // Now find the original box that matches. TBOX original_box; C_BLOB_IT b_it(original_word->cblob_list()); @@ -106,16 +108,19 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) { } } if (!original_box.null_box()) { - if (NearlyEqual(original_box.left(), box.left(), kBoxClipTolerance)) { + if (NearlyEqual(original_box.left(), box.left(), + kBoxClipTolerance)) { box.set_left(original_box.left()); } - if (NearlyEqual(original_box.right(), box.right(), kBoxClipTolerance)) { + if (NearlyEqual(original_box.right(), box.right(), + kBoxClipTolerance)) { box.set_right(original_box.right()); } if (NearlyEqual(original_box.top(), box.top(), kBoxClipTolerance)) { box.set_top(original_box.top()); } - if (NearlyEqual(original_box.bottom(), box.bottom(), kBoxClipTolerance)) { + if (NearlyEqual(original_box.bottom(), box.bottom(), + kBoxClipTolerance)) { box.set_bottom(original_box.bottom()); } } @@ -193,7 +198,8 @@ void BoxWord::ComputeBoundingBox() { // This and other putatively are the same, so call the (permanent) callback // for each blob index where the bounding boxes match. // The callback is deleted on completion. -void BoxWord::ProcessMatchedBlobs(const TWERD &other, std::function cb) const { +void BoxWord::ProcessMatchedBlobs(const TWERD &other, + const std::function &cb) const { for (unsigned i = 0; i < length_ && i < other.NumBlobs(); ++i) { TBOX blob_box = other.blobs[i]->bounding_box(); if (blob_box == boxes_[i]) { diff --git a/src/ccstruct/boxword.h b/src/ccstruct/boxword.h index 547c01d8..7966fad3 100644 --- a/src/ccstruct/boxword.h +++ b/src/ccstruct/boxword.h @@ -72,7 +72,8 @@ public: // This and other putatively are the same, so call the (permanent) callback // for each blob index where the bounding boxes match. // The callback is deleted on completion. - void ProcessMatchedBlobs(const TWERD &other, std::function cb) const; + void ProcessMatchedBlobs(const TWERD &other, + const std::function &cb) const; const TBOX &bounding_box() const { return bbox_; diff --git a/src/ccstruct/imagedata.cpp b/src/ccstruct/imagedata.cpp index 8e6c3731..a094a2ac 100644 --- a/src/ccstruct/imagedata.cpp +++ b/src/ccstruct/imagedata.cpp @@ -43,7 +43,8 @@ const int kMaxReadAhead = 8; ImageData::ImageData() : page_number_(-1), vertical_text_(false) {} // Takes ownership of the pix and destroys it. -ImageData::ImageData(bool vertical, Image pix) : page_number_(0), vertical_text_(vertical) { +ImageData::ImageData(bool vertical, Image pix) + : page_number_(0), vertical_text_(vertical) { SetPix(pix); } ImageData::~ImageData() { @@ -55,8 +56,8 @@ ImageData::~ImageData() { // Builds and returns an ImageData from the basic data. Note that imagedata, // truth_text, and box_text are all the actual file data, NOT filenames. ImageData *ImageData::Build(const char *name, int page_number, const char *lang, - const char *imagedata, int imagedatasize, const char *truth_text, - const char *box_text) { + const char *imagedata, int imagedatasize, + const char *truth_text, const char *box_text) { auto *image_data = new ImageData(); image_data->imagefilename_ = name; image_data->page_number_ = page_number; @@ -67,7 +68,8 @@ ImageData *ImageData::Build(const char *name, int page_number, const char *lang, memcpy(&image_data->image_data_[0], imagedata, imagedatasize); if (!image_data->AddBoxes(box_text)) { if (truth_text == nullptr || truth_text[0] == '\0') { - tprintf("Error: No text corresponding to page %d from image %s!\n", page_number, name); + tprintf("Error: No text corresponding to page %d from image %s!\n", + page_number, name); delete image_data; return nullptr; } @@ -210,8 +212,9 @@ Image ImageData::GetPix() const { // The return value is the scaled Pix, which must be pixDestroyed after use, // and scale_factor (if not nullptr) is set to the scale factor that was applied // to the image to achieve the target_height. -Image ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width, - int *scaled_height, std::vector *boxes) const { +Image ImageData::PreScale(int target_height, int max_height, + float *scale_factor, int *scaled_width, + int *scaled_height, std::vector *boxes) const { int input_width = 0; int input_height = 0; Image src_pix = GetPix(); @@ -231,8 +234,8 @@ Image ImageData::PreScale(int target_height, int max_height, float *scale_factor // Get the scaled image. Image pix = pixScale(src_pix, im_factor, im_factor); if (pix == nullptr) { - tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height, - im_factor); + tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", + input_width, input_height, im_factor); src_pix.destroy(); return nullptr; } @@ -278,9 +281,9 @@ void ImageData::Display() const { } int width = pixGetWidth(pix); int height = pixGetHeight(pix); - auto *win = - new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize), - 2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true); + auto *win = new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize), + 2 * (height + 4 * kTextSize), width + 10, + height + 3 * kTextSize, true); win->Draw(pix, 0, height - 1); pix.destroy(); // Draw the boxes. @@ -309,7 +312,8 @@ void ImageData::Display() const { // Adds the supplied boxes and transcriptions that correspond to the correct // page number. -void ImageData::AddBoxes(const std::vector &boxes, const std::vector &texts, +void ImageData::AddBoxes(const std::vector &boxes, + const std::vector &texts, const std::vector &box_pages) { // Copy the boxes and make the transcription. for (unsigned i = 0; i < box_pages.size(); ++i) { @@ -346,7 +350,8 @@ Image ImageData::GetPixInternal(const std::vector &image_data) { Image pix = nullptr; if (!image_data.empty()) { // Convert the array to an image. - const auto *u_data = reinterpret_cast(&image_data[0]); + const auto *u_data = + reinterpret_cast(&image_data[0]); pix = pixReadMem(u_data, image_data.size()); } return pix; @@ -361,23 +366,25 @@ bool ImageData::AddBoxes(const char *box_text) { std::vector texts; std::vector box_pages; if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text, - /*continue_on_failure*/ true, &boxes, &texts, nullptr, &box_pages)) { + /*continue_on_failure*/ true, &boxes, &texts, nullptr, + &box_pages)) { AddBoxes(boxes, texts, box_pages); return true; } else { - tprintf("Error: No boxes for page %d from image %s!\n", page_number_, imagefilename_.c_str()); + tprintf("Error: No boxes for page %d from image %s!\n", page_number_, + imagefilename_.c_str()); } } return false; } DocumentData::DocumentData(const std::string &name) - : document_name_(name) - , pages_offset_(-1) - , total_pages_(-1) - , memory_used_(0) - , max_memory_(0) - , reader_(nullptr) {} + : document_name_(name), + pages_offset_(-1), + total_pages_(-1), + memory_used_(0), + max_memory_(0), + reader_(nullptr) {} DocumentData::~DocumentData() { if (thread.joinable()) { @@ -392,15 +399,16 @@ DocumentData::~DocumentData() { // Reads all the pages in the given lstmf filename to the cache. The reader // is used to read the file. -bool DocumentData::LoadDocument(const char *filename, int start_page, int64_t max_memory, - FileReader reader) { +bool DocumentData::LoadDocument(const char *filename, int start_page, + int64_t max_memory, FileReader reader) { SetDocument(filename, max_memory, reader); pages_offset_ = start_page; return ReCachePages(); } // Sets up the document, without actually loading it. -void DocumentData::SetDocument(const char *filename, int64_t max_memory, FileReader reader) { +void DocumentData::SetDocument(const char *filename, int64_t max_memory, + FileReader reader) { std::lock_guard lock_p(pages_mutex_); std::lock_guard lock(general_mutex_); document_name_ = filename; @@ -485,7 +493,8 @@ bool DocumentData::IsPageAvailable(int index, ImageData **page) { } if (num_pages > 0) { index = Modulo(index, num_pages); - if (pages_offset_ <= index && static_cast(index) < pages_offset_ + pages_.size()) { + if (pages_offset_ <= index && + static_cast(index) < pages_offset_ + pages_.size()) { *page = pages_[index - pages_offset_]; // Page is available already. return true; } @@ -505,8 +514,8 @@ int64_t DocumentData::UnCache() { pages_offset_ = -1; set_total_pages(-1); set_memory_used(0); - tprintf("Unloaded document %s, saving %" PRId64 " memory\n", document_name_.c_str(), - memory_saved); + tprintf("Unloaded document %s, saving %" PRId64 " memory\n", + document_name_.c_str(), memory_saved); return memory_saved; } @@ -538,8 +547,8 @@ bool DocumentData::ReCachePages() { } pages_.clear(); TFile fp; - if (!fp.Open(document_name_.c_str(), reader_) || !fp.DeSerializeSize(&loaded_pages) || - loaded_pages <= 0) { + if (!fp.Open(document_name_.c_str(), reader_) || + !fp.DeSerializeSize(&loaded_pages) || loaded_pages <= 0) { tprintf("Deserialize header failed: %s\n", document_name_.c_str()); return false; } @@ -552,7 +561,8 @@ bool DocumentData::ReCachePages() { if (!fp.DeSerialize(&non_null)) { break; } - if (page < pages_offset_ || (max_memory_ > 0 && memory_used() > max_memory_)) { + if (page < pages_offset_ || + (max_memory_ > 0 && memory_used() > max_memory_)) { if (non_null && !ImageData::SkipDeSerialize(&fp)) { break; } @@ -574,16 +584,17 @@ bool DocumentData::ReCachePages() { } } if (page < loaded_pages) { - tprintf("Deserialize failed: %s read %d/%d lines\n", document_name_.c_str(), page, - loaded_pages); + tprintf("Deserialize failed: %s read %d/%d lines\n", document_name_.c_str(), + page, loaded_pages); for (auto page : pages_) { delete page; } pages_.clear(); } else if (loaded_pages > 1) { // Avoid lots of messages for training with single line images. - tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(), loaded_pages, - pages_offset_ + 1, pages_offset_ + pages_.size(), document_name_.c_str()); + tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(), + loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(), + document_name_.c_str()); } set_total_pages(loaded_pages); return !pages_.empty(); @@ -601,7 +612,8 @@ DocumentCache::~DocumentCache() { // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. bool DocumentCache::LoadDocuments(const std::vector &filenames, - CachingStrategy cache_strategy, FileReader reader) { + CachingStrategy cache_strategy, + FileReader reader) { cache_strategy_ = cache_strategy; int64_t fair_share_memory = 0; // In the round-robin case, each DocumentData handles restricting its content @@ -610,7 +622,7 @@ bool DocumentCache::LoadDocuments(const std::vector &filenames, if (cache_strategy_ == CS_ROUND_ROBIN) { fair_share_memory = max_memory_ / filenames.size(); } - for (auto filename : filenames) { + for (const auto &filename : filenames) { auto *document = new DocumentData(filename); document->SetDocument(filename.c_str(), fair_share_memory, reader); AddToCache(document); @@ -632,7 +644,8 @@ bool DocumentCache::AddToCache(DocumentData *data) { } // Finds and returns a document by name. -DocumentData *DocumentCache::FindDocument(const std::string &document_name) const { +DocumentData *DocumentCache::FindDocument( + const std::string &document_name) const { for (auto *document : documents_) { if (document->document_name() == document_name) { return document; @@ -696,7 +709,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) { } } int doc_index = serial / num_pages_per_doc_ % num_docs; - const ImageData *doc = documents_[doc_index]->GetPage(serial % num_pages_per_doc_); + const ImageData *doc = + documents_[doc_index]->GetPage(serial % num_pages_per_doc_); // Count up total memory. Background loading makes it more complicated to // keep a running count. int64_t total_memory = 0; @@ -710,7 +724,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) { // we create a hole between them and then un-caching the backmost occupied // will work for both. int num_in_front = CountNeighbourDocs(doc_index, 1); - for (int offset = num_in_front - 2; offset > 1 && total_memory >= max_memory_; --offset) { + for (int offset = num_in_front - 2; + offset > 1 && total_memory >= max_memory_; --offset) { int next_index = (doc_index + offset) % num_docs; total_memory -= documents_[next_index]->UnCache(); } @@ -718,7 +733,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) { // we take away the document that a 2nd reader is using, it will put it // back and make a hole between. int num_behind = CountNeighbourDocs(doc_index, -1); - for (int offset = num_behind; offset < 0 && total_memory >= max_memory_; ++offset) { + for (int offset = num_behind; offset < 0 && total_memory >= max_memory_; + ++offset) { int next_index = (doc_index + offset + num_docs) % num_docs; total_memory -= documents_[next_index]->UnCache(); } diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp index a2eccecc..65ea748f 100644 --- a/src/ccstruct/pageres.cpp +++ b/src/ccstruct/pageres.cpp @@ -65,7 +65,8 @@ const double kMaxWordGapRatio = 2.0; // which words to keep, based on the adjustment factors of the two words. // TODO(rays) This is horrible. Replace with an enhance params training model. static double StopperAmbigThreshold(double f1, double f2) { - return (f2 - f1) * kStopperAmbiguityThresholdGain - kStopperAmbiguityThresholdOffset; + return (f2 - f1) * kStopperAmbiguityThresholdGain - + kStopperAmbiguityThresholdOffset; } /************************************************************************* @@ -79,7 +80,8 @@ PAGE_RES::PAGE_RES(bool merge_similar_words, BLOCK_LIST *the_block_list, BLOCK_IT block_it(the_block_list); BLOCK_RES_IT block_res_it(&block_res_list); for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { - block_res_it.add_to_end(new BLOCK_RES(merge_similar_words, block_it.data())); + block_res_it.add_to_end( + new BLOCK_RES(merge_similar_words, block_it.data())); } prev_word_best_choice = prev_word_best_choice_ptr; } @@ -127,7 +129,8 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) { row = the_row; bool add_next_word = false; TBOX union_box; - float line_height = the_row->x_height() + the_row->ascenders() - the_row->descenders(); + float line_height = + the_row->x_height() + the_row->ascenders() - the_row->descenders(); for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { auto *word_res = new WERD_RES(word_it.data()); word_res->x_height = the_row->x_height(); @@ -298,14 +301,17 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES &source) { // norm_box is used to override the word bounding box to determine the // normalization scale and offset. // Returns false if the word is empty and sets up fake results. -bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tess, - Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, - bool use_body_size, bool allow_detailed_fx, ROW *row, +bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, + tesseract::Tesseract *tess, Image pix, + int norm_mode, const TBOX *norm_box, + bool numeric_mode, bool use_body_size, + bool allow_detailed_fx, ROW *row, const BLOCK *block) { auto norm_mode_hint = static_cast(norm_mode); tesseract = tess; POLY_BLOCK *pb = block != nullptr ? block->pdblk.poly_block() : nullptr; - if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY && word->cblob_list()->empty()) || + if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY && + word->cblob_list()->empty()) || (pb != nullptr && !pb->IsText())) { // Empty words occur when all the blobs have been moved to the rej_blobs // list, which seems to occur frequently in junk. @@ -317,9 +323,12 @@ bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::T SetupWordScript(unicharset_in); chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word); float word_xheight = - use_body_size && row != nullptr && row->body_size() > 0.0f ? row->body_size() : x_height; - chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), word_xheight, baseline_shift, - numeric_mode, norm_mode_hint, norm_box, &denorm); + use_body_size && row != nullptr && row->body_size() > 0.0f + ? row->body_size() + : x_height; + chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), + word_xheight, baseline_shift, numeric_mode, + norm_mode_hint, norm_box, &denorm); blob_row = row; SetupBasicsFromChoppedWord(unicharset_in); SetupBlamerBundle(); @@ -398,7 +407,8 @@ void WERD_RES::SetupBlobWidthsAndGaps() { TBOX box = blob->bounding_box(); blob_widths.push_back(box.width()); if (b + 1 < num_blobs) { - blob_gaps.push_back(chopped_word->blobs[b + 1]->bounding_box().left() - box.right()); + blob_gaps.push_back(chopped_word->blobs[b + 1]->bounding_box().left() - + box.right()); } } } @@ -460,8 +470,8 @@ bool WERD_RES::StatesAllValid() { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) { WERD_CHOICE *choice = it.data(); if (choice->TotalOfStates() != ratings_dim) { - tprintf("Cooked #%u has total of states = %u vs ratings dim of %u\n", index, - choice->TotalOfStates(), ratings_dim); + tprintf("Cooked #%u has total of states = %u vs ratings dim of %u\n", + index, choice->TotalOfStates(), ratings_dim); return false; } } @@ -471,7 +481,8 @@ bool WERD_RES::StatesAllValid() { // Prints a list of words found if debug is true or the word result matches // the word_to_debug. void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) { - if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr && + if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' && + best_choice != nullptr && best_choice->unichar_string() == std::string(word_to_debug))) { if (raw_choice != nullptr) { raw_choice->print("\nBest Raw Choice"); @@ -490,8 +501,8 @@ void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) { // Prints the top choice along with the accepted/done flags. void WERD_RES::DebugTopChoice(const char *msg) const { - tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted, tess_would_adapt, - done); + tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted, + tess_would_adapt, done); if (best_choice == nullptr) { tprintf("\n"); } else { @@ -516,7 +527,8 @@ void WERD_RES::FilterWordChoices(int debug_level) { int index = 0; for (it.forward(); !it.at_first(); it.forward(), ++index) { WERD_CHOICE *choice = it.data(); - float threshold = StopperAmbigThreshold(best_choice->adjust_factor(), choice->adjust_factor()); + float threshold = StopperAmbigThreshold(best_choice->adjust_factor(), + choice->adjust_factor()); // i, j index the blob choice in choice, best_choice. // chunk is an index into the chopped_word blobs (AKA chunks). // Since the two words may use different segmentations of the chunks, we @@ -555,8 +567,10 @@ void WERD_RES::FilterWordChoices(int debug_level) { } } -void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, - float rating_margin, float *thresholds) { +void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, + float min_rating, float max_rating, + float rating_margin, + float *thresholds) { int chunk = 0; int end_chunk = best_choice->state(0); int end_raw_chunk = raw_choice->state(0); @@ -612,26 +626,29 @@ bool WERD_RES::LogNewRawChoice(WERD_CHOICE *word_choice) { // The best_choices list is kept in sorted order by rating. Duplicates are // removed, and the list is kept no longer than max_num_choices in length. // Returns true if the word_choice is still a valid pointer. -bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice) { +bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, + WERD_CHOICE *word_choice) { if (best_choice != nullptr) { // Throw out obviously bad choices to save some work. // TODO(rays) Get rid of this! This piece of code produces different // results according to the order in which words are found, which is an // undesirable behavior. It would be better to keep all the choices and // prune them later when more information is available. - float max_certainty_delta = - StopperAmbigThreshold(best_choice->adjust_factor(), word_choice->adjust_factor()); + float max_certainty_delta = StopperAmbigThreshold( + best_choice->adjust_factor(), word_choice->adjust_factor()); if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) { max_certainty_delta = -kStopperAmbiguityThresholdOffset; } - if (word_choice->certainty() - best_choice->certainty() < max_certainty_delta) { + if (word_choice->certainty() - best_choice->certainty() < + max_certainty_delta) { if (debug) { std::string bad_string; word_choice->string_and_lengths(&bad_string, nullptr); tprintf( "Discarding choice \"%s\" with an overly low certainty" " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n", - bad_string.c_str(), word_choice->certainty(), best_choice->certainty(), + bad_string.c_str(), word_choice->certainty(), + best_choice->certainty(), max_certainty_delta + best_choice->certainty()); } delete word_choice; @@ -664,8 +681,8 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE * } else { // Old is better. if (debug) { - tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n", new_str.c_str(), - word_choice->rating(), choice->rating()); + tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n", + new_str.c_str(), word_choice->rating(), choice->rating()); } delete word_choice; return false; @@ -720,8 +737,8 @@ void WERD_RES::PrintBestChoices() const { } alternates_str += it.data()->unichar_string(); } - tprintf("Alternates for \"%s\": {\"%s\"}\n", best_choice->unichar_string().c_str(), - alternates_str.c_str()); + tprintf("Alternates for \"%s\": {\"%s\"}\n", + best_choice->unichar_string().c_str(), alternates_str.c_str()); } // Returns the sum of the widths of the blob between start_blob and last_blob @@ -830,12 +847,14 @@ void WERD_RES::RebuildBestState() { int length = best_choice->state(i); best_state.push_back(length); if (length > 1) { - SEAM::JoinPieces(seam_array, chopped_word->blobs, start, start + length - 1); + SEAM::JoinPieces(seam_array, chopped_word->blobs, start, + start + length - 1); } TBLOB *blob = chopped_word->blobs[start]; rebuild_word->blobs.push_back(new TBLOB(*blob)); if (length > 1) { - SEAM::BreakPieces(seam_array, chopped_word->blobs, start, start + length - 1); + SEAM::BreakPieces(seam_array, chopped_word->blobs, start, + start + length - 1); } start += length; } @@ -925,7 +944,8 @@ void WERD_RES::FakeWordFromRatings(PermuterType permuter) { rating = choice->rating(); certainty = choice->certainty(); } - word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating, certainty); + word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating, + certainty); } LogNewRawChoice(word_choice); // Ownership of word_choice taken by word here. @@ -948,14 +968,17 @@ void WERD_RES::BestChoiceToCorrectText() { // callback box_cb is nullptr or returns true, setting the merged blob // result to the class returned from class_cb. // Returns true if anything was merged. -bool WERD_RES::ConditionalBlobMerge(std::function class_cb, - std::function box_cb) { +bool WERD_RES::ConditionalBlobMerge( + const std::function &class_cb, + const std::function &box_cb) { ASSERT_HOST(best_choice->empty() || ratings != nullptr); bool modified = false; for (unsigned i = 0; i + 1 < best_choice->length(); ++i) { - UNICHAR_ID new_id = class_cb(best_choice->unichar_id(i), best_choice->unichar_id(i + 1)); + UNICHAR_ID new_id = + class_cb(best_choice->unichar_id(i), best_choice->unichar_id(i + 1)); if (new_id != INVALID_UNICHAR_ID && - (box_cb == nullptr || box_cb(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) { + (box_cb == nullptr || + box_cb(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) { // Raw choice should not be fixed. best_choice->set_unichar_id(new_id, i); modified = true; @@ -1003,8 +1026,9 @@ static int is_simple_quote(const char *signed_str, int length) { // Standard 1 byte quotes. return (length == 1 && (*str == '\'' || *str == '`')) || // UTF-8 3 bytes curved quotes. - (length == 3 && ((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) || - (*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99))); + (length == 3 && + ((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) || + (*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99))); } // Callback helper for fix_quotes returns a double quote if both @@ -1012,7 +1036,8 @@ static int is_simple_quote(const char *signed_str, int length) { UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) { const char *ch = uch_set->id_to_unichar(id1); const char *next_ch = uch_set->id_to_unichar(id2); - if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) { + if (is_simple_quote(ch, strlen(ch)) && + is_simple_quote(next_ch, strlen(next_ch))) { return uch_set->unichar_to_id("\""); } return INVALID_UNICHAR_ID; @@ -1020,7 +1045,8 @@ UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) { // Change pairs of quotes to double quotes. void WERD_RES::fix_quotes() { - if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\""))) { + if (!uch_set->contains_unichar("\"") || + !uch_set->get_enabled(uch_set->unichar_to_id("\""))) { return; // Don't create it if it is disallowed. } @@ -1049,7 +1075,8 @@ bool WERD_RES::HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2) { // Change pairs of hyphens to a single hyphen if the bounding boxes touch // Typically a long dash which has been segmented. void WERD_RES::fix_hyphens() { - if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-"))) { + if (!uch_set->contains_unichar("-") || + !uch_set->get_enabled(uch_set->unichar_to_id("-"))) { return; // Don't create it if it is disallowed. } @@ -1071,7 +1098,8 @@ UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) { // Change pairs of tess failures to a single one void WERD_RES::merge_tess_fails() { using namespace std::placeholders; // for _1, _2 - if (ConditionalBlobMerge(std::bind(&WERD_RES::BothSpaces, this, _1, _2), nullptr)) { + if (ConditionalBlobMerge(std::bind(&WERD_RES::BothSpaces, this, _1, _2), + nullptr)) { unsigned len = best_choice->length(); ASSERT_HOST(reject_map.length() == len); ASSERT_HOST(box_word->length() == len); @@ -1178,7 +1206,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const { } WERD_RES_IT word_res_it(&row_res->word_res_list); - for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); word_res_it.forward()) { + for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); + word_res_it.forward()) { if (word_res_it.data() == word_res) { return -1; } else if (word_res_it.data() == other.word_res) { @@ -1190,7 +1219,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const { // we both point to the same block, but different rows. ROW_RES_IT row_res_it(&block_res->row_res_list); - for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list(); row_res_it.forward()) { + for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list(); + row_res_it.forward()) { if (row_res_it.data() == row_res) { return -1; } else if (row_res_it.data() == other.row_res) { @@ -1202,7 +1232,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const { // We point to different blocks. BLOCK_RES_IT block_res_it(&page_res->block_res_list); - for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list(); block_res_it.forward()) { + for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list(); + block_res_it.forward()) { if (block_res_it.data() == block_res) { return -1; } else if (block_res_it.data() == other.block_res) { @@ -1218,7 +1249,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const { // before the current position. The simple fields of the WERD_RES are copied // from clone_res and the resulting WERD_RES is returned for further setup // with best_choice etc. -WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word) { +WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, + WERD *new_word) { // Make a WERD_RES for the new_word. auto *new_res = new WERD_RES(new_word); new_res->CopySimpleFields(clone_res); @@ -1245,7 +1277,8 @@ WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *ne // are likely very poor, if they come from LSTM, where it only outputs the // character at one pixel within it, so we find the midpoints between them. static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box, - C_BLOB_LIST *next_word_blobs, std::vector *blob_ends) { + C_BLOB_LIST *next_word_blobs, + std::vector *blob_ends) { C_BLOB_IT blob_it(word.word->cblob_list()); for (int length : word.best_state) { // Get the bounding box of the fake blobs @@ -1272,17 +1305,18 @@ static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box, // Helper computes the bounds of a word by restricting it to existing words // that significantly overlap. -static TBOX ComputeWordBounds(const tesseract::PointerVector &words, int w_index, - TBOX prev_box, WERD_RES_IT w_it) { +static TBOX ComputeWordBounds(const tesseract::PointerVector &words, + int w_index, TBOX prev_box, WERD_RES_IT w_it) { constexpr int kSignificantOverlapFraction = 4; TBOX clipped_box; TBOX current_box = words[w_index]->word->bounding_box(); TBOX next_box; - if (static_cast(w_index + 1) < words.size() && words[w_index + 1] != nullptr && - words[w_index + 1]->word != nullptr) { + if (static_cast(w_index + 1) < words.size() && + words[w_index + 1] != nullptr && words[w_index + 1]->word != nullptr) { next_box = words[w_index + 1]->word->bounding_box(); } - for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo; w_it.forward()) { + for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo; + w_it.forward()) { if (w_it.data() == nullptr || w_it.data()->word == nullptr) { continue; } @@ -1317,14 +1351,19 @@ static TBOX ComputeWordBounds(const tesseract::PointerVector &words, i // Helper moves the blob from src to dest. If it isn't contained by clip_box, // the blob is replaced by a fake that is contained. -static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it, const TBOX &clip_box) { +static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it, + const TBOX &clip_box) { C_BLOB *src_blob = src_it->extract(); TBOX box = src_blob->bounding_box(); if (!clip_box.contains(box)) { - int left = ClipToRange(box.left(), clip_box.left(), clip_box.right() - 1); - int right = ClipToRange(box.right(), clip_box.left() + 1, clip_box.right()); - int top = ClipToRange(box.top(), clip_box.bottom() + 1, clip_box.top()); - int bottom = ClipToRange(box.bottom(), clip_box.bottom(), clip_box.top() - 1); + int left = + ClipToRange(box.left(), clip_box.left(), clip_box.right() - 1); + int right = + ClipToRange(box.right(), clip_box.left() + 1, clip_box.right()); + int top = + ClipToRange(box.top(), clip_box.bottom() + 1, clip_box.top()); + int bottom = + ClipToRange(box.bottom(), clip_box.bottom(), clip_box.top() - 1); box = TBOX(left, bottom, right, top); delete src_blob; src_blob = C_BLOB::FakeBlob(box); @@ -1336,7 +1375,8 @@ static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it, const TBOX &c // Replaces the current WERD/WERD_RES with the given words. The given words // contain fake blobs that indicate the position of the characters. These are // replaced with real blobs from the current word as much as possible. -void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector *words) { +void PAGE_RES_IT::ReplaceCurrentWord( + tesseract::PointerVector *words) { if (words->empty()) { DeleteCurrentWord(); return; @@ -1405,11 +1445,13 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector *words) int end_x = blob_ends[i]; TBOX blob_box; // Add the blobs up to end_x. - while (!src_b_it.empty() && src_b_it.data()->bounding_box().x_middle() < end_x) { + while (!src_b_it.empty() && + src_b_it.data()->bounding_box().x_middle() < end_x) { blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box); src_b_it.forward(); } - while (!rej_b_it.empty() && rej_b_it.data()->bounding_box().x_middle() < end_x) { + while (!rej_b_it.empty() && + rej_b_it.data()->bounding_box().x_middle() < end_x) { blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box); rej_b_it.forward(); } @@ -1484,13 +1526,14 @@ void PAGE_RES_IT::MakeCurrentWordFuzzy() { // The next word should be the corresponding part of combo, but we have // already stepped past it, so find it by search. WERD_RES_IT wr_it(&row()->word_res_list); - for (wr_it.mark_cycle_pt(); !wr_it.cycled_list() && wr_it.data() != word_res; - wr_it.forward()) { + for (wr_it.mark_cycle_pt(); + !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) { } wr_it.forward(); ASSERT_HOST(wr_it.data()->part_of_combo); real_word = wr_it.data()->word; - ASSERT_HOST(!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON)); + ASSERT_HOST(!real_word->flag(W_FUZZY_SP) && + !real_word->flag(W_FUZZY_NON)); real_word->set_flag(W_FUZZY_SP, true); } } @@ -1531,7 +1574,8 @@ void PAGE_RES_IT::ResetWordIterator() { // cycled_list state correctly. word_res_it.move_to_first(); for (word_res_it.mark_cycle_pt(); - !word_res_it.cycled_list() && word_res_it.data() != next_word_res; word_res_it.forward()) { + !word_res_it.cycled_list() && word_res_it.data() != next_word_res; + word_res_it.forward()) { if (!word_res_it.data()->part_of_combo) { if (prev_row_res == row_res) { prev_word_res = word_res; @@ -1624,8 +1668,9 @@ WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) { foundword: // Update prev_word_best_choice pointer. if (page_res != nullptr && page_res->prev_word_best_choice != nullptr) { - *page_res->prev_word_best_choice = - (new_block || prev_word_res == nullptr) ? nullptr : prev_word_res->best_choice; + *page_res->prev_word_best_choice = (new_block || prev_word_res == nullptr) + ? nullptr + : prev_word_res->best_choice; } return word_res; } @@ -1653,8 +1698,9 @@ WERD_RES *PAGE_RES_IT::restart_row() { *************************************************************************/ WERD_RES *PAGE_RES_IT::forward_paragraph() { - while (block_res == next_block_res && (next_row_res != nullptr && next_row_res->row != nullptr && - row_res->row->para() == next_row_res->row->para())) { + while (block_res == next_block_res && + (next_row_res != nullptr && next_row_res->row != nullptr && + row_res->row->para() == next_row_res->row->para())) { internal_forward(false, true); } return internal_forward(false, true); diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 39ef8f37..48e70b73 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -19,24 +19,24 @@ #ifndef PAGERES_H #define PAGERES_H -#include "blamer.h" // for BlamerBundle (ptr only), IRR_NUM_REASONS -#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH +#include "blamer.h" // for BlamerBundle (ptr only), IRR_NUM_REASONS +#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH +#include "elst.h" // for ELIST_ITERATOR, ELIST_LINK, ELISTIZEH #include "genericvector.h" // for PointerVector -#include "elst.h" // for ELIST_ITERATOR, ELIST_LINK, ELISTIZEH -#include "matrix.h" // for MATRIX -#include "normalis.h" // for DENORM -#include "ratngs.h" // for WERD_CHOICE, BLOB_CHOICE (ptr only) -#include "rect.h" // for TBOX -#include "rejctmap.h" // for REJMAP -#include "unicharset.h" // for UNICHARSET, UNICHARSET::Direction, UNI... -#include "werd.h" // for WERD, W_BOL, W_EOL +#include "matrix.h" // for MATRIX +#include "normalis.h" // for DENORM +#include "ratngs.h" // for WERD_CHOICE, BLOB_CHOICE (ptr only) +#include "rect.h" // for TBOX +#include "rejctmap.h" // for REJMAP +#include "unicharset.h" // for UNICHARSET, UNICHARSET::Direction, UNI... +#include "werd.h" // for WERD, W_BOL, W_EOL #include // for UNICHAR_ID, INVALID_UNICHAR_ID -#include // for int32_t, int16_t -#include // for std::function -#include // for std::pair -#include // for std::vector +#include // for int32_t, int16_t +#include // for std::function +#include // for std::pair +#include // for std::vector #include // for int8_t @@ -218,7 +218,8 @@ public: // Stores the lstm choices of every timestep std::vector>> timesteps; // Stores the lstm choices of every timestep segmented by character - std::vector>>> segmented_timesteps; + std::vector>>> + segmented_timesteps; // Symbolchoices acquired during CTC std::vector>> CTC_symbol_choices; // Stores if the timestep vector starts with a space @@ -391,7 +392,8 @@ public: } bool AnyRtlCharsInWord() const { - if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) { + if (uch_set == nullptr || best_choice == nullptr || + best_choice->length() < 1) { return false; } for (unsigned id = 0; id < best_choice->length(); id++) { @@ -400,7 +402,8 @@ public: continue; // Ignore illegal chars. } UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); - if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) { + if (dir == UNICHARSET::U_RIGHT_TO_LEFT || + dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) { return true; } } @@ -408,7 +411,8 @@ public: } bool AnyLtrCharsInWord() const { - if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) { + if (uch_set == nullptr || best_choice == nullptr || + best_choice->length() < 1) { return false; } for (unsigned id = 0; id < best_choice->length(); id++) { @@ -417,7 +421,8 @@ public: continue; // Ignore illegal chars. } UNICHARSET::Direction dir = uch_set->get_direction(unichar_id); - if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER) { + if (dir == UNICHARSET::U_LEFT_TO_RIGHT || + dir == UNICHARSET::U_ARABIC_NUMBER) { return true; } } @@ -463,9 +468,11 @@ public: // of any of the above flags. It should really be a tesseract::OcrEngineMode // but is declared as int for ease of use with tessedit_ocr_engine_mode. // Returns false if the word is empty and sets up fake results. - bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, - Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, - bool use_body_size, bool allow_detailed_fx, ROW *row, + bool SetupForRecognition(const UNICHARSET &unicharset_in, + tesseract::Tesseract *tesseract, Image pix, + int norm_mode, const TBOX *norm_box, + bool numeric_mode, bool use_body_size, + bool allow_detailed_fx, ROW *row, const BLOCK *block); // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty @@ -529,8 +536,9 @@ public: // min_rating limits how tight to make a template. // max_rating limits how loose to make a template. // rating_margin denotes the amount of margin to put in template. - void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, - float rating_margin, float *thresholds); + void ComputeAdaptionThresholds(float certainty_scale, float min_rating, + float max_rating, float rating_margin, + float *thresholds); // Saves a copy of the word_choice if it has the best unadjusted rating. // Returns true if the word_choice was the new best. @@ -541,7 +549,8 @@ public: // The best_choices list is kept in sorted order by rating. Duplicates are // removed, and the list is kept no longer than max_num_choices in length. // Returns true if the word_choice is still a valid pointer. - bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice); + bool LogNewCookedChoice(int max_num_choices, bool debug, + WERD_CHOICE *word_choice); // Prints a brief list of all the best choices. void PrintBestChoices() const; @@ -616,8 +625,9 @@ public: // callback box_cb is nullptr or returns true, setting the merged blob // result to the class returned from class_cb. // Returns true if anything was merged. - bool ConditionalBlobMerge(std::function class_cb, - std::function box_cb); + bool ConditionalBlobMerge( + const std::function &class_cb, + const std::function &box_cb); // Merges 2 adjacent blobs in the result (index and index+1) and corrects // all the data to account for the change. @@ -683,7 +693,8 @@ public: // Do two PAGE_RES_ITs point at the same word? // This is much cheaper than cmp(). bool operator==(const PAGE_RES_IT &other) const { - return word_res == other.word_res && row_res == other.row_res && block_res == other.block_res; + return word_res == other.word_res && row_res == other.row_res && + block_res == other.block_res; } bool operator!=(const PAGE_RES_IT &other) const { diff --git a/src/ccstruct/ratngs.cpp b/src/ccstruct/ratngs.cpp index add07471..04c6b3e9 100644 --- a/src/ccstruct/ratngs.cpp +++ b/src/ccstruct/ratngs.cpp @@ -28,6 +28,7 @@ #include "unicharset.h" #include +#include #include #include @@ -149,7 +150,7 @@ BLOB_CHOICE &BLOB_CHOICE::operator=(const BLOB_CHOICE &other) { // Returns true if *this and other agree on the baseline and x-height // to within some tolerance based on a given estimate of the x-height. bool BLOB_CHOICE::PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const { - double baseline_diff = fabs(yshift() - other.yshift()); + double baseline_diff = std::fabs(yshift() - other.yshift()); if (baseline_diff > kMaxBaselineDrift * x_height) { if (debug) { tprintf("Baseline diff %g for %d v %d\n", baseline_diff, unichar_id_, other.unichar_id_); diff --git a/src/ccutil/unicharset.cpp b/src/ccutil/unicharset.cpp index 9d5b470f..b56a0420 100644 --- a/src/ccutil/unicharset.cpp +++ b/src/ccutil/unicharset.cpp @@ -58,24 +58,26 @@ const double kMinXHeightFraction = 0.25; const double kMinCapHeightFraction = 0.05; /*static */ -const char *UNICHARSET::kCustomLigatures[][2] = {{"ct", "\uE003"}, // c + t -> U+E003 - {"Å¿h", "\uE006"}, // long-s + h -> U+E006 - {"Å¿i", "\uE007"}, // long-s + i -> U+E007 - {"Å¿l", "\uE008"}, // long-s + l -> U+E008 - {"Å¿Å¿", "\uE009"}, // long-s + long-s -> U+E009 - {nullptr, nullptr}}; +const char *UNICHARSET::kCustomLigatures[][2] = { + {"ct", "\uE003"}, // c + t -> U+E003 + {"Å¿h", "\uE006"}, // long-s + h -> U+E006 + {"Å¿i", "\uE007"}, // long-s + i -> U+E007 + {"Å¿l", "\uE008"}, // long-s + l -> U+E008 + {"Å¿Å¿", "\uE009"}, // long-s + long-s -> U+E009 + {nullptr, nullptr}}; // List of mappings to make when ingesting strings from the outside. // The substitutions clean up text that should exist for rendering of // synthetic data, but not in the recognition set. -const char *UNICHARSET::kCleanupMaps[][2] = {{"\u0640", ""}, // TATWEEL is deleted. - {"\ufb01", "fi"}, // fi ligature->fi pair. - {"\ufb02", "fl"}, // fl ligature->fl pair. - {nullptr, nullptr}}; +const char *UNICHARSET::kCleanupMaps[][2] = { + {"\u0640", ""}, // TATWEEL is deleted. + {"\ufb01", "fi"}, // fi ligature->fi pair. + {"\ufb02", "fl"}, // fl ligature->fl pair. + {nullptr, nullptr}}; // List of strings for the SpecialUnicharCodes. Keep in sync with the enum. -const char *UNICHARSET::kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT] = {" ", "Joined", - "|Broken|0|1"}; +const char *UNICHARSET::kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT] = { + " ", "Joined", "|Broken|0|1"}; const char *UNICHARSET::null_script = "NULL"; @@ -137,7 +139,8 @@ bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const { } // Expands the ranges with the ranges from the src properties. -void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(const UNICHAR_PROPERTIES &src) { +void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom( + const UNICHAR_PROPERTIES &src) { UpdateRange(src.min_bottom, &min_bottom, &max_bottom); UpdateRange(src.max_bottom, &min_bottom, &max_bottom); UpdateRange(src.min_top, &min_top, &max_top); @@ -164,7 +167,8 @@ void UNICHARSET::UNICHAR_PROPERTIES::CopyFrom(const UNICHAR_PROPERTIES &src) { fragment = saved_fragment; } -UNICHARSET::UNICHARSET() : ids(), script_table(nullptr), script_table_size_used(0) { +UNICHARSET::UNICHARSET() + : ids(), script_table(nullptr), script_table_size_used(0) { clear(); for (int i = 0; i < SPECIAL_UNICHAR_CODES_COUNT; ++i) { unichar_insert(kSpecialUnicharCodes[i]); @@ -180,13 +184,15 @@ UNICHARSET::~UNICHARSET() { UNICHAR_ID UNICHARSET::unichar_to_id(const char *const unichar_repr) const { - std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr); + std::string cleaned = + old_style_included_ ? unichar_repr : CleanupString(unichar_repr); return ids.contains(cleaned.data(), cleaned.size()) ? ids.unichar_to_id(cleaned.data(), cleaned.size()) : INVALID_UNICHAR_ID; } -UNICHAR_ID UNICHARSET::unichar_to_id(const char *const unichar_repr, int length) const { +UNICHAR_ID UNICHARSET::unichar_to_id(const char *const unichar_repr, + int length) const { assert(length > 0 && length <= UNICHAR_LEN); std::string cleaned(unichar_repr, length); if (!old_style_included_) { @@ -215,7 +221,8 @@ int UNICHARSET::step(const char *str) const { // Return whether the given UTF-8 string is encodable with this UNICHARSET. // If not encodable, write the first byte offset which cannot be converted // into the second (return) argument. -bool UNICHARSET::encodable_string(const char *str, unsigned *first_bad_position) const { +bool UNICHARSET::encodable_string(const char *str, + unsigned *first_bad_position) const { std::vector encoding; return encode_string(str, true, &encoding, nullptr, first_bad_position); } @@ -230,7 +237,8 @@ bool UNICHARSET::encodable_string(const char *str, unsigned *first_bad_position) // that do not belong in the unicharset, or encoding may fail. // Use CleanupString to perform the cleaning. bool UNICHARSET::encode_string(const char *str, bool give_up_on_failure, - std::vector *encoding, std::vector *lengths, + std::vector *encoding, + std::vector *lengths, unsigned *encoded_length) const { std::vector working_encoding; std::vector working_lengths; @@ -240,8 +248,8 @@ bool UNICHARSET::encode_string(const char *str, bool give_up_on_failure, unsigned str_pos = 0; bool perfect = true; while (str_pos < str_length) { - encode_string(str, str_pos, str_length, &working_encoding, &working_lengths, &str_pos, encoding, - &best_lengths); + encode_string(str, str_pos, str_length, &working_encoding, &working_lengths, + &str_pos, encoding, &best_lengths); if (str_pos < str_length) { // This is a non-match. Skip one utf-8 character. perfect = false; @@ -357,8 +365,9 @@ void UNICHARSET::set_normed_ids(UNICHAR_ID unichar_id) { unichars[unichar_id].properties.normed_ids.clear(); if (unichar_id == UNICHAR_SPACE && id_to_unichar(unichar_id)[0] == ' ') { unichars[unichar_id].properties.normed_ids.push_back(UNICHAR_SPACE); - } else if (!encode_string(unichars[unichar_id].properties.normed.c_str(), true, - &unichars[unichar_id].properties.normed_ids, nullptr, nullptr)) { + } else if (!encode_string(unichars[unichar_id].properties.normed.c_str(), + true, &unichars[unichar_id].properties.normed_ids, + nullptr, nullptr)) { unichars[unichar_id].properties.normed_ids.clear(); unichars[unichar_id].properties.normed_ids.push_back(unichar_id); } @@ -383,7 +392,8 @@ void UNICHARSET::set_ranges_empty() { // Sets all the properties for this unicharset given a src unicharset with // everything set. The unicharsets don't have to be the same, and graphemes // are correctly accounted for. -void UNICHARSET::PartialSetPropertiesFromOther(int start_index, const UNICHARSET &src) { +void UNICHARSET::PartialSetPropertiesFromOther(int start_index, + const UNICHARSET &src) { for (unsigned ch = start_index; ch < unichars.size(); ++ch) { const char *utf8 = id_to_unichar(ch); UNICHAR_PROPERTIES properties; @@ -464,8 +474,10 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET &src) { // Returns true if the acceptable ranges of the tops of the characters do // not overlap, making their x-height calculations distinct. bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const { - int overlap = std::min(unichars[id1].properties.max_top, unichars[id2].properties.max_top) - - std::max(unichars[id1].properties.min_top, unichars[id2].properties.min_top); + int overlap = std::min(unichars[id1].properties.max_top, + unichars[id2].properties.max_top) - + std::max(unichars[id1].properties.min_top, + unichars[id2].properties.min_top); return overlap <= 0; } @@ -478,8 +490,10 @@ bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const { // the overall process of encoding a partially failed string more efficient. // See unicharset.h for definition of the args. void UNICHARSET::encode_string(const char *str, int str_index, int str_length, - std::vector *encoding, std::vector *lengths, - unsigned *best_total_length, std::vector *best_encoding, + std::vector *encoding, + std::vector *lengths, + unsigned *best_total_length, + std::vector *best_encoding, std::vector *best_lengths) const { if (str_index > static_cast(*best_total_length)) { // This is the best result so far. @@ -504,8 +518,8 @@ void UNICHARSET::encode_string(const char *str, int str_index, int str_length, UNICHAR_ID id = ids.unichar_to_id(str + str_index, length); encoding->push_back(id); lengths->push_back(length); - encode_string(str, str_index + length, str_length, encoding, lengths, best_total_length, - best_encoding, best_lengths); + encode_string(str, str_index + length, str_length, encoding, lengths, + best_total_length, best_encoding, best_lengths); if (static_cast(*best_total_length) == str_length) { return; // Tail recursion success! } @@ -526,7 +540,8 @@ void UNICHARSET::encode_string(const char *str, int str_index, int str_length, // Returns false if no valid match was found in the unicharset. // NOTE that script_id, mirror, and other_case refer to this unicharset on // return and will need translation if the target unicharset is different. -bool UNICHARSET::GetStrProperties(const char *utf8_str, UNICHAR_PROPERTIES *props) const { +bool UNICHARSET::GetStrProperties(const char *utf8_str, + UNICHAR_PROPERTIES *props) const { props->Init(); props->SetRangesEmpty(); int total_unicodes = 0; @@ -636,22 +651,26 @@ char UNICHARSET::get_chartype(UNICHAR_ID id) const { return 0; } -void UNICHARSET::unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style) { +void UNICHARSET::unichar_insert(const char *const unichar_repr, + OldUncleanUnichars old_style) { if (old_style == OldUncleanUnichars::kTrue) { old_style_included_ = true; } - std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr); + std::string cleaned = + old_style_included_ ? unichar_repr : CleanupString(unichar_repr); if (!cleaned.empty() && !ids.contains(cleaned.data(), cleaned.size())) { const char *str = cleaned.c_str(); std::vector encoding; - if (!old_style_included_ && encode_string(str, true, &encoding, nullptr, nullptr)) { + if (!old_style_included_ && + encode_string(str, true, &encoding, nullptr, nullptr)) { return; } auto &u = unichars.emplace_back(); int index = 0; do { if (index >= UNICHAR_LEN) { - fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN, unichar_repr); + fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN, + unichar_repr); return; } u.representation[index++] = *str++; @@ -673,11 +692,13 @@ void UNICHARSET::unichar_insert(const char *const unichar_repr, OldUncleanUnicha } bool UNICHARSET::contains_unichar(const char *const unichar_repr) const { - std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr); + std::string cleaned = + old_style_included_ ? unichar_repr : CleanupString(unichar_repr); return ids.contains(cleaned.data(), cleaned.size()); } -bool UNICHARSET::contains_unichar(const char *const unichar_repr, int length) const { +bool UNICHARSET::contains_unichar(const char *const unichar_repr, + int length) const { if (length == 0) { return false; } @@ -688,7 +709,8 @@ bool UNICHARSET::contains_unichar(const char *const unichar_repr, int length) co return ids.contains(cleaned.data(), cleaned.size()); } -bool UNICHARSET::eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const { +bool UNICHARSET::eq(UNICHAR_ID unichar_id, + const char *const unichar_repr) const { return strcmp(this->id_to_unichar(unichar_id), unichar_repr) == 0; } @@ -709,17 +731,20 @@ bool UNICHARSET::save_to_string(std::string &str) const { unsigned int properties = this->get_properties(id); if (strcmp(this->id_to_unichar(id), " ") == 0) { snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties, - this->get_script_from_script_id(this->get_script(id)), this->get_other_case(id)); + this->get_script_from_script_id(this->get_script(id)), + this->get_other_case(id)); str += buffer; } else { std::ostringstream stream; stream.imbue(std::locale::classic()); - stream << this->id_to_unichar(id) << ' ' << properties << ' ' << min_bottom << ',' - << max_bottom << ',' << min_top << ',' << max_top << ',' << width << ',' << width_sd - << ',' << bearing << ',' << bearing_sd << ',' << advance << ',' << advance_sd << ' ' + stream << this->id_to_unichar(id) << ' ' << properties << ' ' + << min_bottom << ',' << max_bottom << ',' << min_top << ',' + << max_top << ',' << width << ',' << width_sd << ',' << bearing + << ',' << bearing_sd << ',' << advance << ',' << advance_sd << ' ' << this->get_script_from_script_id(this->get_script(id)) << ' ' - << this->get_other_case(id) << ' ' << this->get_direction(id) << ' ' - << this->get_mirror(id) << ' ' << this->get_normed_unichar(id) << "\t# " + << this->get_other_case(id) << ' ' << this->get_direction(id) + << ' ' << this->get_mirror(id) << ' ' + << this->get_normed_unichar(id) << "\t# " << this->debug_str(id).c_str() << '\n'; str += stream.str().c_str(); } @@ -741,24 +766,28 @@ private: bool UNICHARSET::load_from_file(FILE *file, bool skip_fragments) { LocalFilePointer lfp(file); using namespace std::placeholders; // for _1, _2 - std::function fgets_cb = std::bind(&LocalFilePointer::fgets, &lfp, _1, _2); + std::function fgets_cb = + std::bind(&LocalFilePointer::fgets, &lfp, _1, _2); bool success = load_via_fgets(fgets_cb, skip_fragments); return success; } bool UNICHARSET::load_from_file(tesseract::TFile *file, bool skip_fragments) { using namespace std::placeholders; // for _1, _2 - std::function fgets_cb = std::bind(&tesseract::TFile::FGets, file, _1, _2); + std::function fgets_cb = + std::bind(&tesseract::TFile::FGets, file, _1, _2); bool success = load_via_fgets(fgets_cb, skip_fragments); return success; } -bool UNICHARSET::load_via_fgets(std::function fgets_cb, bool skip_fragments) { +bool UNICHARSET::load_via_fgets( + const std::function &fgets_cb, bool skip_fragments) { int unicharset_size; char buffer[256]; this->clear(); - if (fgets_cb(buffer, sizeof(buffer)) == nullptr || sscanf(buffer, "%d", &unicharset_size) != 1) { + if (fgets_cb(buffer, sizeof(buffer)) == nullptr || + sscanf(buffer, "%d", &unicharset_size) != 1) { return false; } for (UNICHAR_ID id = 0; id < unicharset_size; ++id) { @@ -800,27 +829,30 @@ bool UNICHARSET::load_via_fgets(std::function fgets_cb, boo auto position = stream.tellg(); stream.seekg(position); char c1, c2, c3, c4, c5, c6, c7, c8, c9; - stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >> width >> - c5 >> width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >> advance >> c9 >> advance_sd >> - std::setw(63) >> script >> other_case >> direction >> mirror >> std::setw(63) >> normed; - if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || c5 != ',' || - c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') { + stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> + max_top >> c4 >> width >> c5 >> width_sd >> c6 >> bearing >> c7 >> + bearing_sd >> c8 >> advance >> c9 >> advance_sd >> std::setw(63) >> + script >> other_case >> direction >> mirror >> std::setw(63) >> normed; + if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || + c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') { stream.clear(); stream.seekg(position); - stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >> width >> - c5 >> width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >> advance >> c9 >> - advance_sd >> std::setw(63) >> script >> other_case >> direction >> mirror; - if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || c5 != ',' || - c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') { + stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> + max_top >> c4 >> width >> c5 >> width_sd >> c6 >> bearing >> c7 >> + bearing_sd >> c8 >> advance >> c9 >> advance_sd >> std::setw(63) >> + script >> other_case >> direction >> mirror; + if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || + c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') { stream.clear(); stream.seekg(position); - stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> - std::setw(63) >> script >> other_case >> direction >> mirror; + stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> + max_top >> std::setw(63) >> script >> other_case >> direction >> + mirror; if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') { stream.clear(); stream.seekg(position); - stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> - std::setw(63) >> script >> other_case; + stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> + max_top >> std::setw(63) >> script >> other_case; if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') { stream.clear(); stream.seekg(position); @@ -909,8 +941,9 @@ void UNICHARSET::post_load_setup() { script_has_upper_lower_ = net_case_alphas > 0; script_has_xheight_ = - script_has_upper_lower_ || (x_height_alphas > cap_height_alphas * kMinXHeightFraction && - cap_height_alphas > x_height_alphas * kMinCapHeightFraction); + script_has_upper_lower_ || + (x_height_alphas > cap_height_alphas * kMinXHeightFraction && + cap_height_alphas > x_height_alphas * kMinCapHeightFraction); null_sid_ = get_script_id_from_name(null_script); ASSERT_HOST(null_sid_ == 0); @@ -954,7 +987,8 @@ bool UNICHARSET::major_right_to_left() const { if (dir == UNICHARSET::U_LEFT_TO_RIGHT) { ltr_count++; } - if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || + if (dir == UNICHARSET::U_RIGHT_TO_LEFT || + dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC || dir == UNICHARSET::U_ARABIC_NUMBER) { rtl_count++; } @@ -966,7 +1000,8 @@ bool UNICHARSET::major_right_to_left() const { // An empty or nullptr whitelist enables everything (minus any blacklist). // An empty or nullptr blacklist disables nothing. // An empty or nullptr blacklist has no effect. -void UNICHARSET::set_black_and_whitelist(const char *blacklist, const char *whitelist, +void UNICHARSET::set_black_and_whitelist(const char *blacklist, + const char *whitelist, const char *unblacklist) { bool def_enabled = whitelist == nullptr || whitelist[0] == '\0'; // Set everything to default @@ -1037,7 +1072,8 @@ int UNICHARSET::add_script(const char *script) { assert(script_table_size_used == script_table_size_reserved); script_table_size_reserved += script_table_size_reserved; char **new_script_table = new char *[script_table_size_reserved]; - memcpy(new_script_table, script_table, script_table_size_used * sizeof(char *)); + memcpy(new_script_table, script_table, + script_table_size_used * sizeof(char *)); delete[] script_table; script_table = new_script_table; } @@ -1048,7 +1084,8 @@ int UNICHARSET::add_script(const char *script) { // Returns the string that represents a fragment // with the given unichar, pos and total. -std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, bool natural) { +std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, + bool natural) { if (total == 1) { return std::string(unichar); } @@ -1056,8 +1093,8 @@ std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, bo result += kSeparator; result += unichar; char buffer[kMaxLen]; - snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos, natural ? kNaturalFlag : kSeparator, - total); + snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos, + natural ? kNaturalFlag : kSeparator, total); result += buffer; return result; } diff --git a/src/ccutil/unicharset.h b/src/ccutil/unicharset.h index f367b1ff..f573ce8f 100644 --- a/src/ccutil/unicharset.h +++ b/src/ccutil/unicharset.h @@ -85,7 +85,8 @@ public: // Returns the string that represents a fragment // with the given unichar, pos and total. - static std::string to_string(const char *unichar, int pos, int total, bool natural); + static std::string to_string(const char *unichar, int pos, int total, + bool natural); // Returns the string that represents this fragment. std::string to_string() const { return to_string(unichar, pos, total, natural); @@ -93,19 +94,22 @@ public: // Checks whether a fragment has the same unichar, // position and total as the given inputs. - inline bool equals(const char *other_unichar, int other_pos, int other_total) const { - return (strcmp(this->unichar, other_unichar) == 0 && this->pos == other_pos && - this->total == other_total); + inline bool equals(const char *other_unichar, int other_pos, + int other_total) const { + return (strcmp(this->unichar, other_unichar) == 0 && + this->pos == other_pos && this->total == other_total); } inline bool equals(const CHAR_FRAGMENT *other) const { - return this->equals(other->get_unichar(), other->get_pos(), other->get_total()); + return this->equals(other->get_unichar(), other->get_pos(), + other->get_total()); } // Checks whether a given fragment is a continuation of this fragment. // Assumes that the given fragment pointer is not nullptr. inline bool is_continuation_of(const CHAR_FRAGMENT *fragment) const { return (strcmp(this->unichar, fragment->get_unichar()) == 0 && - this->total == fragment->get_total() && this->pos == fragment->get_pos() + 1); + this->total == fragment->get_total() && + this->pos == fragment->get_pos() + 1); } // Returns true if this fragment is a beginning fragment. @@ -237,8 +241,10 @@ public: // WARNING: Caller must guarantee that str has already been cleaned of codes // that do not belong in the unicharset, or encoding may fail. // Use CleanupString to perform the cleaning. - bool encode_string(const char *str, bool give_up_on_failure, std::vector *encoding, - std::vector *lengths, unsigned *encoded_length) const; + bool encode_string(const char *str, bool give_up_on_failure, + std::vector *encoding, + std::vector *lengths, + unsigned *encoded_length) const; // Return the unichar representation corresponding to the given UNICHAR_ID // within the UNICHARSET. @@ -272,7 +278,8 @@ public: // TATWEEL characters are kept and n-grams are allowed. Otherwise TATWEEL // characters are ignored/skipped as if they don't exist and n-grams that // can already be encoded are not added. - void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style); + void unichar_insert(const char *const unichar_repr, + OldUncleanUnichars old_style); void unichar_insert(const char *const unichar_repr) { unichar_insert(unichar_repr, OldUncleanUnichars::kFalse); } @@ -365,7 +372,8 @@ public: // Returns true if the operation is successful. bool save_to_file(FILE *file) const { std::string str; - return save_to_string(str) && tesseract::Serialize(file, &str[0], str.length()); + return save_to_string(str) && + tesseract::Serialize(file, &str[0], str.length()); } bool save_to_file(tesseract::TFile *file) const { @@ -575,8 +583,8 @@ public: // baseline-normalized coordinates, ie, where the baseline is // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight // (See normalis.h for the definitions). - void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, - int *max_top) const { + void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, + int *min_top, int *max_top) const { if (INVALID_UNICHAR_ID == unichar_id) { *min_bottom = *min_top = 0; *max_bottom = *max_top = 256; // kBlnCellHeight @@ -588,16 +596,21 @@ public: *min_top = unichars[unichar_id].properties.min_top; *max_top = unichars[unichar_id].properties.max_top; } - void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, - int max_top) { - unichars[unichar_id].properties.min_bottom = ClipToRange(min_bottom, 0, UINT8_MAX); - unichars[unichar_id].properties.max_bottom = ClipToRange(max_bottom, 0, UINT8_MAX); - unichars[unichar_id].properties.min_top = ClipToRange(min_top, 0, UINT8_MAX); - unichars[unichar_id].properties.max_top = ClipToRange(max_top, 0, UINT8_MAX); + void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom, + int min_top, int max_top) { + unichars[unichar_id].properties.min_bottom = + ClipToRange(min_bottom, 0, UINT8_MAX); + unichars[unichar_id].properties.max_bottom = + ClipToRange(max_bottom, 0, UINT8_MAX); + unichars[unichar_id].properties.min_top = + ClipToRange(min_top, 0, UINT8_MAX); + unichars[unichar_id].properties.max_top = + ClipToRange(max_top, 0, UINT8_MAX); } // Returns the width stats (as mean, sd) of the given unichar relative to the // median advance of all characters in the character set. - void get_width_stats(UNICHAR_ID unichar_id, float *width, float *width_sd) const { + void get_width_stats(UNICHAR_ID unichar_id, float *width, + float *width_sd) const { if (INVALID_UNICHAR_ID == unichar_id) { *width = 0.0f; *width_sd = 0.0f; @@ -614,7 +627,8 @@ public: } // Returns the stats of the x-bearing (as mean, sd) of the given unichar // relative to the median advance of all characters in the character set. - void get_bearing_stats(UNICHAR_ID unichar_id, float *bearing, float *bearing_sd) const { + void get_bearing_stats(UNICHAR_ID unichar_id, float *bearing, + float *bearing_sd) const { if (INVALID_UNICHAR_ID == unichar_id) { *bearing = *bearing_sd = 0.0f; return; @@ -623,13 +637,15 @@ public: *bearing = unichars[unichar_id].properties.bearing; *bearing_sd = unichars[unichar_id].properties.bearing_sd; } - void set_bearing_stats(UNICHAR_ID unichar_id, float bearing, float bearing_sd) { + void set_bearing_stats(UNICHAR_ID unichar_id, float bearing, + float bearing_sd) { unichars[unichar_id].properties.bearing = bearing; unichars[unichar_id].properties.bearing_sd = bearing_sd; } // Returns the stats of the x-advance of the given unichar (as mean, sd) // relative to the median advance of all characters in the character set. - void get_advance_stats(UNICHAR_ID unichar_id, float *advance, float *advance_sd) const { + void get_advance_stats(UNICHAR_ID unichar_id, float *advance, + float *advance_sd) const { if (INVALID_UNICHAR_ID == unichar_id) { *advance = *advance_sd = 0; return; @@ -638,7 +654,8 @@ public: *advance = unichars[unichar_id].properties.advance; *advance_sd = unichars[unichar_id].properties.advance_sd; } - void set_advance_stats(UNICHAR_ID unichar_id, float advance, float advance_sd) { + void set_advance_stats(UNICHAR_ID unichar_id, float advance, + float advance_sd) { unichars[unichar_id].properties.advance = advance; unichars[unichar_id].properties.advance_sd = advance_sd; } @@ -654,8 +671,9 @@ public: return true; } int script_id = get_script(unichar_id); - return script_id != han_sid_ && script_id != thai_sid_ && script_id != hangul_sid_ && - script_id != hiragana_sid_ && script_id != katakana_sid_; + return script_id != han_sid_ && script_id != thai_sid_ && + script_id != hangul_sid_ && script_id != hiragana_sid_ && + script_id != katakana_sid_; } // Return the script name of the given unichar. @@ -738,7 +756,8 @@ public: // at these codes and they should not be used. bool has_special_codes() const { return get_fragment(UNICHAR_BROKEN) != nullptr && - strcmp(id_to_unichar(UNICHAR_BROKEN), kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0; + strcmp(id_to_unichar(UNICHAR_BROKEN), + kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0; } // Returns true if there are any repeated unicodes in the normalized @@ -800,7 +819,8 @@ public: // Return a pointer to the CHAR_FRAGMENT class struct if the given // unichar representation represents a character fragment. const CHAR_FRAGMENT *get_fragment(const char *const unichar_repr) const { - if (unichar_repr == nullptr || unichar_repr[0] == '\0' || !ids.contains(unichar_repr, false)) { + if (unichar_repr == nullptr || unichar_repr[0] == '\0' || + !ids.contains(unichar_repr, false)) { return nullptr; } return get_fragment(unichar_to_id(unichar_repr)); @@ -1020,8 +1040,9 @@ private: // best_encoding contains the encoding that used the longest part of str. // best_lengths (may be null) contains the lengths of best_encoding. void encode_string(const char *str, int str_index, int str_length, - std::vector *encoding, std::vector *lengths, - unsigned *best_total_length, std::vector *best_encoding, + std::vector *encoding, + std::vector *lengths, unsigned *best_total_length, + std::vector *best_encoding, std::vector *best_lengths) const; // Gets the properties for a grapheme string, combining properties for @@ -1034,7 +1055,8 @@ private: // Load ourselves from a "file" where our only interface to the file is // an implementation of fgets(). This is the parsing primitive accessed by // the public routines load_from_file(). - bool load_via_fgets(std::function fgets_cb, bool skip_fragments); + bool load_via_fgets(const std::function &fgets_cb, + bool skip_fragments); // List of mappings to make when ingesting strings from the outside. // The substitutions clean up text that should exists for rendering of diff --git a/src/classify/adaptmatch.cpp b/src/classify/adaptmatch.cpp index b582290b..c777ceaa 100644 --- a/src/classify/adaptmatch.cpp +++ b/src/classify/adaptmatch.cpp @@ -1782,13 +1782,13 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATU Y2 = F2->Params[PicoFeatY]; A2 = F2->Params[PicoFeatDir]; - AngleDelta = fabs(A1 - A2); + AngleDelta = std::fabs(A1 - A2); if (AngleDelta > 0.5) { AngleDelta = 1.0 - AngleDelta; } - if (AngleDelta > matcher_clustering_max_angle_delta || fabs(X1 - X2) > SegmentLength || - fabs(Y1 - Y2) > SegmentLength) { + if (AngleDelta > matcher_clustering_max_angle_delta || std::fabs(X1 - X2) > SegmentLength || + std::fabs(Y1 - Y2) > SegmentLength) { break; } } diff --git a/src/classify/cluster.cpp b/src/classify/cluster.cpp index 6928bfea..bdf11f25 100644 --- a/src/classify/cluster.cpp +++ b/src/classify/cluster.cpp @@ -1674,13 +1674,13 @@ float Mean(PROTOTYPE *Proto, uint16_t Dimension) { float StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) { switch (Proto->Style) { case spherical: - return sqrt(Proto->Variance.Spherical); + return std::sqrt(Proto->Variance.Spherical); case elliptical: - return sqrt(Proto->Variance.Elliptical[Dimension]); + return std::sqrt(Proto->Variance.Elliptical[Dimension]); case mixed: switch (Proto->Distrib[Dimension]) { case normal: - return sqrt(Proto->Variance.Elliptical[Dimension]); + return std::sqrt(Proto->Variance.Elliptical[Dimension]); case uniform: case D_random: return Proto->Variance.Elliptical[Dimension]; @@ -2268,7 +2268,7 @@ static PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATIST } FillBuckets(NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), Proto->Mean[i], - sqrt(Proto->Variance.Elliptical[i])); + std::sqrt(Proto->Variance.Elliptical[i])); if (DistributionOK(NormalBuckets)) { continue; } @@ -2576,7 +2576,7 @@ static bool Independent(PARAM_DESC *ParamDesc, int16_t N, float *CoVariance, flo if ((*VARii == 0.0) || (*VARjj == 0.0)) { CorrelationCoeff = 0.0; } else { - CorrelationCoeff = sqrt(sqrt(*CoVariance * *CoVariance / (*VARii * *VARjj))); + CorrelationCoeff = sqrt(std::sqrt(*CoVariance * *CoVariance / (*VARii * *VARjj))); } if (CorrelationCoeff > Independence) { return false; diff --git a/src/classify/clusttool.cpp b/src/classify/clusttool.cpp index 4e920538..543378d6 100644 --- a/src/classify/clusttool.cpp +++ b/src/classify/clusttool.cpp @@ -207,7 +207,7 @@ PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) { case spherical: ReadNFloats(fp, 1, &(Proto->Variance.Spherical)); Proto->Magnitude.Spherical = 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical); - Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast(N)); + Proto->TotalMagnitude = std::pow(Proto->Magnitude.Spherical, static_cast(N)); Proto->LogMagnitude = log(static_cast(Proto->TotalMagnitude)); Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; Proto->Distrib.clear(); diff --git a/src/classify/intmatcher.cpp b/src/classify/intmatcher.cpp index 2b353cf3..98162cdd 100644 --- a/src/classify/intmatcher.cpp +++ b/src/classify/intmatcher.cpp @@ -676,7 +676,7 @@ IntegerMatcher::IntegerMatcher(tesseract::IntParam *classify_debug_level) if (kSEExponentialMultiplier > 0.0) { double scale = - 1.0 - exp(-kSEExponentialMultiplier) * + 1.0 - std::exp(-kSEExponentialMultiplier) * exp(kSEExponentialMultiplier * (static_cast(i) / SE_TABLE_SIZE)); evidence *= ClipToRange(scale, 0.0, 1.0); } diff --git a/src/classify/intproto.cpp b/src/classify/intproto.cpp index cca6c279..215c310c 100644 --- a/src/classify/intproto.cpp +++ b/src/classify/intproto.cpp @@ -365,14 +365,14 @@ void AddProtoToProtoPruner(PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *C Length = Proto->Length; X = Proto->X + X_SHIFT; - Pad = std::max(fabs(cos(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()), - fabs(sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength())); + Pad = std::max(fabs(std::cos(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()), + fabs(std::sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength())); FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug); Y = Proto->Y + Y_SHIFT; - Pad = std::max(fabs(sin(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()), - fabs(cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength())); + Pad = std::max(fabs(std::sin(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()), + fabs(std::cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength())); FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug); } /* AddProtoToProtoPruner */ @@ -1388,8 +1388,8 @@ void InitTableFiller(float EndPad, float SidePad, float AnglePad, PROTO_STRUCT * if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) { /* rising diagonal proto */ Angle *= 2.0 * M_PI; - Cos = fabs(cos(Angle)); - Sin = fabs(sin(Angle)); + Cos = fabs(std::cos(Angle)); + Sin = fabs(std::sin(Angle)); /* compute the positions of the corners of the acceptance region */ Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin; @@ -1438,8 +1438,8 @@ void InitTableFiller(float EndPad, float SidePad, float AnglePad, PROTO_STRUCT * } else { /* falling diagonal proto */ Angle *= 2.0 * M_PI; - Cos = fabs(cos(Angle)); - Sin = fabs(sin(Angle)); + Cos = fabs(std::cos(Angle)); + Sin = fabs(std::sin(Angle)); /* compute the positions of the corners of the acceptance region */ Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin; diff --git a/src/classify/kdtree.cpp b/src/classify/kdtree.cpp index 4d419a23..df706d77 100644 --- a/src/classify/kdtree.cpp +++ b/src/classify/kdtree.cpp @@ -33,7 +33,7 @@ namespace tesseract { /*----------------------------------------------------------------------------- Global Data Definitions and Declarations -----------------------------------------------------------------------------*/ -#define MINSEARCH -FLT_MAX +#define MINSEARCH (-FLT_MAX) #define MAXSEARCH FLT_MAX // Helper function to find the next essential dimension in a cycle. @@ -398,7 +398,7 @@ float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) { } float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) { - return sqrt(DistanceSquared(k, dim, p1, p2)); + return std::sqrt(DistanceSquared(k, dim, p1, p2)); } /*---------------------------------------------------------------------------*/ diff --git a/src/dict/dawg.cpp b/src/dict/dawg.cpp index da66383f..af45176f 100644 --- a/src/dict/dawg.cpp +++ b/src/dict/dawg.cpp @@ -38,7 +38,8 @@ namespace tesseract { // instead of weak vtables in every compilation unit. Dawg::~Dawg() = default; -bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, bool requires_complete) const { +bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, + bool requires_complete) const { if (word.empty()) { return !requires_complete; } @@ -56,7 +57,8 @@ bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, bool requires_complete) const } } // Now check the last character. - return edge_char_of(node, word.unichar_id(end_index), requires_complete) != NO_EDGE; + return edge_char_of(node, word.unichar_id(end_index), requires_complete) != + NO_EDGE; } bool Dawg::word_in_dawg(const WERD_CHOICE &word) const { @@ -84,7 +86,8 @@ int Dawg::check_for_words(const char *filename, const UNICHARSET &unicharset, chomp_string(string); // remove newline WERD_CHOICE word(string, unicharset); if (word.length() > 0 && !word.contains_unichar_id(INVALID_UNICHAR_ID)) { - if (!match_words(&word, 0, 0, enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) { + if (!match_words(&word, 0, 0, + enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) { tprintf("Missing word: %s\n", string); ++misses; } @@ -106,21 +109,25 @@ void Dawg::iterate_words(const UNICHARSET &unicharset, iterate_words_rec(word, 0, cb); } -static void CallWithUTF8(std::function cb, const WERD_CHOICE *wc) { +static void CallWithUTF8(const std::function &cb, + const WERD_CHOICE *wc) { std::string s; wc->string_and_lengths(&s, nullptr); cb(s.c_str()); } -void Dawg::iterate_words(const UNICHARSET &unicharset, std::function cb) const { +void Dawg::iterate_words(const UNICHARSET &unicharset, + const std::function &cb) const { using namespace std::placeholders; // for _1 - std::function shim(std::bind(CallWithUTF8, cb, _1)); + std::function shim( + std::bind(CallWithUTF8, cb, _1)); WERD_CHOICE word(&unicharset); iterate_words_rec(word, 0, shim); } -void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, - std::function cb) const { +void Dawg::iterate_words_rec( + const WERD_CHOICE &word_so_far, NODE_REF to_explore, + const std::function &cb) const { NodeChildVector children; this->unichar_ids_of(to_explore, &children, false); for (auto &i : children) { @@ -136,7 +143,8 @@ void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore } } -bool Dawg::match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const { +bool Dawg::match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, + UNICHAR_ID wildcard) const { if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) { bool any_matched = false; NodeChildVector vec; @@ -187,7 +195,8 @@ SquishedDawg::~SquishedDawg() { delete[] edges_; } -EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const { +EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, + bool word_end) const { EDGE_REF edge = node; if (node == 0) { // binary search EDGE_REF start = 0; @@ -195,7 +204,8 @@ EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool w int compare; while (start <= end) { edge = (start + end) >> 1; // (start + end) / 2 - compare = given_greater_than_edge_rec(NO_EDGE, word_end, unichar_id, edges_[edge]); + compare = given_greater_than_edge_rec(NO_EDGE, word_end, unichar_id, + edges_[edge]); if (compare == 0) { // given == vec[k] return edge; } else if (compare == 1) { // given > vec[k] @@ -258,8 +268,8 @@ void SquishedDawg::print_node(NODE_REF node, int max_num_edges) const { eow = end_of_word(edge) ? eow_string : not_eow_string; unichar_id = edge_letter(edge); - tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", edge, - next_node(edge), unichar_id, direction, is_last, eow); + tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", + edge, next_node(edge), unichar_id, direction, is_last, eow); if (edge - node > max_num_edges) { return; @@ -273,8 +283,9 @@ void SquishedDawg::print_node(NODE_REF node, int max_num_edges) const { eow = end_of_word(edge) ? eow_string : not_eow_string; unichar_id = edge_letter(edge); - tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", edge, - next_node(edge), unichar_id, direction, is_last, eow); + tprintf(REFFORMAT " : next = " REFFORMAT + ", unichar_id = %d, %s %s %s\n", + edge, next_node(edge), unichar_id, direction, is_last, eow); if (edge - node > MAX_NODE_EDGES_DISPLAY) { return; @@ -291,9 +302,11 @@ void SquishedDawg::print_edge(EDGE_REF edge) const { if (edge == NO_EDGE) { tprintf("NO_EDGE\n"); } else { - tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = '%d', %s %s %s\n", edge, - next_node(edge), edge_letter(edge), (forward_edge(edge) ? "FORWARD" : " "), - (last_edge(edge) ? "LAST" : " "), (end_of_word(edge) ? "EOW" : "")); + tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = '%d', %s %s %s\n", + edge, next_node(edge), edge_letter(edge), + (forward_edge(edge) ? "FORWARD" : " "), + (last_edge(edge) ? "LAST" : " "), + (end_of_word(edge) ? "EOW" : "")); } } @@ -328,8 +341,8 @@ bool SquishedDawg::read_squished_dawg(TFile *file) { return false; } if (debug_level_ > 2) { - tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n", type_, lang_.c_str(), - perm_, unicharset_size_, num_edges_); + tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n", + type_, lang_.c_str(), perm_, unicharset_size_, num_edges_); for (EDGE_REF edge = 0; edge < num_edges_; ++edge) { print_edge(edge); } @@ -337,7 +350,8 @@ bool SquishedDawg::read_squished_dawg(TFile *file) { return true; } -std::unique_ptr SquishedDawg::build_node_map(int32_t *num_nodes) const { +std::unique_ptr SquishedDawg::build_node_map( + int32_t *num_nodes) const { EDGE_REF edge; std::unique_ptr node_map(new EDGE_REF[num_edges_]); int32_t node_counter; diff --git a/src/dict/dawg.h b/src/dict/dawg.h index 6d349548..05e7c5c5 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -147,16 +147,19 @@ public: // For each word in the Dawg, call the given (permanent) callback with the // text (UTF-8) version of the word. - void iterate_words(const UNICHARSET &unicharset, std::function cb) const; + void iterate_words(const UNICHARSET &unicharset, + const std::function &cb) const; // Pure virtual function that should be implemented by the derived classes. /// Returns the edge that corresponds to the letter out of this node. - virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const = 0; + virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, + bool word_end) const = 0; /// Fills the given NodeChildVector with all the unichar ids (and the /// corresponding EDGE_REFs) for which there is an edge out of this node. - virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const = 0; + virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, + bool word_end) const = 0; /// Returns the next node visited by following the edge /// indicated by the given EDGE_REF. @@ -175,7 +178,8 @@ public: /// Fills vec with unichar ids that represent the character classes /// of the given unichar_id. - virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, + virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, + const UNICHARSET &unicharset, std::vector *vec) const { (void)unichar_id; (void)unicharset; @@ -194,8 +198,13 @@ public: } protected: - Dawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level) - : lang_(lang), type_(type), perm_(perm), unicharset_size_(0), debug_level_(debug_level) {} + Dawg(DawgType type, const std::string &lang, PermuterType perm, + int debug_level) + : lang_(lang), + type_(type), + perm_(perm), + unicharset_size_(0), + debug_level_(debug_level) {} /// Returns the next node visited by following this edge. inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const { @@ -207,14 +216,16 @@ protected: } /// Returns the direction flag of this edge. inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const { - return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? BACKWARD_EDGE : FORWARD_EDGE; + return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? BACKWARD_EDGE + : FORWARD_EDGE; } /// Returns true if this edge marks the end of a word. inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const { return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0; } /// Returns UNICHAR_ID recorded in this edge. - inline UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const { + inline UNICHAR_ID unichar_id_from_edge_rec( + const EDGE_RECORD &edge_rec) const { return ((edge_rec & letter_mask_) >> LETTER_START_BIT); } /// Sets the next node link for this edge in the Dawg. @@ -233,13 +244,14 @@ protected: /// checked are the same) /// 0 if edge_rec_match() returns true /// -1 otherwise - inline int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, + inline int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, + UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const { UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec); NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec); bool curr_word_end = end_of_word_from_edge_rec(edge_rec); - if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node, curr_word_end, - curr_unichar_id)) { + if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node, + curr_word_end, curr_unichar_id)) { return 0; } if (unichar_id > curr_unichar_id) { @@ -260,8 +272,9 @@ protected: /// Returns true if all the values are equal (any value matches /// next_node if next_node == NO_EDGE, any value matches word_end /// if word_end is false). - inline bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, - NODE_REF other_next_node, bool other_word_end, + inline bool edge_rec_match(NODE_REF next_node, bool word_end, + UNICHAR_ID unichar_id, NODE_REF other_next_node, + bool other_word_end, UNICHAR_ID other_unichar_id) const { return ((unichar_id == other_unichar_id) && (next_node == NO_EDGE || next_node == other_next_node) && @@ -277,11 +290,13 @@ protected: /// the *'s in this string are interpreted as wildcards. /// WERD_CHOICE param is not passed by const so that wildcard searches /// can modify it and work without having to copy WERD_CHOICEs. - bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const; + bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, + UNICHAR_ID wildcard) const; // Recursively iterate over all words in a dawg (see public iterate_words). - void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, - std::function cb) const; + void iterate_words_rec( + const WERD_CHOICE &word_so_far, NODE_REF to_explore, + const std::function &cb) const; // Member Variables. std::string lang_; @@ -339,12 +354,13 @@ protected: // We're back in the punctuation dawg. Continuing there is the only option. struct DawgPosition { DawgPosition() = default; - DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc) - : dawg_ref(dawgref) - , punc_ref(puncref) - , dawg_index(dawg_idx) - , punc_index(punc_idx) - , back_to_punc(backtopunc) {} + DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, + bool backtopunc) + : dawg_ref(dawgref), + punc_ref(puncref), + dawg_index(dawg_idx), + punc_index(punc_idx), + back_to_punc(backtopunc) {} bool operator==(const DawgPosition &other) { return dawg_index == other.dawg_index && dawg_ref == other.dawg_ref && punc_index == other.punc_index && punc_ref == other.punc_ref && @@ -364,7 +380,8 @@ public: /// Adds an entry for the given dawg_index with the given node to the vec. /// Returns false if the same entry already exists in the vector, /// true otherwise. - inline bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg) { + inline bool add_unique(const DawgPosition &new_pos, bool debug, + const char *debug_msg) { for (auto position : *this) { if (position == new_pos) { return false; @@ -372,8 +389,9 @@ public: } push_back(new_pos); if (debug) { - tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", debug_msg, new_pos.dawg_index, - new_pos.dawg_ref, new_pos.punc_ref, new_pos.back_to_punc ? " returned" : ""); + tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", debug_msg, + new_pos.dawg_index, new_pos.dawg_ref, new_pos.punc_ref, + new_pos.back_to_punc ? " returned" : ""); } return true; } @@ -389,19 +407,23 @@ public: // class TESS_API SquishedDawg : public Dawg { public: - SquishedDawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level) - : Dawg(type, lang, perm, debug_level) {} - SquishedDawg(const char *filename, DawgType type, const std::string &lang, PermuterType perm, + SquishedDawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level) + : Dawg(type, lang, perm, debug_level) {} + SquishedDawg(const char *filename, DawgType type, const std::string &lang, + PermuterType perm, int debug_level) : Dawg(type, lang, perm, debug_level) { TFile file; ASSERT_HOST(file.Open(filename, nullptr)); ASSERT_HOST(read_squished_dawg(&file)); num_forward_edges_in_node0 = num_forward_edges(0); } - SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const std::string &lang, - PermuterType perm, int unicharset_size, int debug_level) - : Dawg(type, lang, perm, debug_level), edges_(edges), num_edges_(num_edges) { + SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, + const std::string &lang, PermuterType perm, int unicharset_size, + int debug_level) + : Dawg(type, lang, perm, debug_level), + edges_(edges), + num_edges_(num_edges) { init(unicharset_size); num_forward_edges_in_node0 = num_forward_edges(0); if (debug_level > 3) { @@ -424,11 +446,13 @@ public: } /// Returns the edge that corresponds to the letter out of this node. - EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const override; + EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, + bool word_end) const override; /// Fills the given NodeChildVector with all the unichar ids (and the /// corresponding EDGE_REFs) for which there is an edge out of this node. - void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override { + void unichar_ids_of(NODE_REF node, NodeChildVector *vec, + bool word_end) const override { EDGE_REF edge = node; if (!edge_occupied(edge) || edge == NO_EDGE) { return; @@ -502,7 +526,8 @@ private: } /// Returns true if this edge is in the forward direction. inline bool forward_edge(EDGE_REF edge_ref) const { - return (edge_occupied(edge_ref) && (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref]))); + return (edge_occupied(edge_ref) && + (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref]))); } /// Returns true if this edge is in the backward direction. inline bool backward_edge(EDGE_REF edge_ref) const { diff --git a/src/lstm/networkio.cpp b/src/lstm/networkio.cpp index ffe1baff..2d430001 100644 --- a/src/lstm/networkio.cpp +++ b/src/lstm/networkio.cpp @@ -17,6 +17,7 @@ #include "networkio.h" #include // for FLT_MAX +#include #include #include "functions.h" @@ -28,7 +29,7 @@ namespace tesseract { // Minimum value to output for certainty. const float kMinCertainty = -20.0f; // Probability corresponding to kMinCertainty. -const float kMinProb = exp(kMinCertainty); +const float kMinProb = std::exp(kMinCertainty); // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim. void NetworkIO::Resize2d(bool int_mode, int width, int num_features) { @@ -356,7 +357,7 @@ Image NetworkIO::ToPix() const { } else if (num_features > 3) { // More than 3 features use false yellow/blue color, assuming a signed // input in the range [-1,1]. - red = ClipToRange(IntCastRounded(fabs(pixel) * 255), 0, 255); + red = ClipToRange(IntCastRounded(std::fabs(pixel) * 255), 0, 255); if (pixel >= 0) { green = red; blue = 0; @@ -586,7 +587,7 @@ void NetworkIO::EnsureBestLabel(int t, int label) { // Helper function converts prob to certainty taking the minimum into account. /* static */ float NetworkIO::ProbToCertainty(float prob) { - return prob > kMinProb ? log(prob) : kMinCertainty; + return prob > kMinProb ? std::log(prob) : kMinCertainty; } // Returns true if there is any bad value that is suspiciously like a GT @@ -807,7 +808,7 @@ void NetworkIO::ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const Network // Reconstruct the target from the delta. float comb_target = delta_line[i] + output; comb_line[i] = comb_target - comb_line[i]; - float base_delta = fabs(comb_target - base_line[i]); + float base_delta = std::fabs(comb_target - base_line[i]); if (base_delta > max_base_delta) { max_base_delta = base_delta; } diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp index 22ee776e..9946a65a 100644 --- a/src/lstm/recodebeam.cpp +++ b/src/lstm/recodebeam.cpp @@ -41,11 +41,13 @@ const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = { static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"}; // Prints debug details of the node. -void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) const { +void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, + int depth) const { if (code == null_char) { tprintf("null_char"); } else { - tprintf("label=%d, uid=%d=%s", code, unichar_id, unicharset.debug_str(unichar_id).c_str()); + tprintf("label=%d, uid=%d=%s", code, unichar_id, + unicharset.debug_str(unichar_id).c_str()); } tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%" PRIx64, score, certainty, start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "", @@ -59,16 +61,16 @@ void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) c } // Borrows the pointer, which is expected to survive until *this is deleted. -RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, - Dict *dict) - : recoder_(recoder) - , beam_size_(0) - , top_code_(-1) - , second_code_(-1) - , dict_(dict) - , space_delimited_(true) - , is_simple_text_(simple_text) - , null_char_(null_char) { +RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder, + int null_char, bool simple_text, Dict *dict) + : recoder_(recoder), + beam_size_(0), + top_code_(-1), + second_code_(-1), + dict_(dict), + space_delimited_(true), + is_simple_text_(simple_text), + null_char_(null_char) { if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) { space_delimited_ = false; } @@ -84,9 +86,9 @@ RecodeBeamSearch::~RecodeBeamSearch() { } // Decodes the set of network outputs, storing the lattice internally. -void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, double cert_offset, - double worst_dict_cert, const UNICHARSET *charset, - int lstm_choice_mode) { +void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, + double cert_offset, double worst_dict_cert, + const UNICHARSET *charset, int lstm_choice_mode) { beam_size_ = 0; int width = output.Width(); if (lstm_choice_mode) { @@ -94,14 +96,16 @@ void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, double } for (int t = 0; t < width; ++t) { ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]); - DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset); + DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, + charset); if (lstm_choice_mode) { SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t); } } } -void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY &output, double dict_ratio, - double cert_offset, double worst_dict_cert, +void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY &output, + double dict_ratio, double cert_offset, + double worst_dict_cert, const UNICHARSET *charset) { beam_size_ = 0; int width = output.dim1(); @@ -111,9 +115,9 @@ void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY &output, double dict } } -void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict_ratio, - double cert_offset, double worst_dict_cert, - const UNICHARSET *charset, int lstm_choice_mode) { +void RecodeBeamSearch::DecodeSecondaryBeams( + const NetworkIO &output, double dict_ratio, double cert_offset, + double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode) { for (auto data : secondary_beam_) { delete data; } @@ -128,14 +132,17 @@ void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict t >= character_boundaries_[bucketNumber + 1]) { ++bucketNumber; } - ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t), output.NumFeatures(), - kBeamWidths[0]); - DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset); + ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t), + output.NumFeatures(), kBeamWidths[0]); + DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset, + worst_dict_cert, charset); } } -void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outputs, - const UNICHARSET *charset, int xCoord) { +void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, + int num_outputs, + const UNICHARSET *charset, + int xCoord) { std::vector> choices; for (int i = 0; i < num_outputs; ++i) { if (outputs[i] >= 0.01f) { @@ -153,7 +160,8 @@ void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outp while (choices.size() > pos && choices[pos].second > outputs[i]) { pos++; } - choices.insert(choices.begin() + pos, std::pair(character, outputs[i])); + choices.insert(choices.begin() + pos, + std::pair(character, outputs[i])); } } timesteps.push_back(choices); @@ -162,7 +170,8 @@ void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outp void RecodeBeamSearch::segmentTimestepsByCharacters() { for (unsigned i = 1; i < character_boundaries_.size(); ++i) { std::vector>> segment; - for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i]; ++j) { + for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i]; + ++j) { segment.push_back(timesteps[j]); } segmentedTimesteps.push_back(segment); @@ -170,7 +179,8 @@ void RecodeBeamSearch::segmentTimestepsByCharacters() { } std::vector>> RecodeBeamSearch::combineSegmentedTimesteps( - std::vector>>> *segmentedTimesteps) { + std::vector>>> + *segmentedTimesteps) { std::vector>> combined_timesteps; for (auto &segmentedTimestep : *segmentedTimesteps) { for (auto &j : segmentedTimestep) { @@ -180,8 +190,10 @@ RecodeBeamSearch::combineSegmentedTimesteps( return combined_timesteps; } -void RecodeBeamSearch::calculateCharBoundaries(std::vector *starts, std::vector *ends, - std::vector *char_bounds_, int maxWidth) { +void RecodeBeamSearch::calculateCharBoundaries(std::vector *starts, + std::vector *ends, + std::vector *char_bounds_, + int maxWidth) { char_bounds_->push_back(0); for (unsigned i = 0; i < ends->size(); ++i) { int middle = ((*starts)[i + 1] - (*ends)[i]) / 2; @@ -192,8 +204,8 @@ void RecodeBeamSearch::calculateCharBoundaries(std::vector *starts, std::ve } // Returns the best path as labels/scores/xcoords similar to simple CTC. -void RecodeBeamSearch::ExtractBestPathAsLabels(std::vector *labels, - std::vector *xcoords) const { +void RecodeBeamSearch::ExtractBestPathAsLabels( + std::vector *labels, std::vector *xcoords) const { labels->clear(); xcoords->clear(); std::vector best_nodes; @@ -215,22 +227,23 @@ void RecodeBeamSearch::ExtractBestPathAsLabels(std::vector *labels, // Returns the best path as unichar-ids/certs/ratings/xcoords skipping // duplicates, nulls and intermediate parts. -void RecodeBeamSearch::ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, - std::vector *unichar_ids, - std::vector *certs, - std::vector *ratings, - std::vector *xcoords) const { +void RecodeBeamSearch::ExtractBestPathAsUnicharIds( + bool debug, const UNICHARSET *unicharset, std::vector *unichar_ids, + std::vector *certs, std::vector *ratings, + std::vector *xcoords) const { std::vector best_nodes; ExtractBestPaths(&best_nodes, nullptr); ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords); if (debug) { DebugPath(unicharset, best_nodes); - DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings, *xcoords); + DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings, + *xcoords); } } // Returns the best path as a set of WERD_RES. -void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, +void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, + float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector *words, int lstm_choice_mode) { @@ -245,9 +258,11 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_ ExtractBestPaths(&best_nodes, &second_nodes); if (debug) { DebugPath(unicharset, best_nodes); - ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings, &xcoords); + ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings, + &xcoords); tprintf("\nSecond choice path:\n"); - DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings, xcoords); + DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings, + xcoords); } // If lstm choice mode is required in granularity level 2, it stores the x // Coordinates of every chosen character, to match the alternative choices to @@ -256,7 +271,8 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_ &character_boundaries_); int num_ids = unichar_ids.size(); if (debug) { - DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings, xcoords); + DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings, + xcoords); } // Convert labels to unichar-ids. int word_end = 0; @@ -283,16 +299,19 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_ if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) { space_cert = certs[word_end]; } - bool leading_space = word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE; + bool leading_space = + word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE; // Create a WERD_RES for the output word. WERD_RES *word_res = InitializeWord(leading_space, line_box, word_start, word_end, - std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor); + std::min(space_cert, prev_space_cert), unicharset, + xcoords, scale_factor); for (int i = word_start; i < word_end; ++i) { auto *choices = new BLOB_CHOICE_LIST; BLOB_CHOICE_IT bc_it(choices); - auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1, 1.0f, - static_cast(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER); + auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1, + 1.0f, static_cast(INT16_MAX), 0.0f, + BCC_STATIC_CLASSIFIER); int col = i - word_start; choice->set_matrix_cell(col, col); bc_it.add_after_then_move(choice); @@ -314,7 +333,8 @@ struct greater_than { } }; -void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *charset, +void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, + const UNICHARSET *charset, bool secondary) const { std::vector> topology; std::unordered_set visited; @@ -340,7 +360,7 @@ void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET * } int ct = 0; unsigned cb = 1; - for (std::vector layer : topology) { + for (const std::vector &layer : topology) { if (cb >= character_boundaries_.size()) { break; } @@ -380,7 +400,8 @@ void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET * prevCode = " "; } if (uids) { - tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode, node->score); + tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode, + node->score); } else { tprintf("%s(|)%f(>)%s(|)%f\n", prevCode, prevScore, code, node->score); } @@ -397,7 +418,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) { } // For the first iteration the original beam is analyzed. After that a // new beam is calculated based on the results from the original beam. - std::vector ¤tBeam = secondary_beam_.empty() ? beam_ : secondary_beam_; + std::vector ¤tBeam = + secondary_beam_.empty() ? beam_ : secondary_beam_; character_boundaries_[0] = 0; for (unsigned j = 1; j < character_boundaries_.size(); ++j) { std::vector unichar_ids; @@ -406,7 +428,7 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) { std::vector xcoords; int backpath = character_boundaries_[j] - character_boundaries_[j - 1]; std::vector &heaps = - currentBeam.at(character_boundaries_[j] - 1)->beams_->heap(); + currentBeam.at(character_boundaries_[j] - 1)->beams_->heap(); std::vector best_nodes; std::vector best; // Scan the segmented node chain for valid unichar ids. @@ -415,7 +437,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) { int backcounter = 0; const RecodeNode *node = &entry.data(); while (node != nullptr && backcounter < backpath) { - if (node->code != null_char_ && node->unichar_id != INVALID_UNICHAR_ID) { + if (node->code != null_char_ && + node->unichar_id != INVALID_UNICHAR_ID) { validChar = true; break; } @@ -430,7 +453,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) { if (!best.empty()) { std::sort(best.begin(), best.end(), greater_than()); ExtractPath(best[0], &best_nodes, backpath); - ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords); + ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, + &xcoords); } if (!unichar_ids.empty()) { int bestPos = 0; @@ -466,7 +490,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) { int id = unichar_ids[bestPos]; const char *result = unicharset->id_to_unichar_ext(id); float rating = ratings[bestPos]; - ctc_choices[j - 1].push_back(std::pair(result, rating)); + ctc_choices[j - 1].push_back( + std::pair(result, rating)); } else { std::vector> choice; int id = unichar_ids[bestPos]; @@ -504,7 +529,8 @@ void RecodeBeamSearch::DebugBeams(const UNICHARSET &unicharset) const { continue; } // Print all the best scoring nodes for each unichar found. - tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict", kNodeContNames[c]); + tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict", + kNodeContNames[c]); DebugBeamPos(unicharset, beam_[p]->beams_[index]); } } @@ -512,7 +538,8 @@ void RecodeBeamSearch::DebugBeams(const UNICHARSET &unicharset) const { } // Generates debug output of the content of a single beam position. -void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, const RecodeHeap &heap) const { +void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, + const RecodeHeap &heap) const { std::vector unichar_bests(unicharset.size()); const RecodeNode *null_best = nullptr; int heap_size = heap.size(); @@ -543,12 +570,11 @@ void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, const RecodeHe // Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping // duplicates, nulls and intermediate parts. /* static */ -void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector &best_nodes, - std::vector *unichar_ids, - std::vector *certs, - std::vector *ratings, - std::vector *xcoords, - std::vector *character_boundaries) { +void RecodeBeamSearch::ExtractPathAsUnicharIds( + const std::vector &best_nodes, + std::vector *unichar_ids, std::vector *certs, + std::vector *ratings, std::vector *xcoords, + std::vector *character_boundaries) { unichar_ids->clear(); certs->clear(); ratings->clear(); @@ -571,7 +597,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vectorunichar_id; - if (unichar_id == UNICHAR_SPACE && !certs->empty() && best_nodes[t]->permuter != NO_PERM) { + if (unichar_id == UNICHAR_SPACE && !certs->empty() && + best_nodes[t]->permuter != NO_PERM) { // All the rating and certainty go on the previous character except // for the space itself. if (certainty < certs->back()) { @@ -587,8 +614,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vectorcertainty; // Special-case NO-PERM space to forget the certainty of the previous // nulls. See long comment in ContinueContext. - if (cert < certainty || - (unichar_id == UNICHAR_SPACE && best_nodes[t - 1]->permuter == NO_PERM)) { + if (cert < certainty || (unichar_id == UNICHAR_SPACE && + best_nodes[t - 1]->permuter == NO_PERM)) { certainty = cert; } rating -= cert; @@ -612,19 +639,23 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector &xcoords, float scale_factor) { + const std::vector &xcoords, + float scale_factor) { // Make a fake blob for each non-zero label. C_BLOB_LIST blobs; C_BLOB_IT b_it(&blobs); for (int i = word_start; i < word_end; ++i) { if (static_cast(i + 1) < character_boundaries_.size()) { - TBOX box(static_cast(std::floor(character_boundaries_[i] * scale_factor)) + + TBOX box(static_cast( + std::floor(character_boundaries_[i] * scale_factor)) + line_box.left(), line_box.bottom(), - static_cast(std::ceil(character_boundaries_[i + 1] * scale_factor)) + + static_cast( + std::ceil(character_boundaries_[i + 1] * scale_factor)) + line_box.left(), line_box.top()); b_it.add_after_then_move(C_BLOB::FakeBlob(box)); @@ -644,7 +675,8 @@ WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_ // Fills top_n_flags_ with bools that are true iff the corresponding output // is one of the top_n. -void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int top_n) { +void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, + int top_n) { top_n_flags_.clear(); top_n_flags_.resize(num_outputs, TN_ALSO_RAN); top_code_ = -1; @@ -676,15 +708,17 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to top_n_flags_[null_char_] = TN_TOP2; } -void RecodeBeamSearch::ComputeSecTopN(std::unordered_set *exList, const float *outputs, - int num_outputs, int top_n) { +void RecodeBeamSearch::ComputeSecTopN(std::unordered_set *exList, + const float *outputs, int num_outputs, + int top_n) { top_n_flags_.clear(); top_n_flags_.resize(num_outputs, TN_ALSO_RAN); top_code_ = -1; second_code_ = -1; top_heap_.clear(); for (int i = 0; i < num_outputs; ++i) { - if ((top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) && !exList->count(i)) { + if ((top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) && + !exList->count(i)) { TopPair entry(outputs[i], i); top_heap_.Push(&entry); if (top_heap_.size() > top_n) { @@ -712,8 +746,9 @@ void RecodeBeamSearch::ComputeSecTopN(std::unordered_set *exList, const flo // Adds the computation for the current time-step to the beam. Call at each // time-step in sequence from left to right. outputs is the activation vector // for the current timestep. -void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio, - double cert_offset, double worst_dict_cert, +void RecodeBeamSearch::DecodeStep(const float *outputs, int t, + double dict_ratio, double cert_offset, + double worst_dict_cert, const UNICHARSET *charset, bool debug) { if (t == static_cast(beam_.size())) { beam_.push_back(new RecodeBeam); @@ -723,11 +758,12 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio step->Clear(); if (t == 0) { // The first step can only use singles and initials. - ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset, - dict_ratio, cert_offset, worst_dict_cert, step); + ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, + charset, dict_ratio, cert_offset, worst_dict_cert, step); if (dict_ != nullptr) { - ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset, - dict_ratio, cert_offset, worst_dict_cert, step); + ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, + TN_TOP2, charset, dict_ratio, cert_offset, + worst_dict_cert, step); } } else { RecodeBeam *prev = beam_[t - 1]; @@ -759,8 +795,9 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio // best first, but it comes before a lot of the worst, so it is slightly // more efficient than going forwards. for (int i = prev->beams_[index].size() - 1; i >= 0; --i) { - ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset, - dict_ratio, cert_offset, worst_dict_cert, step); + ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, + top_n, charset, dict_ratio, cert_offset, + worst_dict_cert, step); } } for (int index = 0; index < kNumBeams; ++index) { @@ -775,15 +812,16 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio if (step->best_initial_dawgs_[c].code >= 0) { int index = BeamIndex(true, static_cast(c), 0); RecodeHeap *dawg_heap = &step->beams_[index]; - PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap); + PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], + dawg_heap); } } } } -void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double dict_ratio, - double cert_offset, double worst_dict_cert, - const UNICHARSET *charset, bool debug) { +void RecodeBeamSearch::DecodeSecondaryStep( + const float *outputs, int t, double dict_ratio, double cert_offset, + double worst_dict_cert, const UNICHARSET *charset, bool debug) { if (t == static_cast(secondary_beam_.size())) { secondary_beam_.push_back(new RecodeBeam); } @@ -791,11 +829,12 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d step->Clear(); if (t == 0) { // The first step can only use singles and initials. - ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset, - dict_ratio, cert_offset, worst_dict_cert, step); + ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, + charset, dict_ratio, cert_offset, worst_dict_cert, step); if (dict_ != nullptr) { - ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset, - dict_ratio, cert_offset, worst_dict_cert, step); + ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, + TN_TOP2, charset, dict_ratio, cert_offset, + worst_dict_cert, step); } } else { RecodeBeam *prev = secondary_beam_[t - 1]; @@ -827,8 +866,9 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d // best first, but it comes before a lot of the worst, so it is slightly // more efficient than going forwards. for (int i = prev->beams_[index].size() - 1; i >= 0; --i) { - ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset, - dict_ratio, cert_offset, worst_dict_cert, step); + ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, + top_n, charset, dict_ratio, cert_offset, + worst_dict_cert, step); } } for (int index = 0; index < kNumBeams; ++index) { @@ -843,7 +883,8 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d if (step->best_initial_dawgs_[c].code >= 0) { int index = BeamIndex(true, static_cast(c), 0); RecodeHeap *dawg_heap = &step->beams_[index]; - PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap); + PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], + dawg_heap); } } } @@ -853,10 +894,10 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d // continuations of context prev, which is of the given length, using the // given network outputs to provide scores to the choices. Uses only those // choices for which top_n_flags[index] == top_n_flag. -void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const float *outputs, - TopNState top_n_flag, const UNICHARSET *charset, - double dict_ratio, double cert_offset, - double worst_dict_cert, RecodeBeam *step) { +void RecodeBeamSearch::ContinueContext( + const RecodeNode *prev, int index, const float *outputs, + TopNState top_n_flag, const UNICHARSET *charset, double dict_ratio, + double cert_offset, double worst_dict_cert, RecodeBeam *step) { RecodedCharID prefix; RecodedCharID full_code; const RecodeNode *previous = prev; @@ -864,7 +905,8 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const bool use_dawgs = IsDawgFromBeamsIndex(index); NodeContinuation prev_cont = ContinuationFromBeamsIndex(index); for (int p = length - 1; p >= 0; --p, previous = previous->prev) { - while (previous != nullptr && (previous->duplicate || previous->code == null_char_)) { + while (previous != nullptr && + (previous->duplicate || previous->code == null_char_)) { previous = previous->prev; } if (previous != nullptr) { @@ -875,26 +917,34 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const if (prev != nullptr && !is_simple_text_) { if (top_n_flags_[prev->code] == top_n_flag) { if (prev_cont != NC_NO_DUP) { - float cert = NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset; - PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert, - dict_ratio, use_dawgs, NC_ANYTHING, prev, step); - } - if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 && prev->code != null_char_) { float cert = - NetworkIO::ProbToCertainty(outputs[prev->code] + outputs[null_char_]) + cert_offset; - PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert, - dict_ratio, use_dawgs, NC_NO_DUP, prev, step); + NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset; + PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, + cert, worst_dict_cert, dict_ratio, use_dawgs, + NC_ANYTHING, prev, step); + } + if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 && + prev->code != null_char_) { + float cert = NetworkIO::ProbToCertainty(outputs[prev->code] + + outputs[null_char_]) + + cert_offset; + PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, + cert, worst_dict_cert, dict_ratio, use_dawgs, + NC_NO_DUP, prev, step); } } if (prev_cont == NC_ONLY_DUP) { return; } - if (prev->code != null_char_ && length > 0 && top_n_flags_[null_char_] == top_n_flag) { + if (prev->code != null_char_ && length > 0 && + top_n_flags_[null_char_] == top_n_flag) { // Allow nulls within multi code sequences, as the nulls within are not // explicitly included in the code sequence. - float cert = NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset; - PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID, cert, worst_dict_cert, - dict_ratio, use_dawgs, NC_ANYTHING, prev, step); + float cert = + NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset; + PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID, + cert, worst_dict_cert, dict_ratio, use_dawgs, + NC_ANYTHING, prev, step); } } const std::vector *final_codes = recoder_.GetFinalCodes(prefix); @@ -920,18 +970,19 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const !charset->get_enabled(unichar_id)) { continue; // disabled by whitelist/blacklist } - ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ANYTHING, - prev, step); + ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, + use_dawgs, NC_ANYTHING, prev, step); if (top_n_flag == TN_TOP2 && code != null_char_) { float prob = outputs[code] + outputs[null_char_]; - if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ && + if (prev != nullptr && prev_cont == NC_ANYTHING && + prev->code != null_char_ && ((prev->code == top_code_ && code == second_code_) || (code == top_code_ && prev->code == second_code_))) { prob += outputs[prev->code]; } cert = NetworkIO::ProbToCertainty(prob) + cert_offset; - ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ONLY_DUP, - prev, step); + ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, + use_dawgs, NC_ONLY_DUP, prev, step); } } } @@ -945,37 +996,44 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const continue; } float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset; - PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert, - dict_ratio, use_dawgs, NC_ANYTHING, prev, step); + PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, + worst_dict_cert, dict_ratio, use_dawgs, + NC_ANYTHING, prev, step); if (top_n_flag == TN_TOP2 && code != null_char_) { float prob = outputs[code] + outputs[null_char_]; - if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ && + if (prev != nullptr && prev_cont == NC_ANYTHING && + prev->code != null_char_ && ((prev->code == top_code_ && code == second_code_) || (code == top_code_ && prev->code == second_code_))) { prob += outputs[prev->code]; } cert = NetworkIO::ProbToCertainty(prob) + cert_offset; - PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert, - dict_ratio, use_dawgs, NC_ONLY_DUP, prev, step); + PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, + cert, worst_dict_cert, dict_ratio, use_dawgs, + NC_ONLY_DUP, prev, step); } } } } // Continues for a new unichar, using dawg or non-dawg as per flag. -void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, float worst_dict_cert, - float dict_ratio, bool use_dawgs, NodeContinuation cont, - const RecodeNode *prev, RecodeBeam *step) { +void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, + float worst_dict_cert, float dict_ratio, + bool use_dawgs, NodeContinuation cont, + const RecodeNode *prev, + RecodeBeam *step) { if (use_dawgs) { if (cert > worst_dict_cert) { ContinueDawg(code, unichar_id, cert, cont, prev, step); } } else { RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)]; - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false, false, false, false, - cert * dict_ratio, prev, nullptr, nodawg_heap); - if (dict_ != nullptr && ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) || - !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) { + PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false, + false, false, false, cert * dict_ratio, prev, nullptr, + nodawg_heap); + if (dict_ != nullptr && + ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) || + !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) { // Any top choice position that can start a new word, ie a space or // any non-space-delimited character, should also be considered // by the dawg search, so push initial dawg to the dawg heap. @@ -995,8 +1053,8 @@ void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, flo } else { dawg_cert *= dict_ratio; } - PushInitialDawgIfBetter(code, unichar_id, permuter, false, false, dawg_cert, cont, prev, - step); + PushInitialDawgIfBetter(code, unichar_id, permuter, false, false, + dawg_cert, cont, prev, step); } } } @@ -1004,13 +1062,14 @@ void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, flo // Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev, // appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id // is a valid continuation of whatever is in prev. -void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeContinuation cont, +void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, + NodeContinuation cont, const RecodeNode *prev, RecodeBeam *step) { RecodeHeap *dawg_heap = &step->beams_[BeamIndex(true, cont, 0)]; RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)]; if (unichar_id == INVALID_UNICHAR_ID) { - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false, false, false, cert, - prev, nullptr, dawg_heap); + PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false, + false, false, cert, prev, nullptr, dawg_heap); return; } // Avoid dictionary probe if score a total loss. @@ -1018,8 +1077,10 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo if (prev != nullptr) { score += prev->score; } - if (dawg_heap->size() >= kBeamWidths[0] && score <= dawg_heap->PeekTop().data().score && - nodawg_heap->size() >= kBeamWidths[0] && score <= nodawg_heap->PeekTop().data().score) { + if (dawg_heap->size() >= kBeamWidths[0] && + score <= dawg_heap->PeekTop().data().score && + nodawg_heap->size() >= kBeamWidths[0] && + score <= nodawg_heap->PeekTop().data().score) { return; } const RecodeNode *uni_prev = prev; @@ -1033,10 +1094,11 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo if (uni_prev != nullptr && uni_prev->end_of_word) { // Space is good. Push initial state, to the dawg beam and a regular // space to the top choice beam. - PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false, false, cert, cont, prev, - step); - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter, false, false, false, - false, cert, prev, nullptr, nodawg_heap); + PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false, + false, cert, cont, prev, step); + PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter, + false, false, false, false, cert, prev, nullptr, + nodawg_heap); } return; } else if (uni_prev != nullptr && uni_prev->start_of_dawg && @@ -1060,18 +1122,21 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo } else { return; // Can't continue if not a dict word. } - auto permuter = static_cast( - dict_->def_letter_is_okay(&dawg_args, dict_->getUnicharset(), unichar_id, false)); + auto permuter = static_cast(dict_->def_letter_is_okay( + &dawg_args, dict_->getUnicharset(), unichar_id, false)); if (permuter != NO_PERM) { - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start, - dawg_args.valid_end, false, cert, prev, dawg_args.updated_dawgs, dawg_heap); + PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, + word_start, dawg_args.valid_end, false, cert, prev, + dawg_args.updated_dawgs, dawg_heap); if (dawg_args.valid_end && !space_delimited_) { // We can start another word right away, so push initial state as well, // to the dawg beam, and the regular character to the top choice beam, // since non-dict words can start here too. - PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true, cert, cont, prev, step); - PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start, true, false, - cert, prev, nullptr, nodawg_heap); + PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true, + cert, cont, prev, step); + PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, + word_start, true, false, cert, prev, nullptr, + nodawg_heap); } } else { delete updated_dawgs; @@ -1081,9 +1146,11 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo // Adds a RecodeNode composed of the tuple (code, unichar_id, // initial-dawg-state, prev, cert) to the given heap if/ there is room or if // better than the current worst element if already full. -void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, PermuterType permuter, +void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, + PermuterType permuter, bool start, bool end, float cert, - NodeContinuation cont, const RecodeNode *prev, + NodeContinuation cont, + const RecodeNode *prev, RecodeBeam *step) { RecodeNode *best_initial_dawg = &step->best_initial_dawgs_[cont]; float score = cert; @@ -1093,8 +1160,9 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, Permute if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) { auto *initial_dawgs = new DawgPositionVector; dict_->default_dawgs(initial_dawgs, false); - RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, score, prev, - initial_dawgs, ComputeCodeHash(code, false, prev)); + RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, + score, prev, initial_dawgs, + ComputeCodeHash(code, false, prev)); *best_initial_dawg = node; } } @@ -1103,22 +1171,23 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, Permute // false, false, false, false, cert, prev, nullptr) to heap if there is room // or if better than the current worst element if already full. /* static */ -void RecodeBeamSearch::PushDupOrNoDawgIfBetter(int length, bool dup, int code, int unichar_id, - float cert, float worst_dict_cert, float dict_ratio, - bool use_dawgs, NodeContinuation cont, - const RecodeNode *prev, RecodeBeam *step) { +void RecodeBeamSearch::PushDupOrNoDawgIfBetter( + int length, bool dup, int code, int unichar_id, float cert, + float worst_dict_cert, float dict_ratio, bool use_dawgs, + NodeContinuation cont, const RecodeNode *prev, RecodeBeam *step) { int index = BeamIndex(use_dawgs, cont, length); if (use_dawgs) { if (cert > worst_dict_cert) { - PushHeapIfBetter(kBeamWidths[length], code, unichar_id, prev ? prev->permuter : NO_PERM, - false, false, false, dup, cert, prev, nullptr, &step->beams_[index]); + PushHeapIfBetter(kBeamWidths[length], code, unichar_id, + prev ? prev->permuter : NO_PERM, false, false, false, + dup, cert, prev, nullptr, &step->beams_[index]); } } else { cert *= dict_ratio; if (cert >= kMinCertainty || code == null_char_) { PushHeapIfBetter(kBeamWidths[length], code, unichar_id, - prev ? prev->permuter : TOP_CHOICE_PERM, false, false, false, dup, cert, - prev, nullptr, &step->beams_[index]); + prev ? prev->permuter : TOP_CHOICE_PERM, false, false, + false, dup, cert, prev, nullptr, &step->beams_[index]); } } } @@ -1127,17 +1196,19 @@ void RecodeBeamSearch::PushDupOrNoDawgIfBetter(int length, bool dup, int code, i // dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room // or if better than the current worst element if already full. void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id, - PermuterType permuter, bool dawg_start, bool word_start, - bool end, bool dup, float cert, const RecodeNode *prev, - DawgPositionVector *d, RecodeHeap *heap) { + PermuterType permuter, bool dawg_start, + bool word_start, bool end, bool dup, + float cert, const RecodeNode *prev, + DawgPositionVector *d, + RecodeHeap *heap) { float score = cert; if (prev != nullptr) { score += prev->score; } if (heap->size() < max_size || score > heap->PeekTop().data().score) { uint64_t hash = ComputeCodeHash(code, dup, prev); - RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, dup, cert, score, prev, - d, hash); + RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, + dup, cert, score, prev, d, hash); if (UpdateHeapIfMatched(&node, heap)) { return; } @@ -1154,7 +1225,8 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id, // Adds a RecodeNode to heap if there is room // or if better than the current worst element if already full. -void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHeap *heap) { +void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, + RecodeHeap *heap) { if (heap->size() < max_size || node->score > heap->PeekTop().data().score) { if (UpdateHeapIfMatched(node, heap)) { return; @@ -1170,7 +1242,8 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHe // Searches the heap for a matching entry, and updates the score with // reshuffle if needed. Returns true if there was a match. -bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap) { +bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, + RecodeHeap *heap) { // TODO(rays) consider hash map instead of linear search. // It might not be faster because the hash map would have to be updated // every time a heap reshuffle happens, and that would be a lot of overhead. @@ -1178,7 +1251,8 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea for (auto &i : nodes) { RecodeNode &node = i.data(); if (node.code == new_node->code && node.code_hash == new_node->code_hash && - node.permuter == new_node->permuter && node.start_of_dawg == new_node->start_of_dawg) { + node.permuter == new_node->permuter && + node.start_of_dawg == new_node->start_of_dawg) { if (new_node->score > node.score) { // The new one is better. Update the entire node in the heap and // reshuffle. @@ -1193,7 +1267,8 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea } // Computes and returns the code-hash for the given code and prev. -uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const { +uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, + const RecodeNode *prev) const { uint64_t hash = prev == nullptr ? 0 : prev->code_hash; if (!dup && code != null_char_) { int num_classes = recoder_.code_range(); @@ -1209,8 +1284,9 @@ uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode // during Decode. On return the best_nodes vector essentially contains the set // of code, score pairs that make the optimal path with the constraint that // the recoder can decode the code sequence back to a sequence of unichar-ids. -void RecodeBeamSearch::ExtractBestPaths(std::vector *best_nodes, - std::vector *second_nodes) const { +void RecodeBeamSearch::ExtractBestPaths( + std::vector *best_nodes, + std::vector *second_nodes) const { // Scan both beams to extract the best and second best paths. const RecodeNode *best_node = nullptr; const RecodeNode *second_best_node = nullptr; @@ -1230,11 +1306,13 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector *best_no // last valid unichar_id. const RecodeNode *dawg_node = node; while (dawg_node != nullptr && - (dawg_node->unichar_id == INVALID_UNICHAR_ID || dawg_node->duplicate)) { + (dawg_node->unichar_id == INVALID_UNICHAR_ID || + dawg_node->duplicate)) { dawg_node = dawg_node->prev; } if (dawg_node == nullptr || - (!dawg_node->end_of_word && dawg_node->unichar_id != UNICHAR_SPACE)) { + (!dawg_node->end_of_word && + dawg_node->unichar_id != UNICHAR_SPACE)) { // Dawg node is not valid. continue; } @@ -1242,7 +1320,8 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector *best_no if (best_node == nullptr || node->score > best_node->score) { second_best_node = best_node; best_node = node; - } else if (second_best_node == nullptr || node->score > second_best_node->score) { + } else if (second_best_node == nullptr || + node->score > second_best_node->score) { second_best_node = node; } } @@ -1256,8 +1335,8 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector *best_no // Helper backtracks through the lattice from the given node, storing the // path and reversing it. -void RecodeBeamSearch::ExtractPath(const RecodeNode *node, - std::vector *path) const { +void RecodeBeamSearch::ExtractPath( + const RecodeNode *node, std::vector *path) const { path->clear(); while (node != nullptr) { path->push_back(node); @@ -1266,7 +1345,8 @@ void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::reverse(path->begin(), path->end()); } -void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::vector *path, +void RecodeBeamSearch::ExtractPath(const RecodeNode *node, + std::vector *path, int limiter) const { int pathcounter = 0; path->clear(); @@ -1279,8 +1359,9 @@ void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::vector &path) const { +void RecodeBeamSearch::DebugPath( + const UNICHARSET *unicharset, + const std::vector &path) const { for (unsigned c = 0; c < path.size(); ++c) { const RecodeNode &node = *path[c]; tprintf("%u ", c); @@ -1289,19 +1370,18 @@ void RecodeBeamSearch::DebugPath(const UNICHARSET *unicharset, } // Helper prints debug information on the given unichar path. -void RecodeBeamSearch::DebugUnicharPath(const UNICHARSET *unicharset, - const std::vector &path, - const std::vector &unichar_ids, - const std::vector &certs, - const std::vector &ratings, - const std::vector &xcoords) const { +void RecodeBeamSearch::DebugUnicharPath( + const UNICHARSET *unicharset, const std::vector &path, + const std::vector &unichar_ids, const std::vector &certs, + const std::vector &ratings, const std::vector &xcoords) const { auto num_ids = unichar_ids.size(); double total_rating = 0.0; for (unsigned c = 0; c < num_ids; ++c) { int coord = xcoords[c]; tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c], unicharset->debug_str(unichar_ids[c]).c_str(), ratings[c], certs[c], - path[coord]->start_of_word, path[coord]->end_of_word, path[coord]->permuter); + path[coord]->start_of_word, path[coord]->end_of_word, + path[coord]->permuter); total_rating += ratings[c]; } tprintf("Path total rating = %g\n", total_rating); diff --git a/src/textord/baselinedetect.cpp b/src/textord/baselinedetect.cpp index 839bc20c..309ee51b 100644 --- a/src/textord/baselinedetect.cpp +++ b/src/textord/baselinedetect.cpp @@ -63,11 +63,11 @@ const double kMinFittingLinespacings = 0.25; namespace tesseract { BaselineRow::BaselineRow(double line_spacing, TO_ROW *to_row) - : blobs_(to_row->blob_list()) - , baseline_pt1_(0.0f, 0.0f) - , baseline_pt2_(0.0f, 0.0f) - , baseline_error_(0.0) - , good_baseline_(false) { + : blobs_(to_row->blob_list()), + baseline_pt1_(0.0f, 0.0f), + baseline_pt2_(0.0f, 0.0f), + baseline_error_(0.0), + good_baseline_(false) { ComputeBoundingBox(); // Compute a scale factor for rounding to ints. disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing; @@ -87,11 +87,11 @@ void BaselineRow::SetupOldLineParameters(TO_ROW *row) const { // Outputs diagnostic information. void BaselineRow::Print() const { - tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", baseline_pt1_.x(), - baseline_pt1_.y(), baseline_pt2_.x(), baseline_pt2_.y(), BaselineAngle(), - StraightYAtX(0.0)); - tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_, baseline_error_, - good_baseline_); + tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", + baseline_pt1_.x(), baseline_pt1_.y(), baseline_pt2_.x(), + baseline_pt2_.y(), BaselineAngle(), StraightYAtX(0.0)); + tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_, + baseline_error_, good_baseline_); bounding_box_.print(); } @@ -133,8 +133,9 @@ double BaselineRow::StraightYAtX(double x) const { if (denominator == 0.0) { return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0; } - return baseline_pt1_.y() + - (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) / denominator; + return baseline_pt1_.y() + (x - baseline_pt1_.x()) * + (baseline_pt2_.y() - baseline_pt1_.y()) / + denominator; } // Fits a straight baseline to the points. Returns true if it had enough @@ -170,7 +171,8 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) { baseline_error_ = fitter_.Fit(&pt1, &pt2); baseline_pt1_ = pt1; baseline_pt2_ = pt2; - if (baseline_error_ > max_baseline_error_ && fitter_.SufficientPointsForIndependentFit()) { + if (baseline_error_ > max_baseline_error_ && + fitter_.SufficientPointsForIndependentFit()) { // The fit was bad but there were plenty of points, so try skipping // the first and last few, and use the new line if it dramatically improves // the error of fit. @@ -184,7 +186,10 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) { int debug = 0; #ifdef kDebugYCoord Print(); - debug = bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord ? 3 : 2; + debug = bounding_box_.bottom() < kDebugYCoord && + bounding_box_.top() > kDebugYCoord + ? 3 + : 2; #endif // Now we obtained a direction from that fit, see if we can improve the // fit using the same direction and some other start point. @@ -218,7 +223,8 @@ void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) { return; } #ifdef kDebugYCoord - if (bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord && debug < 3) + if (bounding_box_.bottom() < kDebugYCoord && + bounding_box_.top() > kDebugYCoord && debug < 3) debug = 3; #endif FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]); @@ -226,7 +232,8 @@ void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) { // Modifies the baseline to snap to the textline grid if the existing // result is not good enough. -double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, +double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, + double line_spacing, double line_offset) { if (blobs_->empty()) { if (debug > 1) { @@ -240,7 +247,8 @@ double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, dou int best_index = -1; for (unsigned i = 0; i < displacement_modes_.size(); ++i) { double blob_y = displacement_modes_[i]; - double error = BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset); + double error = + BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset); if (debug > 1) { tprintf("Mode at %g has error %g from model \n", blob_y, error); } @@ -263,9 +271,11 @@ double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, dou displacement_modes_[best_index]); bounding_box_.print(); } - FitConstrainedIfBetter(debug, direction, model_margin, displacement_modes_[best_index]); + FitConstrainedIfBetter(debug, direction, model_margin, + displacement_modes_[best_index]); } else if (debug > 1) { - tprintf("Linespacing model only moves current line by %g for row at:", shift); + tprintf("Linespacing model only moves current line by %g for row at:", + shift); bounding_box_.print(); } } else if (debug > 1) { @@ -296,7 +306,8 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) { if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true; #endif - FCOORD blob_pos((box.left() + box.right()) / 2.0f, blob->baseline_position()); + FCOORD blob_pos((box.left() + box.right()) / 2.0f, + blob->baseline_position()); double offset = direction * blob_pos; perp_blob_dists.push_back(offset); #ifdef kDebugYCoord @@ -338,24 +349,28 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) { // Otherwise the new fit will only replace the old if it is really better, // or the old fit is marked bad and the new fit has sufficient points, as // well as being within the max_baseline_error_. -void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance, +void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction, + double cheat_allowance, double target_offset) { double halfrange = fit_halfrange_ * direction.length(); double min_dist = target_offset - halfrange; double max_dist = target_offset + halfrange; ICOORD line_pt; - double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist, debug > 2, &line_pt); + double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist, + debug > 2, &line_pt); // Allow cheat_allowance off the new error new_error -= cheat_allowance; double old_angle = BaselineAngle(); double new_angle = direction.angle(); if (debug > 1) { - tprintf("Constrained error = %g, original = %g", new_error, baseline_error_); - tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle, new_angle, - new_angle - old_angle, kMaxSkewDeviation); + tprintf("Constrained error = %g, original = %g", new_error, + baseline_error_); + tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle, + new_angle, new_angle - old_angle, kMaxSkewDeviation); } - bool new_good_baseline = new_error <= max_baseline_error_ && - (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit()); + bool new_good_baseline = + new_error <= max_baseline_error_ && + (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit()); // The new will replace the old if any are true: // 1. the new error is better // 2. the old is NOT good, but the new is @@ -368,7 +383,8 @@ void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction, dou baseline_pt2_ = baseline_pt1_ + direction; good_baseline_ = new_good_baseline; if (debug > 1) { - tprintf("Replacing with constrained baseline, good = %d\n", good_baseline_); + tprintf("Replacing with constrained baseline, good = %d\n", + good_baseline_); } } else if (debug > 1) { tprintf("Keeping old baseline\n"); @@ -400,14 +416,14 @@ void BaselineRow::ComputeBoundingBox() { } BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block) - : block_(block) - , debug_level_(debug_level) - , non_text_block_(non_text) - , good_skew_angle_(false) - , skew_angle_(0.0) - , line_spacing_(block->line_spacing) - , line_offset_(0.0) - , model_error_(0.0) { + : block_(block), + debug_level_(debug_level), + non_text_block_(non_text), + good_skew_angle_(false), + skew_angle_(0.0), + line_spacing_(block->line_spacing), + line_offset_(0.0), + model_error_(0.0) { TO_ROW_IT row_it(block_->get_rows()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { // Sort the blobs on the rows. @@ -418,7 +434,8 @@ BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block) // Computes and returns the absolute error of the given perp_disp from the // given linespacing model. -double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing, double line_offset) { +double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing, + double line_offset) { // Round to the nearest multiple of line_spacing + line offset. int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing); double model_y = line_spacing * multiple + line_offset; @@ -452,7 +469,8 @@ bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) { good_skew_angle_ = false; } if (debug_level_ > 0) { - tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_, good_skew_angle_); + tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_, + good_skew_angle_); } return good_skew_angle_; } @@ -483,9 +501,11 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) { // baseline. // Start by finding the row that is best fitted to the model. unsigned best_row = 0; - double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), line_spacing_, line_offset_); + double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), + line_spacing_, line_offset_); for (unsigned r = 1; r < rows_.size(); ++r) { - double error = SpacingModelError(rows_[r]->PerpDisp(direction), line_spacing_, line_offset_); + double error = SpacingModelError(rows_[r]->PerpDisp(direction), + line_spacing_, line_offset_); if (error < best_error) { best_error = error; best_row = r; @@ -494,11 +514,13 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) { // Starting at the best fitting row, work outwards, syncing the offset. double offset = line_offset_; for (auto r = best_row + 1; r < rows_.size(); ++r) { - offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, line_spacing_, offset); + offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, + line_spacing_, offset); } offset = line_offset_; for (int r = best_row - 1; r >= 0; --r) { - offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, line_spacing_, offset); + offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, + line_spacing_, offset); } } @@ -506,7 +528,8 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) { void BaselineBlock::SetupBlockParameters() const { if (line_spacing_ > 0.0) { // Where was block_line_spacing set before? - float min_spacing = std::min(block_->line_spacing, static_cast(line_spacing_)); + float min_spacing = + std::min(block_->line_spacing, static_cast(line_spacing_)); if (min_spacing < block_->line_size) { block_->line_size = min_spacing; } @@ -549,8 +572,8 @@ void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) { // As a side-effect, computes the xheights of the rows and the block. // Although x-height estimation is conceptually separate, it is part of // detecting perspective distortion and therefore baseline fitting. -void BaselineBlock::FitBaselineSplines(bool enable_splines, bool show_final_rows, - Textord *textord) { +void BaselineBlock::FitBaselineSplines(bool enable_splines, + bool show_final_rows, Textord *textord) { double gradient = tan(skew_angle_); FCOORD rotation(1.0f, 0.0f); @@ -565,8 +588,8 @@ void BaselineBlock::FitBaselineSplines(bool enable_splines, bool show_final_rows int32_t xstarts[2] = {block_box.left(), block_box.right()}; double coeffs[3] = {0.0, row->line_m(), row->line_c()}; row->baseline = QSPLINE(1, xstarts, coeffs); - textord->compute_row_xheight(row, block_->block->classify_rotation(), row->line_m(), - block_->line_size); + textord->compute_row_xheight(row, block_->block->classify_rotation(), + row->line_m(), block_->line_size); } } textord->compute_block_xheight(block_, gradient); @@ -599,7 +622,8 @@ void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) { } plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE); // Show discarded blobs. - plot_blob_list(win, &block_->underlines, ScrollView::YELLOW, ScrollView::CORAL); + plot_blob_list(win, &block_->underlines, ScrollView::YELLOW, + ScrollView::CORAL); if (block_->blobs.length() > 0) { tprintf("%d blobs discarded as noise\n", block_->blobs.length()); } @@ -647,8 +671,9 @@ bool BaselineBlock::ComputeLineSpacing() { } } if (debug_level_ > 0) { - tprintf("Spacing %g, in %zu rows, %d gaps fitted out of %d non-trivial\n", line_spacing_, - row_positions.size(), fitting_gaps, non_trivial_gaps); + tprintf("Spacing %g, in %zu rows, %d gaps fitted out of %d non-trivial\n", + line_spacing_, row_positions.size(), fitting_gaps, + non_trivial_gaps); } return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings; } @@ -686,7 +711,8 @@ void BaselineBlock::EstimateLineSpacing() { // Find the first row after row that overlaps it significantly. const TBOX &row_box = row->bounding_box(); unsigned r2; - for (r2 = r + 1; r2 < rows_.size() && !row_box.major_x_overlap(rows_[r2]->bounding_box()); + for (r2 = r + 1; r2 < rows_.size() && + !row_box.major_x_overlap(rows_[r2]->bounding_box()); ++r2) { ; } @@ -703,7 +729,8 @@ void BaselineBlock::EstimateLineSpacing() { // If we have at least one value, use it, otherwise leave the previous // value unchanged. if (!spacings.empty()) { - std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2, spacings.end()); + std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2, + spacings.end()); line_spacing_ = spacings[spacings.size() / 2]; if (debug_level_ > 1) { tprintf("Estimate of linespacing = %g\n", line_spacing_); @@ -718,14 +745,16 @@ void BaselineBlock::EstimateLineSpacing() { void BaselineBlock::RefineLineSpacing(const std::vector &positions) { double spacings[3], offsets[3], errors[3]; int index_range; - errors[0] = - FitLineSpacingModel(positions, line_spacing_, &spacings[0], &offsets[0], &index_range); + errors[0] = FitLineSpacingModel(positions, line_spacing_, &spacings[0], + &offsets[0], &index_range); if (index_range > 1) { double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range); // Try the hypotheses that there might be index_range +/- 1 line spaces. - errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1], &offsets[1], nullptr); + errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1], + &offsets[1], nullptr); double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range); - errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2], &offsets[2], nullptr); + errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2], + &offsets[2], nullptr); for (int i = 1; i <= 2; ++i) { if (errors[i] < errors[0]) { spacings[0] = spacings[i]; @@ -739,8 +768,8 @@ void BaselineBlock::RefineLineSpacing(const std::vector &positions) { line_offset_ = offsets[0]; model_error_ = errors[0]; if (debug_level_ > 0) { - tprintf("Final linespacing model = %g + offset %g, error %g\n", line_spacing_, line_offset_, - model_error_); + tprintf("Final linespacing model = %g + offset %g, error %g\n", + line_spacing_, line_offset_, model_error_); } } } @@ -750,8 +779,9 @@ void BaselineBlock::RefineLineSpacing(const std::vector &positions) { // and the corresponding intercept in c_out, and the number of spacings seen // in index_delta. Returns the error of fit to the line spacing model. // Uses a simple linear regression, but optimized the offset using the median. -double BaselineBlock::FitLineSpacingModel(const std::vector &positions, double m_in, - double *m_out, double *c_out, int *index_delta) { +double BaselineBlock::FitLineSpacingModel(const std::vector &positions, + double m_in, double *m_out, + double *c_out, int *index_delta) { if (m_in == 0.0f || positions.size() < 2) { *m_out = m_in; *c_out = 0.0; @@ -762,6 +792,7 @@ double BaselineBlock::FitLineSpacingModel(const std::vector &positions, } std::vector offsets; // Get the offset (remainder) linespacing for each line and choose the median. + offsets.reserve(positions.size()); for (double position : positions) { offsets.push_back(fmod(position, m_in)); } @@ -795,7 +826,8 @@ double BaselineBlock::FitLineSpacingModel(const std::vector &positions, *c_out = 0.0; } if (debug_level_ > 1) { - tprintf("Median offset = %g, compared to mean of %g.\n", *c_out, llsq.c(*m_out)); + tprintf("Median offset = %g, compared to mean of %g.\n", *c_out, + llsq.c(*m_out)); } // Index_delta is the number of hypothesized line gaps present. if (index_delta != nullptr) { @@ -805,13 +837,14 @@ double BaselineBlock::FitLineSpacingModel(const std::vector &positions, // a full line-spacing in disagreement with the median. double rms_error = llsq.rms(*m_out, llsq.c(*m_out)); if (debug_level_ > 1) { - tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in, median_offset, - *m_out, *c_out, rms_error); + tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in, + median_offset, *m_out, *c_out, rms_error); } return rms_error; } -BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks) +BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew, + TO_BLOCK_LIST *blocks) : page_skew_(page_skew), debug_level_(debug_level) { TO_BLOCK_IT it(blocks); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { @@ -863,8 +896,10 @@ void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) { // x-heights and displaying debug information. // NOTE that ComputeStraightBaselines must have been called first as this // sets up data in the TO_ROWs upon which this function depends. -void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, - bool remove_noise, bool show_final_rows, +void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, + bool enable_splines, + bool remove_noise, + bool show_final_rows, Textord *textord) { for (auto bl_block : blocks_) { if (enable_splines) { diff --git a/src/textord/cjkpitch.cpp b/src/textord/cjkpitch.cpp index b6437f29..42bd488a 100644 --- a/src/textord/cjkpitch.cpp +++ b/src/textord/cjkpitch.cpp @@ -22,6 +22,7 @@ #include "tovars.h" #include // for std::sort +#include #include // for std::vector namespace tesseract { @@ -437,7 +438,7 @@ private: } const float real_pitch = box_pitch(box1, box2); - if (fabs(real_pitch - pitch) < pitch * kFPTolerance) { + if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) { return true; } @@ -645,7 +646,7 @@ void FPRow::EstimatePitch(bool pass1) { // So we collect only pitch values between two good // characters. and within tolerance in pass2. if (pass1 || - (prev_was_good && fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) { + (prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) { good_pitches_.Add(pitch); if (!is_box_modified(i - 1) && !is_box_modified(i)) { good_gaps_.Add(gap); diff --git a/src/textord/colpartition.cpp b/src/textord/colpartition.cpp index d57703ff..3683d025 100644 --- a/src/textord/colpartition.cpp +++ b/src/textord/colpartition.cpp @@ -25,9 +25,9 @@ #include "colpartitiongrid.h" #include "colpartitionset.h" #include "detlinefit.h" -#include "helpers.h" // for UpdateRange #include "dppoint.h" -#include "host.h" // for NearlyEqual +#include "helpers.h" // for UpdateRange +#include "host.h" // for NearlyEqual #include "imagefind.h" #include "workingpartset.h" @@ -89,14 +89,14 @@ const int kMaxColorDistance = 900; // blob_type is the blob_region_type_ of the blobs in this partition. // Vertical is the direction of logical vertical on the possibly skewed image. ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD &vertical) - : left_margin_(-INT32_MAX) - , right_margin_(INT32_MAX) - , median_bottom_(INT32_MAX) - , median_top_(-INT32_MAX) - , median_left_(INT32_MAX) - , median_right_(-INT32_MAX) - , blob_type_(blob_type) - , vertical_(vertical) { + : left_margin_(-INT32_MAX), + right_margin_(INT32_MAX), + median_bottom_(INT32_MAX), + median_top_(-INT32_MAX), + median_left_(INT32_MAX), + median_right_(-INT32_MAX), + blob_type_(blob_type), + vertical_(vertical) { memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); } @@ -105,8 +105,10 @@ ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD &vertical) // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and // the ColPartition owns the BLOBNBOX!!! // Call DeleteBoxes before deleting the ColPartition. -ColPartition *ColPartition::FakePartition(const TBOX &box, PolyBlockType block_type, - BlobRegionType blob_type, BlobTextFlowType flow) { +ColPartition *ColPartition::FakePartition(const TBOX &box, + PolyBlockType block_type, + BlobRegionType blob_type, + BlobTextFlowType flow) { auto *part = new ColPartition(blob_type, ICOORD(0, 1)); part->set_type(block_type); part->set_flow(flow); @@ -124,7 +126,8 @@ ColPartition *ColPartition::FakePartition(const TBOX &box, PolyBlockType block_t // than the surrounding text that may be a dropcap, two or more vertically // touching characters, or some graphic element. // If the given list is not nullptr, the partition is also added to the list. -ColPartition *ColPartition::MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list) { +ColPartition *ColPartition::MakeBigPartition(BLOBNBOX *box, + ColPartition_LIST *big_part_list) { box->set_owner(nullptr); auto *single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1)); single->set_flow(BTFT_NONE); @@ -155,8 +158,9 @@ ColPartition::~ColPartition() { // Constructs a fake ColPartition with no BLOBNBOXes to represent a // horizontal or vertical line, given a type and a bounding box. -ColPartition *ColPartition::MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, - int left, int bottom, int right, int top) { +ColPartition *ColPartition::MakeLinePartition(BlobRegionType blob_type, + const ICOORD &vertical, int left, + int bottom, int right, int top) { auto *part = new ColPartition(blob_type, vertical); part->bounding_box_ = TBOX(left, bottom, right, top); part->median_bottom_ = bottom; @@ -202,8 +206,9 @@ void ColPartition::AddBox(BLOBNBOX *bbox) { right_key_ = BoxRightKey(); } if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) { - tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", box.left(), - box.bottom(), box.right(), box.top(), bounding_box_.left(), bounding_box_.right()); + tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", + box.left(), box.bottom(), box.right(), box.top(), + bounding_box_.left(), bounding_box_.right()); } } @@ -227,11 +232,13 @@ BLOBNBOX *ColPartition::BiggestBox() { for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX *bbox = bb_it.data(); if (IsVerticalType()) { - if (biggest == nullptr || bbox->bounding_box().width() > biggest->bounding_box().width()) { + if (biggest == nullptr || + bbox->bounding_box().width() > biggest->bounding_box().width()) { biggest = bbox; } } else { - if (biggest == nullptr || bbox->bounding_box().height() > biggest->bounding_box().height()) { + if (biggest == nullptr || + bbox->bounding_box().height() > biggest->bounding_box().height()) { biggest = bbox; } } @@ -362,7 +369,8 @@ bool ColPartition::IsLegal() { } return false; // Bounding box invalid. } - if (left_margin_ > bounding_box_.left() || right_margin_ < bounding_box_.right()) { + if (left_margin_ > bounding_box_.left() || + right_margin_ < bounding_box_.right()) { if (textord_debug_bugs) { tprintf("Margins invalid\n"); Print(); @@ -371,8 +379,8 @@ bool ColPartition::IsLegal() { } if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) { if (textord_debug_bugs) { - tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(), right_key_, - BoxRightKey()); + tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(), + right_key_, BoxRightKey()); Print(); } return false; // Keys inside the box. @@ -383,10 +391,12 @@ bool ColPartition::IsLegal() { // Returns true if the left and right edges are approximately equal. bool ColPartition::MatchingColumns(const ColPartition &other) const { int y = (MidY() + other.MidY()) / 2; - if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor, LeftAtY(y) / kColumnWidthFactor, 1)) { + if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor, + LeftAtY(y) / kColumnWidthFactor, 1)) { return false; } - if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor, RightAtY(y) / kColumnWidthFactor, 1)) { + if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor, + RightAtY(y) / kColumnWidthFactor, 1)) { return false; } return true; @@ -400,10 +410,14 @@ bool ColPartition::MatchingTextColor(const ColPartition &other) const { } // Colors must match for other to count. - double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_); - double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_); - double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_); - double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_); + double d_this1_o = + ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_); + double d_this2_o = + ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_); + double d_o1_this = + ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_); + double d_o2_this = + ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_); // All 4 distances must be small enough. return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance && d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance; @@ -441,7 +455,8 @@ bool ColPartition::ConfirmNoTabViolation(const ColPartition &other) const { } // Returns true if other has a similar stroke width to this. -bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, +bool ColPartition::MatchingStrokeWidth(const ColPartition &other, + double fractional_tolerance, double constant_tolerance) const { int match_count = 0; int nonmatch_count = 0; @@ -450,8 +465,8 @@ bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractio box_it.mark_cycle_pt(); other_it.mark_cycle_pt(); while (!box_it.cycled_list() && !other_it.cycled_list()) { - if (box_it.data()->MatchingStrokeWidth(*other_it.data(), fractional_tolerance, - constant_tolerance)) { + if (box_it.data()->MatchingStrokeWidth( + *other_it.data(), fractional_tolerance, constant_tolerance)) { ++match_count; } else { ++nonmatch_count; @@ -468,7 +483,8 @@ bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractio // (1) this is a ColPartition containing only diacritics, and // (2) the base characters indicated on the diacritics all believably lie // within the text line of the candidate ColPartition. -bool ColPartition::OKDiacriticMerge(const ColPartition &candidate, bool debug) const { +bool ColPartition::OKDiacriticMerge(const ColPartition &candidate, + bool debug) const { BLOBNBOX_C_IT it(const_cast(&boxes_)); int min_top = INT32_MAX; int max_bottom = -INT32_MAX; @@ -490,13 +506,14 @@ bool ColPartition::OKDiacriticMerge(const ColPartition &candidate, bool debug) c } // If the intersection of all vertical ranges of all base characters // overlaps the median range of this, then it is OK. - bool result = min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_; + bool result = + min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_; if (debug) { if (result) { tprintf("OKDiacritic!\n"); } else { - tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top, median_bottom_, - median_top_); + tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top, + median_bottom_, median_top_); } } return result; @@ -591,7 +608,8 @@ int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) { return count; } -void ColPartition::SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density) { +void ColPartition::SetSpecialBlobsDensity(const BlobSpecialTextType type, + const float density) { ASSERT_HOST(type < BSTT_COUNT); special_blobs_densities_[type] = density; } @@ -619,10 +637,12 @@ void ColPartition::ComputeSpecialBlobsDensity() { // Partnerships are added symmetrically to partner and this. void ColPartition::AddPartner(bool upper, ColPartition *partner) { if (upper) { - partner->lower_partners_.add_sorted(SortByBoxLeft, true, this); + partner->lower_partners_.add_sorted(SortByBoxLeft, true, + this); upper_partners_.add_sorted(SortByBoxLeft, true, partner); } else { - partner->upper_partners_.add_sorted(SortByBoxLeft, true, this); + partner->upper_partners_.add_sorted(SortByBoxLeft, true, + this); lower_partners_.add_sorted(SortByBoxLeft, true, partner); } } @@ -651,14 +671,16 @@ ColPartition *ColPartition::SingletonPartner(bool upper) { } // Merge with the other partition and delete it. -void ColPartition::Absorb(ColPartition *other, WidthCallback cb) { +void ColPartition::Absorb(ColPartition *other, const WidthCallback &cb) { // The result has to either own all of the blobs or none of them. // Verify the flag is consistent. ASSERT_HOST(owns_blobs() == other->owns_blobs()); // TODO(nbeato): check owns_blobs better. Right now owns_blobs // should always be true when this is called. So there is no issues. - if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()) || - TabFind::WithinTestRegion(2, other->bounding_box_.left(), other->bounding_box_.bottom())) { + if (TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()) || + TabFind::WithinTestRegion(2, other->bounding_box_.left(), + other->bounding_box_.bottom())) { tprintf("Merging:"); Print(); other->Print(); @@ -669,8 +691,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) { for (int type = 0; type < BSTT_COUNT; ++type) { unsigned w1 = boxes_.length(); unsigned w2 = other->boxes_.length(); - float new_val = - special_blobs_densities_[type] * w1 + other->special_blobs_densities_[type] * w2; + float new_val = special_blobs_densities_[type] * w1 + + other->special_blobs_densities_[type] * w2; if (!w1 || !w2) { ASSERT_HOST((w1 + w2) > 0); special_blobs_densities_[type] = new_val / (w1 + w2); @@ -723,7 +745,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) { for (int upper = 0; upper < 2; ++upper) { ColPartition_CLIST partners; ColPartition_C_IT part_it(&partners); - part_it.add_list_after(upper ? &other->upper_partners_ : &other->lower_partners_); + part_it.add_list_after(upper ? &other->upper_partners_ + : &other->lower_partners_); for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { ColPartition *partner = part_it.extract(); partner->RemovePartner(!upper, other); @@ -747,7 +770,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) { // the text involved, and is usually a fraction of the median size of merge1 // and/or merge2, or this. // TODO(rays) Determine whether vertical text needs to be considered. -bool ColPartition::OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, +bool ColPartition::OKMergeOverlap(const ColPartition &merge1, + const ColPartition &merge2, int ok_box_overlap, bool debug) { // Vertical partitions are not allowed to be involved. if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) { @@ -916,7 +940,8 @@ void ColPartition::ComputeLimits() { if (it.empty()) { return; } - if (IsImageType() || blob_type() == BRT_RECTIMAGE || blob_type() == BRT_POLYIMAGE) { + if (IsImageType() || blob_type() == BRT_RECTIMAGE || + blob_type() == BRT_POLYIMAGE) { median_top_ = bounding_box_.top(); median_bottom_ = bounding_box_.bottom(); median_height_ = bounding_box_.height(); @@ -957,7 +982,8 @@ void ColPartition::ComputeLimits() { Print(); } if (left_margin_ > bounding_box_.left() && textord_debug_bugs) { - tprintf("Made partition with bad left coords, %d > %d\n", left_margin_, bounding_box_.left()); + tprintf("Made partition with bad left coords, %d > %d\n", left_margin_, + bounding_box_.left()); Print(); } // Fix partner lists. The bounding box has changed and partners are stored @@ -973,7 +999,8 @@ void ColPartition::ComputeLimits() { partner->AddPartner(!upper, this); } } - if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) { + if (TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom())) { tprintf("Recomputed box for partition %p\n", this); Print(); } @@ -998,10 +1025,12 @@ void ColPartition::SetPartitionType(int resolution, ColPartitionSet *columns) { int first_spanned_col = -1; ColumnSpanningType span_type = columns->SpanningType( resolution, bounding_box_.left(), bounding_box_.right(), - std::min(bounding_box_.height(), bounding_box_.width()), MidY(), left_margin_, right_margin_, - &first_column_, &last_column_, &first_spanned_col); + std::min(bounding_box_.height(), bounding_box_.width()), MidY(), + left_margin_, right_margin_, &first_column_, &last_column_, + &first_spanned_col); column_set_ = columns; - if (first_column_ < last_column_ && span_type == CST_PULLOUT && !IsLineType()) { + if (first_column_ < last_column_ && span_type == CST_PULLOUT && + !IsLineType()) { // Unequal columns may indicate that the pullout spans one of the columns // it lies in, so force it to be allocated to just that column. if (first_spanned_col >= 0) { @@ -1026,8 +1055,8 @@ void ColPartition::SetPartitionType(int resolution, ColPartitionSet *columns) { // in the columns. PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const { if (flow == CST_NOISE) { - if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && blob_type_ != BRT_RECTIMAGE && - blob_type_ != BRT_VERT_TEXT) { + if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && + blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) { return PT_NOISE; } flow = CST_FLOWING; @@ -1075,18 +1104,18 @@ PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const { // Returns the first and last column touched by this partition. // resolution refers to the ppi resolution of the image. -void ColPartition::ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, - int *last_col) { +void ColPartition::ColumnRange(int resolution, ColPartitionSet *columns, + int *first_col, int *last_col) { int first_spanned_col = -1; - ColumnSpanningType span_type = - columns->SpanningType(resolution, bounding_box_.left(), bounding_box_.right(), - std::min(bounding_box_.height(), bounding_box_.width()), MidY(), - left_margin_, right_margin_, first_col, last_col, &first_spanned_col); + ColumnSpanningType span_type = columns->SpanningType( + resolution, bounding_box_.left(), bounding_box_.right(), + std::min(bounding_box_.height(), bounding_box_.width()), MidY(), + left_margin_, right_margin_, first_col, last_col, &first_spanned_col); type_ = PartitionType(span_type); } // Sets the internal flags good_width_ and good_column_. -void ColPartition::SetColumnGoodness(WidthCallback cb) { +void ColPartition::SetColumnGoodness(const WidthCallback &cb) { int y = MidY(); int width = RightAtY(y) - LeftAtY(y); good_width_ = cb(width); @@ -1127,10 +1156,12 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f); if (textord_debug_tabfind >= 4) { tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count, - max_width * kMaxLeaderGapFractionOfMax, min_width * kMaxLeaderGapFractionOfMin); + max_width * kMaxLeaderGapFractionOfMax, + min_width * kMaxLeaderGapFractionOfMin); } if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax && - gap_iqr < min_width * kMaxLeaderGapFractionOfMin && blob_count >= kMinLeaderCount) { + gap_iqr < min_width * kMaxLeaderGapFractionOfMin && + blob_count >= kMinLeaderCount) { // This is stable enough to be called a leader, so check the widths. // Since leader dashes can join, run a dp cutting algorithm and go // on the cost. @@ -1151,8 +1182,9 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { projection[left - part_left].AddLocalCost(height); } } - DPPoint *best_end = DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance, - part_width, projection); + DPPoint *best_end = + DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance, + part_width, projection); if (best_end != nullptr && best_end->total_cost() < blob_count) { // Good enough. Call it a leader. result = true; @@ -1161,7 +1193,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { BLOBNBOX *blob = it.data(); // If the first or last blob is spaced too much, don't mark it. if (it.at_first()) { - int gap = it.data_relative(1)->bounding_box().left() - blob->bounding_box().right(); + int gap = it.data_relative(1)->bounding_box().left() - + blob->bounding_box().right(); if (blob->bounding_box().width() + gap > max_step) { it.extract(); modified_blob_list = true; @@ -1169,7 +1202,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { } } if (it.at_last()) { - int gap = blob->bounding_box().left() - it.data_relative(-1)->bounding_box().right(); + int gap = blob->bounding_box().left() - + it.data_relative(-1)->bounding_box().right(); if (blob->bounding_box().width() + gap > max_step) { it.extract(); modified_blob_list = true; @@ -1188,7 +1222,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { if (best_end == nullptr) { tprintf("No path\n"); } else { - tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(), blob_count); + tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(), + blob_count); } } delete[] projection; @@ -1275,10 +1310,12 @@ void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) { blob_type_ = BRT_NOISE; } } - if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) { - tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", blob_count, - noisy_count, good_blob_score_); - tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_, blob_type_); + if (TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom())) { + tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", + blob_count, noisy_count, good_blob_score_); + tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_, + blob_type_); Print(); } SetBlobTypes(); @@ -1371,7 +1408,8 @@ bool ColPartition::HasGoodBaseline() { // Adds this ColPartition to a matching WorkingPartSet if one can be found, // otherwise starts a new one in the appropriate column, ending the previous. -void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, +void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, + int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_sets) { if (block_owned_) { @@ -1414,10 +1452,11 @@ void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, in // Find the column that the right edge falls in. BLOCK_LIST completed_blocks; TO_BLOCK_LIST to_blocks; - for (; !it.cycled_list() && col_index <= last_column_; it.forward(), ++col_index) { + for (; !it.cycled_list() && col_index <= last_column_; + it.forward(), ++col_index) { WorkingPartSet *end_set = it.data(); - end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks, - &to_blocks); + end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, + &completed_blocks, &to_blocks); } work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); } @@ -1431,9 +1470,12 @@ void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, in // The used partitions are put onto used_parts, as they may still be referred // to in the partition grid. bleft, tright and resolution are the bounds // and resolution of the original image. -void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, - ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, - BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks) { +void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, + int resolution, + ColPartition_LIST *block_parts, + ColPartition_LIST *used_parts, + BLOCK_LIST *completed_blocks, + TO_BLOCK_LIST *to_blocks) { int page_height = tright.y() - bleft.y(); // Compute the initial spacing stats. ColPartition_IT it(block_parts); @@ -1466,7 +1508,8 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, part->set_side_step(static_cast(side_steps.median() + 0.5)); if (!it.at_last()) { ColPartition *next_part = it.data_relative(1); - part->set_bottom_spacing(part->median_bottom() - next_part->median_bottom()); + part->set_bottom_spacing(part->median_bottom() - + next_part->median_bottom()); part->set_top_spacing(part->median_top() - next_part->median_top()); } else { part->set_bottom_spacing(page_height); @@ -1474,8 +1517,8 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, } if (textord_debug_tabfind) { part->Print(); - tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n", side_steps.median(), - part->top_spacing(), part->bottom_spacing()); + tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n", + side_steps.median(), part->top_spacing(), part->bottom_spacing()); } ++part_count; } @@ -1508,21 +1551,25 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, tprintf( "Spacings unequal: upper:%d/%d, lower:%d/%d," " sizes %d %d %d\n", - part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(), - next_part->bottom_spacing(), part->median_height(), next_part->median_height(), + part->top_spacing(), part->bottom_spacing(), + next_part->top_spacing(), next_part->bottom_spacing(), + part->median_height(), next_part->median_height(), third_part != nullptr ? third_part->median_height() : 0); } // We can only consider adding the next line to the block if the sizes // match and the lines are close enough for their size. if (part->SizesSimilar(*next_part) && - next_part->median_height() * kMaxSameBlockLineSpacing > part->bottom_spacing() && - part->median_height() * kMaxSameBlockLineSpacing > part->top_spacing()) { + next_part->median_height() * kMaxSameBlockLineSpacing > + part->bottom_spacing() && + part->median_height() * kMaxSameBlockLineSpacing > + part->top_spacing()) { // Even now, we can only add it as long as the third line doesn't // match in the same way and have a smaller bottom spacing. if (third_part == nullptr || !next_part->SizesSimilar(*third_part) || third_part->median_height() * kMaxSameBlockLineSpacing <= next_part->bottom_spacing() || - next_part->median_height() * kMaxSameBlockLineSpacing <= next_part->top_spacing() || + next_part->median_height() * kMaxSameBlockLineSpacing <= + next_part->top_spacing() || next_part->bottom_spacing() > part->bottom_spacing()) { // Add to the current block. sp_block_it.add_to_end(it.extract()); @@ -1542,8 +1589,9 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, } else { if (textord_debug_tabfind && !it.empty()) { ColPartition *next_part = it.data(); - tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n", part->top_spacing(), - part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(), + tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n", + part->top_spacing(), part->bottom_spacing(), + next_part->top_spacing(), next_part->bottom_spacing(), part->median_height(), next_part->median_height()); } } @@ -1570,8 +1618,9 @@ static void ClipCoord(const ICOORD &bleft, const ICOORD &tright, ICOORD *pos) { // itself. Sets up the block for (old) textline formation correctly for // vertical and horizontal text. The partitions are moved to used_parts // afterwards, as they cannot be deleted yet. -static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing, BLOCK *block, - ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) { +static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing, + BLOCK *block, ColPartition_LIST *block_parts, + ColPartition_LIST *used_parts) { // Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it. // Move all the parts to a done list as they are no longer needed, except // that have have to continue to exist until the part grid is deleted. @@ -1646,7 +1695,8 @@ static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing, BLOCK *b // Constructs a block from the given list of partitions. // Arguments are as LineSpacingBlocks above. TO_BLOCK *ColPartition::MakeBlock(const ICOORD &bleft, const ICOORD &tright, - ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) { + ColPartition_LIST *block_parts, + ColPartition_LIST *used_parts) { if (block_parts->empty()) { return nullptr; // Nothing to do. } @@ -1704,7 +1754,8 @@ TO_BLOCK *ColPartition::MakeBlock(const ICOORD &bleft, const ICOORD &tright, // Constructs a block from the given list of vertical text partitions. // Currently only creates rectangular blocks. -TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, +TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft, + const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) { if (block_parts->empty()) { @@ -1722,8 +1773,8 @@ TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD tprintf("Making block at:"); block_box.print(); } - auto *block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(), block_box.right(), - block_box.top()); + auto *block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(), + block_box.right(), block_box.top()); block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type)); return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts); } @@ -1741,8 +1792,9 @@ TO_ROW *ColPartition::MakeToRow() { int top = blob->bounding_box().top(); int bottom = blob->bounding_box().bottom(); if (row == nullptr) { - row = new TO_ROW(blob, static_cast(top), static_cast(bottom), - static_cast(line_size)); + row = + new TO_ROW(blob, static_cast(top), static_cast(bottom), + static_cast(line_size)); } else { row->add_blob(blob, static_cast(top), static_cast(bottom), static_cast(line_size)); @@ -1785,7 +1837,8 @@ ColPartition *ColPartition::CopyButDontOwnBlobs() { copy->set_owns_blobs(false); BLOBNBOX_C_IT inserter(copy->boxes()); BLOBNBOX_C_IT traverser(boxes()); - for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward()) { + for (traverser.mark_cycle_pt(); !traverser.cycled_list(); + traverser.forward()) { inserter.add_after_then_move(traverser.data()); } return copy; @@ -1812,19 +1865,21 @@ void ColPartition::Print() const { "ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" " ts=%d bs=%d ls=%d rs=%d\n", - boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y), - bounding_box_.left(), median_left_, bounding_box_.bottom(), median_bottom_, - bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', right_margin_, median_right_, - bounding_box_.top(), median_top_, good_width_, good_column_, type_, kBlobTypes[blob_type_], - flow_, first_column_, last_column_, boxes_.length(), space_above_, space_below_, - space_to_left_, space_to_right_); + boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B', + LeftAtY(y), bounding_box_.left(), median_left_, bounding_box_.bottom(), + median_bottom_, bounding_box_.right(), RightAtY(y), + right_key_tab_ ? 'T' : 'B', right_margin_, median_right_, + bounding_box_.top(), median_top_, good_width_, good_column_, type_, + kBlobTypes[blob_type_], flow_, first_column_, last_column_, + boxes_.length(), space_above_, space_below_, space_to_left_, + space_to_right_); } // Prints debug information on the colors. void ColPartition::PrintColors() { - tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED], color1_[COLOR_GREEN], - color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL], color2_[COLOR_RED], color2_[COLOR_GREEN], - color2_[COLOR_BLUE]); + tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED], + color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL], + color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]); } // Sets the types of all partitions in the run to be the max of the types. @@ -1898,7 +1953,8 @@ void ColPartition::SmoothPartnerRun(int working_set_count) { // one partner. This makes block creation simpler. // If get_desperate is true, goes to more desperate merge methods // to merge flowing text before breaking partnerships. -void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid) { +void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, + ColPartitionGrid *grid) { if (TypesSimilar(type_, type)) { RefinePartnersInternal(true, get_desperate, grid); RefinePartnersInternal(false, get_desperate, grid); @@ -1924,7 +1980,8 @@ void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, ColPar // Cleans up the partners above if upper is true, else below. // If get_desperate is true, goes to more desperate merge methods // to merge flowing text before breaking partnerships. -void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, ColPartitionGrid *grid) { +void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, + ColPartitionGrid *grid) { ColPartition_CLIST *partners = upper ? &upper_partners_ : &lower_partners_; if (!partners->empty() && !partners->singleton()) { RefinePartnersByType(upper, partners); @@ -1952,8 +2009,10 @@ void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, ColPar // Cleans up the partners above if upper is true, else below. // Restricts the partners to only desirable types. For text and BRT_HLINE this // means the same type_ , and for image types it means any image type. -void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners) { - bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); +void ColPartition::RefinePartnersByType(bool upper, + ColPartition_CLIST *partners) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); if (debug) { tprintf("Refining %d %s partners by type for:\n", partners->length(), upper ? "Upper" : "Lower"); @@ -1983,7 +2042,8 @@ void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners // Only polyimages are allowed to have partners of any kind! for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition *partner = it.data(); - if (partner->blob_type() != BRT_POLYIMAGE || blob_type() != BRT_POLYIMAGE) { + if (partner->blob_type() != BRT_POLYIMAGE || + blob_type() != BRT_POLYIMAGE) { if (debug) { tprintf("Removing partner:"); partner->Print(); @@ -2003,7 +2063,8 @@ void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners // Gets rid of this<->b, leaving a clean chain. // Also if we have this<->a and a<->this, then gets rid of this<->a, as // this has multiple partners. -void ColPartition::RefinePartnerShortcuts(bool upper, ColPartition_CLIST *partners) { +void ColPartition::RefinePartnerShortcuts(bool upper, + ColPartition_CLIST *partners) { bool done_any = false; do { done_any = false; @@ -2054,8 +2115,10 @@ void ColPartition::RefinePartnerShortcuts(bool upper, ColPartition_CLIST *partne // by aggressive line fitting/splitting, as there are probably vertically // joined blobs that cross textlines. void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, - ColPartition_CLIST *partners, ColPartitionGrid *grid) { - bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); + ColPartition_CLIST *partners, + ColPartitionGrid *grid) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); if (debug) { tprintf("Refining %d %s partners by merge for:\n", partners->length(), upper ? "Upper" : "Lower"); @@ -2078,12 +2141,13 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, } } int overlap_increase; - ColPartition *candidate = - grid->BestMergeCandidate(part, &candidates, debug, nullptr, &overlap_increase); + ColPartition *candidate = grid->BestMergeCandidate( + part, &candidates, debug, nullptr, &overlap_increase); if (candidate != nullptr && (overlap_increase <= 0 || desperate)) { if (debug) { - tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", part->HCoreOverlap(*candidate), - part->VCoreOverlap(*candidate), overlap_increase); + tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", + part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate), + overlap_increase); } // Remove before merge and re-insert to keep the integrity of the grid. grid->RemoveBBox(candidate); @@ -2102,8 +2166,10 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, // Cleans up the partners above if upper is true, else below. // Keep the partner with the biggest overlap. -void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partners) { - bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); +void ColPartition::RefinePartnersByOverlap(bool upper, + ColPartition_CLIST *partners) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); if (debug) { tprintf("Refining %d %s partners by overlap for:\n", partners->length(), upper ? "Upper" : "Lower"); @@ -2115,8 +2181,9 @@ void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partn int best_overlap = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition *partner = it.data(); - int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right()) - - std::max(bounding_box_.left(), partner->bounding_box_.left()); + int overlap = + std::min(bounding_box_.right(), partner->bounding_box_.right()) - + std::max(bounding_box_.left(), partner->bounding_box_.left()); if (overlap > best_overlap) { best_overlap = overlap; best_partner = partner; @@ -2137,7 +2204,8 @@ void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partn } // Return true if bbox belongs better in this than other. -bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox, const ColPartition &other) { +bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox, + const ColPartition &other) { const TBOX &box = bbox->bounding_box(); // Margins take priority. int left = box.left(); @@ -2150,14 +2218,17 @@ bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox, const ColPartition &other } int top = box.top(); int bottom = box.bottom(); - int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_); - int other_overlap = std::min(top, other.median_top_) - std::max(bottom, other.median_bottom_); + int this_overlap = + std::min(top, median_top_) - std::max(bottom, median_bottom_); + int other_overlap = + std::min(top, other.median_top_) - std::max(bottom, other.median_bottom_); int this_miss = median_top_ - median_bottom_ - this_overlap; int other_miss = other.median_top_ - other.median_bottom_ - other_overlap; if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) { - tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n", box.left(), - box.bottom(), box.right(), box.top(), this_overlap, other_overlap, this_miss, - other_miss, median_top_, other.median_top_); + tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n", + box.left(), box.bottom(), box.right(), box.top(), this_overlap, + other_overlap, this_miss, other_miss, median_top_, + other.median_top_); } if (this_miss < other_miss) { return true; @@ -2200,13 +2271,15 @@ bool ColPartition::IsInSameColumnAs(const ColPartition &part) const { // Overlap does not occur when last < part.first or first > part.last. // In other words, one is completely to the side of the other. // This is just DeMorgan's law applied to that so the function returns true. - return (last_column_ >= part.first_column_) && (first_column_ <= part.last_column_); + return (last_column_ >= part.first_column_) && + (first_column_ <= part.last_column_); } // Smoothes the spacings in the list into groups of equal linespacing. // resolution is the resolution of the original image, used as a basis // for thresholds in change of spacing. page_height is in pixels. -void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_LIST *parts) { +void ColPartition::SmoothSpacings(int resolution, int page_height, + ColPartition_LIST *parts) { // The task would be trivial if we didn't have to allow for blips - // occasional offsets in spacing caused by anomalous text, such as all // caps, groups of descenders, joined words, Arabic etc. @@ -2258,13 +2331,17 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_ // The last time, everything is shifted up 1, so we present OKSpacingBlip // with neighbourhood-1 and check that PN_LOWER matches the median. if (neighbourhood[PN_LOWER] == nullptr || - (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER], resolution) && - (neighbourhood[PN_UPPER] == nullptr || neighbourhood[PN_LOWER] == nullptr || + (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER], + resolution) && + (neighbourhood[PN_UPPER] == nullptr || + neighbourhood[PN_LOWER] == nullptr || !OKSpacingBlip(resolution, median_space, neighbourhood, 0)) && - (neighbourhood[PN_UPPER - 1] == nullptr || neighbourhood[PN_LOWER - 1] == nullptr || + (neighbourhood[PN_UPPER - 1] == nullptr || + neighbourhood[PN_LOWER - 1] == nullptr || !OKSpacingBlip(resolution, median_space, neighbourhood, -1) || !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) && - (neighbourhood[PN_UPPER + 1] == nullptr || neighbourhood[PN_LOWER + 1] == nullptr || + (neighbourhood[PN_UPPER + 1] == nullptr || + neighbourhood[PN_LOWER + 1] == nullptr || !OKSpacingBlip(resolution, median_space, neighbourhood, 1) || !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) { // The group has ended. PN_UPPER is the last member. @@ -2297,7 +2374,8 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_ if (neighbourhood[i] == nullptr) { tprintf("NULL"); if (i > 0 && neighbourhood[i - 1] != nullptr) { - if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) { + if (neighbourhood[i - 1]->SingletonPartner(false) != + nullptr) { tprintf(" Lower partner:"); neighbourhood[i - 1]->SingletonPartner(false)->Print(); } else { @@ -2307,7 +2385,8 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_ tprintf("\n"); } } else { - tprintf("Top = %d, bottom = %d\n", neighbourhood[i]->top_spacing(), + tprintf("Top = %d, bottom = %d\n", + neighbourhood[i]->top_spacing(), neighbourhood[i]->bottom_spacing()); } } @@ -2350,12 +2429,13 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_ // Returns true if the parts array of pointers to partitions matches the // condition for a spacing blip. See SmoothSpacings for what this means // and how it is used. -bool ColPartition::OKSpacingBlip(int resolution, int median_spacing, ColPartition **parts, - int offset) { +bool ColPartition::OKSpacingBlip(int resolution, int median_spacing, + ColPartition **parts, int offset) { // The blip is OK if upper and lower sum to an OK value and at least // one of above1 and below1 is equal to the median. parts += offset; - return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], median_spacing, resolution) && + return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], median_spacing, + resolution) && ((parts[PN_ABOVE1] != nullptr && parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) || (parts[PN_BELOW1] != nullptr && @@ -2373,22 +2453,27 @@ bool ColPartition::SpacingEqual(int spacing, int resolution) const { // Returns true if both the top and bottom spacings of this and other // match to within suitable margins dictated by the image resolution. -bool ColPartition::SpacingsEqual(const ColPartition &other, int resolution) const { - int bottom_error = - std::max(BottomSpacingMargin(resolution), other.BottomSpacingMargin(resolution)); - int top_error = std::max(TopSpacingMargin(resolution), other.TopSpacingMargin(resolution)); +bool ColPartition::SpacingsEqual(const ColPartition &other, + int resolution) const { + int bottom_error = std::max(BottomSpacingMargin(resolution), + other.BottomSpacingMargin(resolution)); + int top_error = std::max(TopSpacingMargin(resolution), + other.TopSpacingMargin(resolution)); return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) && (NearlyEqual(top_spacing_, other.top_spacing_, top_error) || - NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2, bottom_error)); + NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2, + bottom_error)); } // Returns true if the sum spacing of this and other match the given // spacing (or twice the given spacing) to within a suitable margin dictated // by the image resolution. -bool ColPartition::SummedSpacingOK(const ColPartition &other, int spacing, int resolution) const { - int bottom_error = - std::max(BottomSpacingMargin(resolution), other.BottomSpacingMargin(resolution)); - int top_error = std::max(TopSpacingMargin(resolution), other.TopSpacingMargin(resolution)); +bool ColPartition::SummedSpacingOK(const ColPartition &other, int spacing, + int resolution) const { + int bottom_error = std::max(BottomSpacingMargin(resolution), + other.BottomSpacingMargin(resolution)); + int top_error = std::max(TopSpacingMargin(resolution), + other.TopSpacingMargin(resolution)); int bottom_total = bottom_spacing_ + other.bottom_spacing_; int top_total = top_spacing_ + other.top_spacing_; return (NearlyEqual(spacing, bottom_total, bottom_error) && @@ -2420,7 +2505,8 @@ bool ColPartition::SizesSimilar(const ColPartition &other) const { // Helper updates margin_left and margin_right, being the bounds of the left // margin of part of a block. Returns false and does not update the bounds if // this partition has a disjoint margin with the established margin. -static bool UpdateLeftMargin(const ColPartition &part, int *margin_left, int *margin_right) { +static bool UpdateLeftMargin(const ColPartition &part, int *margin_left, + int *margin_right) { const TBOX &part_box = part.bounding_box(); int top = part_box.top(); int bottom = part_box.bottom(); @@ -2444,7 +2530,8 @@ static bool UpdateLeftMargin(const ColPartition &part, int *margin_left, int *ma // condition that the intersection of the left margins is non-empty, ie the // rightmost left margin is to the left of the leftmost left bounding box edge. // On return the iterator is set to the start of the next run. -void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end) { +void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, + ICOORD *end) { ColPartition *part = part_it->data(); ColPartition *start_part = part; int start_y = part->bounding_box_.top(); @@ -2463,7 +2550,8 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD * do { part_it->forward(); part = part_it->data(); - } while (!part_it->at_first() && UpdateLeftMargin(*part, &margin_left, &margin_right)); + } while (!part_it->at_first() && + UpdateLeftMargin(*part, &margin_left, &margin_right)); // The run ended. If we were pushed inwards, compute the next run and // extend it backwards into the run we just calculated to find the end of // this run that provides a tight box. @@ -2475,13 +2563,15 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD * do { next_it.forward(); part = next_it.data(); - } while (!next_it.at_first() && UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); + } while (!next_it.at_first() && + UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); // Now extend the next run backwards into the original run to get the // tightest fit. do { part_it->backward(); part = part_it->data(); - } while (part != start_part && UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); + } while (part != start_part && + UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); part_it->forward(); } // Now calculate the end_y. @@ -2495,16 +2585,17 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD * end->set_y(end_y); end->set_x(part->XAtY(margin_right, end_y)); if (textord_debug_tabfind && !part_it->at_first()) { - tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", start_y, end_y, - part->XAtY(margin_left, end_y), end->x(), part->left_margin_, - part->bounding_box_.left()); + tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", + start_y, end_y, part->XAtY(margin_left, end_y), end->x(), + part->left_margin_, part->bounding_box_.left()); } } // Helper updates margin_left and margin_right, being the bounds of the right // margin of part of a block. Returns false and does not update the bounds if // this partition has a disjoint margin with the established margin. -static bool UpdateRightMargin(const ColPartition &part, int *margin_left, int *margin_right) { +static bool UpdateRightMargin(const ColPartition &part, int *margin_left, + int *margin_right) { const TBOX &part_box = part.bounding_box(); int top = part_box.top(); int bottom = part_box.bottom(); @@ -2529,7 +2620,8 @@ static bool UpdateRightMargin(const ColPartition &part, int *margin_left, int *m // leftmost right margin is to the right of the rightmost right bounding box // edge. // On return the iterator is set to the start of the next run. -void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end) { +void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, + ICOORD *end) { ColPartition *part = part_it->data(); ColPartition *start_part = part; int start_y = part->bounding_box_.bottom(); @@ -2548,7 +2640,8 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD do { part_it->backward(); part = part_it->data(); - } while (!part_it->at_last() && UpdateRightMargin(*part, &margin_left, &margin_right)); + } while (!part_it->at_last() && + UpdateRightMargin(*part, &margin_left, &margin_right)); // The run ended. If we were pushed inwards, compute the next run and // extend it backwards to find the end of this run for a tight box. int next_margin_right = INT32_MAX; @@ -2559,13 +2652,15 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD do { next_it.backward(); part = next_it.data(); - } while (!next_it.at_last() && UpdateRightMargin(*part, &next_margin_left, &next_margin_right)); + } while (!next_it.at_last() && + UpdateRightMargin(*part, &next_margin_left, &next_margin_right)); // Now extend the next run forwards into the original run to get the // tightest fit. do { part_it->forward(); part = part_it->data(); - } while (part != start_part && UpdateRightMargin(*part, &next_margin_left, &next_margin_right)); + } while (part != start_part && + UpdateRightMargin(*part, &next_margin_left, &next_margin_right)); part_it->backward(); } // Now calculate the end_y. @@ -2579,9 +2674,9 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD end->set_y(end_y); end->set_x(part->XAtY(margin_left, end_y)); if (textord_debug_tabfind && !part_it->at_last()) { - tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", start_y, end_y, - end->x(), part->XAtY(margin_right, end_y), part->bounding_box_.right(), - part->right_margin_); + tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", + start_y, end_y, end->x(), part->XAtY(margin_right, end_y), + part->bounding_box_.right(), part->right_margin_); } } diff --git a/src/textord/colpartition.h b/src/textord/colpartition.h index 33596c21..8b44ad05 100644 --- a/src/textord/colpartition.h +++ b/src/textord/colpartition.h @@ -81,7 +81,8 @@ public: * Constructs a fake ColPartition with no BLOBNBOXes to represent a * horizontal or vertical line, given a type and a bounding box. */ - static ColPartition *MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, + static ColPartition *MakeLinePartition(BlobRegionType blob_type, + const ICOORD &vertical, int left, int bottom, int right, int top); // Constructs and returns a fake ColPartition with a single fake BLOBNBOX, @@ -90,14 +91,16 @@ public: // the ColPartition owns the BLOBNBOX!!! // Call DeleteBoxes before deleting the ColPartition. static ColPartition *FakePartition(const TBOX &box, PolyBlockType block_type, - BlobRegionType blob_type, BlobTextFlowType flow); + BlobRegionType blob_type, + BlobTextFlowType flow); // Constructs and returns a ColPartition with the given real BLOBNBOX, // and sets it up to be a "big" partition (single-blob partition bigger // than the surrounding text that may be a dropcap, two or more vertically // touching characters, or some graphic element. // If the given list is not nullptr, the partition is also added to the list. - static ColPartition *MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list); + static ColPartition *MakeBigPartition(BLOBNBOX *box, + ColPartition_LIST *big_part_list); ~ColPartition(); @@ -389,7 +392,8 @@ public: return false; } int overlap = VCoreOverlap(other); - int height = std::min(median_top_ - median_bottom_, other.median_top_ - other.median_bottom_); + int height = std::min(median_top_ - median_bottom_, + other.median_top_ - other.median_bottom_); return overlap * 3 > height; } // Returns true if this and other can be combined without putting a @@ -412,7 +416,8 @@ public: // Returns true if the types are similar to each other. static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) { - return (type1 == type2 || (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) || + return (type1 == type2 || + (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) || (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION)); } @@ -519,7 +524,8 @@ public: bool ConfirmNoTabViolation(const ColPartition &other) const; // Returns true if other has a similar stroke width to this. - bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, + bool MatchingStrokeWidth(const ColPartition &other, + double fractional_tolerance, double constant_tolerance) const; // Returns true if candidate is an acceptable diacritic base char merge // with this as the diacritic. @@ -548,7 +554,8 @@ public: // Set the density value for a particular BlobSpecialTextType, should ONLY be // used for debugging or testing. In production code, use // ComputeSpecialBlobsDensity instead. - void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density); + void SetSpecialBlobsDensity(const BlobSpecialTextType type, + const float density); // Compute the SpecialTextType density of blobs, where we assume // that the SpecialTextType in the boxes_ has been set. void ComputeSpecialBlobsDensity(); @@ -565,14 +572,14 @@ public: ColPartition *SingletonPartner(bool upper); // Merge with the other partition and delete it. - void Absorb(ColPartition *other, WidthCallback cb); + void Absorb(ColPartition *other, const WidthCallback &cb); // Returns true if the overlap between this and the merged pair of // merge candidates is sufficiently trivial to be allowed. // The merged box can graze the edge of this by the ok_box_overlap // if that exceeds the margin to the median top and bottom. - bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, - bool debug); + bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, + int ok_box_overlap, bool debug); // Find the blob at which to split this to minimize the overlap with the // given box. Returns the first blob to go in the second partition. @@ -606,10 +613,11 @@ public: // Returns the first and last column touched by this partition. // resolution refers to the ppi resolution of the image. - void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col); + void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, + int *last_col); // Sets the internal flags good_width_ and good_column_. - void SetColumnGoodness(WidthCallback cb); + void SetColumnGoodness(const WidthCallback &cb); // Determines whether the blobs in this partition mostly represent // a leader (fixed pitch sequence) and sets the member blobs accordingly. @@ -634,8 +642,9 @@ public: // Adds this ColPartition to a matching WorkingPartSet if one can be found, // otherwise starts a new one in the appropriate column, ending the previous. - void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, - ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set); + void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, + int resolution, ColPartition_LIST *used_parts, + WorkingPartSet_LIST *working_set); // From the given block_parts list, builds one or more BLOCKs and // corresponding TO_BLOCKs, such that the line spacing is uniform in each. @@ -643,17 +652,21 @@ public: // The used partitions are put onto used_parts, as they may still be referred // to in the partition grid. bleft, tright and resolution are the bounds // and resolution of the original image. - static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, - ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, - BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks); + static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, + int resolution, ColPartition_LIST *block_parts, + ColPartition_LIST *used_parts, + BLOCK_LIST *completed_blocks, + TO_BLOCK_LIST *to_blocks); // Constructs a block from the given list of partitions. // Arguments are as LineSpacingBlocks above. static TO_BLOCK *MakeBlock(const ICOORD &bleft, const ICOORD &tright, - ColPartition_LIST *block_parts, ColPartition_LIST *used_parts); + ColPartition_LIST *block_parts, + ColPartition_LIST *used_parts); // Constructs a block from the given list of vertical text partitions. // Currently only creates rectangular blocks. - static TO_BLOCK *MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, + static TO_BLOCK *MakeVerticalTextBlock(const ICOORD &bleft, + const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts); @@ -686,7 +699,8 @@ public: // one partner. This makes block creation simpler. // If get_desperate is true, goes to more desperate merge methods // to merge flowing text before breaking partnerships. - void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid); + void RefinePartners(PolyBlockType type, bool get_desperate, + ColPartitionGrid *grid); // Returns true if this column partition is in the same column as // part. This function will only work after the SetPartitionType function @@ -700,8 +714,10 @@ public: const ColPartition *part2 = *static_cast(p2); int mid_y1 = part1->bounding_box_.y_middle(); int mid_y2 = part2->bounding_box_.y_middle(); - if ((part2->bounding_box_.bottom() <= mid_y1 && mid_y1 <= part2->bounding_box_.top()) || - (part1->bounding_box_.bottom() <= mid_y2 && mid_y2 <= part1->bounding_box_.top())) { + if ((part2->bounding_box_.bottom() <= mid_y1 && + mid_y1 <= part2->bounding_box_.top()) || + (part1->bounding_box_.bottom() <= mid_y2 && + mid_y2 <= part1->bounding_box_.top())) { // Sort by increasing x. return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle(); } @@ -721,7 +737,8 @@ private: // Cleans up the partners above if upper is true, else below. // If get_desperate is true, goes to more desperate merge methods // to merge flowing text before breaking partnerships. - void RefinePartnersInternal(bool upper, bool get_desperate, ColPartitionGrid *grid); + void RefinePartnersInternal(bool upper, bool get_desperate, + ColPartitionGrid *grid); // Restricts the partners to only desirable types. For text and BRT_HLINE this // means the same type_ , and for image types it means any image type. void RefinePartnersByType(bool upper, ColPartition_CLIST *partners); @@ -736,7 +753,8 @@ private: // is set, indicating that the textlines probably need to be regenerated // by aggressive line fitting/splitting, as there are probably vertically // joined blobs that cross textlines. - void RefineTextPartnersByMerge(bool upper, bool desperate, ColPartition_CLIST *partners, + void RefineTextPartnersByMerge(bool upper, bool desperate, + ColPartition_CLIST *partners, ColPartitionGrid *grid); // Keep the partner with the biggest overlap. void RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partners); @@ -747,12 +765,14 @@ private: // Smoothes the spacings in the list into groups of equal linespacing. // resolution is the resolution of the original image, used as a basis // for thresholds in change of spacing. page_height is in pixels. - static void SmoothSpacings(int resolution, int page_height, ColPartition_LIST *parts); + static void SmoothSpacings(int resolution, int page_height, + ColPartition_LIST *parts); // Returns true if the parts array of pointers to partitions matches the // condition for a spacing blip. See SmoothSpacings for what this means // and how it is used. - static bool OKSpacingBlip(int resolution, int median_spacing, ColPartition **parts, int offset); + static bool OKSpacingBlip(int resolution, int median_spacing, + ColPartition **parts, int offset); // Returns true if both the top and bottom spacings of this match the given // spacing to within suitable margins dictated by the image resolution. @@ -765,7 +785,8 @@ private: // Returns true if the sum spacing of this and other match the given // spacing (or twice the given spacing) to within a suitable margin dictated // by the image resolution. - bool SummedSpacingOK(const ColPartition &other, int spacing, int resolution) const; + bool SummedSpacingOK(const ColPartition &other, int spacing, + int resolution) const; // Returns a suitable spacing margin that can be applied to bottoms of // text lines, based on the resolution and the stored side_step_. @@ -792,7 +813,8 @@ private: // rightmost right bounding box edge. // TODO(rays) Not good enough. Needs improving to tightly wrap text in both // directions, and to loosely wrap images. - static void RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end); + static void RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, + ICOORD *end); // The margins are determined by the position of the nearest vertically // overlapping neighbour to the side. They indicate the maximum extent @@ -893,7 +915,8 @@ private: }; // Typedef it now in case it becomes a class later. -using ColPartitionGridSearch = GridSearch; +using ColPartitionGridSearch = + GridSearch; } // namespace tesseract. diff --git a/src/textord/colpartitiongrid.cpp b/src/textord/colpartitiongrid.cpp index 7fdba01a..e0ff8f1e 100644 --- a/src/textord/colpartitiongrid.cpp +++ b/src/textord/colpartitiongrid.cpp @@ -25,6 +25,7 @@ #include "imagefind.h" #include +#include namespace tesseract { @@ -63,12 +64,15 @@ const double kMaxPartitionSpacing = 1.75; // decision in GridSmoothNeighbour. const int kSmoothDecisionMargin = 4; -ColPartitionGrid::ColPartitionGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright) - : BBGrid(gridsize, bleft, tright) {} +ColPartitionGrid::ColPartitionGrid(int gridsize, const ICOORD &bleft, + const ICOORD &tright) + : BBGrid( + gridsize, bleft, tright) {} // Handles a click event in a display window. void ColPartitionGrid::HandleClick(int x, int y) { - BBGrid::HandleClick(x, y); + BBGrid::HandleClick(x, + y); // Run a radial search for partitions that overlap. ColPartitionGridSearch radsearch(this); radsearch.SetUniqueMode(true); @@ -93,8 +97,9 @@ void ColPartitionGrid::HandleClick(int x, int y) { // true, then the partitions are merged. // Both callbacks are deleted before returning. void ColPartitionGrid::Merges( - std::function box_cb, - std::function confirm_cb) { + const std::function &box_cb, + const std::function + &confirm_cb) { // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); @@ -112,8 +117,9 @@ void ColPartitionGrid::Merges( // true, then the partitions are merged. // Returns true if the partition is consumed by one or more merges. bool ColPartitionGrid::MergePart( - std::function box_cb, - std::function confirm_cb, + const std::function &box_cb, + const std::function + &confirm_cb, ColPartition *part) { if (part->IsUnMergeableType()) { return false; @@ -138,12 +144,13 @@ bool ColPartitionGrid::MergePart( FindMergeCandidates(part, box, debug, &merge_candidates); // Find the best merge candidate based on minimal overlap increase. int overlap_increase; - ColPartition *neighbour = - BestMergeCandidate(part, &merge_candidates, debug, confirm_cb, &overlap_increase); + ColPartition *neighbour = BestMergeCandidate(part, &merge_candidates, debug, + confirm_cb, &overlap_increase); if (neighbour != nullptr && overlap_increase <= 0) { if (debug) { - tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", part->HCoreOverlap(*neighbour), - part->VCoreOverlap(*neighbour), overlap_increase); + tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", + part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour), + overlap_increase); } // Looks like a good candidate so merge it. RemoveBBox(neighbour); @@ -171,7 +178,8 @@ bool ColPartitionGrid::MergePart( // In general we only want to merge partitions that look like they // are on the same text line, ie their median limits overlap, but we have // to make exceptions for diacritics and stray punctuation. -static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candidate, bool debug) { +static bool OKMergeCandidate(const ColPartition *part, + const ColPartition *candidate, bool debug) { const TBOX &part_box = part->bounding_box(); if (candidate == part) { return false; // Ignore itself. @@ -205,7 +213,8 @@ static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candi } // Candidates must either overlap in median y, // or part or candidate must be an acceptable diacritic. - if (!part->VSignificantCoreOverlap(*candidate) && !part->OKDiacriticMerge(*candidate, debug) && + if (!part->VSignificantCoreOverlap(*candidate) && + !part->OKDiacriticMerge(*candidate, debug) && !candidate->OKDiacriticMerge(*part, debug)) { if (debug) { tprintf("Candidate fails overlap and diacritic tests!\n"); @@ -221,7 +230,8 @@ static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candi // the overlap with them uncombined. // An overlap is not counted if passes the OKMergeOverlap test with ok_overlap // as the pixel overlap limit. merge1 and merge2 must both be non-nullptr. -static int IncreaseInOverlap(const ColPartition *merge1, const ColPartition *merge2, int ok_overlap, +static int IncreaseInOverlap(const ColPartition *merge1, + const ColPartition *merge2, int ok_overlap, ColPartition_CLIST *parts) { ASSERT_HOST(merge1 != nullptr && merge2 != nullptr); int total_area = 0; @@ -236,7 +246,8 @@ static int IncreaseInOverlap(const ColPartition *merge1, const ColPartition *mer TBOX part_box = part->bounding_box(); // Compute the overlap of the merged box with part. int overlap_area = part_box.intersection(merged_box).area(); - if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, ok_overlap, false)) { + if (overlap_area > 0 && + !part->OKMergeOverlap(*merge1, *merge2, ok_overlap, false)) { total_area += overlap_area; // Subtract the overlap of merge1 and merge2 individually. overlap_area = part_box.intersection(merge1->bounding_box()).area(); @@ -289,7 +300,8 @@ static bool TestCompatibleCandidates(const ColPartition &part, bool debug, ColPartition_C_IT it2(it); for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { ColPartition *candidate2 = it2.data(); - if (candidate2 != candidate && !OKMergeCandidate(candidate, candidate2, false)) { + if (candidate2 != candidate && + !OKMergeCandidate(candidate, candidate2, false)) { if (debug) { tprintf("NC overlap failed:Candidate:"); candidate2->bounding_box().print(); @@ -341,7 +353,8 @@ int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid **overlap_grid) { // Finds all the ColPartitions in the grid that overlap with the given // box and returns them SortByBoxLeft(ed) and uniqued in the given list. // Any partition equal to not_this (may be nullptr) is excluded. -void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, +void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box, + const ColPartition *not_this, ColPartition_CLIST *parts) { ColPartitionGridSearch rsearch(this); rsearch.StartRectSearch(box); @@ -396,7 +409,8 @@ void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box, const ColParti // in overlap, or tightly spaced text would end up in bits. ColPartition *ColPartitionGrid::BestMergeCandidate( const ColPartition *part, ColPartition_CLIST *candidates, bool debug, - std::function confirm_cb, + const std::function + &confirm_cb, int *overlap_increase) { if (overlap_increase != nullptr) { *overlap_increase = 0; @@ -404,7 +418,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate( if (candidates->empty()) { return nullptr; } - int ok_overlap = static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); + int ok_overlap = + static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); // The best neighbour to merge with is the one that causes least // total pairwise overlap among all the neighbours. // If more than one offers the same total overlap, choose the one @@ -424,8 +439,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate( // we need anything that might be overlapped by the merged box. FindOverlappingPartitions(full_box, part, &neighbours); if (debug) { - tprintf("Finding best merge candidate from %d, %d neighbours for box:", candidates->length(), - neighbours.length()); + tprintf("Finding best merge candidate from %d, %d neighbours for box:", + candidates->length(), neighbours.length()); part_box.print(); } // If the best increase in overlap is positive, then we also check the @@ -434,7 +449,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate( // non-candidate overlap is better than the best overlap, then return // the worst non-candidate overlap instead. ColPartition_CLIST non_candidate_neighbours; - non_candidate_neighbours.set_subtract(SortByBoxLeft, true, &neighbours, candidates); + non_candidate_neighbours.set_subtract(SortByBoxLeft, true, + &neighbours, candidates); int worst_nc_increase = 0; int best_increase = INT32_MAX; int best_area = 0; @@ -454,8 +470,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate( best_increase = increase; best_area = cand_box.bounding_union(part_box).area() - cand_box.area(); if (debug) { - tprintf("New best merge candidate has increase %d, area %d, over box:", increase, - best_area); + tprintf("New best merge candidate has increase %d, area %d, over box:", + increase, best_area); full_box.print(); candidate->Print(); } @@ -466,7 +482,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate( best_candidate = candidate; } } - increase = IncreaseInOverlap(part, candidate, ok_overlap, &non_candidate_neighbours); + increase = IncreaseInOverlap(part, candidate, ok_overlap, + &non_candidate_neighbours); if (increase > worst_nc_increase) { worst_nc_increase = increase; } @@ -478,7 +495,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate( // but only if each candidate is either a good diacritic merge with part, // or an ok merge candidate with all the others. // See TestCompatibleCandidates for more explanation and a picture. - if (worst_nc_increase < best_increase && TestCompatibleCandidates(*part, debug, candidates)) { + if (worst_nc_increase < best_increase && + TestCompatibleCandidates(*part, debug, candidates)) { best_increase = worst_nc_increase; } } @@ -490,7 +508,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate( // Helper to remove the given box from the given partition, put it in its // own partition, and add to the partition list. -static void RemoveBadBox(BLOBNBOX *box, ColPartition *part, ColPartition_LIST *part_list) { +static void RemoveBadBox(BLOBNBOX *box, ColPartition *part, + ColPartition_LIST *part_list) { part->RemoveBox(box); ColPartition::MakeBigPartition(box, part_list); } @@ -501,8 +520,10 @@ static void RemoveBadBox(BLOBNBOX *box, ColPartition *part, ColPartition_LIST *p // Blobs that cause overlaps get removed, put in individual partitions // and added to the big_parts list. They are most likely characters on // 2 textlines that touch, or something big like a dropcap. -void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts) { - int ok_overlap = static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); +void ColPartitionGrid::SplitOverlappingPartitions( + ColPartition_LIST *big_parts) { + int ok_overlap = + static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); @@ -534,7 +555,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts) BLOBNBOX *excluded = part->BiggestBox(); TBOX shrunken = part->BoundsWithoutBox(excluded); if (!shrunken.overlap(neighbour_box) && - excluded->bounding_box().height() > kBigPartSizeRatio * shrunken.height()) { + excluded->bounding_box().height() > + kBigPartSizeRatio * shrunken.height()) { // Removing the biggest box fixes the overlap, so do it! gsearch.RemoveBBox(); RemoveBadBox(excluded, part, big_parts); @@ -550,7 +572,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts) BLOBNBOX *excluded = neighbour->BiggestBox(); TBOX shrunken = neighbour->BoundsWithoutBox(excluded); if (!shrunken.overlap(box) && - excluded->bounding_box().height() > kBigPartSizeRatio * shrunken.height()) { + excluded->bounding_box().height() > + kBigPartSizeRatio * shrunken.height()) { // Removing the biggest box fixes the overlap, so do it! rsearch.RemoveBBox(); RemoveBadBox(excluded, neighbour, big_parts); @@ -562,7 +585,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts) int part_overlap_count = part->CountOverlappingBoxes(neighbour_box); int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box); ColPartition *right_part = nullptr; - if (neighbour_overlap_count <= part_overlap_count || part->IsSingleton()) { + if (neighbour_overlap_count <= part_overlap_count || + part->IsSingleton()) { // Try to split the neighbour to reduce overlap. BLOBNBOX *split_blob = neighbour->OverlapSplitBlob(box); if (split_blob != nullptr) { @@ -608,15 +632,18 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts) // nontext_map, which is used to prevent the spread of text neighbourhoods // into images. // Returns true if anything was changed. -bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, - const TBOX &im_box, const FCOORD &rotation) { +bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, + Image nontext_map, + const TBOX &im_box, + const FCOORD &rotation) { // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); gsearch.StartFullSearch(); ColPartition *part; bool any_changed = false; while ((part = gsearch.NextFullSearch()) != nullptr) { - if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type())) { + if (part->flow() != source_type || + BLOBNBOX::IsLineType(part->blob_type())) { continue; } const TBOX &box = part->bounding_box(); @@ -658,7 +685,8 @@ void ColPartitionGrid::ReflectInYAxis() { // it into proper blocks or columns. // TODO(rays) some kind of sort function would be useful and probably better // than the default here, which is to sort by order of the grid search. -void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { +void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, + TO_BLOCK_LIST *to_blocks) { TO_BLOCK_IT to_block_it(to_blocks); BLOCK_IT block_it(blocks); // All partitions will be put on this list and deleted on return. @@ -672,8 +700,10 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LI part_it.add_after_then_move(part); // The partition has to be at least vaguely like text. BlobRegionType blob_type = part->blob_type(); - if (BLOBNBOX::IsTextType(blob_type) || (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) { - PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT; + if (BLOBNBOX::IsTextType(blob_type) || + (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) { + PolyBlockType type = + blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT; // Get metrics from the row that will be used for the block. TBOX box = part->bounding_box(); int median_width = part->median_width(); @@ -685,7 +715,8 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LI part->DeleteBoxes(); continue; } - auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), box.right(), box.top()); + auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), + box.right(), box.top()); block->pdblk.set_poly_block(new POLY_BLOCK(box, type)); auto *to_block = new TO_BLOCK(block); TO_ROW_IT row_it(to_block->get_rows()); @@ -780,7 +811,8 @@ bool ColPartitionGrid::MakeColPartSets(PartSetVector *part_sets) { bool any_parts_found = false; while ((part = gsearch.NextFullSearch()) != nullptr) { BlobRegionType blob_type = part->blob_type(); - if (blob_type != BRT_NOISE && (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { + if (blob_type != BRT_NOISE && + (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { int grid_x, grid_y; const TBOX &part_box = part->bounding_box(); GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); @@ -815,11 +847,13 @@ ColPartitionSet *ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) { ColPartition *part; while ((part = gsearch.NextFullSearch()) != nullptr) { BlobRegionType blob_type = part->blob_type(); - if (blob_type != BRT_NOISE && (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { + if (blob_type != BRT_NOISE && + (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { // Consider for single column. BlobTextFlowType flow = part->flow(); - if ((blob_type == BRT_TEXT && (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || - flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || + if ((blob_type == BRT_TEXT && + (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || + flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) { if (single_column_part == nullptr) { single_column_part = part->ShallowCopy(); @@ -841,7 +875,7 @@ ColPartitionSet *ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) { if (single_column_part != nullptr) { // Make a ColPartitionSet out of the single_column_part as a candidate // for the single column case. - single_column_part->SetColumnGoodness(cb); + single_column_part->SetColumnGoodness(std::move(cb)); return new ColPartitionSet(single_column_part); } return nullptr; @@ -923,7 +957,8 @@ void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST *im_blobs) { // The boxes within the partitions have changed (by deskew) so recompute // the bounds of all the partitions and reinsert them into the grid. -void ColPartitionGrid::RecomputeBounds(int gridsize, const ICOORD &bleft, const ICOORD &tright, +void ColPartitionGrid::RecomputeBounds(int gridsize, const ICOORD &bleft, + const ICOORD &tright, const ICOORD &vertical) { ColPartition_LIST saved_parts; ColPartition_IT part_it(&saved_parts); @@ -957,7 +992,8 @@ void ColPartitionGrid::GridFindMargins(ColPartitionSet **best_columns) { ColPartition *part; while ((part = gsearch.NextFullSearch()) != nullptr) { // Set up a rectangle search x-bounded by the column and y by the part. - ColPartitionSet *columns = best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr; + ColPartitionSet *columns = + best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr; FindPartitionMargins(columns, part); const TBOX &box = part->bounding_box(); if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { @@ -972,7 +1008,8 @@ void ColPartitionGrid::GridFindMargins(ColPartitionSet **best_columns) { // best_columns, which may be nullptr, is an array of pointers indicating the // column set at each y-coordinate in the grid. // best_columns is usually the best_columns_ member of ColumnFinder. -void ColPartitionGrid::ListFindMargins(ColPartitionSet **best_columns, ColPartition_LIST *parts) { +void ColPartitionGrid::ListFindMargins(ColPartitionSet **best_columns, + ColPartition_LIST *parts) { ColPartition_IT part_it(parts); for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { ColPartition *part = part_it.data(); @@ -1050,15 +1087,18 @@ void ColPartitionGrid::FindFigureCaptions() { while ((part = gsearch.NextFullSearch()) != nullptr) { if (part->IsImageType()) { const TBOX &part_box = part->bounding_box(); - bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom()); + bool debug = + AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom()); ColPartition *best_caption = nullptr; int best_dist = 0; // Distance to best_caption. int best_upper = 0; // Direction of best_caption. // Handle both lower and upper directions. for (int upper = 0; upper < 2; ++upper) { - ColPartition_C_IT partner_it(upper ? part->upper_partners() : part->lower_partners()); + ColPartition_C_IT partner_it(upper ? part->upper_partners() + : part->lower_partners()); // If there are no image partners, then this direction is ok. - for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); partner_it.forward()) { + for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); + partner_it.forward()) { ColPartition *partner = partner_it.data(); if (partner->IsImageType()) { break; @@ -1068,7 +1108,8 @@ void ColPartitionGrid::FindFigureCaptions() { continue; } // Find the nearest totally overlapping text partner. - for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); partner_it.forward()) { + for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); + partner_it.forward()) { ColPartition *partner = partner_it.data(); if (!partner->IsTextType() || partner->type() == PT_TABLE) { continue; @@ -1080,7 +1121,8 @@ void ColPartitionGrid::FindFigureCaptions() { tprintf("Considering partner:"); partner_box.print(); } - if (partner_box.left() >= part_box.left() && partner_box.right() <= part_box.right()) { + if (partner_box.left() >= part_box.left() && + partner_box.right() <= part_box.right()) { int dist = partner_box.y_gap(part_box); if (best_caption == nullptr || dist < best_dist) { best_dist = dist; @@ -1106,7 +1148,8 @@ void ColPartitionGrid::FindFigureCaptions() { ColPartition *end_partner = nullptr; ColPartition *next_partner = nullptr; for (ColPartition *partner = best_caption; - partner != nullptr && line_count <= kMaxCaptionLines; partner = next_partner) { + partner != nullptr && line_count <= kMaxCaptionLines; + partner = next_partner) { if (!partner->IsTextType()) { end_partner = partner; break; @@ -1115,7 +1158,8 @@ void ColPartitionGrid::FindFigureCaptions() { total_height += partner->bounding_box().height(); next_partner = partner->SingletonPartner(best_upper); if (next_partner != nullptr) { - int gap = partner->bounding_box().y_gap(next_partner->bounding_box()); + int gap = + partner->bounding_box().y_gap(next_partner->bounding_box()); if (gap > biggest_gap) { biggest_gap = gap; end_partner = next_partner; @@ -1132,8 +1176,8 @@ void ColPartitionGrid::FindFigureCaptions() { } } if (debug) { - tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", line_count, - biggest_gap, smallest_gap, mean_height); + tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", + line_count, biggest_gap, smallest_gap, mean_height); if (end_partner != nullptr) { tprintf("End partner:"); end_partner->bounding_box().print(); @@ -1144,7 +1188,8 @@ void ColPartitionGrid::FindFigureCaptions() { } if (line_count <= kMaxCaptionLines) { // This is a qualified caption. Mark the text as caption. - for (ColPartition *partner = best_caption; partner != nullptr && partner != end_partner; + for (ColPartition *partner = best_caption; + partner != nullptr && partner != end_partner; partner = next_partner) { partner->set_type(PT_CAPTION_TEXT); partner->SetBlobTypes(); @@ -1232,7 +1277,8 @@ void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition *part) { // Finds the best partner in the given direction for the given partition. // Stores the result with AddPartner. -void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, ColPartition *part) { +void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, + ColPartition *part) { if (part->type() == PT_NOISE) { return; // Noise is not allowed to partner anything. } @@ -1292,7 +1338,8 @@ void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { gsearch.StartFullSearch(); ColPartition *part; while ((part = gsearch.NextFullSearch()) != nullptr) { - part->RefinePartners(static_cast(type), get_desperate, this); + part->RefinePartners(static_cast(type), get_desperate, + this); // Iterator may have been messed up by a merge. gsearch.RepositionIterator(); } @@ -1304,9 +1351,11 @@ void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { // Finds and returns a list of candidate ColPartitions to merge with part. // The candidates must overlap search_box, and when merged must not // overlap any other partitions that are not overlapped by each individually. -void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX &search_box, - bool debug, ColPartition_CLIST *candidates) { - int ok_overlap = static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); +void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, + const TBOX &search_box, bool debug, + ColPartition_CLIST *candidates) { + int ok_overlap = + static_cast(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); const TBOX &part_box = part->bounding_box(); // Now run the rect search. ColPartitionGridSearch rsearch(this); @@ -1393,7 +1442,8 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX // into images. // Returns true if the partition was changed. bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box, - const FCOORD &rerotation, bool debug, ColPartition *part) { + const FCOORD &rerotation, bool debug, + ColPartition *part) { const TBOX &part_box = part->bounding_box(); if (debug) { tprintf("Smooothing part at:"); @@ -1409,8 +1459,8 @@ bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box, for (int d = 0; d < BND_COUNT; ++d) { int dist; auto dir = static_cast(d); - BlobRegionType type = - SmoothInOneDirection(dir, nontext_map, im_box, rerotation, debug, *part, &dist); + BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box, + rerotation, debug, *part, &dist); if (debug) { tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist); } @@ -1459,8 +1509,9 @@ bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box, // Sets up a search box based on the part_box, padded in all directions // except direction. Also setup dist_scaling to weight x,y distances according // to the given direction. -static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, const TBOX &part_box, - int min_padding, TBOX *search_box, ICOORD *dist_scaling) { +static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, + const TBOX &part_box, int min_padding, + TBOX *search_box, ICOORD *dist_scaling) { *search_box = part_box; // Generate a pad value based on the min dimension of part_box, but at least // min_padding and then scaled by kMaxPadFactor. @@ -1511,20 +1562,21 @@ enum NeighbourPartitionType { // partitions that makes a decisive result (if any) and returns the type // and the distance of the collection. If there are any pixels in the // nontext_map, then the decision is biased towards image. -BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map, - const TBOX &im_box, const FCOORD &rerotation, - bool debug, const ColPartition &part, - int *best_distance) { +BlobRegionType ColPartitionGrid::SmoothInOneDirection( + BlobNeighbourDir direction, Image nontext_map, const TBOX &im_box, + const FCOORD &rerotation, bool debug, const ColPartition &part, + int *best_distance) { // Set up a rectangle search bounded by the part. const TBOX &part_box = part.bounding_box(); TBOX search_box; ICOORD dist_scaling; - ComputeSearchBoxAndScaling(direction, part_box, gridsize(), &search_box, &dist_scaling); - bool image_region = - ImageFind::CountPixelsInRotatedBox(search_box, im_box, rerotation, nontext_map) > 0; + ComputeSearchBoxAndScaling(direction, part_box, gridsize(), &search_box, + &dist_scaling); + bool image_region = ImageFind::CountPixelsInRotatedBox( + search_box, im_box, rerotation, nontext_map) > 0; std::vector dists[NPT_COUNT]; - AccumulatePartDistances(part, dist_scaling, search_box, nontext_map, im_box, rerotation, debug, - dists); + AccumulatePartDistances(part, dist_scaling, search_box, nontext_map, im_box, + rerotation, debug, dists); // By iteratively including the next smallest distance across the vectors, // (as in a merge sort) we can use the vector indices as counts of each type // and find the nearest set of objects that give us a definite decision. @@ -1551,33 +1603,35 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction } *best_distance = min_dist; if (debug) { - tprintf("Totals: htext=%u+%u, vtext=%u+%u, image=%u+%u, at dist=%d\n", counts[NPT_HTEXT], - counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE], - image_bias, min_dist); + tprintf("Totals: htext=%u+%u, vtext=%u+%u, image=%u+%u, at dist=%d\n", + counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT], + counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE], image_bias, min_dist); } // See if we have a decision yet. auto image_count = counts[NPT_IMAGE]; - auto htext_score = - counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - (image_count + counts[NPT_WEAK_VTEXT]); - auto vtext_score = - counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - (image_count + counts[NPT_WEAK_HTEXT]); + auto htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - + (image_count + counts[NPT_WEAK_VTEXT]); + auto vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - + (image_count + counts[NPT_WEAK_HTEXT]); if (image_count > 0 && image_bias - htext_score >= kSmoothDecisionMargin && image_bias - vtext_score >= kSmoothDecisionMargin) { *best_distance = dists[NPT_IMAGE][0]; - if (!dists[NPT_WEAK_VTEXT].empty() && *best_distance > dists[NPT_WEAK_VTEXT][0]) { + if (!dists[NPT_WEAK_VTEXT].empty() && + *best_distance > dists[NPT_WEAK_VTEXT][0]) { *best_distance = dists[NPT_WEAK_VTEXT][0]; } - if (!dists[NPT_WEAK_HTEXT].empty() && *best_distance > dists[NPT_WEAK_HTEXT][0]) { + if (!dists[NPT_WEAK_HTEXT].empty() && + *best_distance > dists[NPT_WEAK_HTEXT][0]) { *best_distance = dists[NPT_WEAK_HTEXT][0]; } return BRT_POLYIMAGE; } - if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && counts[NPT_HTEXT] > 0 && - htext_score >= kSmoothDecisionMargin) { + if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && + counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) { *best_distance = dists[NPT_HTEXT][0]; return BRT_TEXT; - } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && counts[NPT_VTEXT] > 0 && - vtext_score >= kSmoothDecisionMargin) { + } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && + counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) { *best_distance = dists[NPT_VTEXT][0]; return BRT_VERT_TEXT; } @@ -1592,11 +1646,10 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction // The nontext_map (+im_box, rerotation) is used to make text invisible if // there is non-text in between. // dists must be an array of vectors of size NPT_COUNT. -void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part, - const ICOORD &dist_scaling, const TBOX &search_box, - Image nontext_map, const TBOX &im_box, - const FCOORD &rerotation, bool debug, - std::vector *dists) { +void ColPartitionGrid::AccumulatePartDistances( + const ColPartition &base_part, const ICOORD &dist_scaling, + const TBOX &search_box, Image nontext_map, const TBOX &im_box, + const FCOORD &rerotation, bool debug, std::vector *dists) { const TBOX &part_box = base_part.bounding_box(); ColPartitionGridSearch rsearch(this); rsearch.SetUniqueMode(true); @@ -1605,14 +1658,16 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part, // Search for compatible neighbours with a similar strokewidth, but not // on the other side of a tab vector. while ((neighbour = rsearch.NextRectSearch()) != nullptr) { - if (neighbour->IsUnMergeableType() || !base_part.ConfirmNoTabViolation(*neighbour) || + if (neighbour->IsUnMergeableType() || + !base_part.ConfirmNoTabViolation(*neighbour) || neighbour == &base_part) { continue; } TBOX nbox = neighbour->bounding_box(); BlobRegionType n_type = neighbour->blob_type(); if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && - !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, nontext_map)) { + !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, + nontext_map)) { continue; // Text not visible the other side of image. } if (BLOBNBOX::IsLineType(n_type)) { @@ -1673,7 +1728,8 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part, // neighbours that vertically overlap significantly. // columns may be nullptr, and indicates the assigned column structure this // is applicable to part. -void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns, ColPartition *part) { +void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns, + ColPartition *part) { // Set up a rectangle search x-bounded by the column and y by the part. TBOX box = part->bounding_box(); int y = part->MidY(); @@ -1693,19 +1749,20 @@ void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns, ColPartiti left_margin -= kColumnWidthFactor; right_margin += kColumnWidthFactor; // Search for ColPartitions that reduce the margin. - left_margin = - FindMargin(box.left() + box.height(), true, left_margin, box.bottom(), box.top(), part); + left_margin = FindMargin(box.left() + box.height(), true, left_margin, + box.bottom(), box.top(), part); part->set_left_margin(left_margin); // Search for ColPartitions that reduce the margin. - right_margin = - FindMargin(box.right() - box.height(), false, right_margin, box.bottom(), box.top(), part); + right_margin = FindMargin(box.right() - box.height(), false, right_margin, + box.bottom(), box.top(), part); part->set_right_margin(right_margin); } // Starting at x, and going in the specified direction, up to x_limit, finds // the margin for the given y range by searching sideways, // and ignoring not_this. -int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, int y_bottom, int y_top, +int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, + int y_bottom, int y_top, const ColPartition *not_this) { int height = y_top - y_bottom; // Iterate the ColPartitions in the grid. diff --git a/src/textord/colpartitiongrid.h b/src/textord/colpartitiongrid.h index e2b71c7b..939fe1f7 100644 --- a/src/textord/colpartitiongrid.h +++ b/src/textord/colpartitiongrid.h @@ -47,16 +47,18 @@ public: // calls the confirm_cb to check any more rules. If the confirm_cb returns // true, then the partitions are merged. // Both callbacks are deleted before returning. - void Merges(std::function box_cb, - std::function confirm_cb); + void Merges(const std::function &box_cb, + const std::function &confirm_cb); // For the given partition, calls the box_cb permanent callback // to compute the search box, searches the box, and if a candidate is found, // calls the confirm_cb to check any more rules. If the confirm_cb returns // true, then the partitions are merged. // Returns true if the partition is consumed by one or more merges. - bool MergePart(std::function box_cb, - std::function confirm_cb, + bool MergePart(const std::function &box_cb, + const std::function &confirm_cb, ColPartition *part); // Computes and returns the total overlap of all partitions in the grid. @@ -78,7 +80,8 @@ public: // See colpartitiongrid.cpp for a diagram. ColPartition *BestMergeCandidate( const ColPartition *part, ColPartition_CLIST *candidates, bool debug, - std::function confirm_cb, + const std::function + &confirm_cb, int *overlap_increase); // Split partitions where it reduces overlap between their bounding boxes. @@ -98,8 +101,8 @@ public: // nontext_map, which is used to prevent the spread of text neighbourhoods // into images. // Returns true if anything was changed. - bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box, - const FCOORD &rerotation); + bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, + const TBOX &im_box, const FCOORD &rerotation); // Reflects the grid and its colpartitions in the y-axis, assuming that // all blob boxes have already been done. @@ -150,7 +153,8 @@ public: // Improves the margins of the ColPartitions in the list by calling // FindPartitionMargins on each. - void ListFindMargins(ColPartitionSet **best_columns, ColPartition_LIST *parts); + void ListFindMargins(ColPartitionSet **best_columns, + ColPartition_LIST *parts); // Deletes all the partitions in the grid after disowning all the blobs. void DeleteParts(); @@ -185,8 +189,8 @@ private: // Finds and returns a list of candidate ColPartitions to merge with part. // The candidates must overlap search_box, and when merged must not // overlap any other partitions that are not overlapped by each individually. - void FindMergeCandidates(const ColPartition *part, const TBOX &search_box, bool debug, - ColPartition_CLIST *candidates); + void FindMergeCandidates(const ColPartition *part, const TBOX &search_box, + bool debug, ColPartition_CLIST *candidates); // Smoothes the region type/flow type of the given part by looking at local // neighbours and the given image mask. Searches a padded rectangle with the @@ -199,7 +203,8 @@ private: // nontext_map, which is used to prevent the spread of text neighbourhoods // into images. // Returns true if the partition was changed. - bool SmoothRegionType(Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug, + bool SmoothRegionType(Image nontext_map, const TBOX &im_box, + const FCOORD &rerotation, bool debug, ColPartition *part); // Executes the search for SmoothRegionType in a single direction. // Creates a bounding box that is padded in all directions except direction, @@ -207,17 +212,21 @@ private: // partitions that makes a decisive result (if any) and returns the type // and the distance of the collection. If there are any pixels in the // nontext_map, then the decision is biased towards image. - BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map, - const TBOX &im_box, const FCOORD &rerotation, bool debug, - const ColPartition &part, int *best_distance); + BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, + Image nontext_map, const TBOX &im_box, + const FCOORD &rerotation, bool debug, + const ColPartition &part, + int *best_distance); // Counts the partitions in the given search_box by appending the gap // distance (scaled by dist_scaling) of the part from the base_part to the // vector of the appropriate type for the partition. Prior to return, the // vectors in the dists array are sorted in increasing order. // dists must be an array of vectors of size NPT_COUNT. - void AccumulatePartDistances(const ColPartition &base_part, const ICOORD &dist_scaling, - const TBOX &search_box, Image nontext_map, const TBOX &im_box, - const FCOORD &rerotation, bool debug, std::vector *dists); + void AccumulatePartDistances(const ColPartition &base_part, + const ICOORD &dist_scaling, + const TBOX &search_box, Image nontext_map, + const TBOX &im_box, const FCOORD &rerotation, + bool debug, std::vector *dists); // Improves the margins of the ColPartition by searching for // neighbours that vertically overlap significantly. @@ -226,8 +235,8 @@ private: // Starting at x, and going in the specified direction, up to x_limit, finds // the margin for the given y range by searching sideways, // and ignoring not_this. - int FindMargin(int x, bool right_to_left, int x_limit, int y_bottom, int y_top, - const ColPartition *not_this); + int FindMargin(int x, bool right_to_left, int x_limit, int y_bottom, + int y_top, const ColPartition *not_this); }; } // namespace tesseract. diff --git a/src/textord/colpartitionset.cpp b/src/textord/colpartitionset.cpp index f83566bd..a68275df 100644 --- a/src/textord/colpartitionset.cpp +++ b/src/textord/colpartitionset.cpp @@ -90,7 +90,8 @@ void ColPartitionSet::RelinquishParts() { } // Attempt to improve this by adding partitions or expanding partitions. -void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *src_sets) { +void ColPartitionSet::ImproveColumnCandidate(const WidthCallback &cb, + PartSetVector *src_sets) { int set_size = src_sets->size(); // Iterate over the provided column sets, as each one may have something // to improve this. @@ -140,7 +141,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr // it was before, so use the tab. part->CopyLeftTab(*col_part, false); part->SetColumnGoodness(cb); - } else if (col_box_left < part_left && (box_width_ok || !part_width_ok)) { + } else if (col_box_left < part_left && + (box_width_ok || !part_width_ok)) { // The box is leaving the good column metric at least as good as // it was before, so use the box. part->CopyLeftTab(*col_part, true); @@ -149,7 +151,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr part_left = part->left_key(); } if (col_right > part_right && - (part_it.at_last() || part_it.data_relative(1)->left_key() > col_right)) { + (part_it.at_last() || + part_it.data_relative(1)->left_key() > col_right)) { // The right edge is better, so we can possibly expand it. int col_box_right = col_part->BoxRightKey(); bool tab_width_ok = cb(part->KeyWidth(part_left, col_right)); @@ -159,7 +162,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr // it was before, so use the tab. part->CopyRightTab(*col_part, false); part->SetColumnGoodness(cb); - } else if (col_box_right > part_right && (box_width_ok || !part_width_ok)) { + } else if (col_box_right > part_right && + (box_width_ok || !part_width_ok)) { // The box is leaving the good column metric at least as good as // it was before, so use the box. part->CopyRightTab(*col_part, true); @@ -173,8 +177,10 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr // If this set is good enough to represent a new partitioning into columns, // add it to the vector of sets, otherwise delete it. -void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback cb) { - bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); +void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets, + const WidthCallback &cb) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); if (debug) { tprintf("Considering new column candidate:\n"); Print(); @@ -222,7 +228,8 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthC // Return true if the partitions in other are all compatible with the columns // in this. -bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback cb) { +bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other, + const WidthCallback &cb) { if (debug) { tprintf("CompatibleColumns testing compatibility\n"); Print(); @@ -288,7 +295,8 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other, Widt if (debug) { int next_right = next_part->bounding_box().right(); tprintf("CompatibleColumns false due to 2 parts of good width\n"); - tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left, next_right); + tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left, + next_right); right_col->Print(); } return false; @@ -375,7 +383,8 @@ ColPartitionSet *ColPartitionSet::Copy(bool good_only) { } // Return the bounding boxes of columns at the given y-range -void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments) { +void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, + ColSegment_LIST *segments) { ColPartition_IT it(&parts_); ColSegment_IT col_it(segments); col_it.move_to_last(); @@ -392,7 +401,8 @@ void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *s #ifndef GRAPHICS_DISABLED // Display the edges of the columns at the given y coords. -void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win) { +void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, + ScrollView *win) { ColPartition_IT it(&parts_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition *part = it.data(); @@ -410,10 +420,9 @@ void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, ScrollView *wi // Column indices are 2n + 1 for real columns (0 based) and even values // represent the gaps in between columns, with 0 being left of the leftmost. // resolution refers to the ppi resolution of the image. -ColumnSpanningType ColPartitionSet::SpanningType(int resolution, int left, int right, int height, - int y, int left_margin, int right_margin, - int *first_col, int *last_col, - int *first_spanned_col) { +ColumnSpanningType ColPartitionSet::SpanningType( + int resolution, int left, int right, int height, int y, int left_margin, + int right_margin, int *first_col, int *last_col, int *first_spanned_col) { *first_col = -1; *last_col = -1; *first_spanned_col = -1; @@ -505,7 +514,8 @@ ColumnSpanningType ColPartitionSet::SpanningType(int resolution, int left, int r // columns that do not match and start new ones for the new columns in this. // As ColPartitions are turned into BLOCKs, the used ones are put in // used_parts, as they still need to be referenced in the grid. -void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution, +void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, + const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set_list) { // Move the input list to a temporary location so we can delete its elements @@ -525,11 +535,12 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { ColPartition *column = col_it.data(); // Any existing column to the left of column is completed. - while (!src_it.empty() && ((working_set = src_it.data())->column() == nullptr || - working_set->column()->right_key() <= column->left_key())) { + while (!src_it.empty() && + ((working_set = src_it.data())->column() == nullptr || + working_set->column()->right_key() <= column->left_key())) { src_it.extract(); - working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks, - &to_blocks); + working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, + &completed_blocks, &to_blocks); delete working_set; src_it.forward(); } @@ -542,7 +553,8 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh // A matching column gets to stay, and first_new_set gets all the // completed_sets. working_set = src_it.empty() ? nullptr : src_it.data(); - if (working_set != nullptr && working_set->column()->MatchingColumns(*column)) { + if (working_set != nullptr && + working_set->column()->MatchingColumns(*column)) { working_set->set_column(column); dest_it.add_after_then_move(src_it.extract()); src_it.forward(); @@ -557,8 +569,8 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh // Complete any remaining src working sets. while (!src_it.empty()) { working_set = src_it.extract(); - working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks, - &to_blocks); + working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, + &completed_blocks, &to_blocks); delete working_set; src_it.forward(); } @@ -573,8 +585,10 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh } // Accumulate the widths and gaps into the given variables. -void ColPartitionSet::AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, - int *total_gap, int *gap_samples) { +void ColPartitionSet::AccumulateColumnWidthsAndGaps(int *total_width, + int *width_samples, + int *total_gap, + int *gap_samples) { ColPartition_IT it(&parts_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition *part = it.data(); @@ -597,8 +611,9 @@ void ColPartitionSet::Print() { tprintf( "Partition set of %d parts, %d good, coverage=%d+%d" " (%d,%d)->(%d,%d)\n", - it.length(), good_column_count_, good_coverage_, bad_coverage_, bounding_box_.left(), - bounding_box_.bottom(), bounding_box_.right(), bounding_box_.top()); + it.length(), good_column_count_, good_coverage_, bad_coverage_, + bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(), + bounding_box_.top()); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition *part = it.data(); part->Print(); @@ -608,7 +623,8 @@ void ColPartitionSet::Print() { // PRIVATE CODE. // Add the given partition to the list in the appropriate place. -void ColPartitionSet::AddPartition(ColPartition *new_part, ColPartition_IT *it) { +void ColPartitionSet::AddPartition(ColPartition *new_part, + ColPartition_IT *it) { AddPartitionCoverageAndBox(*new_part); int new_right = new_part->right_key(); if (it->data()->left_key() >= new_right) { diff --git a/src/textord/colpartitionset.h b/src/textord/colpartitionset.h index 7c384183..6a0c0dae 100644 --- a/src/textord/colpartitionset.h +++ b/src/textord/colpartitionset.h @@ -20,9 +20,9 @@ #ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_ #define TESSERACT_TEXTORD_COLPARTITIONSET_H_ -#include "colpartition.h" // For ColPartition_LIST. -#include "rect.h" // For TBOX. -#include "tabvector.h" // For BLOBNBOX_CLIST. +#include "colpartition.h" // For ColPartition_LIST. +#include "rect.h" // For TBOX. +#include "tabvector.h" // For BLOBNBOX_CLIST. namespace tesseract { @@ -71,15 +71,17 @@ public: void RelinquishParts(); // Attempt to improve this by adding partitions or expanding partitions. - void ImproveColumnCandidate(WidthCallback cb, PartSetVector *src_sets); + void ImproveColumnCandidate(const WidthCallback &cb, PartSetVector *src_sets); // If this set is good enough to represent a new partitioning into columns, // add it to the vector of sets, otherwise delete it. - void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback cb); + void AddToColumnSetsIfUnique(PartSetVector *column_sets, + const WidthCallback &cb); // Return true if the partitions in other are all compatible with the columns // in this. - bool CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback cb); + bool CompatibleColumns(bool debug, ColPartitionSet *other, + const WidthCallback &cb); // Returns the total width of all blobs in the part_set that do not lie // within an approved column. Used as a cost measure for using this @@ -104,20 +106,22 @@ public: // represent the gaps in between columns, with 0 being left of the leftmost. // resolution refers to the ppi resolution of the image. It may be 0 if only // the first_col and last_col are required. - ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, - int left_margin, int right_margin, int *first_col, int *last_col, - int *first_spanned_col); + ColumnSpanningType SpanningType(int resolution, int left, int right, + int height, int y, int left_margin, + int right_margin, int *first_col, + int *last_col, int *first_spanned_col); // The column_set has changed. Close down all in-progress WorkingPartSets in // columns that do not match and start new ones for the new columns in this. // As ColPartitions are turned into BLOCKs, the used ones are put in // used_parts, as they still need to be referenced in the grid. - void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution, - ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set); + void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, + int resolution, ColPartition_LIST *used_parts, + WorkingPartSet_LIST *working_set); // Accumulate the widths and gaps into the given variables. - void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap, - int *gap_samples); + void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, + int *total_gap, int *gap_samples); // Provide debug output for this ColPartitionSet and all the ColPartitions. void Print(); diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp index f8cfe23a..686de19d 100644 --- a/src/textord/makerow.cpp +++ b/src/textord/makerow.cpp @@ -38,6 +38,7 @@ #include "underlin.h" #include +#include #include // for std::vector namespace tesseract { @@ -357,7 +358,7 @@ void compute_page_skew( // get average gradient for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); blob_count = row->blob_list()->length(); - row_err = static_cast(ceil(row->line_error())); + row_err = static_cast(std::ceil(row->line_error())); if (row_err <= 0) { row_err = 1; } @@ -636,7 +637,7 @@ void delete_non_dropout_rows( // find lines min_y = block_box.bottom() - 1; max_y = block_box.top() + 1; for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { - line_index = static_cast(floor(row_it.data()->intercept())); + line_index = static_cast(std::floor(row_it.data()->intercept())); if (line_index <= min_y) { min_y = line_index - 1; } @@ -668,7 +669,7 @@ void delete_non_dropout_rows( // find lines compute_dropout_distances(&occupation[0], &deltas[0], line_count); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); - line_index = static_cast(floor(row->intercept())); + line_index = static_cast(std::floor(row->intercept())); distance = deltas[line_index - min_y]; if (find_best_dropout_row(row, distance, block->line_spacing / 2, line_index, &row_it, testing_on)) { @@ -726,7 +727,7 @@ bool find_best_dropout_row( // find neighbours row_offset = row_inc; do { next_row = row_it->data_relative(row_offset); - next_index = static_cast(floor(next_row->intercept())); + next_index = static_cast(std::floor(next_row->intercept())); if ((distance < 0 && next_index < line_index && next_index > line_index + distance + distance) || (distance >= 0 && next_index > line_index && @@ -774,7 +775,7 @@ TBOX deskew_block_coords( // block box BLOBNBOX *blob; // current blob BLOBNBOX_IT blob_it; // iterator - length = sqrt(gradient * gradient + 1); + length = std::sqrt(gradient * gradient + 1); rotation = FCOORD(1 / length, -gradient / length); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); @@ -815,7 +816,7 @@ void compute_line_occupation( // project blobs FCOORD rotation; // inverse of skew line_count = max_y - min_y + 1; - length = sqrt(gradient * gradient + 1); + length = std::sqrt(gradient * gradient + 1); rotation = FCOORD(1 / length, -gradient / length); for (line_index = 0; line_index < line_count; line_index++) { deltas[line_index] = 0; @@ -1193,7 +1194,7 @@ void compute_row_stats( // find lines row_it.backward(); } while (!row_it.at_last()); block->key_row = prev_row; - block->baseline_offset = fmod(prev_row->parallel_c(), block->line_spacing); + block->baseline_offset = std::fmod(prev_row->parallel_c(), block->line_spacing); if (testing_on) { tprintf("Blob based spacing=(%g,%g), offset=%g", block->line_size, block->line_spacing, block->baseline_offset); @@ -1237,7 +1238,7 @@ void compute_row_stats( // find lines block->line_spacing = rows[row_index]->spacing; block->max_blob_size = block->line_spacing * textord_excess_blobsize; } - block->baseline_offset = fmod(rows[row_index]->intercept(), block->line_spacing); + block->baseline_offset = std::fmod(rows[row_index]->intercept(), block->line_spacing); } if (testing_on) { tprintf("\nEstimate line size=%g, spacing=%g, offset=%g\n", block->line_size, @@ -1796,7 +1797,7 @@ void separate_underlines(TO_BLOCK *block, // block to do int min_blob_height = static_cast(textord_min_blob_height_fraction * block->line_size + 0.5); // length of vector - length = sqrt(1 + gradient * gradient); + length = std::sqrt(1 + gradient * gradient); g_vec = FCOORD(1 / length, -gradient / length); blob_rotation = FCOORD(rotation.x(), -rotation.y()); blob_rotation.rotate(g_vec); // undoing everything @@ -2295,7 +2296,7 @@ void assign_blobs_to_rows( // find lines (block->block->pdblk.bounding_box().bottom() + block->block->pdblk.bounding_box().top()) / 2.0f; if (gradient != nullptr) { - g_length = sqrt(1 + *gradient * *gradient); + g_length = std::sqrt(1 + *gradient * *gradient); } #ifndef GRAPHICS_DISABLED if (drawing_skew) { diff --git a/src/textord/oldbasel.cpp b/src/textord/oldbasel.cpp index 3c0d3459..c0821323 100644 --- a/src/textord/oldbasel.cpp +++ b/src/textord/oldbasel.cpp @@ -32,6 +32,7 @@ #include "textord.h" #include "tprintf.h" +#include #include // for std::vector #include @@ -1450,7 +1451,7 @@ void make_first_xheight( // find xheight for (blobindex = 0; blobindex < blobcount; blobindex++) { int xcenter = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2; float base = baseline->y(xcenter); - float bottomdiff = fabs(base - blobcoords[blobindex].bottom()); + float bottomdiff = std::fabs(base - blobcoords[blobindex].bottom()); int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1; int height = static_cast(blobcoords[blobindex].top() - base + 0.5); if (blobcoords[blobindex].height() > init_lineheight * kMinHeight) { diff --git a/src/textord/tablefind.cpp b/src/textord/tablefind.cpp index fdfd74f6..0993c1e7 100644 --- a/src/textord/tablefind.cpp +++ b/src/textord/tablefind.cpp @@ -22,6 +22,7 @@ #include #include +#include #include "tablefind.h" #include @@ -157,11 +158,11 @@ void DeleteObject(T *object) { } TableFinder::TableFinder() - : resolution_(0) - , global_median_xheight_(0) - , global_median_blob_width_(0) - , global_median_ledding_(0) - , left_to_right_language_(true) {} + : resolution_(0), + global_median_xheight_(0), + global_median_blob_width_(0), + global_median_ledding_(0), + left_to_right_language_(true) {} TableFinder::~TableFinder() { // ColPartitions and ColSegments created by this class for storage in grids @@ -177,7 +178,8 @@ void TableFinder::set_left_to_right_language(bool order) { left_to_right_language_ = order; } -void TableFinder::Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right) { +void TableFinder::Init(int grid_size, const ICOORD &bottom_left, + const ICOORD &top_right) { // Initialize clean partitions list and grid clean_part_grid_.Init(grid_size, bottom_left, top_right); leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right); @@ -188,7 +190,8 @@ void TableFinder::Init(int grid_size, const ICOORD &bottom_left, const ICOORD &t // Copy cleaned partitions from part_grid_ to clean_part_grid_ and // insert leaders and rulers into the leader_and_ruling_grid_ -void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block) { +void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid, + TO_BLOCK *block) { // Calculate stats. This lets us filter partitions in AllowTextPartition() // and filter blobs in AllowBlob(). SetGlobalSpacings(grid); @@ -255,7 +258,8 @@ void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block) } // High level function to perform table detection -void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_columns, +void TableFinder::LocateTables(ColPartitionGrid *grid, + ColPartitionSet **all_columns, WidthCallback width_cb, const FCOORD &reskew) { // initialize spacing, neighbors, and columns InitializePartitions(all_columns); @@ -264,8 +268,10 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col if (textord_show_tables) { ScrollView *table_win = MakeWindow(0, 300, "Column Partitions & Neighbors"); DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE); - DisplayColPartitionConnections(table_win, &clean_part_grid_, ScrollView::ORANGE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + DisplayColPartitionConnections(table_win, &clean_part_grid_, + ScrollView::ORANGE); table_win = MakeWindow(100, 300, "Fragmented Text"); DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE); @@ -339,7 +345,8 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col #ifndef GRAPHICS_DISABLED if (textord_show_tables) { ScrollView *table_win = MakeWindow(1400, 600, "Recognized Tables"); - DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, ScrollView::BLUE); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, + ScrollView::BLUE); table_grid_.DisplayBoxes(table_win); } #endif // !GRAPHICS_DISABLED @@ -353,7 +360,8 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col #ifndef GRAPHICS_DISABLED if (textord_show_tables) { ScrollView *table_win = MakeWindow(1500, 300, "Detected Tables"); - DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, ScrollView::BLUE); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, + ScrollView::BLUE); table_grid_.DisplayBoxes(table_win); } #endif // !GRAPHICS_DISABLED @@ -362,7 +370,7 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. - MakeTableBlocks(grid, all_columns, width_cb); + MakeTableBlocks(grid, all_columns, std::move(width_cb)); } // All grids have the same dimensions. The clean_part_grid_ sizes are set from // the part_grid_ that is passed to InsertCleanPartitions, which was the same as @@ -452,7 +460,8 @@ void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition *part) { // Look for the next split in the partition. for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { const TBOX &box = box_it.data()->bounding_box(); - if (previous_right != INT32_MIN && box.left() - previous_right > kThreshold) { + if (previous_right != INT32_MIN && + box.left() - previous_right > kThreshold) { // We have a split position. Split the partition in two pieces. // Insert the left piece in the grid and keep processing the right. int mid_x = (box.left() + previous_right) / 2; @@ -484,7 +493,8 @@ bool TableFinder::AllowTextPartition(const ColPartition &part) const { const int median_area = global_median_xheight_ * global_median_blob_width_; const double kAreaPerBlobRequired = median_area * kAllowTextArea; // Keep comparisons strictly greater to disallow 0! - return part.median_height() > kHeightRequired && part.median_width() > kWidthRequired && + return part.median_height() > kHeightRequired && + part.median_width() > kWidthRequired && part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count(); } @@ -512,13 +522,15 @@ ScrollView *TableFinder::MakeWindow(int x, int y, const char *window_name) { #endif // Make single-column blocks from good_columns_ partitions. -void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns, ColSegment_LIST *column_blocks) { +void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns, + ColSegment_LIST *column_blocks) { for (int i = 0; i < gridheight(); ++i) { ColPartitionSet *columns = all_columns[i]; if (columns != nullptr) { ColSegment_LIST new_blocks; // Get boxes from the current vertical position on the grid - columns->GetColumnBoxes(i * gridsize(), (i + 1) * gridsize(), &new_blocks); + columns->GetColumnBoxes(i * gridsize(), (i + 1) * gridsize(), + &new_blocks); // Merge the new_blocks boxes into column_blocks if they are well-aligned GroupColumnBlocks(&new_blocks, column_blocks); } @@ -526,7 +538,8 @@ void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns, ColSegment_LIST } // Merge column segments into the current list if they are well aligned. -void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks, ColSegment_LIST *column_blocks) { +void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks, + ColSegment_LIST *column_blocks) { ColSegment_IT src_it(new_blocks); ColSegment_IT dest_it(column_blocks); // iterate through the source list @@ -558,8 +571,10 @@ void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks, ColSegment_LIST bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) { int x_margin = 20; int y_margin = 5; - return (abs(b1.left() - b2.left()) < x_margin) && (abs(b1.right() - b2.right()) < x_margin) && - (abs(b1.top() - b2.bottom()) < y_margin || abs(b2.top() - b1.bottom()) < y_margin); + return (abs(b1.left() - b2.left()) < x_margin) && + (abs(b1.right() - b2.right()) < x_margin) && + (abs(b1.top() - b2.bottom()) < y_margin || + abs(b2.top() - b1.bottom()) < y_margin); } // Set up info for clean_part_grid_ partitions to be valid during detection @@ -571,7 +586,8 @@ void TableFinder::InitializePartitions(ColPartitionSet **all_columns) { } // Set left, right and top, bottom spacings of each colpartition. -void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns) { +void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, + ColPartitionSet **all_columns) { // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(grid); gsearch.StartFullSearch(); @@ -599,7 +615,8 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet * hsearch.StartSideSearch(box.left(), box.bottom(), box.top()); ColPartition *neighbor = nullptr; while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) { - if (neighbor->type() == PT_PULLOUT_IMAGE || neighbor->type() == PT_FLOWING_IMAGE || + if (neighbor->type() == PT_PULLOUT_IMAGE || + neighbor->type() == PT_FLOWING_IMAGE || neighbor->type() == PT_HEADING_IMAGE) { int right = neighbor->bounding_box().right(); if (right < box.left()) { @@ -611,7 +628,8 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet * hsearch.StartSideSearch(box.left(), box.bottom(), box.top()); neighbor = nullptr; while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) { - if (neighbor->type() == PT_PULLOUT_IMAGE || neighbor->type() == PT_FLOWING_IMAGE || + if (neighbor->type() == PT_PULLOUT_IMAGE || + neighbor->type() == PT_FLOWING_IMAGE || neighbor->type() == PT_HEADING_IMAGE) { int left = neighbor->bounding_box().left(); if (left > box.right()) { @@ -623,8 +641,9 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet * ColPartition *upper_part = part->SingletonPartner(true); if (upper_part) { - int space = std::max( - 0, static_cast(upper_part->bounding_box().bottom() - part->bounding_box().bottom())); + int space = + std::max(0, static_cast(upper_part->bounding_box().bottom() - + part->bounding_box().bottom())); part->set_space_above(space); } else { // TODO(nbeato): What constitutes a good value? @@ -635,8 +654,9 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet * ColPartition *lower_part = part->SingletonPartner(false); if (lower_part) { - int space = std::max( - 0, static_cast(part->bounding_box().bottom() - lower_part->bounding_box().bottom())); + int space = + std::max(0, static_cast(part->bounding_box().bottom() - + lower_part->bounding_box().bottom())); part->set_space_below(space); } else { // TODO(nbeato): What constitutes a good value? @@ -650,14 +670,17 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet * // Set spacing and closest neighbors above and below a given colpartition. void TableFinder::SetVerticalSpacing(ColPartition *part) { TBOX box = part->bounding_box(); - int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast(tright().y())); - int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast(bleft().y())); + int top_range = + std::min(box.top() + kMaxVerticalSpacing, static_cast(tright().y())); + int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, + static_cast(bleft().y())); box.set_top(top_range); box.set_bottom(bottom_range); TBOX part_box = part->bounding_box(); // Start a rect search - GridSearch rectsearch(&clean_part_grid_); + GridSearch rectsearch( + &clean_part_grid_); rectsearch.StartRectSearch(box); ColPartition *neighbor; int min_space_above = kMaxVerticalSpacing; @@ -676,7 +699,8 @@ void TableFinder::SetVerticalSpacing(ColPartition *part) { min_space_below = gap; below_neighbor = neighbor; } // If neighbor is above current partition - else if (part_box.top() < neighbor_box.bottom() && gap < min_space_above) { + else if (part_box.top() < neighbor_box.bottom() && + gap < min_space_above) { min_space_above = gap; above_neighbor = neighbor; } @@ -777,7 +801,8 @@ void TableFinder::MarkTablePartitions() { if (textord_tablefind_show_mark) { ScrollView *table_win = MakeWindow(300, 300, "Initial Table Partitions"); DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); } #endif FilterFalseAlarms(); @@ -785,7 +810,8 @@ void TableFinder::MarkTablePartitions() { if (textord_tablefind_show_mark) { ScrollView *table_win = MakeWindow(600, 300, "Filtered Table Partitions"); DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); } #endif SmoothTablePartitionRuns(); @@ -793,7 +819,8 @@ void TableFinder::MarkTablePartitions() { if (textord_tablefind_show_mark) { ScrollView *table_win = MakeWindow(900, 300, "Smoothed Table Partitions"); DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); } #endif FilterFalseAlarms(); @@ -801,7 +828,8 @@ void TableFinder::MarkTablePartitions() { if (textord_tablefind_show_mark || textord_show_tables) { ScrollView *table_win = MakeWindow(900, 300, "Final Table Partitions"); DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); - DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); } #endif } @@ -815,7 +843,8 @@ void TableFinder::MarkTablePartitions() { // 4- Partitions with leaders before/after them. void TableFinder::MarkPartitionsUsingLocalInformation() { // Iterate the ColPartitions in the grid. - GridSearch gsearch(&clean_part_grid_); + GridSearch gsearch( + &clean_part_grid_); gsearch.StartFullSearch(); ColPartition *part = nullptr; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -850,7 +879,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const { BLOBNBOX_CLIST *part_boxes = part->boxes(); BLOBNBOX_C_IT it(part_boxes); // Check if this is a relatively small partition (such as a single word) - if (part->bounding_box().width() < kMinBoxesInTextPartition * part->median_height() && + if (part->bounding_box().width() < + kMinBoxesInTextPartition * part->median_height() && part_boxes->length() < kMinBoxesInTextPartition) { return true; } @@ -907,7 +937,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const { } // Since no large gap was found, return false if the partition is too // long to be a data cell - if (part->bounding_box().width() > kMaxBoxesInDataPartition * part->median_height() || + if (part->bounding_box().width() > + kMaxBoxesInDataPartition * part->median_height() || part_boxes->length() > kMaxBoxesInDataPartition) { return false; } @@ -1016,19 +1047,23 @@ void TableFinder::FilterParagraphEndings() { // To account for that, check if the partition center is to // the left of the one above it. int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2; - int upper_mid = (upper_part->bounding_box().left() + upper_part->bounding_box().right()) / 2; + int upper_mid = (upper_part->bounding_box().left() + + upper_part->bounding_box().right()) / + 2; int current_spacing = 0; // spacing of the current line to margin int upper_spacing = 0; // spacing of the previous line to the margin if (left_to_right_language_) { // Left to right languages, use mid - left to figure out the distance // the middle is from the left margin. - int left = std::min(part->bounding_box().left(), upper_part->bounding_box().left()); + int left = std::min(part->bounding_box().left(), + upper_part->bounding_box().left()); current_spacing = mid - left; upper_spacing = upper_mid - left; } else { // Right to left languages, use right - mid to figure out the distance // the middle is from the right margin. - int right = std::max(part->bounding_box().right(), upper_part->bounding_box().right()); + int right = std::max(part->bounding_box().right(), + upper_part->bounding_box().right()); current_spacing = right - mid; upper_spacing = right - upper_mid; } @@ -1046,7 +1081,8 @@ void TableFinder::FilterParagraphEndings() { // The last line of a paragraph should be left aligned. // TODO(nbeato): This would be untrue if the text was right aligned. // How often is that? - if (part->space_to_left() > kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) { + if (part->space_to_left() > + kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) { continue; } // The line above it should be right aligned (assuming justified format). @@ -1055,7 +1091,8 @@ void TableFinder::FilterParagraphEndings() { // line could have fit on the previous line). So compare // whitespace to text. if (upper_part->bounding_box().width() < - kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right()) { + kMinParagraphEndingTextToWhitespaceRatio * + upper_part->space_to_right()) { continue; } @@ -1153,7 +1190,8 @@ void TableFinder::SetColumnsType(ColSegment_LIST *column_blocks) { TBOX box = seg->bounding_box(); int num_table_cells = 0; int num_text_cells = 0; - GridSearch rsearch(&clean_part_grid_); + GridSearch rsearch( + &clean_part_grid_); rsearch.SetUniqueMode(true); rsearch.StartRectSearch(box); ColPartition *part = nullptr; @@ -1178,7 +1216,8 @@ void TableFinder::SetColumnsType(ColSegment_LIST *column_blocks) { } // Move column blocks to grid -void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid) { +void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, + ColSegmentGrid *col_seg_grid) { ColSegment_IT it(segments); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColSegment *seg = it.extract(); @@ -1200,7 +1239,8 @@ void TableFinder::GridMergeColumnBlocks() { int margin = gridsize(); // Iterate the Column Blocks in the grid. - GridSearch gsearch(&col_seg_grid_); + GridSearch gsearch( + &col_seg_grid_); gsearch.StartFullSearch(); ColSegment *seg; while ((seg = gsearch.NextFullSearch()) != nullptr) { @@ -1214,12 +1254,15 @@ void TableFinder::GridMergeColumnBlocks() { do { TBOX box = seg->bounding_box(); // slightly expand the search region vertically - int top_range = std::min(box.top() + margin, static_cast(tright().y())); - int bottom_range = std::max(box.bottom() - margin, static_cast(bleft().y())); + int top_range = + std::min(box.top() + margin, static_cast(tright().y())); + int bottom_range = + std::max(box.bottom() - margin, static_cast(bleft().y())); box.set_top(top_range); box.set_bottom(bottom_range); neighbor_found = false; - GridSearch rectsearch(&col_seg_grid_); + GridSearch rectsearch( + &col_seg_grid_); rectsearch.StartRectSearch(box); ColSegment *neighbor = nullptr; while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { @@ -1277,7 +1320,8 @@ void TableFinder::GridMergeColumnBlocks() { void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { ColSegment_IT it(table_columns); // Iterate the ColPartitions in the grid. - GridSearch gsearch(&clean_part_grid_); + GridSearch gsearch( + &clean_part_grid_); gsearch.StartFullSearch(); ColPartition *part; while ((part = gsearch.NextFullSearch()) != nullptr) { @@ -1291,7 +1335,8 @@ void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { // Start a search below the current cell to find bottom neighbours // Note: a full search will always process things above it first, so // this should be starting at the highest cell and working its way down. - GridSearch vsearch(&clean_part_grid_); + GridSearch vsearch( + &clean_part_grid_); vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom()); ColPartition *neighbor = nullptr; bool found_neighbours = false; @@ -1326,11 +1371,13 @@ void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { // Mark regions in a column that are x-bounded by the column boundaries and // y-bounded by the table columns' projection on the y-axis as table regions -void TableFinder::GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions) { +void TableFinder::GetTableRegions(ColSegment_LIST *table_columns, + ColSegment_LIST *table_regions) { ColSegment_IT cit(table_columns); ColSegment_IT rit(table_regions); // Iterate through column blocks - GridSearch gsearch(&col_seg_grid_); + GridSearch gsearch( + &col_seg_grid_); gsearch.StartFullSearch(); ColSegment *part; int page_height = tright().y() - bleft().y(); @@ -1389,7 +1436,8 @@ void TableFinder::GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIS // single line and hence the tables get merged together void TableFinder::GridMergeTableRegions() { // Iterate the table regions in the grid. - GridSearch gsearch(&table_grid_); + GridSearch gsearch( + &table_grid_); gsearch.StartFullSearch(); ColSegment *seg = nullptr; while ((seg = gsearch.NextFullSearch()) != nullptr) { @@ -1402,7 +1450,8 @@ void TableFinder::GridMergeTableRegions() { search_region.set_left(bleft().x()); search_region.set_right(tright().x()); neighbor_found = false; - GridSearch rectsearch(&table_grid_); + GridSearch rectsearch( + &table_grid_); rectsearch.StartRectSearch(search_region); ColSegment *neighbor = nullptr; while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { @@ -1454,13 +1503,15 @@ bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) { // Check for ColPartitions spanning both table regions TBOX bbox = box1.bounding_union(box2); // Start a rect search on bbox - GridSearch rectsearch(&clean_part_grid_); + GridSearch rectsearch( + &clean_part_grid_); rectsearch.StartRectSearch(bbox); ColPartition *part = nullptr; while ((part = rectsearch.NextRectSearch()) != nullptr) { const TBOX &part_box = part->bounding_box(); // return true if a colpartition spanning both table regions is found - if (part_box.overlap(box1) && part_box.overlap(box2) && !part->IsImageType()) { + if (part_box.overlap(box1) && part_box.overlap(box2) && + !part->IsImageType()) { return true; } } @@ -1542,12 +1593,14 @@ void TableFinder::GrowTableBox(const TBOX &table_box, TBOX *result_box) { // Grow a table by increasing the size of the box to include // partitions with significant overlap with the table. -void TableFinder::GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range, +void TableFinder::GrowTableToIncludePartials(const TBOX &table_box, + const TBOX &search_range, TBOX *result_box) { // Rulings are in a different grid, so search 2 grids for rulings, text, // and table partitions that are not entirely within the new box. for (int i = 0; i < 2; ++i) { - ColPartitionGrid *grid = (i == 0) ? &fragmented_text_grid_ : &leader_and_ruling_grid_; + ColPartitionGrid *grid = + (i == 0) ? &fragmented_text_grid_ : &leader_and_ruling_grid_; ColPartitionGridSearch rectsearch(grid); rectsearch.StartRectSearch(search_range); ColPartition *part = nullptr; @@ -1569,7 +1622,8 @@ void TableFinder::GrowTableToIncludePartials(const TBOX &table_box, const TBOX & // Grow a table by expanding to the extents of significantly // overlapping lines. -void TableFinder::GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, +void TableFinder::GrowTableToIncludeLines(const TBOX &table_box, + const TBOX &search_range, TBOX *result_box) { ColPartitionGridSearch rsearch(&leader_and_ruling_grid_); rsearch.SetUniqueMode(true); @@ -1601,7 +1655,8 @@ void TableFinder::GrowTableToIncludeLines(const TBOX &table_box, const TBOX &sea // Checks whether the horizontal line belong to the table by looking at the // side spacing of extra ColParitions that will be included in the table // due to expansion -bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &table_box) { +bool TableFinder::HLineBelongsToTable(const ColPartition &part, + const TBOX &table_box) { if (!part.IsHorizontalLine()) { return false; } @@ -1627,7 +1682,8 @@ bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &tabl // Rulings are in a different grid, so search 2 grids for rulings, text, // and table partitions that are introduced by the new box. for (int i = 0; i < 2; ++i) { - ColPartitionGrid *grid = (i == 0) ? &clean_part_grid_ : &leader_and_ruling_grid_; + ColPartitionGrid *grid = + (i == 0) ? &clean_part_grid_ : &leader_and_ruling_grid_; // Start a rect search on bbox ColPartitionGridSearch rectsearch(grid); rectsearch.SetUniqueMode(true); @@ -1672,12 +1728,14 @@ bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &tabl void TableFinder::IncludeLeftOutColumnHeaders(TBOX *table_box) { // Start a search above the current table to look for column headers ColPartitionGridSearch vsearch(&clean_part_grid_); - vsearch.StartVerticalSearch(table_box->left(), table_box->right(), table_box->top()); + vsearch.StartVerticalSearch(table_box->left(), table_box->right(), + table_box->top()); ColPartition *neighbor = nullptr; ColPartition *previous_neighbor = nullptr; while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) { // Max distance to find a table heading. - const int max_distance = kMaxColumnHeaderDistance * neighbor->median_height(); + const int max_distance = + kMaxColumnHeaderDistance * neighbor->median_height(); int table_top = table_box->top(); const TBOX &box = neighbor->bounding_box(); // Do not continue if the next box is way above @@ -1714,7 +1772,8 @@ void TableFinder::DeleteSingleColumnTables() { // create an integer array to hold projection on x-axis int *table_xprojection = new int[page_width]; // Iterate through all tables in the table grid - GridSearch table_search(&table_grid_); + GridSearch table_search( + &table_grid_); table_search.StartFullSearch(); ColSegment *table; while ((table = table_search.NextFullSearch()) != nullptr) { @@ -1724,7 +1783,8 @@ void TableFinder::DeleteSingleColumnTables() { table_xprojection[i] = 0; } // Start a rect search on table_box - GridSearch rectsearch(&clean_part_grid_); + GridSearch rectsearch( + &clean_part_grid_); rectsearch.SetUniqueMode(true); rectsearch.StartRectSearch(table_box); ColPartition *part; @@ -1938,7 +1998,8 @@ void TableFinder::DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW); } -void TableFinder::DisplayColPartitionConnections(ScrollView *win, ColPartitionGrid *grid, +void TableFinder::DisplayColPartitionConnections(ScrollView *win, + ColPartitionGrid *grid, ScrollView::Color color) { // Iterate the ColPartitions in the grid. GridSearch gsearch(grid); @@ -1982,8 +2043,9 @@ void TableFinder::DisplayColPartitionConnections(ScrollView *win, ColPartitionGr // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. -void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_columns, - WidthCallback width_cb) { +void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, + ColPartitionSet **all_columns, + const WidthCallback &width_cb) { // Since we have table blocks already, remove table tags from all // colpartitions GridSearch gsearch(grid); @@ -1997,13 +2059,15 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_ } // Now make a single colpartition out of each table block and remove // all colpartitions contained within a table - GridSearch table_search(&table_grid_); + GridSearch table_search( + &table_grid_); table_search.StartFullSearch(); ColSegment *table; while ((table = table_search.NextFullSearch()) != nullptr) { const TBOX &table_box = table->bounding_box(); // Start a rect search on table_box - GridSearch rectsearch(grid); + GridSearch rectsearch( + grid); rectsearch.StartRectSearch(table_box); ColPartition *part; ColPartition *table_partition = nullptr; @@ -2045,7 +2109,10 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_ //////// ColSegment code //////// ColSegment::ColSegment() - : ELIST_LINK(), num_table_cells_(0), num_text_cells_(0), type_(COL_UNKNOWN) {} + : ELIST_LINK(), + num_table_cells_(0), + num_text_cells_(0), + type_(COL_UNKNOWN) {} // Provides a color for BBGrid to draw the rectangle. ScrollView::Color ColSegment::BoxColor() const { diff --git a/src/textord/tablefind.h b/src/textord/tablefind.h index 4b758b40..e2640417 100644 --- a/src/textord/tablefind.h +++ b/src/textord/tablefind.h @@ -107,7 +107,8 @@ private: // Typedef BBGrid of ColSegments using ColSegmentGrid = BBGrid; -using ColSegmentGridSearch = GridSearch; +using ColSegmentGridSearch = + GridSearch; // TableFinder is a utility class to find a set of tables given a set of // ColPartitions and Columns. The TableFinder will mark candidate ColPartitions @@ -143,8 +144,8 @@ public: // tables. The columns and width callbacks are used to merge tables. // The reskew argument is only used to write the tables to the out.png // if that feature is enabled. - void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb, - const FCOORD &reskew); + void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, + WidthCallback width_cb, const FCOORD &reskew); protected: // Access for the grid dimensions. @@ -179,7 +180,8 @@ protected: // Utility function to move segments to col_seg_grid // Note: Move includes ownership, // so segments will be be owned by col_seg_grid - void MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid); + void MoveColSegmentsToGrid(ColSegment_LIST *segments, + ColSegmentGrid *col_seg_grid); //////// Set up code to run during table detection to correctly //////// initialize variables on column partitions that are used later. @@ -191,7 +193,8 @@ protected: // Set left, right and top, bottom spacings of each colpartition. // Left/right spacings are w.r.t the column boundaries // Top/bottom spacings are w.r.t. previous and next colpartitions - static void SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns); + static void SetPartitionSpacings(ColPartitionGrid *grid, + ColPartitionSet **all_columns); // Set spacing and closest neighbors above and below a given colpartition. void SetVerticalSpacing(ColPartition *part); @@ -263,10 +266,12 @@ protected: //////// // Get Column segments from best_columns_ - void GetColumnBlocks(ColPartitionSet **columns, ColSegment_LIST *col_segments); + void GetColumnBlocks(ColPartitionSet **columns, + ColSegment_LIST *col_segments); // Group Column segments into consecutive single column regions. - void GroupColumnBlocks(ColSegment_LIST *current_segments, ColSegment_LIST *col_segments); + void GroupColumnBlocks(ColSegment_LIST *current_segments, + ColSegment_LIST *col_segments); // Check if two boxes are consecutive within the same column bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2); @@ -295,7 +300,8 @@ protected: // earlier functions) in the x direction and the min/max extent of // overlapping table columns in the y direction. // Section 4.2 of paper. - void GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions); + void GetTableRegions(ColSegment_LIST *table_columns, + ColSegment_LIST *table_regions); //////// Functions to "patch up" found tables //////// @@ -316,11 +322,12 @@ protected: void GrowTableBox(const TBOX &table_box, TBOX *result_box); // Grow a table by increasing the size of the box to include // partitions with significant overlap with the table. - void GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range, - TBOX *result_box); + void GrowTableToIncludePartials(const TBOX &table_box, + const TBOX &search_range, TBOX *result_box); // Grow a table by expanding to the extents of significantly // overlapping lines. - void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, TBOX *result_box); + void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, + TBOX *result_box); // Checks whether the horizontal line belong to the table by looking at the // side spacing of extra ColParitions that will be included in the table // due to expansion @@ -351,12 +358,14 @@ protected: // Displays Colpartitions marked as table row. Overlays them on top of // part_grid_. - void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color); + void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols, + ScrollView::Color color); // Displays the colpartitions using a new coloring on an existing window. // Note: This method is only for debug purpose during development and // would not be part of checked in code - void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color, + void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, + ScrollView::Color text_color, ScrollView::Color table_color); void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color); @@ -366,7 +375,8 @@ protected: // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. - void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb); + void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns, + const WidthCallback &width_cb); ///////////////////////////////////////////////// // Useful objects used during table find process. diff --git a/src/textord/tablerecog.cpp b/src/textord/tablerecog.cpp index bde67ba8..2bcb4c07 100644 --- a/src/textord/tablerecog.cpp +++ b/src/textord/tablerecog.cpp @@ -720,15 +720,6 @@ int StructuredTable::CountPartitions(const TBOX &box) { //////// TableRecognizer Class //////// -TableRecognizer::TableRecognizer() - : text_grid_(nullptr) - , line_grid_(nullptr) - , min_height_(0) - , min_width_(0) - , max_text_height_(INT32_MAX) {} - -TableRecognizer::~TableRecognizer() = default; - void TableRecognizer::Init() {} void TableRecognizer::set_text_grid(ColPartitionGrid *text_grid) { diff --git a/src/textord/tablerecog.h b/src/textord/tablerecog.h index 8a136869..080cfaec 100644 --- a/src/textord/tablerecog.h +++ b/src/textord/tablerecog.h @@ -250,8 +250,8 @@ protected: class TESS_API TableRecognizer { public: - TableRecognizer(); - ~TableRecognizer(); + TableRecognizer() = default; + ~TableRecognizer() = default; // Initialization code. Must be called after the constructor. void Init(); @@ -358,13 +358,13 @@ protected: static bool IsWeakTableRow(StructuredTable *table, int row); // Input data, used as read only data to make decisions. - ColPartitionGrid *text_grid_; // Text ColPartitions - ColPartitionGrid *line_grid_; // Line ColPartitions + ColPartitionGrid *text_grid_ = nullptr; // Text ColPartitions + ColPartitionGrid *line_grid_ = nullptr; // Line ColPartitions // Table constraints, a "good" table must satisfy these. - int min_height_; - int min_width_; + int min_height_ = 0; + int min_width_ = 0; // Filters, used to prevent awkward partitions from destroying structure. - int max_text_height_; // Horizontal lines may intersect taller text. + int max_text_height_ = INT32_MAX; // Horizontal lines may intersect taller text. }; } // namespace tesseract diff --git a/src/textord/tordmain.cpp b/src/textord/tordmain.cpp index 3e44fead..3305a4e4 100644 --- a/src/textord/tordmain.cpp +++ b/src/textord/tordmain.cpp @@ -325,7 +325,7 @@ float Textord::filter_noise_blobs(BLOBNBOX_LIST *src_list, // original list (tesseract::CCStruct::kDescenderFraction + tesseract::CCStruct::kXHeightFraction + 2 * tesseract::CCStruct::kAscenderFraction) / tesseract::CCStruct::kXHeightFraction); - min_y = floor(initial_x / 2); + min_y = std::floor(initial_x / 2); max_x = ceil(initial_x * textord_width_limit); small_it.move_to_first(); for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { @@ -729,7 +729,7 @@ void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BL int best_g = 0; float best_angle_diff = FLT_MAX; for (const auto &group : groups) { - double angle_diff = fabs(block_angle - group->angle); + double angle_diff = std::fabs(block_angle - group->angle); if (angle_diff > M_PI) { angle_diff = fabs(angle_diff - 2.0 * M_PI); } diff --git a/src/textord/tospace.cpp b/src/textord/tospace.cpp index 07f38b7f..75b3aed0 100644 --- a/src/textord/tospace.cpp +++ b/src/textord/tospace.cpp @@ -36,6 +36,7 @@ #endif #include +#include #include #define MAXSPACING 128 /*max expected spacing in pix */ @@ -295,7 +296,7 @@ void Textord::row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx, /* Collect first pass stats for row */ if (!good_block_space_estimate) { - block_space_gap_width = int16_t(floor(row->xheight / 2)); + block_space_gap_width = int16_t(std::floor(row->xheight / 2)); } if (!row->blob_list()->empty()) { if (tosp_threshold_bias1 > 0) { @@ -435,7 +436,7 @@ are ignoring big gaps*/ if (suspected_table) { sane_space = std::max(tosp_table_kn_sp_ratio * row->kern_size, tosp_table_xht_sp_ratio * row->xheight); - sane_threshold = int32_t(floor((sane_space + row->kern_size) / 2)); + sane_threshold = int32_t(std::floor((sane_space + row->kern_size) / 2)); if ((row->space_size < sane_space) || (row->space_threshold < sane_threshold)) { if (tosp_debug_level > 5) { @@ -606,7 +607,7 @@ It comes to the same thing. (Though there is a difference in that old textor has integer space_size and kern_size.) */ - row->space_threshold = int32_t(floor((row->space_size + row->kern_size) / 2)); + row->space_threshold = int32_t(std::floor((row->space_size + row->kern_size) / 2)); } // Apply the same logic and ratios as in row_spacing_stats to @@ -648,7 +649,7 @@ bool Textord::isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_sta crude_threshold_estimate = std::max(tosp_init_guess_kn_mult * kern_estimate, tosp_init_guess_xht_mult * row->xheight); small_gaps_count = - stats_count_under(all_gap_stats, static_cast(ceil(crude_threshold_estimate))); + stats_count_under(all_gap_stats, static_cast(std::ceil(crude_threshold_estimate))); total = all_gap_stats->get_total(); if ((total <= tosp_redo_kern_limit) || @@ -718,7 +719,7 @@ bool Textord::isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_sta } else { row->kern_size = all_gap_stats->median(); } - row->space_threshold = int32_t(floor((row->space_size + row->kern_size) / 2)); + row->space_threshold = int32_t(std::floor((row->space_size + row->kern_size) / 2)); /* Sanity check */ if ((row->kern_size >= row->space_threshold) || (row->space_threshold >= row->space_size) || (row->space_threshold <= 0)) { @@ -793,7 +794,7 @@ threshold is not within it, move the threshold so that is is just inside it. reqd_zero_width = 3; } - for (index = int16_t(ceil(kn)); index < int16_t(floor(sp)); index++) { + for (index = int16_t(std::ceil(kn)); index < int16_t(std::floor(sp)); index++) { if (all_gap_stats->pile_count(index) == 0) { if (zero_width == 0) { zero_start = index; @@ -909,7 +910,7 @@ the gap between the word being built and the next one. */ current_gap = box_it.data()->bounding_box().left() - next_rep_char_word_right; current_within_xht_gap = current_gap; if (current_gap > tosp_rep_space * repetition_spacing) { - prev_blanks = static_cast(floor(current_gap / row->space_size)); + prev_blanks = static_cast(std::floor(current_gap / row->space_size)); if (prev_blanks < 1) { prev_blanks = 1; } @@ -1002,7 +1003,7 @@ the gap between the word being built and the next one. */ current_gap = word->bounding_box().left() - prev_x; current_within_xht_gap = current_gap; if (current_gap > tosp_rep_space * repetition_spacing) { - blanks = static_cast(floor(current_gap / row->space_size)); + blanks = static_cast(std::floor(current_gap / row->space_size)); if (blanks < 1) { blanks = 1; } @@ -1066,7 +1067,7 @@ the gap between the word being built and the next one. */ repetition_spacing = find_mean_blob_spacing(word); current_gap = word->bounding_box().left() - prev_x; if (current_gap > tosp_rep_space * repetition_spacing) { - blanks = static_cast(floor(current_gap / row->space_size)); + blanks = static_cast(std::floor(current_gap / row->space_size)); if (blanks < 1) { blanks = 1; } @@ -1738,7 +1739,7 @@ caps ht chars which should NOT have their box reduced: T, Y, V, W etc if (left_limit > junk) { *left_above_xht = INT16_MAX; // No area above xht } else { - *left_above_xht = static_cast(floor(left_limit)); + *left_above_xht = static_cast(std::floor(left_limit)); } /* Find reduced LH limit of blob - the left extent of the region ABOVE the @@ -1762,7 +1763,7 @@ Find reduced RH limit of blob - the right extent of the region BELOW the xht. return TBOX(); // no area within xht so return empty box } - return TBOX(ICOORD(static_cast(floor(left_limit)), blob_box.bottom()), - ICOORD(static_cast(ceil(right_limit)), blob_box.top())); + return TBOX(ICOORD(static_cast(std::floor(left_limit)), blob_box.bottom()), + ICOORD(static_cast(std::ceil(right_limit)), blob_box.top())); } } // namespace tesseract diff --git a/src/textord/wordseg.cpp b/src/textord/wordseg.cpp index ef4950f5..3dd741a8 100644 --- a/src/textord/wordseg.cpp +++ b/src/textord/wordseg.cpp @@ -23,6 +23,8 @@ #include "wordseg.h" +#include + #include "blobbox.h" #include "cjkpitch.h" #include "drawtord.h" @@ -222,7 +224,7 @@ int32_t row_words( // compute space size lower = row->xheight * textord_words_initial_lower; upper = row->xheight * textord_words_initial_upper; cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, 3, cluster_stats); - while (cluster_count < 2 && ceil(lower) < floor(upper)) { + while (cluster_count < 2 && std::ceil(lower) < std::floor(upper)) { // shrink gap upper = (upper * 3 + lower) / 4; lower = (lower * 3 + upper) / 4; diff --git a/src/training/combine_tessdata.cpp b/src/training/combine_tessdata.cpp index 2b38854a..30edc8e9 100644 --- a/src/training/combine_tessdata.cpp +++ b/src/training/combine_tessdata.cpp @@ -26,8 +26,7 @@ using namespace tesseract; -static int list_components(TessdataManager &tm, const char *filename) -{ +static int list_components(TessdataManager &tm, const char *filename) { // Initialize TessdataManager with the data in the given traineddata file. if (filename != nullptr && !tm.Init(filename)) { tprintf("Failed to read %s\n", filename); @@ -37,8 +36,7 @@ static int list_components(TessdataManager &tm, const char *filename) return EXIT_SUCCESS; } -static int list_network(TessdataManager &tm, const char *filename) -{ +static int list_network(TessdataManager &tm, const char *filename) { if (filename != nullptr && !tm.Init(filename)) { tprintf("Failed to read %s\n", filename); return EXIT_FAILURE; @@ -62,11 +60,11 @@ static int list_network(TessdataManager &tm, const char *filename) std::cout << "Layer Learning Rates: "; auto layers = recognizer.EnumerateLayers(); - for (auto id : layers) { + for (const auto &id : layers) { auto layer = recognizer.GetLayer(id); std::cout << id << "(" << layer->name() << ")" << "=" << recognizer.GetLayerLearningRate(id) - << (layers[layers.size()-1] != id ? ", " : ""); + << (layers[layers.size() - 1] != id ? ", " : ""); } std::cout << "\n"; } @@ -138,7 +136,8 @@ int main(int argc, char **argv) { } else { printf("Output %s created successfully.\n", output_file.c_str()); } - } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) { + } else if (argc >= 4 && + (strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) { // Initialize TessdataManager with the data in the given traineddata file. if (!tm.Init(argv[2])) { tprintf("Failed to read %s\n", argv[2]); @@ -173,7 +172,8 @@ int main(int argc, char **argv) { if (tm.ExtractToFile(filename.c_str())) { printf("Wrote %s\n", filename.c_str()); } else if (errno != 0) { - printf("Error, could not extract %s: %s\n", filename.c_str(), strerror(errno)); + printf("Error, could not extract %s: %s\n", filename.c_str(), + strerror(errno)); return EXIT_FAILURE; } } @@ -184,7 +184,8 @@ int main(int argc, char **argv) { std::string traineddata_filename = new_traineddata_filename; traineddata_filename += ".__tmp__"; if (rename(new_traineddata_filename, traineddata_filename.c_str()) != 0) { - tprintf("Failed to create a temporary file %s\n", traineddata_filename.c_str()); + tprintf("Failed to create a temporary file %s\n", + traineddata_filename.c_str()); return EXIT_FAILURE; } @@ -212,7 +213,8 @@ int main(int argc, char **argv) { std::vector lstm_data; fp.OpenWrite(&lstm_data); ASSERT_HOST(recognizer.Serialize(&tm, &fp)); - tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0], lstm_data.size()); + tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0], + lstm_data.size()); if (!tm.SaveFile(argv[2], nullptr)) { tprintf("Failed to write modified traineddata:%s!\n", argv[2]); return EXIT_FAILURE; diff --git a/src/training/common/ctc.cpp b/src/training/common/ctc.cpp index 0cc0eb10..1f7613e8 100644 --- a/src/training/common/ctc.cpp +++ b/src/training/common/ctc.cpp @@ -24,6 +24,7 @@ #include #include // for FLT_MAX +#include #include namespace tesseract { @@ -266,7 +267,7 @@ float CTC::CalculateBiasFraction() { if (total_labels == 0) { return 0.0f; } - return exp(std::max(true_pos - false_pos, 1) * log(kMinProb_) / total_labels); + return exp(std::max(true_pos - false_pos, 1) * std::log(kMinProb_) / total_labels); } // Given ln(x) and ln(y), returns ln(x + y), using: @@ -319,7 +320,7 @@ void CTC::Backward(GENERIC_2D_ARRAY *log_probs) const { const float *outputs_tp1 = outputs_[t + 1]; for (int u = min_labels_[t]; u <= max_labels_[t]; ++u) { // Continuing the same label. - double log_sum = log_probs->get(t + 1, u) + log(outputs_tp1[labels_[u]]); + double log_sum = log_probs->get(t + 1, u) + std::log(outputs_tp1[labels_[u]]); // Change from previous label. if (u + 1 < num_labels_) { double prev_prob = outputs_tp1[labels_[u + 1]]; diff --git a/src/training/common/mastertrainer.cpp b/src/training/common/mastertrainer.cpp index be86055f..4d92f977 100644 --- a/src/training/common/mastertrainer.cpp +++ b/src/training/common/mastertrainer.cpp @@ -33,7 +33,7 @@ #include "shapeclassifier.h" #include "shapetable.h" #ifndef GRAPHICS_DISABLED -#include "svmnode.h" +# include "svmnode.h" #endif #include "scanutils.h" @@ -51,16 +51,16 @@ const float kFontMergeDistance = 0.025; MasterTrainer::MasterTrainer(NormalizationMode norm_mode, bool shape_analysis, bool replicate_samples, int debug_level) - : norm_mode_(norm_mode) - , samples_(fontinfo_table_) - , junk_samples_(fontinfo_table_) - , verify_samples_(fontinfo_table_) - , charsetsize_(0) - , enable_shape_analysis_(shape_analysis) - , enable_replication_(replicate_samples) - , fragments_(nullptr) - , prev_unichar_id_(-1) - , debug_level_(debug_level) {} + : norm_mode_(norm_mode), + samples_(fontinfo_table_), + junk_samples_(fontinfo_table_), + verify_samples_(fontinfo_table_), + charsetsize_(0), + enable_shape_analysis_(shape_analysis), + enable_replication_(replicate_samples), + fragments_(nullptr), + prev_unichar_id_(-1), + debug_level_(debug_level) {} MasterTrainer::~MasterTrainer() { delete[] fragments_; @@ -137,10 +137,14 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name, const FEATURE_DEFS_STRUCT &feature_defs, bool verification) { char buffer[2048]; - const int int_feature_type = ShortNameToFeatureType(feature_defs, kIntFeatureType); - const int micro_feature_type = ShortNameToFeatureType(feature_defs, kMicroFeatureType); - const int cn_feature_type = ShortNameToFeatureType(feature_defs, kCNFeatureType); - const int geo_feature_type = ShortNameToFeatureType(feature_defs, kGeoFeatureType); + const int int_feature_type = + ShortNameToFeatureType(feature_defs, kIntFeatureType); + const int micro_feature_type = + ShortNameToFeatureType(feature_defs, kMicroFeatureType); + const int cn_feature_type = + ShortNameToFeatureType(feature_defs, kCNFeatureType); + const int geo_feature_type = + ShortNameToFeatureType(feature_defs, kGeoFeatureType); FILE *fp = fopen(page_name, "rb"); if (fp == nullptr) { @@ -175,8 +179,8 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name, sample->set_font_id(font_id); sample->set_page_num(page_number + page_images_.size()); sample->set_bounding_box(bounding_box); - sample->ExtractCharDesc(int_feature_type, micro_feature_type, cn_feature_type, geo_feature_type, - char_desc); + sample->ExtractCharDesc(int_feature_type, micro_feature_type, + cn_feature_type, geo_feature_type, char_desc); AddSample(verification, unichar.c_str(), sample); delete char_desc; } @@ -186,7 +190,8 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name, // Adds the given single sample to the trainer, setting the classid // appropriately from the given unichar_str. -void MasterTrainer::AddSample(bool verification, const char *unichar, TrainingSample *sample) { +void MasterTrainer::AddSample(bool verification, const char *unichar, + TrainingSample *sample) { if (verification) { verify_samples_.AddSample(unichar, sample); prev_unichar_id_ = -1; @@ -314,7 +319,8 @@ void MasterTrainer::SetupMasterShapes() { ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, &char_shapes_end_fragment); char_shapes.AppendMasterShapes(char_shapes_end_fragment, nullptr); - ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, &char_shapes); + ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, + &char_shapes); master_shapes_.AppendMasterShapes(char_shapes, nullptr); tprintf("Master shape_table:%s\n", master_shapes_.SummaryStr().c_str()); } @@ -383,13 +389,13 @@ bool MasterTrainer::LoadFontInfo(const char *filename) { fontinfo.name = font_name; fontinfo.properties = 0; fontinfo.universal_id = 0; - if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold, &fixed, &serif, - &fraktur) != 6) { + if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold, + &fixed, &serif, &fraktur) != 6) { delete[] font_name; continue; } - fontinfo.properties = - (italic << 0) + (bold << 1) + (fixed << 2) + (serif << 3) + (fraktur << 4); + fontinfo.properties = (italic << 0) + (bold << 1) + (fixed << 2) + + (serif << 3) + (fraktur << 4); if (!fontinfo_table_.contains(fontinfo)) { fontinfo_table_.push_back(fontinfo); } else { @@ -477,7 +483,8 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) { fi->init_spacing(unicharset_.size()); FontSpacingInfo *spacing = nullptr; for (int l = 0; l < num_unichars; ++l) { - if (tfscanf(fontinfo_file, "%s %d %d %d", uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) { + if (tfscanf(fontinfo_file, "%s %d %d %d", uch, &x_gap_before, &x_gap_after, + &num_kerned) != 4) { tprintf("Bad format of font spacing file %s\n", filename); fclose(fontinfo_file); return false; @@ -498,7 +505,8 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) { if (!valid || !unicharset_.contains_unichar(kerned_uch)) { continue; } - spacing->kerned_unichar_ids.push_back(unicharset_.unichar_to_id(kerned_uch)); + spacing->kerned_unichar_ids.push_back( + unicharset_.unichar_to_id(kerned_uch)); spacing->kerned_x_gaps.push_back(static_cast(x_gap * scale)); } if (valid) { @@ -572,13 +580,14 @@ void MasterTrainer::SetupFlatShapeTable(ShapeTable *shape_table) { // Sets up a Clusterer for mftraining on a single shape_id. // Call FreeClusterer on the return value after use. -CLUSTERER *MasterTrainer::SetupForClustering(const ShapeTable &shape_table, - const FEATURE_DEFS_STRUCT &feature_defs, int shape_id, - int *num_samples) { +CLUSTERER *MasterTrainer::SetupForClustering( + const ShapeTable &shape_table, const FEATURE_DEFS_STRUCT &feature_defs, + int shape_id, int *num_samples) { int desc_index = ShortNameToFeatureType(feature_defs, kMicroFeatureType); int num_params = feature_defs.FeatureDesc[desc_index]->NumParams; ASSERT_HOST(num_params == (int)MicroFeatureParameter::MFCount); - CLUSTERER *clusterer = MakeClusterer(num_params, feature_defs.FeatureDesc[desc_index]->ParamDesc); + CLUSTERER *clusterer = MakeClusterer( + num_params, feature_defs.FeatureDesc[desc_index]->ParamDesc); // We want to iterate over the samples of just the one shape. IndexMapBiDi shape_map; @@ -612,12 +621,14 @@ CLUSTERER *MasterTrainer::SetupForClustering(const ShapeTable &shape_table, void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset, const UNICHARSET &shape_set, const ShapeTable &shape_table, - CLASS_STRUCT *float_classes, const char *inttemp_file, + CLASS_STRUCT *float_classes, + const char *inttemp_file, const char *pffmtable_file) { auto *classify = new tesseract::Classify(); // Move the fontinfo table to classify. fontinfo_table_.MoveTo(&classify->get_fontinfo_table()); - INT_TEMPLATES_STRUCT *int_templates = classify->CreateIntTemplates(float_classes, shape_set); + INT_TEMPLATES_STRUCT *int_templates = + classify->CreateIntTemplates(float_classes, shape_set); FILE *fp = fopen(inttemp_file, "wb"); if (fp == nullptr) { tprintf("Error, failed to open file \"%s\"\n", inttemp_file); @@ -631,10 +642,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset, // We put the shapetable_cutoffs in a vector, and compute the // unicharset cutoffs along the way. std::vector shapetable_cutoffs; - std::vector unichar_cutoffs; - for (int c = 0; c < unicharset.size(); ++c) { - unichar_cutoffs.push_back(0); - } + std::vector unichar_cutoffs(unicharset.size()); /* then write out each class */ for (int i = 0; i < int_templates->NumClasses; ++i) { INT_CLASS_STRUCT *Class = ClassForClassId(int_templates, i); @@ -679,7 +687,8 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset, // Generate debug output relating to the canonical distance between the // two given UTF8 grapheme strings. -void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar_str2) { +void MasterTrainer::DebugCanonical(const char *unichar_str1, + const char *unichar_str2) { int class_id1 = unicharset_.unichar_to_id(unichar_str1); int class_id2 = unicharset_.unichar_to_id(unichar_str2); if (class_id2 == INVALID_UNICHAR_ID) { @@ -689,8 +698,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar tprintf("No unicharset entry found for %s\n", unichar_str1); return; } else { - tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", class_id1, unichar_str1, - class_id2, unichar_str2); + tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", class_id1, + unichar_str1, class_id2, unichar_str2); } int num_fonts = samples_.NumFonts(); const IntFeatureMap &feature_map = feature_map_; @@ -714,7 +723,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar if (samples_.NumClassSamples(f2, class_id2, false) == 0) { continue; } - float dist = samples_.ClusterDistance(f1, class_id1, f2, class_id2, feature_map); + float dist = + samples_.ClusterDistance(f1, class_id1, f2, class_id2, feature_map); tprintf(" %5.3f", dist); } tprintf("\n"); @@ -725,7 +735,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar if (samples_.NumClassSamples(f, class_id1, true) > 0) { shapes.AddShape(class_id1, f); } - if (class_id1 != class_id2 && samples_.NumClassSamples(f, class_id2, true) > 0) { + if (class_id1 != class_id2 && + samples_.NumClassSamples(f, class_id2, true) > 0) { shapes.AddShape(class_id2, f); } } @@ -743,14 +754,17 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar // Until the features window is destroyed, each click in the features window // will display the samples that have that feature in a separate window. void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font, - const char *unichar_str2, int canonical_font) { + const char *unichar_str2, + int canonical_font) { const IntFeatureMap &feature_map = feature_map_; const IntFeatureSpace &feature_space = feature_map.feature_space(); ScrollView *f_window = CreateFeatureSpaceWindow("Features", 100, 500); - ClearFeatureSpaceWindow(norm_mode_ == NM_BASELINE ? baseline : character, f_window); + ClearFeatureSpaceWindow(norm_mode_ == NM_BASELINE ? baseline : character, + f_window); int class_id2 = samples_.unicharset().unichar_to_id(unichar_str2); if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) { - const TrainingSample *sample = samples_.GetCanonicalSample(canonical_font, class_id2); + const TrainingSample *sample = + samples_.GetCanonicalSample(canonical_font, class_id2); for (uint32_t f = 0; f < sample->num_features(); ++f) { RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED); } @@ -780,8 +794,8 @@ void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font, Shape shape; shape.AddToShape(class_id1, cloud_font); s_window->Clear(); - samples_.DisplaySamplesWithFeature(feature_index, shape, feature_space, ScrollView::GREEN, - s_window); + samples_.DisplaySamplesWithFeature(feature_index, shape, feature_space, + ScrollView::GREEN, s_window); s_window->Update(); } } @@ -790,22 +804,25 @@ void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font, } #endif // !GRAPHICS_DISABLED -void MasterTrainer::TestClassifierVOld(bool replicate_samples, ShapeClassifier *test_classifier, +void MasterTrainer::TestClassifierVOld(bool replicate_samples, + ShapeClassifier *test_classifier, ShapeClassifier *old_classifier) { SampleIterator sample_it; sample_it.Init(nullptr, nullptr, replicate_samples, &samples_); - ErrorCounter::DebugNewErrors(test_classifier, old_classifier, CT_UNICHAR_TOPN_ERR, - fontinfo_table_, page_images_, &sample_it); + ErrorCounter::DebugNewErrors(test_classifier, old_classifier, + CT_UNICHAR_TOPN_ERR, fontinfo_table_, + page_images_, &sample_it); } // Tests the given test_classifier on the internal samples. // See TestClassifier for details. -void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, int report_level, +void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, + int report_level, bool replicate_samples, ShapeClassifier *test_classifier, std::string *report_string) { - TestClassifier(error_mode, report_level, replicate_samples, &samples_, test_classifier, - report_string); + TestClassifier(error_mode, report_level, replicate_samples, &samples_, + test_classifier, report_string); } // Tests the given test_classifier on the given samples. @@ -822,8 +839,10 @@ void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, int report_le // If report_string is non-nullptr, a summary of the results for each font // is appended to the report_string. double MasterTrainer::TestClassifier(CountTypes error_mode, int report_level, - bool replicate_samples, TrainingSampleSet *samples, - ShapeClassifier *test_classifier, std::string *report_string) { + bool replicate_samples, + TrainingSampleSet *samples, + ShapeClassifier *test_classifier, + std::string *report_string) { SampleIterator sample_it; sample_it.Init(nullptr, nullptr, replicate_samples, samples); if (report_level > 0) { @@ -837,8 +856,9 @@ double MasterTrainer::TestClassifier(CountTypes error_mode, int report_level, tprintf("Testing %sREPLICATED:\n", replicate_samples ? "" : "NON-"); } double unichar_error = 0.0; - ErrorCounter::ComputeErrorRate(test_classifier, report_level, error_mode, fontinfo_table_, - page_images_, &sample_it, &unichar_error, nullptr, report_string); + ErrorCounter::ComputeErrorRate(test_classifier, report_level, error_mode, + fontinfo_table_, page_images_, &sample_it, + &unichar_error, nullptr, report_string); return unichar_error; } @@ -857,14 +877,16 @@ float MasterTrainer::ShapeDistance(const ShapeTable &shapes, int s1, int s2) { // distances between characters of matching font where possible. for (int c1 = 0; c1 < num_chars1; ++c1) { for (int c2 = 0; c2 < num_chars2; ++c2) { - dist_sum += samples_.UnicharDistance(shape1[c1], shape2[c2], true, feature_map); + dist_sum += + samples_.UnicharDistance(shape1[c1], shape2[c2], true, feature_map); ++dist_count; } } } else { // In the single unichar case, there is little alternative, but to compute // the squared-order distance between pairs of fonts. - dist_sum = samples_.UnicharDistance(shape1[0], shape2[0], false, feature_map); + dist_sum = + samples_.UnicharDistance(shape1[0], shape2[0], false, feature_map); ++dist_count; } return dist_sum / dist_count; @@ -942,8 +964,8 @@ void MasterTrainer::ReplaceFragmentedSamples() { // * No shape shall have more than max_shape_unichars in it, // * Don't merge shapes where the distance between them exceeds max_dist. const float kInfiniteDist = 999.0f; -void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float max_dist, - ShapeTable *shapes) { +void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, + float max_dist, ShapeTable *shapes) { int num_shapes = shapes->NumShapes(); int max_merges = num_shapes - min_shapes; // TODO: avoid new / delete. @@ -971,8 +993,8 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float int num_unichars = shapes->MergedUnicharCount(min_s1, min_s2); shape_dists[min_s1][min_s2 - min_s1 - 1].distance = kInfiniteDist; if (num_unichars > max_shape_unichars) { - tprintf("Merge of %d and %d with %d would exceed max of %d unichars\n", min_s1, min_s2, - num_unichars, max_shape_unichars); + tprintf("Merge of %d and %d with %d would exceed max of %d unichars\n", + min_s1, min_s2, num_unichars, max_shape_unichars); } else { shapes->MergeShapes(min_s1, min_s2); shape_dists[min_s2].clear(); @@ -980,13 +1002,15 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float for (int s = 0; s < min_s1; ++s) { if (!shape_dists[s].empty()) { - shape_dists[s][min_s1 - s - 1].distance = ShapeDistance(*shapes, s, min_s1); + shape_dists[s][min_s1 - s - 1].distance = + ShapeDistance(*shapes, s, min_s1); shape_dists[s][min_s2 - s - 1].distance = kInfiniteDist; } } for (int s2 = min_s1 + 1; s2 < num_shapes; ++s2) { if (shape_dists[min_s1][s2 - min_s1 - 1].distance < kInfiniteDist) { - shape_dists[min_s1][s2 - min_s1 - 1].distance = ShapeDistance(*shapes, min_s1, s2); + shape_dists[min_s1][s2 - min_s1 - 1].distance = + ShapeDistance(*shapes, min_s1, s2); } } for (int s = min_s1 + 1; s < min_s2; ++s) { diff --git a/src/training/mergenf.cpp b/src/training/mergenf.cpp index fd1aa2d9..7e027ab9 100644 --- a/src/training/mergenf.cpp +++ b/src/training/mergenf.cpp @@ -69,7 +69,7 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) { float Angle, Length; /* if p1 and p2 are not close in length, don't let them match */ - Length = fabs(p1->Length - p2->Length); + Length = std::fabs(p1->Length - p2->Length); if (Length > MAX_LENGTH_MISMATCH) { return (0.0); } @@ -88,8 +88,8 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) { } /* set the dummy pico-feature at one end of p1 and match it to p2 */ - Feature->Params[PicoFeatX] = p1->X + cos(Angle) * Length; - Feature->Params[PicoFeatY] = p1->Y + sin(Angle) * Length; + Feature->Params[PicoFeatX] = p1->X + std::cos(Angle) * Length; + Feature->Params[PicoFeatY] = p1->Y + std::sin(Angle) * Length; if (DummyFastMatch(Feature, p2)) { Evidence = SubfeatureEvidence(Feature, p2); if (Evidence < WorstEvidence) { @@ -101,8 +101,8 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) { } /* set the dummy pico-feature at the other end of p1 and match it to p2 */ - Feature->Params[PicoFeatX] = p1->X - cos(Angle) * Length; - Feature->Params[PicoFeatY] = p1->Y - sin(Angle) * Length; + Feature->Params[PicoFeatX] = p1->X - std::cos(Angle) * Length; + Feature->Params[PicoFeatY] = p1->Y - std::sin(Angle) * Length; if (DummyFastMatch(Feature, p2)) { Evidence = SubfeatureEvidence(Feature, p2); if (Evidence < WorstEvidence) { @@ -266,7 +266,7 @@ bool DummyFastMatch(FEATURE Feature, PROTO_STRUCT *Proto) { float AngleError; MaxAngleError = training_angle_pad / 360.0; - AngleError = fabs(Proto->Angle - Feature->Params[PicoFeatDir]); + AngleError = std::fabs(Proto->Angle - Feature->Params[PicoFeatDir]); if (AngleError > 0.5) { AngleError = 1.0 - AngleError; } @@ -296,8 +296,8 @@ void ComputePaddedBoundingBox(PROTO_STRUCT *Proto, float TangentPad, float Ortho FRECT *BoundingBox) { float Length = Proto->Length / 2.0 + TangentPad; float Angle = Proto->Angle * 2.0 * M_PI; - float CosOfAngle = fabs(cos(Angle)); - float SinOfAngle = fabs(sin(Angle)); + float CosOfAngle = fabs(std::cos(Angle)); + float SinOfAngle = fabs(std::sin(Angle)); float Pad = std::max(CosOfAngle * Length, SinOfAngle * OrthogonalPad); BoundingBox->MinX = Proto->X - Pad; diff --git a/src/training/unicharset/lstmtrainer.cpp b/src/training/unicharset/lstmtrainer.cpp index b608942e..f437012e 100644 --- a/src/training/unicharset/lstmtrainer.cpp +++ b/src/training/unicharset/lstmtrainer.cpp @@ -22,6 +22,7 @@ # include "config_auto.h" #endif +#include #include #include "lstmtrainer.h" @@ -71,14 +72,17 @@ const int kTargetXScale = 5; const int kTargetYScale = 100; #endif // !GRAPHICS_DISABLED -LSTMTrainer::LSTMTrainer() : randomly_rotate_(false), training_data_(0), sub_trainer_(nullptr) { +LSTMTrainer::LSTMTrainer() + : randomly_rotate_(false), training_data_(0), sub_trainer_(nullptr) { EmptyConstructor(); debug_interval_ = 0; } -LSTMTrainer::LSTMTrainer(const char *model_base, const char *checkpoint_name, int debug_interval, - int64_t max_memory) - : randomly_rotate_(false), training_data_(max_memory), sub_trainer_(nullptr) { +LSTMTrainer::LSTMTrainer(const char *model_base, const char *checkpoint_name, + int debug_interval, int64_t max_memory) + : randomly_rotate_(false), + training_data_(max_memory), + sub_trainer_(nullptr) { EmptyConstructor(); debug_interval_ = debug_interval; model_base_ = model_base; @@ -96,7 +100,8 @@ LSTMTrainer::~LSTMTrainer() { // Tries to deserialize a trainer from the given file and silently returns // false in case of failure. -bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_traineddata) { +bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, + const char *old_traineddata) { std::vector data; if (!LoadDataFromFile(filename, &data)) { return false; @@ -106,7 +111,8 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra return false; } if (IsIntMode()) { - tprintf("Error, %s is an integer (fast) model, cannot continue training\n", filename); + tprintf("Error, %s is an integer (fast) model, cannot continue training\n", + filename); return false; } if (((old_traineddata == nullptr || *old_traineddata == '\0') && @@ -114,7 +120,8 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra filename == old_traineddata) { return true; // Normal checkpoint load complete. } - tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(), recoder_.code_range()); + tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(), + recoder_.code_range()); if (old_traineddata == nullptr || *old_traineddata == '\0') { tprintf("Must supply the old traineddata for code conversion!\n"); return false; @@ -152,21 +159,23 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra // are implemented. // For other args see NetworkBuilder::InitNetwork. // Note: Be sure to call InitCharSet before InitNetwork! -bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index, int net_flags, - float weight_range, float learning_rate, float momentum, +bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index, + int net_flags, float weight_range, + float learning_rate, float momentum, float adam_beta) { mgr_.SetVersionString(mgr_.VersionString() + ":" + network_spec); adam_beta_ = adam_beta; learning_rate_ = learning_rate; momentum_ = momentum; SetNullChar(); - if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec, append_index, net_flags, - weight_range, &randomizer_, &network_)) { + if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec, + append_index, net_flags, weight_range, + &randomizer_, &network_)) { return false; } network_str_ += network_spec; - tprintf("Built network:%s from request %s\n", - network_->spec().c_str(), network_spec); + tprintf("Built network:%s from request %s\n", network_->spec().c_str(), + network_spec); tprintf( "Training parameters:\n Debug interval = %d," " weights = %g, learning rate = %g, momentum=%g\n", @@ -222,14 +231,14 @@ void LSTMTrainer::InitIterations() { // If the training sample is usable, grid searches for the optimal // dict_ratio/cert_offset, and returns the results in a string of space- // separated triplets of ratio,offset=worderr. -Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, int iteration, - double min_dict_ratio, double dict_ratio_step, - double max_dict_ratio, double min_cert_offset, - double cert_offset_step, double max_cert_offset, - std::string &results) { +Trainability LSTMTrainer::GridSearchDictParams( + const ImageData *trainingdata, int iteration, double min_dict_ratio, + double dict_ratio_step, double max_dict_ratio, double min_cert_offset, + double cert_offset_step, double max_cert_offset, std::string &results) { sample_iteration_ = iteration; NetworkIO fwd_outputs, targets; - Trainability result = PrepareForBackward(trainingdata, &fwd_outputs, &targets); + Trainability result = + PrepareForBackward(trainingdata, &fwd_outputs, &targets); if (result == UNENCODABLE || result == HI_PRECISION_ERR || dict_ == nullptr) { return result; } @@ -238,8 +247,10 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in std::vector truth_labels, ocr_labels, xcoords; ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels)); // NO-dict error. - RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr); - base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr); + RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), + nullptr); + base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, + nullptr); base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords); std::string truth_text = DecodeLabels(truth_labels); std::string ocr_text = DecodeLabels(ocr_labels); @@ -248,18 +259,21 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_); for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) { - for (double c = min_cert_offset; c < max_cert_offset; c += cert_offset_step) { - search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, nullptr); + for (double c = min_cert_offset; c < max_cert_offset; + c += cert_offset_step) { + search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, + nullptr); search.ExtractBestPathAsLabels(&ocr_labels, &xcoords); truth_text = DecodeLabels(truth_labels); ocr_text = DecodeLabels(ocr_labels); // This is destructive on both strings. double word_error = ComputeWordError(&truth_text, &ocr_text); - if ((r == min_dict_ratio && c == min_cert_offset) || !std::isfinite(word_error)) { + if ((r == min_dict_ratio && c == min_cert_offset) || + !std::isfinite(word_error)) { std::string t = DecodeLabels(truth_labels); - std::string o = DecodeLabels(ocr_labels); - tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c, t.c_str(), o.c_str(), - word_error, truth_labels[0]); + std::string o = DecodeLabels(ocr_labels); + tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c, + t.c_str(), o.c_str(), word_error, truth_labels[0]); } results += " " + std::to_string(r); results += "," + std::to_string(c); @@ -278,17 +292,20 @@ void LSTMTrainer::DebugNetwork() { // tesseract into memory ready for training. Returns false if nothing was // loaded. bool LSTMTrainer::LoadAllTrainingData(const std::vector &filenames, - CachingStrategy cache_strategy, bool randomly_rotate) { + CachingStrategy cache_strategy, + bool randomly_rotate) { randomly_rotate_ = randomly_rotate; training_data_.Clear(); - return training_data_.LoadDocuments(filenames, cache_strategy, LoadDataFromFile); + return training_data_.LoadDocuments(filenames, cache_strategy, + LoadDataFromFile); } // Keeps track of best and locally worst char error_rate and launches tests // using tester, when a new min or max is reached. // Writes checkpoints at appropriate times and builds and returns a log message // to indicate progress. Returns false if nothing interesting happened. -bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, std::string &log_msg) { +bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester, + std::string &log_msg) { PrepareLogMsg(log_msg); double error_rate = CharError(); int iteration = learning_iteration(); @@ -320,7 +337,8 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, std::string &log_msg) sub_trainer_.reset(); stall_iteration_ = learning_iteration() + kMinStallIterations; if (TransitionTrainingStage(kStageTransitionThreshold)) { - log_msg += " Transitioned to stage " + std::to_string(CurrentTrainingStage()); + log_msg += + " Transitioned to stage " + std::to_string(CurrentTrainingStage()); } SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_); if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) { @@ -386,7 +404,8 @@ void LSTMTrainer::PrepareLogMsg(std::string &log_msg) const { // Appends iteration learning_iteration()/training_iteration()/ // sample_iteration() to the log_msg. -void LSTMTrainer::LogIterations(const char *intro_str, std::string &log_msg) const { +void LSTMTrainer::LogIterations(const char *intro_str, + std::string &log_msg) const { log_msg += intro_str; log_msg += " iteration " + std::to_string(learning_iteration()); log_msg += "/" + std::to_string(training_iteration()); @@ -396,7 +415,8 @@ void LSTMTrainer::LogIterations(const char *intro_str, std::string &log_msg) con // Returns true and increments the training_stage_ if the error rate has just // passed through the given threshold for the first time. bool LSTMTrainer::TransitionTrainingStage(float error_threshold) { - if (best_error_rate_ < error_threshold && training_stage_ + 1 < num_training_stages_) { + if (best_error_rate_ < error_threshold && + training_stage_ + 1 < num_training_stages_) { ++training_stage_; return true; } @@ -404,8 +424,8 @@ bool LSTMTrainer::TransitionTrainingStage(float error_threshold) { } // Writes to the given file. Returns false in case of error. -bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr, - TFile *fp) const { +bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, + const TessdataManager *mgr, TFile *fp) const { if (!LSTMRecognizer::Serialize(mgr, fp)) { return false; } @@ -470,7 +490,8 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, const TessdataMana return false; } std::vector sub_data; - if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, *sub_trainer_, &sub_data)) { + if (sub_trainer_ != nullptr && + !SaveTrainingDump(LIGHT, *sub_trainer_, &sub_data)) { return false; } if (!fp->Serialize(sub_data)) { @@ -587,11 +608,13 @@ void LSTMTrainer::StartSubtrainer(std::string &log_msg) { log_msg += " Failed to revert to previous best for trial!"; sub_trainer_.reset(); } else { - log_msg += " Trial sub_trainer_ from iteration " + std::to_string(sub_trainer_->training_iteration()); + log_msg += " Trial sub_trainer_ from iteration " + + std::to_string(sub_trainer_->training_iteration()); // Reduce learning rate so it doesn't diverge this time. sub_trainer_->ReduceLearningRates(this, log_msg); // If it fails again, we will wait twice as long before reverting again. - int stall_offset = learning_iteration() - sub_trainer_->learning_iteration(); + int stall_offset = + learning_iteration() - sub_trainer_->learning_iteration(); stall_iteration_ = learning_iteration() + 2 * stall_offset; sub_trainer_->stall_iteration_ = stall_iteration_; // Re-save the best trainer with the new learning rates and stall iteration. @@ -619,7 +642,8 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) { int end_iteration = training_iteration(); while (sub_trainer_->training_iteration() < end_iteration && sub_margin >= kSubTrainerMarginFraction) { - int target_iteration = sub_trainer_->training_iteration() + kNumPagesPerBatch; + int target_iteration = + sub_trainer_->training_iteration() + kNumPagesPerBatch; while (sub_trainer_->training_iteration() < target_iteration) { sub_trainer_->TrainOnLine(this, false); } @@ -631,12 +655,14 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) { sub_error = sub_trainer_->CharError(); sub_margin = (training_error - sub_error) / sub_error; } - if (sub_error < best_error_rate_ && sub_margin >= kSubTrainerMarginFraction) { + if (sub_error < best_error_rate_ && + sub_margin >= kSubTrainerMarginFraction) { // The sub_trainer_ has won the race to a new best. Switch to it. std::vector updated_trainer; SaveTrainingDump(LIGHT, *sub_trainer_, &updated_trainer); ReadTrainingDump(updated_trainer, *this); - log_msg += " Sub trainer wins at iteration " + std::to_string(training_iteration()); + log_msg += " Sub trainer wins at iteration " + + std::to_string(training_iteration()); log_msg += "\n"; return STR_REPLACED; } @@ -647,11 +673,13 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) { // Reduces network learning rates, either for everything, or for layers // independently, according to NF_LAYER_SPECIFIC_LR. -void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer, std::string &log_msg) { +void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer, + std::string &log_msg) { if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) { - int num_reduced = - ReduceLayerLearningRates(kLearningRateDecay, kNumAdjustmentIterations, samples_trainer); - log_msg += "\nReduced learning rate on layers: " + std::to_string(num_reduced); + int num_reduced = ReduceLayerLearningRates( + kLearningRateDecay, kNumAdjustmentIterations, samples_trainer); + log_msg += + "\nReduced learning rate on layers: " + std::to_string(num_reduced); } else { ScaleLearningRate(kLearningRateDecay); log_msg += "\nReduced learning rate to :" + std::to_string(learning_rate_); @@ -712,7 +740,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples, copy_trainer.SetIteration(iteration); // Train on the sample, but keep the update in updates_ instead of // applying to the weights. - const ImageData *trainingdata = copy_trainer.TrainOnLine(samples_trainer, true); + const ImageData *trainingdata = + copy_trainer.TrainOnLine(samples_trainer, true); if (trainingdata == nullptr) { continue; } @@ -727,7 +756,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples, samples_trainer->ReadTrainingDump(updated_trainer, layer_trainer); Network *layer = layer_trainer.GetLayer(layers[i]); // Update the weights in just the layer, using Adam if enabled. - layer->Update(0.0, momentum_, adam_beta_, layer_trainer.training_iteration_ + 1); + layer->Update(0.0, momentum_, adam_beta_, + layer_trainer.training_iteration_ + 1); // Zero the updates matrix again. layer->Update(0.0, 0.0, 0.0, 0); // Train again on the same sample, again holding back the updates. @@ -735,9 +765,10 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples, // Count the sign changes in the updates in layer vs in copy_trainer. float before_bad = bad_sums[ww][i]; float before_ok = ok_sums[ww][i]; - layer->CountAlternators(*copy_trainer.GetLayer(layers[i]), &ok_sums[ww][i], - &bad_sums[ww][i]); - float bad_frac = bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok; + layer->CountAlternators(*copy_trainer.GetLayer(layers[i]), + &ok_sums[ww][i], &bad_sums[ww][i]); + float bad_frac = + bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok; if (bad_frac > 0.0f) { bad_frac = (bad_sums[ww][i] - before_bad) / bad_frac; } @@ -756,8 +787,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples, TFloat total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i]; TFloat frac_down = bad_sums[LR_DOWN][i] / total_down; TFloat frac_same = bad_sums[LR_SAME][i] / total_same; - tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(), lr * factor, - 100.0 * frac_down, lr, 100.0 * frac_same); + tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(), + lr * factor, 100.0 * frac_down, lr, 100.0 * frac_same); if (frac_down < frac_same * kImprovementFraction) { tprintf(" REDUCED\n"); ScaleLayerLearningRate(layers[i], factor); @@ -781,9 +812,10 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples, // Converts the string to integer class labels, with appropriate null_char_s // in between if not in SimpleTextOutput mode. Returns false on failure. /* static */ -bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unicharset, - const UnicharCompress *recoder, bool simple_text, int null_char, - std::vector *labels) { +bool LSTMTrainer::EncodeString(const std::string &str, + const UNICHARSET &unicharset, + const UnicharCompress *recoder, bool simple_text, + int null_char, std::vector *labels) { if (str.c_str() == nullptr || str.length() <= 0) { tprintf("Empty truth string!\n"); return false; @@ -795,7 +827,8 @@ bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unichar labels->push_back(null_char); } std::string cleaned = unicharset.CleanupString(str.c_str()); - if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr, &err_index)) { + if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr, + &err_index)) { bool success = true; for (auto internal_label : internal_labels) { if (recoder != nullptr) { @@ -835,19 +868,23 @@ bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unichar // Performs forward-backward on the given trainingdata. // Returns a Trainability enum to indicate the suitability of the sample. -Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata, bool batch) { +Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata, + bool batch) { NetworkIO fwd_outputs, targets; - Trainability trainable = PrepareForBackward(trainingdata, &fwd_outputs, &targets); + Trainability trainable = + PrepareForBackward(trainingdata, &fwd_outputs, &targets); ++sample_iteration_; if (trainable == UNENCODABLE || trainable == NOT_BOXED) { return trainable; // Sample was unusable. } - bool debug = debug_interval_ > 0 && training_iteration() % debug_interval_ == 0; + bool debug = + debug_interval_ > 0 && training_iteration() % debug_interval_ == 0; // Run backprop on the output. NetworkIO bp_deltas; if (network_->IsTraining() && (trainable != PERFECT || - training_iteration() > last_perfect_training_iteration_ + perfect_delay_)) { + training_iteration() > + last_perfect_training_iteration_ + perfect_delay_)) { network_->Backward(debug, targets, &scratch_space_, &bp_deltas); network_->Update(learning_rate_, batch ? -1.0f : momentum_, adam_beta_, training_iteration_ + 1); @@ -864,18 +901,21 @@ Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata, bool batch) // Prepares the ground truth, runs forward, and prepares the targets. // Returns a Trainability enum to indicate the suitability of the sample. -Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs, +Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, + NetworkIO *fwd_outputs, NetworkIO *targets) { if (trainingdata == nullptr) { tprintf("Null trainingdata.\n"); return UNENCODABLE; } // Ensure repeatability of random elements even across checkpoints. - bool debug = debug_interval_ > 0 && training_iteration() % debug_interval_ == 0; + bool debug = + debug_interval_ > 0 && training_iteration() % debug_interval_ == 0; std::vector truth_labels; if (!EncodeString(trainingdata->transcription(), &truth_labels)) { tprintf("Can't encode transcription: '%s' in language '%s'\n", - trainingdata->transcription().c_str(), trainingdata->language().c_str()); + trainingdata->transcription().c_str(), + trainingdata->language().c_str()); return UNENCODABLE; } bool upside_down = false; @@ -908,8 +948,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw float image_scale; NetworkIO inputs; bool invert = trainingdata->boxes().empty(); - if (!RecognizeLine(*trainingdata, invert, debug, invert, upside_down, &image_scale, &inputs, - fwd_outputs)) { + if (!RecognizeLine(*trainingdata, invert, debug, invert, upside_down, + &image_scale, &inputs, fwd_outputs)) { tprintf("Image %s not trainable\n", trainingdata->imagefilename().c_str()); return UNENCODABLE; } @@ -917,12 +957,14 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw LossType loss_type = OutputLossType(); if (loss_type == LT_SOFTMAX) { if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) { - tprintf("Compute simple targets failed for %s!\n", trainingdata->imagefilename().c_str()); + tprintf("Compute simple targets failed for %s!\n", + trainingdata->imagefilename().c_str()); return UNENCODABLE; } } else if (loss_type == LT_CTC) { if (!ComputeCTCTargets(truth_labels, fwd_outputs, targets)) { - tprintf("Compute CTC targets failed for %s!\n", trainingdata->imagefilename().c_str()); + tprintf("Compute CTC targets failed for %s!\n", + trainingdata->imagefilename().c_str()); return UNENCODABLE; } } else { @@ -936,7 +978,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw if (loss_type != LT_CTC) { LabelsFromOutputs(*targets, &truth_labels, &xcoords); } - if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels, *targets)) { + if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels, + *targets)) { tprintf("Input width was %d\n", inputs.Width()); return UNENCODABLE; } @@ -945,7 +988,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw targets->SubtractAllFromFloat(*fwd_outputs); if (debug_interval_ != 0) { if (truth_text != ocr_text) { - tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(), ocr_text.c_str()); + tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(), + ocr_text.c_str()); } } double char_error = ComputeCharError(truth_labels, ocr_labels); @@ -968,7 +1012,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw // restored. *this must always be the master trainer that retains the only // copy of the training data and language model. trainer is the model that is // actually serialized. -bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer &trainer, +bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, + const LSTMTrainer &trainer, std::vector *data) const { TFile fp; fp.OpenWrite(data); @@ -976,7 +1021,8 @@ bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, const LSTMT } // Restores the model to *this. -bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr, const char *data, int size) { +bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr, + const char *data, int size) { if (size == 0) { tprintf("Warning: data size is 0 in LSTMTrainer::ReadLocalTrainingDump\n"); return false; @@ -990,7 +1036,8 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr, const char * bool LSTMTrainer::SaveTraineddata(const char *filename) { std::vector recognizer_data; SaveRecognitionDump(&recognizer_data); - mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0], recognizer_data.size()); + mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0], + recognizer_data.size()); return mgr_.SaveFile(filename, SaveDataToFile); } @@ -1025,8 +1072,8 @@ void LSTMTrainer::FillErrorBuffer(double new_error, ErrorTypes type) { // Helper generates a map from each current recoder_ code (ie softmax index) // to the corresponding old_recoder code, or -1 if there isn't one. -std::vector LSTMTrainer::MapRecoder(const UNICHARSET &old_chset, - const UnicharCompress &old_recoder) const { +std::vector LSTMTrainer::MapRecoder( + const UNICHARSET &old_chset, const UnicharCompress &old_recoder) const { int num_new_codes = recoder_.code_range(); int num_new_unichars = GetUnicharset().size(); std::vector code_map(num_new_codes, -1); @@ -1045,9 +1092,10 @@ std::vector LSTMTrainer::MapRecoder(const UNICHARSET &old_chset, continue; } // The old unicharset must have the same unichar. - int old_uid = uid < num_new_unichars - ? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid)) - : old_chset.size() - 1; + int old_uid = + uid < num_new_unichars + ? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid)) + : old_chset.size() - 1; if (old_uid == INVALID_UNICHAR_ID) { continue; } @@ -1079,7 +1127,8 @@ void LSTMTrainer::InitCharSet() { // Helper computes and sets the null_char_. void LSTMTrainer::SetNullChar() { - null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN : GetUnicharset().size(); + null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN + : GetUnicharset().size(); RecodedCharID code; recoder_.EncodeUnichar(null_char_, &code); null_char_ = code(0); @@ -1103,7 +1152,8 @@ void LSTMTrainer::EmptyConstructor() { // as an image in the given window, and the corresponding labels at the // corresponding x_starts. // Returns false if the truth string is empty. -bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata, +bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, + const ImageData &trainingdata, const NetworkIO &fwd_outputs, const std::vector &truth_labels, const NetworkIO &outputs) { @@ -1118,12 +1168,15 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr std::vector xcoords; LabelsFromOutputs(outputs, &labels, &xcoords); std::string text = DecodeLabels(labels); - tprintf("Iteration %d: GROUND TRUTH : %s\n", training_iteration(), truth_text.c_str()); + tprintf("Iteration %d: GROUND TRUTH : %s\n", training_iteration(), + truth_text.c_str()); if (truth_text != text) { - tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(), text.c_str()); + tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(), + text.c_str()); } if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) { - tprintf("TRAINING activation path for truth string %s\n", truth_text.c_str()); + tprintf("TRAINING activation path for truth string %s\n", + truth_text.c_str()); DebugActivationPath(outputs, labels, xcoords); #ifndef GRAPHICS_DISABLED DisplayForward(inputs, labels, xcoords, "LSTMTraining", &align_win_); @@ -1140,11 +1193,12 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr #ifndef GRAPHICS_DISABLED // Displays the network targets as line a line graph. -void LSTMTrainer::DisplayTargets(const NetworkIO &targets, const char *window_name, - ScrollView **window) { +void LSTMTrainer::DisplayTargets(const NetworkIO &targets, + const char *window_name, ScrollView **window) { int width = targets.Width(); int num_features = targets.NumFeatures(); - Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale, window); + Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale, + window); for (int c = 0; c < num_features; ++c) { int color = c % (ScrollView::GREEN_YELLOW - 1) + 2; (*window)->Pen(static_cast(color)); @@ -1176,7 +1230,8 @@ void LSTMTrainer::DisplayTargets(const NetworkIO &targets, const char *window_na // Builds a no-compromises target where the first positions should be the // truth labels and the rest is padded with the null_char_. -bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs, const std::vector &truth_labels, +bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs, + const std::vector &truth_labels, NetworkIO *targets) { if (truth_labels.size() > targets->Width()) { tprintf("Error: transcription %s too long to fit into target of width %d\n", @@ -1197,18 +1252,19 @@ bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs, const std::vector // Builds a target using standard CTC. truth_labels should be pre-padded with // nulls wherever desired. They don't have to be between all labels. // outputs is input-output, as it gets clipped to minimum probability. -bool LSTMTrainer::ComputeCTCTargets(const std::vector &truth_labels, NetworkIO *outputs, - NetworkIO *targets) { +bool LSTMTrainer::ComputeCTCTargets(const std::vector &truth_labels, + NetworkIO *outputs, NetworkIO *targets) { // Bottom-clip outputs to a minimum probability. CTC::NormalizeProbs(outputs); - return CTC::ComputeCTCTargets(truth_labels, null_char_, outputs->float_array(), targets); + return CTC::ComputeCTCTargets(truth_labels, null_char_, + outputs->float_array(), targets); } // Computes network errors, and stores the results in the rolling buffers, // along with the supplied text_error. // Returns the delta error of the current sample (not running average.) -double LSTMTrainer::ComputeErrorRates(const NetworkIO &deltas, double char_error, - double word_error) { +double LSTMTrainer::ComputeErrorRates(const NetworkIO &deltas, + double char_error, double word_error) { UpdateErrorBuffer(ComputeRMSError(deltas), ET_RMS); // Delta error is the fraction of timesteps with >0.5 error in the top choice // score. If zero, then the top choice characters are guaranteed correct, @@ -1253,7 +1309,7 @@ double LSTMTrainer::ComputeWinnerError(const NetworkIO &deltas) { for (int t = 0; t < width; ++t) { const float *class_errs = deltas.f(t); for (int c = 0; c < num_classes; ++c) { - float abs_delta = fabs(class_errs[c]); + float abs_delta = std::fabs(class_errs[c]); // TODO(rays) Filtering cases where the delta is very large to cut out // GT errors doesn't work. Find a better way or get better truth. if (0.5 <= abs_delta) { @@ -1292,7 +1348,8 @@ double LSTMTrainer::ComputeCharError(const std::vector &truth_str, // Computes word recall error rate using a very simple bag of words algorithm. // NOTE that this is destructive on both input strings. -double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_str) { +double LSTMTrainer::ComputeWordError(std::string *truth_str, + std::string *ocr_str) { using StrMap = std::unordered_map>; std::vector truth_words = split(*truth_str, ' '); if (truth_words.empty()) { @@ -1300,7 +1357,7 @@ double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_st } std::vector ocr_words = split(*ocr_str, ' '); StrMap word_counts; - for (auto truth_word : truth_words) { + for (const auto &truth_word : truth_words) { std::string truth_word_string(truth_word.c_str()); auto it = word_counts.find(truth_word_string); if (it == word_counts.end()) { @@ -1309,7 +1366,7 @@ double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_st ++it->second; } } - for (auto ocr_word : ocr_words) { + for (const auto &ocr_word : ocr_words) { std::string ocr_word_string(ocr_word.c_str()); auto it = word_counts.find(ocr_word_string); if (it == word_counts.end()) { @@ -1333,7 +1390,8 @@ void LSTMTrainer::UpdateErrorBuffer(double new_error, ErrorTypes type) { int index = training_iteration_ % kRollingBufferSize_; error_buffers_[type][index] = new_error; // Compute the mean error. - int mean_count = std::min(training_iteration_ + 1, error_buffers_[type].size()); + int mean_count = + std::min(training_iteration_ + 1, error_buffers_[type].size()); double buffer_sum = 0.0; for (int i = 0; i < mean_count; ++i) { buffer_sum += error_buffers_[type][i]; @@ -1353,8 +1411,9 @@ void LSTMTrainer::RollErrorBuffers() { } ++training_iteration_; if (debug_interval_ != 0) { - tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n", error_rates_[ET_RMS], - error_rates_[ET_DELTA], error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR], + tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n", + error_rates_[ET_RMS], error_rates_[ET_DELTA], + error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR], error_rates_[ET_SKIP_RATIO]); } } @@ -1364,11 +1423,14 @@ void LSTMTrainer::RollErrorBuffers() { // Tester is an externally supplied callback function that tests on some // data set with a given model and records the error rates in a graph. std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, - const std::vector &model_data, TestCallback tester) { - if (error_rate > best_error_rate_ && iteration < best_iteration_ + kErrorGraphInterval) { + const std::vector &model_data, + const TestCallback &tester) { + if (error_rate > best_error_rate_ && + iteration < best_iteration_ + kErrorGraphInterval) { // Too soon to record a new point. if (tester != nullptr && !worst_model_data_.empty()) { - mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size()); + mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], + worst_model_data_.size()); return tester(worst_iteration_, nullptr, mgr_, CurrentTrainingStage()); } else { return ""; @@ -1384,8 +1446,10 @@ std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, if (error_rate < best_error_rate_) { // This is a new (global) minimum. if (tester != nullptr && !worst_model_data_.empty()) { - mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size()); - result = tester(worst_iteration_, worst_error_rates_, mgr_, CurrentTrainingStage()); + mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], + worst_model_data_.size()); + result = tester(worst_iteration_, worst_error_rates_, mgr_, + CurrentTrainingStage()); worst_model_data_.clear(); best_model_data_ = model_data; } @@ -1397,23 +1461,28 @@ std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, // Compute 2% decay time. double two_percent_more = error_rate + 2.0; int i; - for (i = best_error_history_.size() - 1; i >= 0 && best_error_history_[i] < two_percent_more; - --i) { + for (i = best_error_history_.size() - 1; + i >= 0 && best_error_history_[i] < two_percent_more; --i) { } int old_iteration = i >= 0 ? best_error_iterations_[i] : 0; improvement_steps_ = iteration - old_iteration; - tprintf("2 Percent improvement time=%d, best error was %g @ %d\n", improvement_steps_, - i >= 0 ? best_error_history_[i] : 100.0, old_iteration); + tprintf("2 Percent improvement time=%d, best error was %g @ %d\n", + improvement_steps_, i >= 0 ? best_error_history_[i] : 100.0, + old_iteration); } else if (error_rate > best_error_rate_) { // This is a new (local) maximum. if (tester != nullptr) { if (!best_model_data_.empty()) { - mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0], best_model_data_.size()); - result = tester(best_iteration_, best_error_rates_, mgr_, CurrentTrainingStage()); + mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0], + best_model_data_.size()); + result = tester(best_iteration_, best_error_rates_, mgr_, + CurrentTrainingStage()); } else if (!worst_model_data_.empty()) { // Allow for multiple data points with "worst" error rate. - mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size()); - result = tester(worst_iteration_, worst_error_rates_, mgr_, CurrentTrainingStage()); + mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], + worst_model_data_.size()); + result = tester(worst_iteration_, worst_error_rates_, mgr_, + CurrentTrainingStage()); } if (result.length() > 0) { best_model_data_.clear(); diff --git a/src/training/unicharset/lstmtrainer.h b/src/training/unicharset/lstmtrainer.h index 292b2ef5..be309195 100644 --- a/src/training/unicharset/lstmtrainer.h +++ b/src/training/unicharset/lstmtrainer.h @@ -73,7 +73,8 @@ class LSTMTrainer; // Function to compute and record error rates on some external test set(s). // Args are: iteration, mean errors, model, training stage. // Returns a string containing logging information about the tests. -using TestCallback = std::function; +using TestCallback = std::function; // Trainer class for LSTM networks. Most of the effort is in creating the // ideal target outputs from the transcription. A box file is used if it is @@ -82,8 +83,8 @@ using TestCallback = std::function &filenames, CachingStrategy cache_strategy, + bool LoadAllTrainingData(const std::vector &filenames, + CachingStrategy cache_strategy, bool randomly_rotate); // Keeps track of best and locally worst error rate, using internally computed // values. See MaintainCheckpointsSpecific for more detail. - bool MaintainCheckpoints(TestCallback tester, std::string &log_msg); + bool MaintainCheckpoints(const TestCallback &tester, std::string &log_msg); // Keeps track of best and locally worst error_rate (whatever it is) and // launches tests using rec_model, when a new min or max is reached. // Writes checkpoints using train_model at appropriate times and builds and // returns a log message to indicate progress. Returns false if nothing // interesting happened. - bool MaintainCheckpointsSpecific(int iteration, const std::vector *train_model, - const std::vector *rec_model, TestCallback tester, - std::string &log_msg); + bool MaintainCheckpointsSpecific(int iteration, + const std::vector *train_model, + const std::vector *rec_model, + TestCallback tester, std::string &log_msg); // Builds a string containing a progress message with current error rates. void PrepareLogMsg(std::string &log_msg) const; // Appends iteration learning_iteration()/training_iteration()/ @@ -214,7 +218,8 @@ public: } // Writes to the given file. Returns false in case of error. - bool Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr, TFile *fp) const; + bool Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr, + TFile *fp) const; // Reads from the given file. Returns false in case of error. bool DeSerialize(const TessdataManager *mgr, TFile *fp); @@ -240,18 +245,20 @@ public: // Even if it looks like all weights should remain the same, an adjustment // will be made to guarantee a different result when reverting to an old best. // Returns the number of layer learning rates that were reduced. - int ReduceLayerLearningRates(TFloat factor, int num_samples, LSTMTrainer *samples_trainer); + int ReduceLayerLearningRates(TFloat factor, int num_samples, + LSTMTrainer *samples_trainer); // Converts the string to integer class labels, with appropriate null_char_s // in between if not in SimpleTextOutput mode. Returns false on failure. bool EncodeString(const std::string &str, std::vector *labels) const { - return EncodeString(str, GetUnicharset(), IsRecoding() ? &recoder_ : nullptr, - SimpleTextOutput(), null_char_, labels); + return EncodeString(str, GetUnicharset(), + IsRecoding() ? &recoder_ : nullptr, SimpleTextOutput(), + null_char_, labels); } // Static version operates on supplied unicharset, encoder, simple_text. static bool EncodeString(const std::string &str, const UNICHARSET &unicharset, - const UnicharCompress *recoder, bool simple_text, int null_char, - std::vector *labels); + const UnicharCompress *recoder, bool simple_text, + int null_char, std::vector *labels); // Performs forward-backward on the given trainingdata. // Returns the sample that was used or nullptr if the next sample was deemed @@ -259,7 +266,8 @@ public: // holds the training samples. const ImageData *TrainOnLine(LSTMTrainer *samples_trainer, bool batch) { int sample_index = sample_iteration(); - const ImageData *image = samples_trainer->training_data_.GetPageBySerial(sample_index); + const ImageData *image = + samples_trainer->training_data_.GetPageBySerial(sample_index); if (image != nullptr) { Trainability trainable = TrainOnLine(image, batch); if (trainable == UNENCODABLE || trainable == NOT_BOXED) { @@ -274,30 +282,34 @@ public: // Prepares the ground truth, runs forward, and prepares the targets. // Returns a Trainability enum to indicate the suitability of the sample. - Trainability PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs, - NetworkIO *targets); + Trainability PrepareForBackward(const ImageData *trainingdata, + NetworkIO *fwd_outputs, NetworkIO *targets); // Writes the trainer to memory, so that the current training state can be // restored. *this must always be the master trainer that retains the only // copy of the training data and language model. trainer is the model that is // actually serialized. - bool SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer &trainer, + bool SaveTrainingDump(SerializeAmount serialize_amount, + const LSTMTrainer &trainer, std::vector *data) const; // Reads previously saved trainer from memory. *this must always be the // master trainer that retains the only copy of the training data and // language model. trainer is the model that is restored. - bool ReadTrainingDump(const std::vector &data, LSTMTrainer &trainer) const { + bool ReadTrainingDump(const std::vector &data, + LSTMTrainer &trainer) const { if (data.empty()) { return false; } return ReadSizedTrainingDump(&data[0], data.size(), trainer); } - bool ReadSizedTrainingDump(const char *data, int size, LSTMTrainer &trainer) const { + bool ReadSizedTrainingDump(const char *data, int size, + LSTMTrainer &trainer) const { return trainer.ReadLocalTrainingDump(&mgr_, data, size); } // Restores the model to *this. - bool ReadLocalTrainingDump(const TessdataManager *mgr, const char *data, int size); + bool ReadLocalTrainingDump(const TessdataManager *mgr, const char *data, + int size); // Sets up the data for MaintainCheckpoints from a light ReadTrainingDump. void SetupCheckpointInfo(); @@ -334,26 +346,30 @@ protected: // corresponding x_starts. // Returns false if the truth string is empty. bool DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata, - const NetworkIO &fwd_outputs, const std::vector &truth_labels, + const NetworkIO &fwd_outputs, + const std::vector &truth_labels, const NetworkIO &outputs); // Displays the network targets as line a line graph. - void DisplayTargets(const NetworkIO &targets, const char *window_name, ScrollView **window); + void DisplayTargets(const NetworkIO &targets, const char *window_name, + ScrollView **window); // Builds a no-compromises target where the first positions should be the // truth labels and the rest is padded with the null_char_. - bool ComputeTextTargets(const NetworkIO &outputs, const std::vector &truth_labels, + bool ComputeTextTargets(const NetworkIO &outputs, + const std::vector &truth_labels, NetworkIO *targets); // Builds a target using standard CTC. truth_labels should be pre-padded with // nulls wherever desired. They don't have to be between all labels. // outputs is input-output, as it gets clipped to minimum probability. - bool ComputeCTCTargets(const std::vector &truth_labels, NetworkIO *outputs, - NetworkIO *targets); + bool ComputeCTCTargets(const std::vector &truth_labels, + NetworkIO *outputs, NetworkIO *targets); // Computes network errors, and stores the results in the rolling buffers, // along with the supplied text_error. // Returns the delta error of the current sample (not running average.) - double ComputeErrorRates(const NetworkIO &deltas, double char_error, double word_error); + double ComputeErrorRates(const NetworkIO &deltas, double char_error, + double word_error); // Computes the network activation RMS error rate. double ComputeRMSError(const NetworkIO &deltas); @@ -366,7 +382,8 @@ protected: double ComputeWinnerError(const NetworkIO &deltas); // Computes a very simple bag of chars char error rate. - double ComputeCharError(const std::vector &truth_str, const std::vector &ocr_str); + double ComputeCharError(const std::vector &truth_str, + const std::vector &ocr_str); // Computes a very simple bag of words word recall error rate. // NOTE that this is destructive on both input strings. double ComputeWordError(std::string *truth_str, std::string *ocr_str); @@ -380,8 +397,9 @@ protected: // Given that error_rate is either a new min or max, updates the best/worst // error rates, and record of progress. - std::string UpdateErrorGraph(int iteration, double error_rate, const std::vector &model_data, - TestCallback tester); + std::string UpdateErrorGraph(int iteration, double error_rate, + const std::vector &model_data, + const TestCallback &tester); protected: #ifndef GRAPHICS_DISABLED diff --git a/src/viewer/svutil.cpp b/src/viewer/svutil.cpp index e57814ce..eec8627e 100644 --- a/src/viewer/svutil.cpp +++ b/src/viewer/svutil.cpp @@ -74,9 +74,9 @@ void SVSync::StartProcess(const char *executable, const char *args) { STARTUPINFO start_info; PROCESS_INFORMATION proc_info; GetStartupInfo(&start_info); - if (!CreateProcess(nullptr, const_cast(proc.c_str()), nullptr, nullptr, FALSE, - CREATE_NO_WINDOW | DETACHED_PROCESS, nullptr, nullptr, &start_info, - &proc_info)) + if (!CreateProcess(nullptr, const_cast(proc.c_str()), nullptr, + nullptr, FALSE, CREATE_NO_WINDOW | DETACHED_PROCESS, + nullptr, nullptr, &start_info, &proc_info)) return; # else int pid = fork(); @@ -131,13 +131,13 @@ SVSemaphore::SVSemaphore() { } SVSemaphore::~SVSemaphore() { -#ifdef _WIN32 +# ifdef _WIN32 CloseHandle(semaphore_); -#elif defined(__APPLE__) +# elif defined(__APPLE__) sem_close(semaphore_); -#else +# else sem_close(&semaphore_); -#endif +# endif } void SVSemaphore::Signal() { @@ -243,14 +243,15 @@ static const char *ScrollViewProg() { } // The arguments to the program to invoke to start ScrollView -static std::string ScrollViewCommand(std::string scrollview_path) { +static std::string ScrollViewCommand(const std::string &scrollview_path) { // The following ugly ifdef is to enable the output of the java runtime // to be sent down a black hole on non-windows to ignore all the // exceptions in piccolo. Ideally piccolo would be debugged to make // this unnecessary. // Also the path has to be separated by ; on windows and : otherwise. # ifdef _WIN32 - const char cmd_template[] = "-Djava.library.path=\"%s\" -jar \"%s/ScrollView.jar\""; + const char cmd_template[] = + "-Djava.library.path=\"%s\" -jar \"%s/ScrollView.jar\""; # else const char cmd_template[] = @@ -289,14 +290,15 @@ SVNetwork::SVNetwork(const char *hostname, int port) { # endif // _WIN32 if (getaddrinfo(hostname, port_string.c_str(), nullptr, &addr_info) != 0) { - std::cerr << "Error resolving name for ScrollView host " << std::string(hostname) << ":" << port - << std::endl; + std::cerr << "Error resolving name for ScrollView host " + << std::string(hostname) << ":" << port << std::endl; # ifdef _WIN32 WSACleanup(); # endif // _WIN32 } - stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, addr_info->ai_protocol); + stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, + addr_info->ai_protocol); if (stream_ < 0) { std::cerr << "Failed to open socket" << std::endl; @@ -324,7 +326,8 @@ SVNetwork::SVNetwork(const char *hostname, int port) { Close(); for (;;) { - stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, addr_info->ai_protocol); + stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, + addr_info->ai_protocol); if (stream_ >= 0) { if (connect(stream_, addr_info->ai_addr, addr_info->ai_addrlen) == 0) { break; diff --git a/src/wordrec/chop.cpp b/src/wordrec/chop.cpp index 78a04240..64db4a29 100644 --- a/src/wordrec/chop.cpp +++ b/src/wordrec/chop.cpp @@ -111,7 +111,7 @@ int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { if (static_cast(length) == 0) { return (0); } - angle = static_cast(floor(asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5)); + angle = static_cast(floor(std::asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5)); /* Use dot product */ if (vector1.dot(vector2) < 0) { diff --git a/src/wordrec/findseam.cpp b/src/wordrec/findseam.cpp index 745c0719..74a0578c 100644 --- a/src/wordrec/findseam.cpp +++ b/src/wordrec/findseam.cpp @@ -46,7 +46,7 @@ /* How many to keep */ #define MAX_NUM_SEAMS 150 /* How many to keep */ -#define NO_FULL_PRIORITY -1 /* Special marker for pri. */ +#define NO_FULL_PRIORITY (-1) // Special marker for pri. /* Evaluate right away */ #define BAD_PRIORITY 9999.0 diff --git a/src/wordrec/gradechop.cpp b/src/wordrec/gradechop.cpp index bd7f6028..6f8c98fd 100644 --- a/src/wordrec/gradechop.cpp +++ b/src/wordrec/gradechop.cpp @@ -51,7 +51,7 @@ PRIORITY Wordrec::grade_split_length(SPLIT *split) { if (split_length <= 0) { grade = 0; } else { - grade = sqrt(split_length) * chop_split_dist_knob; + grade = std::sqrt(split_length) * chop_split_dist_knob; } return (std::max(0.0f, grade)); diff --git a/src/wordrec/language_model.cpp b/src/wordrec/language_model.cpp index 57d548d3..83e3a6b9 100644 --- a/src/wordrec/language_model.cpp +++ b/src/wordrec/language_model.cpp @@ -984,8 +984,8 @@ float LanguageModel::ComputeNgramCost(const char *unichar, float certainty, floa *found_small_prob = true; prob = language_model_ngram_small_prob; } - *ngram_cost = -1.0 * log2(prob); - float ngram_and_classifier_cost = -1.0 * log2(CertaintyScore(certainty) / denom) + + *ngram_cost = -1 * std::log2(prob); + float ngram_and_classifier_cost = -1 * std::log2(CertaintyScore(certainty) / denom) + *ngram_cost * language_model_ngram_scale_factor; if (language_model_debug_level > 1) { tprintf("-log [ p(%s) * p(%s | %s) ] = -log2(%g*%g) = %g\n", unichar, unichar, context_ptr, @@ -1341,24 +1341,24 @@ void LanguageModel::ExtractFeaturesFromPath(const ViterbiStateEntry &vse, float int permuter = vse.dawg_info->permuter; if (permuter == NUMBER_PERM || permuter == USER_PATTERN_PERM) { if (vse.consistency_info.num_digits == vse.length) { - features[PTRAIN_DIGITS_SHORT + len] = 1.0; + features[PTRAIN_DIGITS_SHORT + len] = 1.0f; } else { - features[PTRAIN_NUM_SHORT + len] = 1.0; + features[PTRAIN_NUM_SHORT + len] = 1.0f; } } else if (permuter == DOC_DAWG_PERM) { - features[PTRAIN_DOC_SHORT + len] = 1.0; + features[PTRAIN_DOC_SHORT + len] = 1.0f; } else if (permuter == SYSTEM_DAWG_PERM || permuter == USER_DAWG_PERM || permuter == COMPOUND_PERM) { - features[PTRAIN_DICT_SHORT + len] = 1.0; + features[PTRAIN_DICT_SHORT + len] = 1.0f; } else if (permuter == FREQ_DAWG_PERM) { - features[PTRAIN_FREQ_SHORT + len] = 1.0; + features[PTRAIN_FREQ_SHORT + len] = 1.0f; } } // Record shape cost feature (normalized by path length). features[PTRAIN_SHAPE_COST_PER_CHAR] = vse.associate_stats.shape_cost / static_cast(vse.length); // Record ngram cost. (normalized by the path length). - features[PTRAIN_NGRAM_COST_PER_CHAR] = 0.0; + features[PTRAIN_NGRAM_COST_PER_CHAR] = 0.0f; if (vse.ngram_info != nullptr) { features[PTRAIN_NGRAM_COST_PER_CHAR] = vse.ngram_info->ngram_cost / static_cast(vse.length); @@ -1369,7 +1369,7 @@ void LanguageModel::ExtractFeaturesFromPath(const ViterbiStateEntry &vse, float features[PTRAIN_NUM_BAD_CASE] = vse.consistency_info.NumInconsistentCase(); features[PTRAIN_XHEIGHT_CONSISTENCY] = vse.consistency_info.xht_decision; features[PTRAIN_NUM_BAD_CHAR_TYPE] = - vse.dawg_info == nullptr ? vse.consistency_info.NumInconsistentChartype() : 0.0; + vse.dawg_info == nullptr ? vse.consistency_info.NumInconsistentChartype() : 0.0f; features[PTRAIN_NUM_BAD_SPACING] = vse.consistency_info.NumInconsistentSpaces(); // Disabled this feature for now due to its poor performance. // features[PTRAIN_NUM_BAD_FONT] = vse.consistency_info.inconsistent_font; diff --git a/src/wordrec/params_model.cpp b/src/wordrec/params_model.cpp index 6578ef2e..3b57dc34 100644 --- a/src/wordrec/params_model.cpp +++ b/src/wordrec/params_model.cpp @@ -94,7 +94,7 @@ bool ParamsModel::Equivalent(const ParamsModel &that) const { } for (unsigned i = 0; i < weights_vec_[p].size(); i++) { if (weights_vec_[p][i] != that.weights_vec_[p][i] && - fabs(weights_vec_[p][i] - that.weights_vec_[p][i]) > epsilon) { + std::fabs(weights_vec_[p][i] - that.weights_vec_[p][i]) > epsilon) { return false; } }