Optimize performance with clang-tidy

The code was partially formatted with clang-format and optimized with

    clang-tidy --checks="-*,perfor*" --fix src/*/*.cpp

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-11-14 15:54:04 +01:00
parent e5011c545a
commit d8d63fd71b
53 changed files with 2091 additions and 1375 deletions

View File

@ -37,7 +37,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
tesseract::WritingDirection writing_direction;
tesseract::TextlineOrder textline_order;
float deskew_angle;
it->Orientation(&orientation, &writing_direction, &textline_order, &deskew_angle);
it->Orientation(&orientation, &writing_direction, &textline_order,
&deskew_angle);
return orientation;
}
@ -49,7 +50,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
* method currently only inserts a 'textangle' property to indicate the rotation
* direction and does not add any baseline information to the hocr string.
*/
static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel level,
static void AddBaselineCoordsTohOCR(const PageIterator *it,
PageIteratorLevel level,
std::stringstream &hocr_str) {
tesseract::Orientation orientation = GetBlockTextOrientation(it);
if (orientation != ORIENTATION_PAGE_UP) {
@ -82,7 +84,8 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel le
double p1 = (y2 - y1) / static_cast<double>(x2 - x1);
double p0 = y1 - p1 * x1;
hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " " << round(p0 * 1000.0) / 1000.0;
hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " "
<< round(p0 * 1000.0) / 1000.0;
}
static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
@ -91,7 +94,8 @@ static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
it->BoundingBox(level, &left, &top, &right, &bottom);
// This is the only place we use double quotes instead of single quotes,
// but it may too late to change for consistency
hocr_str << " title=\"bbox " << left << " " << top << " " << right << " " << bottom;
hocr_str << " title=\"bbox " << left << " " << top << " " << right << " "
<< bottom;
// Add baseline coordinates & heights for textlines only.
if (level == RIL_TEXTLINE) {
AddBaselineCoordsTohOCR(it, level, hocr_str);
@ -99,8 +103,8 @@ static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
float row_height, descenders, ascenders; // row attributes
it->RowAttributes(&row_height, &descenders, &ascenders);
// TODO(rays): Do we want to limit these to a single decimal place?
hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders << "; x_ascenders "
<< ascenders;
hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders
<< "; x_ascenders " << ascenders;
}
hocr_str << "\">";
}
@ -128,7 +132,8 @@ char *TessBaseAPI::GetHOCRText(int page_number) {
* Returned string must be freed with the delete [] operator.
*/
char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
if (tesseract_ == nullptr ||
(page_res_ == nullptr && Recognize(monitor) < 0)) {
return nullptr;
}
@ -147,13 +152,16 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
#ifdef _WIN32
// convert input name from ANSI encoding to utf-8
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
int str16_len =
MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
wchar_t *uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len);
int utf8_len =
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr);
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str,
str16_len);
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
0, nullptr, nullptr);
char *utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr);
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
nullptr, nullptr);
input_file_ = utf8_str;
delete[] uni16_str;
delete[] utf8_str;
@ -174,8 +182,8 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
hocr_str << "unknown";
}
hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " " << rect_width_ << " "
<< rect_height_ << "; ppageno " << page_number
hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " "
<< rect_width_ << " " << rect_height_ << "; ppageno " << page_number
<< "; scan_res " << GetSourceYResolution() << " "
<< GetSourceYResolution() << "'>\n";
@ -230,7 +238,8 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
// Now, process the word...
int32_t lstm_choice_mode = tesseract_->lstm_choice_mode;
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *rawTimestepMap = nullptr;
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*rawTimestepMap = nullptr;
std::vector<std::vector<std::pair<const char *, float>>> *CTCMap = nullptr;
if (lstm_choice_mode) {
CTCMap = res_it->GetBestLSTMSymbolChoices();
@ -244,10 +253,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
int pointsize, font_id;
const char *font_name;
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
&smallcaps, &pointsize, &font_id);
hocr_str << " title='bbox " << left << " " << top << " " << right << " " << bottom
<< "; x_wconf " << static_cast<int>(res_it->Confidence(RIL_WORD));
font_name =
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
&serif, &smallcaps, &pointsize, &font_id);
hocr_str << " title='bbox " << left << " " << top << " " << right << " "
<< bottom << "; x_wconf "
<< static_cast<int>(res_it->Confidence(RIL_WORD));
if (font_info) {
if (font_name) {
hocr_str << "; x_font " << HOcrEscape(font_name).c_str();
@ -287,31 +298,36 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
hocr_str << "<em>";
}
do {
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
const std::unique_ptr<const char[]> grapheme(
res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != 0) {
if (hocr_boxes) {
res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes " << left << " " << top
<< " " << right << " " << bottom << "; x_conf " << res_it->Confidence(RIL_SYMBOL)
<< "'>";
hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes "
<< left << " " << top << " " << right << " " << bottom
<< "; x_conf " << res_it->Confidence(RIL_SYMBOL) << "'>";
}
hocr_str << HOcrEscape(grapheme.get()).c_str();
if (hocr_boxes) {
hocr_str << "</span>";
tesseract::ChoiceIterator ci(*res_it);
if (lstm_choice_mode == 1 && ci.Timesteps() != nullptr) {
std::vector<std::vector<std::pair<const char *, float>>> *symbol = ci.Timesteps();
std::vector<std::vector<std::pair<const char *, float>>> *symbol =
ci.Timesteps();
hocr_str << "\n <span class='ocr_symbol'"
<< " id='"
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
for (auto timestep : *symbol) {
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt
<< "'>";
for (const auto &timestep : *symbol) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt << "'>";
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
for (auto conf : timestep) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << int(conf.second * 100) << "'>"
<< HOcrEscape(conf.first).c_str() << "</span>";
++ccnt;
@ -324,16 +340,18 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
} else if (lstm_choice_mode == 2) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
do {
const char *choice = ci.GetUTF8Text();
float choiceconf = ci.Confidence();
if (choice != nullptr) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< " title='x_confs " << choiceconf << "'>" << HOcrEscape(choice).c_str()
<< "</span>";
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << choiceconf << "'>"
<< HOcrEscape(choice).c_str() << "</span>";
ccnt++;
}
} while (ci.Next());
@ -352,18 +370,20 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
// If the lstm choice mode is required it is added here
if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) {
for (auto symbol : *rawTimestepMap) {
for (const auto &symbol : *rawTimestepMap) {
hocr_str << "\n <span class='ocr_symbol'"
<< " id='"
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
for (auto timestep : symbol) {
for (const auto &timestep : symbol) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt << "'>";
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
for (auto conf : timestep) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << int(conf.second * 100) << "'>"
<< HOcrEscape(conf.first).c_str() << "</span>";
++ccnt;
@ -375,11 +395,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
++scnt;
}
} else if (lstm_choice_mode == 2 && !hocr_boxes && CTCMap != nullptr) {
for (auto timestep : *CTCMap) {
for (const auto &timestep : *CTCMap) {
if (timestep.size() > 0) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
for (auto &j : timestep) {
float conf = 100 - tesseract_->lstm_rating_coefficient * j.second;
if (conf < 0.0f) {
@ -390,9 +411,10 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< " title='x_confs " << conf << "'>" << HOcrEscape(j.first).c_str()
<< "</span>";
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << conf << "'>"
<< HOcrEscape(j.first).c_str() << "</span>";
ccnt++;
}
hocr_str << "</span>";

View File

@ -35,6 +35,7 @@
#include <algorithm>
#include <cfloat>
#include <cmath>
#include <limits>
#include <memory>
@ -189,11 +190,11 @@ void EquationDetect::IdentifySpecialText(BLOBNBOX *blobnbox, const int height_th
const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8;
// The scores here are negative, so the max/min == fabs(min/max).
// float ratio = fmax(lang_score, equ_score) / fmin(lang_score, equ_score);
const float diff = fabs(lang_score - equ_score);
const float diff = std::fabs(lang_score - equ_score);
BlobSpecialTextType type = BSTT_NONE;
// Classification.
if (fmax(lang_score, equ_score) < kConfScoreTh) {
if (std::fmax(lang_score, equ_score) < kConfScoreTh) {
// If both score are very small, then mark it as unclear.
type = BSTT_UNCLEAR;
} else if (diff > kConfDiffTh && equ_score > lang_score) {
@ -727,7 +728,7 @@ int EquationDetect::CountAlignment(const std::vector<int> &sorted_vec, const int
if (sorted_vec.empty()) {
return 0;
}
const int kDistTh = static_cast<int>(round(0.03f * resolution_));
const int kDistTh = static_cast<int>(std::round(0.03f * resolution_));
auto pos = std::upper_bound(sorted_vec.begin(), sorted_vec.end(), val);
if (pos > sorted_vec.begin()) {
--pos;
@ -772,7 +773,7 @@ void EquationDetect::IdentifyInlinePartsHorizontal() {
ASSERT_HOST(cps_super_bbox_);
std::vector<ColPartition *> new_seeds;
const int kMarginDiffTh = IntCastRounded(0.5 * lang_tesseract_->source_resolution());
const int kGapTh = static_cast<int>(round(1.0f * lang_tesseract_->source_resolution()));
const int kGapTh = static_cast<int>(std::round(1.0f * lang_tesseract_->source_resolution()));
ColPartitionGridSearch search(part_grid_);
search.SetUniqueMode(true);
// The center x coordinate of the cp_super_bbox_.
@ -923,8 +924,8 @@ bool EquationDetect::IsInline(const bool search_bottom, const int textparts_line
// Check if neighbor and part is inline similar.
const float kHeightRatioTh = 0.5;
const int kYGapTh = textparts_linespacing > 0
? textparts_linespacing + static_cast<int>(round(0.02f * resolution_))
: static_cast<int>(round(0.05f * resolution_)); // Default value.
? textparts_linespacing + static_cast<int>(std::round(0.02f * resolution_))
: static_cast<int>(std::round(0.05f * resolution_)); // Default value.
if (part_box.x_overlap(neighbor_box) && // Location feature.
part_box.y_gap(neighbor_box) <= kYGapTh && // Line spacing.
// Geo feature.
@ -978,9 +979,9 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition *part) {
ColPartitionGridSearch search(part_grid_);
ColPartition *neighbor = nullptr;
const TBOX &part_box(part->bounding_box());
const int kXGapTh = static_cast<int>(round(0.5f * resolution_));
const int kRadiusTh = static_cast<int>(round(3.0f * resolution_));
const int kYGapTh = static_cast<int>(round(0.5f * resolution_));
const int kXGapTh = static_cast<int>(std::round(0.5f * resolution_));
const int kRadiusTh = static_cast<int>(std::round(3.0f * resolution_));
const int kYGapTh = static_cast<int>(std::round(0.5f * resolution_));
// Here we use a simple approximation algorithm: from the center of part, We
// perform the radius search, and check if we can find a neighboring partition
@ -1080,7 +1081,7 @@ void EquationDetect::ExpandSeedHorizontal(const bool search_left, ColPartition *
std::vector<ColPartition *> *parts_to_merge) {
ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr);
const float kYOverlapTh = 0.6;
const int kXGapTh = static_cast<int>(round(0.2f * resolution_));
const int kXGapTh = static_cast<int>(std::round(0.2f * resolution_));
ColPartitionGridSearch search(part_grid_);
const TBOX &seed_box(seed->bounding_box());
@ -1132,7 +1133,7 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *
std::vector<ColPartition *> *parts_to_merge) {
ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr && cps_super_bbox_ != nullptr);
const float kXOverlapTh = 0.4;
const int kYGapTh = static_cast<int>(round(0.2f * resolution_));
const int kYGapTh = static_cast<int>(std::round(0.2f * resolution_));
ColPartitionGridSearch search(part_grid_);
const TBOX &seed_box(seed->bounding_box());
@ -1210,8 +1211,8 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *
}
bool EquationDetect::IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const {
const int kXGapTh = static_cast<int>(round(0.25f * resolution_));
const int kYGapTh = static_cast<int>(round(0.05f * resolution_));
const int kXGapTh = static_cast<int>(std::round(0.25f * resolution_));
const int kYGapTh = static_cast<int>(std::round(0.05f * resolution_));
// Check geometric feature.
if (part_box.height() > seed_box.height() || part_box.width() > seed_box.width()) {
@ -1266,7 +1267,7 @@ void EquationDetect::ProcessMathBlockSatelliteParts() {
int med_height = text_box.height();
if (text_parts.size() % 2 == 0 && text_parts.size() > 1) {
const TBOX &text_box = text_parts[text_parts.size() / 2 - 1]->bounding_box();
med_height = static_cast<int>(round(0.5f * (text_box.height() + med_height)));
med_height = static_cast<int>(std::round(0.5f * (text_box.height() + med_height)));
}
// Iterate every text_parts and check if it is a math block satellite.
@ -1348,7 +1349,7 @@ bool EquationDetect::IsMathBlockSatellite(ColPartition *part,
ColPartition *EquationDetect::SearchNNVertical(const bool search_bottom, const ColPartition *part) {
ASSERT_HOST(part);
ColPartition *nearest_neighbor = nullptr, *neighbor = nullptr;
const int kYGapTh = static_cast<int>(round(resolution_ * 0.5f));
const int kYGapTh = static_cast<int>(std::round(resolution_ * 0.5f));
ColPartitionGridSearch search(part_grid_);
search.SetUniqueMode(true);
@ -1383,7 +1384,7 @@ bool EquationDetect::IsNearMathNeighbor(const int y_gap, const ColPartition *nei
if (!neighbor) {
return false;
}
const int kYGapTh = static_cast<int>(round(resolution_ * 0.1f));
const int kYGapTh = static_cast<int>(std::round(resolution_ * 0.1f));
return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh;
}

View File

@ -23,6 +23,7 @@
#include <algorithm>
#include <cctype>
#include <cmath>
#include <cstring>
namespace tesseract {
@ -205,7 +206,7 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
new_xht / word_res->denorm.y_scale());
}
// The xheight must change by at least x_ht_min_change to be used.
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) {
if (std::fabs(new_xht - kBlnXHeight) >= x_ht_min_change) {
return new_xht / word_res->denorm.y_scale();
} else {
return bottom_shift != 0 ? word_res->x_height : 0.0f;

View File

@ -428,7 +428,7 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
// Normalize the orientation scores for the blob and use them to
// update the aggregated orientation score.
for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) {
osr_->orientations[i] += log(blob_o_score[i] / total_blob_o_score);
osr_->orientations[i] += std::log(blob_o_score[i] / total_blob_o_score);
}
// TODO(ranjith) Add an early exit test, based on min_orientation_margin,

View File

@ -113,6 +113,7 @@ static void PrintTable(const std::vector<std::vector<std::string>> &rows, const
}
std::vector<std::string> col_width_patterns;
col_width_patterns.reserve(max_col_widths.size());
for (int max_col_width : max_col_widths) {
col_width_patterns.push_back(std::string("%-") + std::to_string(max_col_width) + "s");
}

View File

@ -33,6 +33,7 @@
#include <allheaders.h> // for pixGetHeight, pixGetPixel
#include <algorithm> // for max, min
#include <cmath>
#include <cstdint> // for INT32_MAX, INT16_MAX
#define PROJECTION_MARGIN 10 // arbitrary
@ -133,7 +134,7 @@ void BLOBNBOX::chop( // chop blobs
BLOBNBOX_IT blob_it; // blob iterator
// get no of chops
blobcount = static_cast<int16_t>(floor(box.width() / xheight));
blobcount = static_cast<int16_t>(std::floor(box.width() / xheight));
if (blobcount > 1 && cblob_ptr != nullptr) {
// width of each
blobwidth = static_cast<float>(box.width() + 1) / blobcount;
@ -150,12 +151,12 @@ void BLOBNBOX::chop( // chop blobs
UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
} while (blob != end_it->data());
if (ymin < ymax) {
leftx = static_cast<int16_t>(floor(rightx - blobwidth));
leftx = static_cast<int16_t>(std::floor(rightx - blobwidth));
if (leftx < box.left()) {
leftx = box.left(); // clip to real box
}
bl = ICOORD(leftx, static_cast<int16_t>(floor(ymin)));
tr = ICOORD(static_cast<int16_t>(ceil(rightx)), static_cast<int16_t>(ceil(ymax)));
bl = ICOORD(leftx, static_cast<int16_t>(std::floor(ymin)));
tr = ICOORD(static_cast<int16_t>(std::ceil(rightx)), static_cast<int16_t>(std::ceil(ymax)));
if (blobindex == 0) {
box = TBOX(bl, tr); // change box
} else {

View File

@ -63,7 +63,8 @@ BoxWord *BoxWord::CopyFromNormalized(TWERD *tessword) {
for (unsigned b = 0; b < boxword->length_; ++b) {
TBLOB *tblob = tessword->blobs[b];
TBOX blob_box;
for (TESSLINE *outline = tblob->outlines; outline != nullptr; outline = outline->next) {
for (TESSLINE *outline = tblob->outlines; outline != nullptr;
outline = outline->next) {
EDGEPT *edgept = outline->loop;
// Iterate over the edges.
do {
@ -92,7 +93,8 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
for (unsigned i = 0; i < length_; ++i) {
TBOX box = boxes_[i];
// Expand by a single pixel, as the poly approximation error is 1 pixel.
box = TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1);
box =
TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1);
// Now find the original box that matches.
TBOX original_box;
C_BLOB_IT b_it(original_word->cblob_list());
@ -106,16 +108,19 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
}
}
if (!original_box.null_box()) {
if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance)) {
if (NearlyEqual<int>(original_box.left(), box.left(),
kBoxClipTolerance)) {
box.set_left(original_box.left());
}
if (NearlyEqual<int>(original_box.right(), box.right(), kBoxClipTolerance)) {
if (NearlyEqual<int>(original_box.right(), box.right(),
kBoxClipTolerance)) {
box.set_right(original_box.right());
}
if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance)) {
box.set_top(original_box.top());
}
if (NearlyEqual<int>(original_box.bottom(), box.bottom(), kBoxClipTolerance)) {
if (NearlyEqual<int>(original_box.bottom(), box.bottom(),
kBoxClipTolerance)) {
box.set_bottom(original_box.bottom());
}
}
@ -193,7 +198,8 @@ void BoxWord::ComputeBoundingBox() {
// This and other putatively are the same, so call the (permanent) callback
// for each blob index where the bounding boxes match.
// The callback is deleted on completion.
void BoxWord::ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const {
void BoxWord::ProcessMatchedBlobs(const TWERD &other,
const std::function<void(int)> &cb) const {
for (unsigned i = 0; i < length_ && i < other.NumBlobs(); ++i) {
TBOX blob_box = other.blobs[i]->bounding_box();
if (blob_box == boxes_[i]) {

View File

@ -72,7 +72,8 @@ public:
// This and other putatively are the same, so call the (permanent) callback
// for each blob index where the bounding boxes match.
// The callback is deleted on completion.
void ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const;
void ProcessMatchedBlobs(const TWERD &other,
const std::function<void(int)> &cb) const;
const TBOX &bounding_box() const {
return bbox_;

View File

@ -43,7 +43,8 @@ const int kMaxReadAhead = 8;
ImageData::ImageData() : page_number_(-1), vertical_text_(false) {}
// Takes ownership of the pix and destroys it.
ImageData::ImageData(bool vertical, Image pix) : page_number_(0), vertical_text_(vertical) {
ImageData::ImageData(bool vertical, Image pix)
: page_number_(0), vertical_text_(vertical) {
SetPix(pix);
}
ImageData::~ImageData() {
@ -55,8 +56,8 @@ ImageData::~ImageData() {
// Builds and returns an ImageData from the basic data. Note that imagedata,
// truth_text, and box_text are all the actual file data, NOT filenames.
ImageData *ImageData::Build(const char *name, int page_number, const char *lang,
const char *imagedata, int imagedatasize, const char *truth_text,
const char *box_text) {
const char *imagedata, int imagedatasize,
const char *truth_text, const char *box_text) {
auto *image_data = new ImageData();
image_data->imagefilename_ = name;
image_data->page_number_ = page_number;
@ -67,7 +68,8 @@ ImageData *ImageData::Build(const char *name, int page_number, const char *lang,
memcpy(&image_data->image_data_[0], imagedata, imagedatasize);
if (!image_data->AddBoxes(box_text)) {
if (truth_text == nullptr || truth_text[0] == '\0') {
tprintf("Error: No text corresponding to page %d from image %s!\n", page_number, name);
tprintf("Error: No text corresponding to page %d from image %s!\n",
page_number, name);
delete image_data;
return nullptr;
}
@ -210,7 +212,8 @@ Image ImageData::GetPix() const {
// The return value is the scaled Pix, which must be pixDestroyed after use,
// and scale_factor (if not nullptr) is set to the scale factor that was applied
// to the image to achieve the target_height.
Image ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
Image ImageData::PreScale(int target_height, int max_height,
float *scale_factor, int *scaled_width,
int *scaled_height, std::vector<TBOX> *boxes) const {
int input_width = 0;
int input_height = 0;
@ -231,8 +234,8 @@ Image ImageData::PreScale(int target_height, int max_height, float *scale_factor
// Get the scaled image.
Image pix = pixScale(src_pix, im_factor, im_factor);
if (pix == nullptr) {
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height,
im_factor);
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
input_width, input_height, im_factor);
src_pix.destroy();
return nullptr;
}
@ -278,9 +281,9 @@ void ImageData::Display() const {
}
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
auto *win =
new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true);
auto *win = new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
2 * (height + 4 * kTextSize), width + 10,
height + 3 * kTextSize, true);
win->Draw(pix, 0, height - 1);
pix.destroy();
// Draw the boxes.
@ -309,7 +312,8 @@ void ImageData::Display() const {
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
void ImageData::AddBoxes(const std::vector<TBOX> &boxes,
const std::vector<std::string> &texts,
const std::vector<int> &box_pages) {
// Copy the boxes and make the transcription.
for (unsigned i = 0; i < box_pages.size(); ++i) {
@ -346,7 +350,8 @@ Image ImageData::GetPixInternal(const std::vector<char> &image_data) {
Image pix = nullptr;
if (!image_data.empty()) {
// Convert the array to an image.
const auto *u_data = reinterpret_cast<const unsigned char *>(&image_data[0]);
const auto *u_data =
reinterpret_cast<const unsigned char *>(&image_data[0]);
pix = pixReadMem(u_data, image_data.size());
}
return pix;
@ -361,23 +366,25 @@ bool ImageData::AddBoxes(const char *box_text) {
std::vector<std::string> texts;
std::vector<int> box_pages;
if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
/*continue_on_failure*/ true, &boxes, &texts, nullptr, &box_pages)) {
/*continue_on_failure*/ true, &boxes, &texts, nullptr,
&box_pages)) {
AddBoxes(boxes, texts, box_pages);
return true;
} else {
tprintf("Error: No boxes for page %d from image %s!\n", page_number_, imagefilename_.c_str());
tprintf("Error: No boxes for page %d from image %s!\n", page_number_,
imagefilename_.c_str());
}
}
return false;
}
DocumentData::DocumentData(const std::string &name)
: document_name_(name)
, pages_offset_(-1)
, total_pages_(-1)
, memory_used_(0)
, max_memory_(0)
, reader_(nullptr) {}
: document_name_(name),
pages_offset_(-1),
total_pages_(-1),
memory_used_(0),
max_memory_(0),
reader_(nullptr) {}
DocumentData::~DocumentData() {
if (thread.joinable()) {
@ -392,15 +399,16 @@ DocumentData::~DocumentData() {
// Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file.
bool DocumentData::LoadDocument(const char *filename, int start_page, int64_t max_memory,
FileReader reader) {
bool DocumentData::LoadDocument(const char *filename, int start_page,
int64_t max_memory, FileReader reader) {
SetDocument(filename, max_memory, reader);
pages_offset_ = start_page;
return ReCachePages();
}
// Sets up the document, without actually loading it.
void DocumentData::SetDocument(const char *filename, int64_t max_memory, FileReader reader) {
void DocumentData::SetDocument(const char *filename, int64_t max_memory,
FileReader reader) {
std::lock_guard<std::mutex> lock_p(pages_mutex_);
std::lock_guard<std::mutex> lock(general_mutex_);
document_name_ = filename;
@ -485,7 +493,8 @@ bool DocumentData::IsPageAvailable(int index, ImageData **page) {
}
if (num_pages > 0) {
index = Modulo(index, num_pages);
if (pages_offset_ <= index && static_cast<unsigned>(index) < pages_offset_ + pages_.size()) {
if (pages_offset_ <= index &&
static_cast<unsigned>(index) < pages_offset_ + pages_.size()) {
*page = pages_[index - pages_offset_]; // Page is available already.
return true;
}
@ -505,8 +514,8 @@ int64_t DocumentData::UnCache() {
pages_offset_ = -1;
set_total_pages(-1);
set_memory_used(0);
tprintf("Unloaded document %s, saving %" PRId64 " memory\n", document_name_.c_str(),
memory_saved);
tprintf("Unloaded document %s, saving %" PRId64 " memory\n",
document_name_.c_str(), memory_saved);
return memory_saved;
}
@ -538,8 +547,8 @@ bool DocumentData::ReCachePages() {
}
pages_.clear();
TFile fp;
if (!fp.Open(document_name_.c_str(), reader_) || !fp.DeSerializeSize(&loaded_pages) ||
loaded_pages <= 0) {
if (!fp.Open(document_name_.c_str(), reader_) ||
!fp.DeSerializeSize(&loaded_pages) || loaded_pages <= 0) {
tprintf("Deserialize header failed: %s\n", document_name_.c_str());
return false;
}
@ -552,7 +561,8 @@ bool DocumentData::ReCachePages() {
if (!fp.DeSerialize(&non_null)) {
break;
}
if (page < pages_offset_ || (max_memory_ > 0 && memory_used() > max_memory_)) {
if (page < pages_offset_ ||
(max_memory_ > 0 && memory_used() > max_memory_)) {
if (non_null && !ImageData::SkipDeSerialize(&fp)) {
break;
}
@ -574,16 +584,17 @@ bool DocumentData::ReCachePages() {
}
}
if (page < loaded_pages) {
tprintf("Deserialize failed: %s read %d/%d lines\n", document_name_.c_str(), page,
loaded_pages);
tprintf("Deserialize failed: %s read %d/%d lines\n", document_name_.c_str(),
page, loaded_pages);
for (auto page : pages_) {
delete page;
}
pages_.clear();
} else if (loaded_pages > 1) {
// Avoid lots of messages for training with single line images.
tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(), loaded_pages,
pages_offset_ + 1, pages_offset_ + pages_.size(), document_name_.c_str());
tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(),
loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(),
document_name_.c_str());
}
set_total_pages(loaded_pages);
return !pages_.empty();
@ -601,7 +612,8 @@ DocumentCache::~DocumentCache() {
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
CachingStrategy cache_strategy, FileReader reader) {
CachingStrategy cache_strategy,
FileReader reader) {
cache_strategy_ = cache_strategy;
int64_t fair_share_memory = 0;
// In the round-robin case, each DocumentData handles restricting its content
@ -610,7 +622,7 @@ bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
if (cache_strategy_ == CS_ROUND_ROBIN) {
fair_share_memory = max_memory_ / filenames.size();
}
for (auto filename : filenames) {
for (const auto &filename : filenames) {
auto *document = new DocumentData(filename);
document->SetDocument(filename.c_str(), fair_share_memory, reader);
AddToCache(document);
@ -632,7 +644,8 @@ bool DocumentCache::AddToCache(DocumentData *data) {
}
// Finds and returns a document by name.
DocumentData *DocumentCache::FindDocument(const std::string &document_name) const {
DocumentData *DocumentCache::FindDocument(
const std::string &document_name) const {
for (auto *document : documents_) {
if (document->document_name() == document_name) {
return document;
@ -696,7 +709,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
}
}
int doc_index = serial / num_pages_per_doc_ % num_docs;
const ImageData *doc = documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
const ImageData *doc =
documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
// Count up total memory. Background loading makes it more complicated to
// keep a running count.
int64_t total_memory = 0;
@ -710,7 +724,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
// we create a hole between them and then un-caching the backmost occupied
// will work for both.
int num_in_front = CountNeighbourDocs(doc_index, 1);
for (int offset = num_in_front - 2; offset > 1 && total_memory >= max_memory_; --offset) {
for (int offset = num_in_front - 2;
offset > 1 && total_memory >= max_memory_; --offset) {
int next_index = (doc_index + offset) % num_docs;
total_memory -= documents_[next_index]->UnCache();
}
@ -718,7 +733,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
// we take away the document that a 2nd reader is using, it will put it
// back and make a hole between.
int num_behind = CountNeighbourDocs(doc_index, -1);
for (int offset = num_behind; offset < 0 && total_memory >= max_memory_; ++offset) {
for (int offset = num_behind; offset < 0 && total_memory >= max_memory_;
++offset) {
int next_index = (doc_index + offset + num_docs) % num_docs;
total_memory -= documents_[next_index]->UnCache();
}

View File

@ -65,7 +65,8 @@ const double kMaxWordGapRatio = 2.0;
// which words to keep, based on the adjustment factors of the two words.
// TODO(rays) This is horrible. Replace with an enhance params training model.
static double StopperAmbigThreshold(double f1, double f2) {
return (f2 - f1) * kStopperAmbiguityThresholdGain - kStopperAmbiguityThresholdOffset;
return (f2 - f1) * kStopperAmbiguityThresholdGain -
kStopperAmbiguityThresholdOffset;
}
/*************************************************************************
@ -79,7 +80,8 @@ PAGE_RES::PAGE_RES(bool merge_similar_words, BLOCK_LIST *the_block_list,
BLOCK_IT block_it(the_block_list);
BLOCK_RES_IT block_res_it(&block_res_list);
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
block_res_it.add_to_end(new BLOCK_RES(merge_similar_words, block_it.data()));
block_res_it.add_to_end(
new BLOCK_RES(merge_similar_words, block_it.data()));
}
prev_word_best_choice = prev_word_best_choice_ptr;
}
@ -127,7 +129,8 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) {
row = the_row;
bool add_next_word = false;
TBOX union_box;
float line_height = the_row->x_height() + the_row->ascenders() - the_row->descenders();
float line_height =
the_row->x_height() + the_row->ascenders() - the_row->descenders();
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
auto *word_res = new WERD_RES(word_it.data());
word_res->x_height = the_row->x_height();
@ -298,14 +301,17 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES &source) {
// norm_box is used to override the word bounding box to determine the
// normalization scale and offset.
// Returns false if the word is empty and sets up fake results.
bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tess,
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
bool use_body_size, bool allow_detailed_fx, ROW *row,
bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in,
tesseract::Tesseract *tess, Image pix,
int norm_mode, const TBOX *norm_box,
bool numeric_mode, bool use_body_size,
bool allow_detailed_fx, ROW *row,
const BLOCK *block) {
auto norm_mode_hint = static_cast<tesseract::OcrEngineMode>(norm_mode);
tesseract = tess;
POLY_BLOCK *pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY && word->cblob_list()->empty()) ||
if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
word->cblob_list()->empty()) ||
(pb != nullptr && !pb->IsText())) {
// Empty words occur when all the blobs have been moved to the rej_blobs
// list, which seems to occur frequently in junk.
@ -317,9 +323,12 @@ bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::T
SetupWordScript(unicharset_in);
chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
float word_xheight =
use_body_size && row != nullptr && row->body_size() > 0.0f ? row->body_size() : x_height;
chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), word_xheight, baseline_shift,
numeric_mode, norm_mode_hint, norm_box, &denorm);
use_body_size && row != nullptr && row->body_size() > 0.0f
? row->body_size()
: x_height;
chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
word_xheight, baseline_shift, numeric_mode,
norm_mode_hint, norm_box, &denorm);
blob_row = row;
SetupBasicsFromChoppedWord(unicharset_in);
SetupBlamerBundle();
@ -398,7 +407,8 @@ void WERD_RES::SetupBlobWidthsAndGaps() {
TBOX box = blob->bounding_box();
blob_widths.push_back(box.width());
if (b + 1 < num_blobs) {
blob_gaps.push_back(chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
blob_gaps.push_back(chopped_word->blobs[b + 1]->bounding_box().left() -
box.right());
}
}
}
@ -460,8 +470,8 @@ bool WERD_RES::StatesAllValid() {
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
WERD_CHOICE *choice = it.data();
if (choice->TotalOfStates() != ratings_dim) {
tprintf("Cooked #%u has total of states = %u vs ratings dim of %u\n", index,
choice->TotalOfStates(), ratings_dim);
tprintf("Cooked #%u has total of states = %u vs ratings dim of %u\n",
index, choice->TotalOfStates(), ratings_dim);
return false;
}
}
@ -471,7 +481,8 @@ bool WERD_RES::StatesAllValid() {
// Prints a list of words found if debug is true or the word result matches
// the word_to_debug.
void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr &&
if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' &&
best_choice != nullptr &&
best_choice->unichar_string() == std::string(word_to_debug))) {
if (raw_choice != nullptr) {
raw_choice->print("\nBest Raw Choice");
@ -490,8 +501,8 @@ void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
// Prints the top choice along with the accepted/done flags.
void WERD_RES::DebugTopChoice(const char *msg) const {
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted, tess_would_adapt,
done);
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted,
tess_would_adapt, done);
if (best_choice == nullptr) {
tprintf("<Null choice>\n");
} else {
@ -516,7 +527,8 @@ void WERD_RES::FilterWordChoices(int debug_level) {
int index = 0;
for (it.forward(); !it.at_first(); it.forward(), ++index) {
WERD_CHOICE *choice = it.data();
float threshold = StopperAmbigThreshold(best_choice->adjust_factor(), choice->adjust_factor());
float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
choice->adjust_factor());
// i, j index the blob choice in choice, best_choice.
// chunk is an index into the chopped_word blobs (AKA chunks).
// Since the two words may use different segmentations of the chunks, we
@ -555,8 +567,10 @@ void WERD_RES::FilterWordChoices(int debug_level) {
}
}
void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating,
float rating_margin, float *thresholds) {
void WERD_RES::ComputeAdaptionThresholds(float certainty_scale,
float min_rating, float max_rating,
float rating_margin,
float *thresholds) {
int chunk = 0;
int end_chunk = best_choice->state(0);
int end_raw_chunk = raw_choice->state(0);
@ -612,26 +626,29 @@ bool WERD_RES::LogNewRawChoice(WERD_CHOICE *word_choice) {
// The best_choices list is kept in sorted order by rating. Duplicates are
// removed, and the list is kept no longer than max_num_choices in length.
// Returns true if the word_choice is still a valid pointer.
bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice) {
bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug,
WERD_CHOICE *word_choice) {
if (best_choice != nullptr) {
// Throw out obviously bad choices to save some work.
// TODO(rays) Get rid of this! This piece of code produces different
// results according to the order in which words are found, which is an
// undesirable behavior. It would be better to keep all the choices and
// prune them later when more information is available.
float max_certainty_delta =
StopperAmbigThreshold(best_choice->adjust_factor(), word_choice->adjust_factor());
float max_certainty_delta = StopperAmbigThreshold(
best_choice->adjust_factor(), word_choice->adjust_factor());
if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) {
max_certainty_delta = -kStopperAmbiguityThresholdOffset;
}
if (word_choice->certainty() - best_choice->certainty() < max_certainty_delta) {
if (word_choice->certainty() - best_choice->certainty() <
max_certainty_delta) {
if (debug) {
std::string bad_string;
word_choice->string_and_lengths(&bad_string, nullptr);
tprintf(
"Discarding choice \"%s\" with an overly low certainty"
" %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
bad_string.c_str(), word_choice->certainty(), best_choice->certainty(),
bad_string.c_str(), word_choice->certainty(),
best_choice->certainty(),
max_certainty_delta + best_choice->certainty());
}
delete word_choice;
@ -664,8 +681,8 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
} else {
// Old is better.
if (debug) {
tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n", new_str.c_str(),
word_choice->rating(), choice->rating());
tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
new_str.c_str(), word_choice->rating(), choice->rating());
}
delete word_choice;
return false;
@ -720,8 +737,8 @@ void WERD_RES::PrintBestChoices() const {
}
alternates_str += it.data()->unichar_string();
}
tprintf("Alternates for \"%s\": {\"%s\"}\n", best_choice->unichar_string().c_str(),
alternates_str.c_str());
tprintf("Alternates for \"%s\": {\"%s\"}\n",
best_choice->unichar_string().c_str(), alternates_str.c_str());
}
// Returns the sum of the widths of the blob between start_blob and last_blob
@ -830,12 +847,14 @@ void WERD_RES::RebuildBestState() {
int length = best_choice->state(i);
best_state.push_back(length);
if (length > 1) {
SEAM::JoinPieces(seam_array, chopped_word->blobs, start, start + length - 1);
SEAM::JoinPieces(seam_array, chopped_word->blobs, start,
start + length - 1);
}
TBLOB *blob = chopped_word->blobs[start];
rebuild_word->blobs.push_back(new TBLOB(*blob));
if (length > 1) {
SEAM::BreakPieces(seam_array, chopped_word->blobs, start, start + length - 1);
SEAM::BreakPieces(seam_array, chopped_word->blobs, start,
start + length - 1);
}
start += length;
}
@ -925,7 +944,8 @@ void WERD_RES::FakeWordFromRatings(PermuterType permuter) {
rating = choice->rating();
certainty = choice->certainty();
}
word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating, certainty);
word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
certainty);
}
LogNewRawChoice(word_choice);
// Ownership of word_choice taken by word here.
@ -948,14 +968,17 @@ void WERD_RES::BestChoiceToCorrectText() {
// callback box_cb is nullptr or returns true, setting the merged blob
// result to the class returned from class_cb.
// Returns true if anything was merged.
bool WERD_RES::ConditionalBlobMerge(std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> class_cb,
std::function<bool(const TBOX &, const TBOX &)> box_cb) {
bool WERD_RES::ConditionalBlobMerge(
const std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> &class_cb,
const std::function<bool(const TBOX &, const TBOX &)> &box_cb) {
ASSERT_HOST(best_choice->empty() || ratings != nullptr);
bool modified = false;
for (unsigned i = 0; i + 1 < best_choice->length(); ++i) {
UNICHAR_ID new_id = class_cb(best_choice->unichar_id(i), best_choice->unichar_id(i + 1));
UNICHAR_ID new_id =
class_cb(best_choice->unichar_id(i), best_choice->unichar_id(i + 1));
if (new_id != INVALID_UNICHAR_ID &&
(box_cb == nullptr || box_cb(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) {
(box_cb == nullptr ||
box_cb(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) {
// Raw choice should not be fixed.
best_choice->set_unichar_id(new_id, i);
modified = true;
@ -1003,7 +1026,8 @@ static int is_simple_quote(const char *signed_str, int length) {
// Standard 1 byte quotes.
return (length == 1 && (*str == '\'' || *str == '`')) ||
// UTF-8 3 bytes curved quotes.
(length == 3 && ((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) ||
(length == 3 &&
((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) ||
(*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99)));
}
@ -1012,7 +1036,8 @@ static int is_simple_quote(const char *signed_str, int length) {
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) {
if (is_simple_quote(ch, strlen(ch)) &&
is_simple_quote(next_ch, strlen(next_ch))) {
return uch_set->unichar_to_id("\"");
}
return INVALID_UNICHAR_ID;
@ -1020,7 +1045,8 @@ UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
// Change pairs of quotes to double quotes.
void WERD_RES::fix_quotes() {
if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\""))) {
if (!uch_set->contains_unichar("\"") ||
!uch_set->get_enabled(uch_set->unichar_to_id("\""))) {
return; // Don't create it if it is disallowed.
}
@ -1049,7 +1075,8 @@ bool WERD_RES::HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2) {
// Change pairs of hyphens to a single hyphen if the bounding boxes touch
// Typically a long dash which has been segmented.
void WERD_RES::fix_hyphens() {
if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-"))) {
if (!uch_set->contains_unichar("-") ||
!uch_set->get_enabled(uch_set->unichar_to_id("-"))) {
return; // Don't create it if it is disallowed.
}
@ -1071,7 +1098,8 @@ UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) {
// Change pairs of tess failures to a single one
void WERD_RES::merge_tess_fails() {
using namespace std::placeholders; // for _1, _2
if (ConditionalBlobMerge(std::bind(&WERD_RES::BothSpaces, this, _1, _2), nullptr)) {
if (ConditionalBlobMerge(std::bind(&WERD_RES::BothSpaces, this, _1, _2),
nullptr)) {
unsigned len = best_choice->length();
ASSERT_HOST(reject_map.length() == len);
ASSERT_HOST(box_word->length() == len);
@ -1178,7 +1206,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
}
WERD_RES_IT word_res_it(&row_res->word_res_list);
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); word_res_it.forward()) {
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
word_res_it.forward()) {
if (word_res_it.data() == word_res) {
return -1;
} else if (word_res_it.data() == other.word_res) {
@ -1190,7 +1219,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
// we both point to the same block, but different rows.
ROW_RES_IT row_res_it(&block_res->row_res_list);
for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list(); row_res_it.forward()) {
for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
row_res_it.forward()) {
if (row_res_it.data() == row_res) {
return -1;
} else if (row_res_it.data() == other.row_res) {
@ -1202,7 +1232,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
// We point to different blocks.
BLOCK_RES_IT block_res_it(&page_res->block_res_list);
for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list(); block_res_it.forward()) {
for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list();
block_res_it.forward()) {
if (block_res_it.data() == block_res) {
return -1;
} else if (block_res_it.data() == other.block_res) {
@ -1218,7 +1249,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
// before the current position. The simple fields of the WERD_RES are copied
// from clone_res and the resulting WERD_RES is returned for further setup
// with best_choice etc.
WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word) {
WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res,
WERD *new_word) {
// Make a WERD_RES for the new_word.
auto *new_res = new WERD_RES(new_word);
new_res->CopySimpleFields(clone_res);
@ -1245,7 +1277,8 @@ WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *ne
// are likely very poor, if they come from LSTM, where it only outputs the
// character at one pixel within it, so we find the midpoints between them.
static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box,
C_BLOB_LIST *next_word_blobs, std::vector<int> *blob_ends) {
C_BLOB_LIST *next_word_blobs,
std::vector<int> *blob_ends) {
C_BLOB_IT blob_it(word.word->cblob_list());
for (int length : word.best_state) {
// Get the bounding box of the fake blobs
@ -1272,17 +1305,18 @@ static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box,
// Helper computes the bounds of a word by restricting it to existing words
// that significantly overlap.
static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words, int w_index,
TBOX prev_box, WERD_RES_IT w_it) {
static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words,
int w_index, TBOX prev_box, WERD_RES_IT w_it) {
constexpr int kSignificantOverlapFraction = 4;
TBOX clipped_box;
TBOX current_box = words[w_index]->word->bounding_box();
TBOX next_box;
if (static_cast<size_t>(w_index + 1) < words.size() && words[w_index + 1] != nullptr &&
words[w_index + 1]->word != nullptr) {
if (static_cast<size_t>(w_index + 1) < words.size() &&
words[w_index + 1] != nullptr && words[w_index + 1]->word != nullptr) {
next_box = words[w_index + 1]->word->bounding_box();
}
for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo; w_it.forward()) {
for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo;
w_it.forward()) {
if (w_it.data() == nullptr || w_it.data()->word == nullptr) {
continue;
}
@ -1317,14 +1351,19 @@ static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words, i
// Helper moves the blob from src to dest. If it isn't contained by clip_box,
// the blob is replaced by a fake that is contained.
static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it, const TBOX &clip_box) {
static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it,
const TBOX &clip_box) {
C_BLOB *src_blob = src_it->extract();
TBOX box = src_blob->bounding_box();
if (!clip_box.contains(box)) {
int left = ClipToRange<int>(box.left(), clip_box.left(), clip_box.right() - 1);
int right = ClipToRange<int>(box.right(), clip_box.left() + 1, clip_box.right());
int top = ClipToRange<int>(box.top(), clip_box.bottom() + 1, clip_box.top());
int bottom = ClipToRange<int>(box.bottom(), clip_box.bottom(), clip_box.top() - 1);
int left =
ClipToRange<int>(box.left(), clip_box.left(), clip_box.right() - 1);
int right =
ClipToRange<int>(box.right(), clip_box.left() + 1, clip_box.right());
int top =
ClipToRange<int>(box.top(), clip_box.bottom() + 1, clip_box.top());
int bottom =
ClipToRange<int>(box.bottom(), clip_box.bottom(), clip_box.top() - 1);
box = TBOX(left, bottom, right, top);
delete src_blob;
src_blob = C_BLOB::FakeBlob(box);
@ -1336,7 +1375,8 @@ static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it, const TBOX &c
// Replaces the current WERD/WERD_RES with the given words. The given words
// contain fake blobs that indicate the position of the characters. These are
// replaced with real blobs from the current word as much as possible.
void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words) {
void PAGE_RES_IT::ReplaceCurrentWord(
tesseract::PointerVector<WERD_RES> *words) {
if (words->empty()) {
DeleteCurrentWord();
return;
@ -1405,11 +1445,13 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words)
int end_x = blob_ends[i];
TBOX blob_box;
// Add the blobs up to end_x.
while (!src_b_it.empty() && src_b_it.data()->bounding_box().x_middle() < end_x) {
while (!src_b_it.empty() &&
src_b_it.data()->bounding_box().x_middle() < end_x) {
blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box);
src_b_it.forward();
}
while (!rej_b_it.empty() && rej_b_it.data()->bounding_box().x_middle() < end_x) {
while (!rej_b_it.empty() &&
rej_b_it.data()->bounding_box().x_middle() < end_x) {
blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box);
rej_b_it.forward();
}
@ -1484,13 +1526,14 @@ void PAGE_RES_IT::MakeCurrentWordFuzzy() {
// The next word should be the corresponding part of combo, but we have
// already stepped past it, so find it by search.
WERD_RES_IT wr_it(&row()->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list() && wr_it.data() != word_res;
wr_it.forward()) {
for (wr_it.mark_cycle_pt();
!wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
}
wr_it.forward();
ASSERT_HOST(wr_it.data()->part_of_combo);
real_word = wr_it.data()->word;
ASSERT_HOST(!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON));
ASSERT_HOST(!real_word->flag(W_FUZZY_SP) &&
!real_word->flag(W_FUZZY_NON));
real_word->set_flag(W_FUZZY_SP, true);
}
}
@ -1531,7 +1574,8 @@ void PAGE_RES_IT::ResetWordIterator() {
// cycled_list state correctly.
word_res_it.move_to_first();
for (word_res_it.mark_cycle_pt();
!word_res_it.cycled_list() && word_res_it.data() != next_word_res; word_res_it.forward()) {
!word_res_it.cycled_list() && word_res_it.data() != next_word_res;
word_res_it.forward()) {
if (!word_res_it.data()->part_of_combo) {
if (prev_row_res == row_res) {
prev_word_res = word_res;
@ -1624,8 +1668,9 @@ WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) {
foundword:
// Update prev_word_best_choice pointer.
if (page_res != nullptr && page_res->prev_word_best_choice != nullptr) {
*page_res->prev_word_best_choice =
(new_block || prev_word_res == nullptr) ? nullptr : prev_word_res->best_choice;
*page_res->prev_word_best_choice = (new_block || prev_word_res == nullptr)
? nullptr
: prev_word_res->best_choice;
}
return word_res;
}
@ -1653,7 +1698,8 @@ WERD_RES *PAGE_RES_IT::restart_row() {
*************************************************************************/
WERD_RES *PAGE_RES_IT::forward_paragraph() {
while (block_res == next_block_res && (next_row_res != nullptr && next_row_res->row != nullptr &&
while (block_res == next_block_res &&
(next_row_res != nullptr && next_row_res->row != nullptr &&
row_res->row->para() == next_row_res->row->para())) {
internal_forward(false, true);
}

View File

@ -21,8 +21,8 @@
#include "blamer.h" // for BlamerBundle (ptr only), IRR_NUM_REASONS
#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH
#include "genericvector.h" // for PointerVector
#include "elst.h" // for ELIST_ITERATOR, ELIST_LINK, ELISTIZEH
#include "genericvector.h" // for PointerVector
#include "matrix.h" // for MATRIX
#include "normalis.h" // for DENORM
#include "ratngs.h" // for WERD_CHOICE, BLOB_CHOICE (ptr only)
@ -218,7 +218,8 @@ public:
// Stores the lstm choices of every timestep
std::vector<std::vector<std::pair<const char *, float>>> timesteps;
// Stores the lstm choices of every timestep segmented by character
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> segmented_timesteps;
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
segmented_timesteps;
// Symbolchoices acquired during CTC
std::vector<std::vector<std::pair<const char *, float>>> CTC_symbol_choices;
// Stores if the timestep vector starts with a space
@ -391,7 +392,8 @@ public:
}
bool AnyRtlCharsInWord() const {
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
if (uch_set == nullptr || best_choice == nullptr ||
best_choice->length() < 1) {
return false;
}
for (unsigned id = 0; id < best_choice->length(); id++) {
@ -400,7 +402,8 @@ public:
continue; // Ignore illegal chars.
}
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
return true;
}
}
@ -408,7 +411,8 @@ public:
}
bool AnyLtrCharsInWord() const {
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
if (uch_set == nullptr || best_choice == nullptr ||
best_choice->length() < 1) {
return false;
}
for (unsigned id = 0; id < best_choice->length(); id++) {
@ -417,7 +421,8 @@ public:
continue; // Ignore illegal chars.
}
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER) {
if (dir == UNICHARSET::U_LEFT_TO_RIGHT ||
dir == UNICHARSET::U_ARABIC_NUMBER) {
return true;
}
}
@ -463,9 +468,11 @@ public:
// of any of the above flags. It should really be a tesseract::OcrEngineMode
// but is declared as int for ease of use with tessedit_ocr_engine_mode.
// Returns false if the word is empty and sets up fake results.
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract,
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
bool use_body_size, bool allow_detailed_fx, ROW *row,
bool SetupForRecognition(const UNICHARSET &unicharset_in,
tesseract::Tesseract *tesseract, Image pix,
int norm_mode, const TBOX *norm_box,
bool numeric_mode, bool use_body_size,
bool allow_detailed_fx, ROW *row,
const BLOCK *block);
// Set up the seam array, bln_boxes, best_choice, and raw_choice to empty
@ -529,8 +536,9 @@ public:
// min_rating limits how tight to make a template.
// max_rating limits how loose to make a template.
// rating_margin denotes the amount of margin to put in template.
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating,
float rating_margin, float *thresholds);
void ComputeAdaptionThresholds(float certainty_scale, float min_rating,
float max_rating, float rating_margin,
float *thresholds);
// Saves a copy of the word_choice if it has the best unadjusted rating.
// Returns true if the word_choice was the new best.
@ -541,7 +549,8 @@ public:
// The best_choices list is kept in sorted order by rating. Duplicates are
// removed, and the list is kept no longer than max_num_choices in length.
// Returns true if the word_choice is still a valid pointer.
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice);
bool LogNewCookedChoice(int max_num_choices, bool debug,
WERD_CHOICE *word_choice);
// Prints a brief list of all the best choices.
void PrintBestChoices() const;
@ -616,8 +625,9 @@ public:
// callback box_cb is nullptr or returns true, setting the merged blob
// result to the class returned from class_cb.
// Returns true if anything was merged.
bool ConditionalBlobMerge(std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> class_cb,
std::function<bool(const TBOX &, const TBOX &)> box_cb);
bool ConditionalBlobMerge(
const std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> &class_cb,
const std::function<bool(const TBOX &, const TBOX &)> &box_cb);
// Merges 2 adjacent blobs in the result (index and index+1) and corrects
// all the data to account for the change.
@ -683,7 +693,8 @@ public:
// Do two PAGE_RES_ITs point at the same word?
// This is much cheaper than cmp().
bool operator==(const PAGE_RES_IT &other) const {
return word_res == other.word_res && row_res == other.row_res && block_res == other.block_res;
return word_res == other.word_res && row_res == other.row_res &&
block_res == other.block_res;
}
bool operator!=(const PAGE_RES_IT &other) const {

View File

@ -28,6 +28,7 @@
#include "unicharset.h"
#include <algorithm>
#include <cmath>
#include <string>
#include <vector>
@ -149,7 +150,7 @@ BLOB_CHOICE &BLOB_CHOICE::operator=(const BLOB_CHOICE &other) {
// Returns true if *this and other agree on the baseline and x-height
// to within some tolerance based on a given estimate of the x-height.
bool BLOB_CHOICE::PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const {
double baseline_diff = fabs(yshift() - other.yshift());
double baseline_diff = std::fabs(yshift() - other.yshift());
if (baseline_diff > kMaxBaselineDrift * x_height) {
if (debug) {
tprintf("Baseline diff %g for %d v %d\n", baseline_diff, unichar_id_, other.unichar_id_);

View File

@ -58,7 +58,8 @@ const double kMinXHeightFraction = 0.25;
const double kMinCapHeightFraction = 0.05;
/*static */
const char *UNICHARSET::kCustomLigatures[][2] = {{"ct", "\uE003"}, // c + t -> U+E003
const char *UNICHARSET::kCustomLigatures[][2] = {
{"ct", "\uE003"}, // c + t -> U+E003
{"ſh", "\uE006"}, // long-s + h -> U+E006
{"ſi", "\uE007"}, // long-s + i -> U+E007
{"ſl", "\uE008"}, // long-s + l -> U+E008
@ -68,14 +69,15 @@ const char *UNICHARSET::kCustomLigatures[][2] = {{"ct", "\uE003"}, // c + t -> U
// List of mappings to make when ingesting strings from the outside.
// The substitutions clean up text that should exist for rendering of
// synthetic data, but not in the recognition set.
const char *UNICHARSET::kCleanupMaps[][2] = {{"\u0640", ""}, // TATWEEL is deleted.
const char *UNICHARSET::kCleanupMaps[][2] = {
{"\u0640", ""}, // TATWEEL is deleted.
{"\ufb01", "fi"}, // fi ligature->fi pair.
{"\ufb02", "fl"}, // fl ligature->fl pair.
{nullptr, nullptr}};
// List of strings for the SpecialUnicharCodes. Keep in sync with the enum.
const char *UNICHARSET::kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT] = {" ", "Joined",
"|Broken|0|1"};
const char *UNICHARSET::kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT] = {
" ", "Joined", "|Broken|0|1"};
const char *UNICHARSET::null_script = "NULL";
@ -137,7 +139,8 @@ bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
}
// Expands the ranges with the ranges from the src properties.
void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(const UNICHAR_PROPERTIES &src) {
void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
const UNICHAR_PROPERTIES &src) {
UpdateRange(src.min_bottom, &min_bottom, &max_bottom);
UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
UpdateRange(src.min_top, &min_top, &max_top);
@ -164,7 +167,8 @@ void UNICHARSET::UNICHAR_PROPERTIES::CopyFrom(const UNICHAR_PROPERTIES &src) {
fragment = saved_fragment;
}
UNICHARSET::UNICHARSET() : ids(), script_table(nullptr), script_table_size_used(0) {
UNICHARSET::UNICHARSET()
: ids(), script_table(nullptr), script_table_size_used(0) {
clear();
for (int i = 0; i < SPECIAL_UNICHAR_CODES_COUNT; ++i) {
unichar_insert(kSpecialUnicharCodes[i]);
@ -180,13 +184,15 @@ UNICHARSET::~UNICHARSET() {
UNICHAR_ID
UNICHARSET::unichar_to_id(const char *const unichar_repr) const {
std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
std::string cleaned =
old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
return ids.contains(cleaned.data(), cleaned.size())
? ids.unichar_to_id(cleaned.data(), cleaned.size())
: INVALID_UNICHAR_ID;
}
UNICHAR_ID UNICHARSET::unichar_to_id(const char *const unichar_repr, int length) const {
UNICHAR_ID UNICHARSET::unichar_to_id(const char *const unichar_repr,
int length) const {
assert(length > 0 && length <= UNICHAR_LEN);
std::string cleaned(unichar_repr, length);
if (!old_style_included_) {
@ -215,7 +221,8 @@ int UNICHARSET::step(const char *str) const {
// Return whether the given UTF-8 string is encodable with this UNICHARSET.
// If not encodable, write the first byte offset which cannot be converted
// into the second (return) argument.
bool UNICHARSET::encodable_string(const char *str, unsigned *first_bad_position) const {
bool UNICHARSET::encodable_string(const char *str,
unsigned *first_bad_position) const {
std::vector<UNICHAR_ID> encoding;
return encode_string(str, true, &encoding, nullptr, first_bad_position);
}
@ -230,7 +237,8 @@ bool UNICHARSET::encodable_string(const char *str, unsigned *first_bad_position)
// that do not belong in the unicharset, or encoding may fail.
// Use CleanupString to perform the cleaning.
bool UNICHARSET::encode_string(const char *str, bool give_up_on_failure,
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
std::vector<UNICHAR_ID> *encoding,
std::vector<char> *lengths,
unsigned *encoded_length) const {
std::vector<UNICHAR_ID> working_encoding;
std::vector<char> working_lengths;
@ -240,8 +248,8 @@ bool UNICHARSET::encode_string(const char *str, bool give_up_on_failure,
unsigned str_pos = 0;
bool perfect = true;
while (str_pos < str_length) {
encode_string(str, str_pos, str_length, &working_encoding, &working_lengths, &str_pos, encoding,
&best_lengths);
encode_string(str, str_pos, str_length, &working_encoding, &working_lengths,
&str_pos, encoding, &best_lengths);
if (str_pos < str_length) {
// This is a non-match. Skip one utf-8 character.
perfect = false;
@ -357,8 +365,9 @@ void UNICHARSET::set_normed_ids(UNICHAR_ID unichar_id) {
unichars[unichar_id].properties.normed_ids.clear();
if (unichar_id == UNICHAR_SPACE && id_to_unichar(unichar_id)[0] == ' ') {
unichars[unichar_id].properties.normed_ids.push_back(UNICHAR_SPACE);
} else if (!encode_string(unichars[unichar_id].properties.normed.c_str(), true,
&unichars[unichar_id].properties.normed_ids, nullptr, nullptr)) {
} else if (!encode_string(unichars[unichar_id].properties.normed.c_str(),
true, &unichars[unichar_id].properties.normed_ids,
nullptr, nullptr)) {
unichars[unichar_id].properties.normed_ids.clear();
unichars[unichar_id].properties.normed_ids.push_back(unichar_id);
}
@ -383,7 +392,8 @@ void UNICHARSET::set_ranges_empty() {
// Sets all the properties for this unicharset given a src unicharset with
// everything set. The unicharsets don't have to be the same, and graphemes
// are correctly accounted for.
void UNICHARSET::PartialSetPropertiesFromOther(int start_index, const UNICHARSET &src) {
void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
const UNICHARSET &src) {
for (unsigned ch = start_index; ch < unichars.size(); ++ch) {
const char *utf8 = id_to_unichar(ch);
UNICHAR_PROPERTIES properties;
@ -464,8 +474,10 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET &src) {
// Returns true if the acceptable ranges of the tops of the characters do
// not overlap, making their x-height calculations distinct.
bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const {
int overlap = std::min(unichars[id1].properties.max_top, unichars[id2].properties.max_top) -
std::max(unichars[id1].properties.min_top, unichars[id2].properties.min_top);
int overlap = std::min(unichars[id1].properties.max_top,
unichars[id2].properties.max_top) -
std::max(unichars[id1].properties.min_top,
unichars[id2].properties.min_top);
return overlap <= 0;
}
@ -478,8 +490,10 @@ bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const {
// the overall process of encoding a partially failed string more efficient.
// See unicharset.h for definition of the args.
void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
unsigned *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
std::vector<UNICHAR_ID> *encoding,
std::vector<char> *lengths,
unsigned *best_total_length,
std::vector<UNICHAR_ID> *best_encoding,
std::vector<char> *best_lengths) const {
if (str_index > static_cast<int>(*best_total_length)) {
// This is the best result so far.
@ -504,8 +518,8 @@ void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
UNICHAR_ID id = ids.unichar_to_id(str + str_index, length);
encoding->push_back(id);
lengths->push_back(length);
encode_string(str, str_index + length, str_length, encoding, lengths, best_total_length,
best_encoding, best_lengths);
encode_string(str, str_index + length, str_length, encoding, lengths,
best_total_length, best_encoding, best_lengths);
if (static_cast<int>(*best_total_length) == str_length) {
return; // Tail recursion success!
}
@ -526,7 +540,8 @@ void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
// Returns false if no valid match was found in the unicharset.
// NOTE that script_id, mirror, and other_case refer to this unicharset on
// return and will need translation if the target unicharset is different.
bool UNICHARSET::GetStrProperties(const char *utf8_str, UNICHAR_PROPERTIES *props) const {
bool UNICHARSET::GetStrProperties(const char *utf8_str,
UNICHAR_PROPERTIES *props) const {
props->Init();
props->SetRangesEmpty();
int total_unicodes = 0;
@ -636,22 +651,26 @@ char UNICHARSET::get_chartype(UNICHAR_ID id) const {
return 0;
}
void UNICHARSET::unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style) {
void UNICHARSET::unichar_insert(const char *const unichar_repr,
OldUncleanUnichars old_style) {
if (old_style == OldUncleanUnichars::kTrue) {
old_style_included_ = true;
}
std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
std::string cleaned =
old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
if (!cleaned.empty() && !ids.contains(cleaned.data(), cleaned.size())) {
const char *str = cleaned.c_str();
std::vector<int> encoding;
if (!old_style_included_ && encode_string(str, true, &encoding, nullptr, nullptr)) {
if (!old_style_included_ &&
encode_string(str, true, &encoding, nullptr, nullptr)) {
return;
}
auto &u = unichars.emplace_back();
int index = 0;
do {
if (index >= UNICHAR_LEN) {
fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN, unichar_repr);
fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN,
unichar_repr);
return;
}
u.representation[index++] = *str++;
@ -673,11 +692,13 @@ void UNICHARSET::unichar_insert(const char *const unichar_repr, OldUncleanUnicha
}
bool UNICHARSET::contains_unichar(const char *const unichar_repr) const {
std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
std::string cleaned =
old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
return ids.contains(cleaned.data(), cleaned.size());
}
bool UNICHARSET::contains_unichar(const char *const unichar_repr, int length) const {
bool UNICHARSET::contains_unichar(const char *const unichar_repr,
int length) const {
if (length == 0) {
return false;
}
@ -688,7 +709,8 @@ bool UNICHARSET::contains_unichar(const char *const unichar_repr, int length) co
return ids.contains(cleaned.data(), cleaned.size());
}
bool UNICHARSET::eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const {
bool UNICHARSET::eq(UNICHAR_ID unichar_id,
const char *const unichar_repr) const {
return strcmp(this->id_to_unichar(unichar_id), unichar_repr) == 0;
}
@ -709,17 +731,20 @@ bool UNICHARSET::save_to_string(std::string &str) const {
unsigned int properties = this->get_properties(id);
if (strcmp(this->id_to_unichar(id), " ") == 0) {
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
this->get_script_from_script_id(this->get_script(id)), this->get_other_case(id));
this->get_script_from_script_id(this->get_script(id)),
this->get_other_case(id));
str += buffer;
} else {
std::ostringstream stream;
stream.imbue(std::locale::classic());
stream << this->id_to_unichar(id) << ' ' << properties << ' ' << min_bottom << ','
<< max_bottom << ',' << min_top << ',' << max_top << ',' << width << ',' << width_sd
<< ',' << bearing << ',' << bearing_sd << ',' << advance << ',' << advance_sd << ' '
stream << this->id_to_unichar(id) << ' ' << properties << ' '
<< min_bottom << ',' << max_bottom << ',' << min_top << ','
<< max_top << ',' << width << ',' << width_sd << ',' << bearing
<< ',' << bearing_sd << ',' << advance << ',' << advance_sd << ' '
<< this->get_script_from_script_id(this->get_script(id)) << ' '
<< this->get_other_case(id) << ' ' << this->get_direction(id) << ' '
<< this->get_mirror(id) << ' ' << this->get_normed_unichar(id) << "\t# "
<< this->get_other_case(id) << ' ' << this->get_direction(id)
<< ' ' << this->get_mirror(id) << ' '
<< this->get_normed_unichar(id) << "\t# "
<< this->debug_str(id).c_str() << '\n';
str += stream.str().c_str();
}
@ -741,24 +766,28 @@ private:
bool UNICHARSET::load_from_file(FILE *file, bool skip_fragments) {
LocalFilePointer lfp(file);
using namespace std::placeholders; // for _1, _2
std::function<char *(char *, int)> fgets_cb = std::bind(&LocalFilePointer::fgets, &lfp, _1, _2);
std::function<char *(char *, int)> fgets_cb =
std::bind(&LocalFilePointer::fgets, &lfp, _1, _2);
bool success = load_via_fgets(fgets_cb, skip_fragments);
return success;
}
bool UNICHARSET::load_from_file(tesseract::TFile *file, bool skip_fragments) {
using namespace std::placeholders; // for _1, _2
std::function<char *(char *, int)> fgets_cb = std::bind(&tesseract::TFile::FGets, file, _1, _2);
std::function<char *(char *, int)> fgets_cb =
std::bind(&tesseract::TFile::FGets, file, _1, _2);
bool success = load_via_fgets(fgets_cb, skip_fragments);
return success;
}
bool UNICHARSET::load_via_fgets(std::function<char *(char *, int)> fgets_cb, bool skip_fragments) {
bool UNICHARSET::load_via_fgets(
const std::function<char *(char *, int)> &fgets_cb, bool skip_fragments) {
int unicharset_size;
char buffer[256];
this->clear();
if (fgets_cb(buffer, sizeof(buffer)) == nullptr || sscanf(buffer, "%d", &unicharset_size) != 1) {
if (fgets_cb(buffer, sizeof(buffer)) == nullptr ||
sscanf(buffer, "%d", &unicharset_size) != 1) {
return false;
}
for (UNICHAR_ID id = 0; id < unicharset_size; ++id) {
@ -800,27 +829,30 @@ bool UNICHARSET::load_via_fgets(std::function<char *(char *, int)> fgets_cb, boo
auto position = stream.tellg();
stream.seekg(position);
char c1, c2, c3, c4, c5, c6, c7, c8, c9;
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >> width >>
c5 >> width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >> advance >> c9 >> advance_sd >>
std::setw(63) >> script >> other_case >> direction >> mirror >> std::setw(63) >> normed;
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || c5 != ',' ||
c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
max_top >> c4 >> width >> c5 >> width_sd >> c6 >> bearing >> c7 >>
bearing_sd >> c8 >> advance >> c9 >> advance_sd >> std::setw(63) >>
script >> other_case >> direction >> mirror >> std::setw(63) >> normed;
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' ||
c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
stream.clear();
stream.seekg(position);
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >> width >>
c5 >> width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >> advance >> c9 >>
advance_sd >> std::setw(63) >> script >> other_case >> direction >> mirror;
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || c5 != ',' ||
c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
max_top >> c4 >> width >> c5 >> width_sd >> c6 >> bearing >> c7 >>
bearing_sd >> c8 >> advance >> c9 >> advance_sd >> std::setw(63) >>
script >> other_case >> direction >> mirror;
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' ||
c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
stream.clear();
stream.seekg(position);
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >>
std::setw(63) >> script >> other_case >> direction >> mirror;
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
max_top >> std::setw(63) >> script >> other_case >> direction >>
mirror;
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') {
stream.clear();
stream.seekg(position);
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >>
std::setw(63) >> script >> other_case;
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
max_top >> std::setw(63) >> script >> other_case;
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') {
stream.clear();
stream.seekg(position);
@ -909,7 +941,8 @@ void UNICHARSET::post_load_setup() {
script_has_upper_lower_ = net_case_alphas > 0;
script_has_xheight_ =
script_has_upper_lower_ || (x_height_alphas > cap_height_alphas * kMinXHeightFraction &&
script_has_upper_lower_ ||
(x_height_alphas > cap_height_alphas * kMinXHeightFraction &&
cap_height_alphas > x_height_alphas * kMinCapHeightFraction);
null_sid_ = get_script_id_from_name(null_script);
@ -954,7 +987,8 @@ bool UNICHARSET::major_right_to_left() const {
if (dir == UNICHARSET::U_LEFT_TO_RIGHT) {
ltr_count++;
}
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
dir == UNICHARSET::U_ARABIC_NUMBER) {
rtl_count++;
}
@ -966,7 +1000,8 @@ bool UNICHARSET::major_right_to_left() const {
// An empty or nullptr whitelist enables everything (minus any blacklist).
// An empty or nullptr blacklist disables nothing.
// An empty or nullptr blacklist has no effect.
void UNICHARSET::set_black_and_whitelist(const char *blacklist, const char *whitelist,
void UNICHARSET::set_black_and_whitelist(const char *blacklist,
const char *whitelist,
const char *unblacklist) {
bool def_enabled = whitelist == nullptr || whitelist[0] == '\0';
// Set everything to default
@ -1037,7 +1072,8 @@ int UNICHARSET::add_script(const char *script) {
assert(script_table_size_used == script_table_size_reserved);
script_table_size_reserved += script_table_size_reserved;
char **new_script_table = new char *[script_table_size_reserved];
memcpy(new_script_table, script_table, script_table_size_used * sizeof(char *));
memcpy(new_script_table, script_table,
script_table_size_used * sizeof(char *));
delete[] script_table;
script_table = new_script_table;
}
@ -1048,7 +1084,8 @@ int UNICHARSET::add_script(const char *script) {
// Returns the string that represents a fragment
// with the given unichar, pos and total.
std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, bool natural) {
std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total,
bool natural) {
if (total == 1) {
return std::string(unichar);
}
@ -1056,8 +1093,8 @@ std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, bo
result += kSeparator;
result += unichar;
char buffer[kMaxLen];
snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos, natural ? kNaturalFlag : kSeparator,
total);
snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos,
natural ? kNaturalFlag : kSeparator, total);
result += buffer;
return result;
}

View File

@ -85,7 +85,8 @@ public:
// Returns the string that represents a fragment
// with the given unichar, pos and total.
static std::string to_string(const char *unichar, int pos, int total, bool natural);
static std::string to_string(const char *unichar, int pos, int total,
bool natural);
// Returns the string that represents this fragment.
std::string to_string() const {
return to_string(unichar, pos, total, natural);
@ -93,19 +94,22 @@ public:
// Checks whether a fragment has the same unichar,
// position and total as the given inputs.
inline bool equals(const char *other_unichar, int other_pos, int other_total) const {
return (strcmp(this->unichar, other_unichar) == 0 && this->pos == other_pos &&
this->total == other_total);
inline bool equals(const char *other_unichar, int other_pos,
int other_total) const {
return (strcmp(this->unichar, other_unichar) == 0 &&
this->pos == other_pos && this->total == other_total);
}
inline bool equals(const CHAR_FRAGMENT *other) const {
return this->equals(other->get_unichar(), other->get_pos(), other->get_total());
return this->equals(other->get_unichar(), other->get_pos(),
other->get_total());
}
// Checks whether a given fragment is a continuation of this fragment.
// Assumes that the given fragment pointer is not nullptr.
inline bool is_continuation_of(const CHAR_FRAGMENT *fragment) const {
return (strcmp(this->unichar, fragment->get_unichar()) == 0 &&
this->total == fragment->get_total() && this->pos == fragment->get_pos() + 1);
this->total == fragment->get_total() &&
this->pos == fragment->get_pos() + 1);
}
// Returns true if this fragment is a beginning fragment.
@ -237,8 +241,10 @@ public:
// WARNING: Caller must guarantee that str has already been cleaned of codes
// that do not belong in the unicharset, or encoding may fail.
// Use CleanupString to perform the cleaning.
bool encode_string(const char *str, bool give_up_on_failure, std::vector<UNICHAR_ID> *encoding,
std::vector<char> *lengths, unsigned *encoded_length) const;
bool encode_string(const char *str, bool give_up_on_failure,
std::vector<UNICHAR_ID> *encoding,
std::vector<char> *lengths,
unsigned *encoded_length) const;
// Return the unichar representation corresponding to the given UNICHAR_ID
// within the UNICHARSET.
@ -272,7 +278,8 @@ public:
// TATWEEL characters are kept and n-grams are allowed. Otherwise TATWEEL
// characters are ignored/skipped as if they don't exist and n-grams that
// can already be encoded are not added.
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style);
void unichar_insert(const char *const unichar_repr,
OldUncleanUnichars old_style);
void unichar_insert(const char *const unichar_repr) {
unichar_insert(unichar_repr, OldUncleanUnichars::kFalse);
}
@ -365,7 +372,8 @@ public:
// Returns true if the operation is successful.
bool save_to_file(FILE *file) const {
std::string str;
return save_to_string(str) && tesseract::Serialize(file, &str[0], str.length());
return save_to_string(str) &&
tesseract::Serialize(file, &str[0], str.length());
}
bool save_to_file(tesseract::TFile *file) const {
@ -575,8 +583,8 @@ public:
// baseline-normalized coordinates, ie, where the baseline is
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight
// (See normalis.h for the definitions).
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top,
int *max_top) const {
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom,
int *min_top, int *max_top) const {
if (INVALID_UNICHAR_ID == unichar_id) {
*min_bottom = *min_top = 0;
*max_bottom = *max_top = 256; // kBlnCellHeight
@ -588,16 +596,21 @@ public:
*min_top = unichars[unichar_id].properties.min_top;
*max_top = unichars[unichar_id].properties.max_top;
}
void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top,
int max_top) {
unichars[unichar_id].properties.min_bottom = ClipToRange<int>(min_bottom, 0, UINT8_MAX);
unichars[unichar_id].properties.max_bottom = ClipToRange<int>(max_bottom, 0, UINT8_MAX);
unichars[unichar_id].properties.min_top = ClipToRange<int>(min_top, 0, UINT8_MAX);
unichars[unichar_id].properties.max_top = ClipToRange<int>(max_top, 0, UINT8_MAX);
void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom,
int min_top, int max_top) {
unichars[unichar_id].properties.min_bottom =
ClipToRange<int>(min_bottom, 0, UINT8_MAX);
unichars[unichar_id].properties.max_bottom =
ClipToRange<int>(max_bottom, 0, UINT8_MAX);
unichars[unichar_id].properties.min_top =
ClipToRange<int>(min_top, 0, UINT8_MAX);
unichars[unichar_id].properties.max_top =
ClipToRange<int>(max_top, 0, UINT8_MAX);
}
// Returns the width stats (as mean, sd) of the given unichar relative to the
// median advance of all characters in the character set.
void get_width_stats(UNICHAR_ID unichar_id, float *width, float *width_sd) const {
void get_width_stats(UNICHAR_ID unichar_id, float *width,
float *width_sd) const {
if (INVALID_UNICHAR_ID == unichar_id) {
*width = 0.0f;
*width_sd = 0.0f;
@ -614,7 +627,8 @@ public:
}
// Returns the stats of the x-bearing (as mean, sd) of the given unichar
// relative to the median advance of all characters in the character set.
void get_bearing_stats(UNICHAR_ID unichar_id, float *bearing, float *bearing_sd) const {
void get_bearing_stats(UNICHAR_ID unichar_id, float *bearing,
float *bearing_sd) const {
if (INVALID_UNICHAR_ID == unichar_id) {
*bearing = *bearing_sd = 0.0f;
return;
@ -623,13 +637,15 @@ public:
*bearing = unichars[unichar_id].properties.bearing;
*bearing_sd = unichars[unichar_id].properties.bearing_sd;
}
void set_bearing_stats(UNICHAR_ID unichar_id, float bearing, float bearing_sd) {
void set_bearing_stats(UNICHAR_ID unichar_id, float bearing,
float bearing_sd) {
unichars[unichar_id].properties.bearing = bearing;
unichars[unichar_id].properties.bearing_sd = bearing_sd;
}
// Returns the stats of the x-advance of the given unichar (as mean, sd)
// relative to the median advance of all characters in the character set.
void get_advance_stats(UNICHAR_ID unichar_id, float *advance, float *advance_sd) const {
void get_advance_stats(UNICHAR_ID unichar_id, float *advance,
float *advance_sd) const {
if (INVALID_UNICHAR_ID == unichar_id) {
*advance = *advance_sd = 0;
return;
@ -638,7 +654,8 @@ public:
*advance = unichars[unichar_id].properties.advance;
*advance_sd = unichars[unichar_id].properties.advance_sd;
}
void set_advance_stats(UNICHAR_ID unichar_id, float advance, float advance_sd) {
void set_advance_stats(UNICHAR_ID unichar_id, float advance,
float advance_sd) {
unichars[unichar_id].properties.advance = advance;
unichars[unichar_id].properties.advance_sd = advance_sd;
}
@ -654,8 +671,9 @@ public:
return true;
}
int script_id = get_script(unichar_id);
return script_id != han_sid_ && script_id != thai_sid_ && script_id != hangul_sid_ &&
script_id != hiragana_sid_ && script_id != katakana_sid_;
return script_id != han_sid_ && script_id != thai_sid_ &&
script_id != hangul_sid_ && script_id != hiragana_sid_ &&
script_id != katakana_sid_;
}
// Return the script name of the given unichar.
@ -738,7 +756,8 @@ public:
// at these codes and they should not be used.
bool has_special_codes() const {
return get_fragment(UNICHAR_BROKEN) != nullptr &&
strcmp(id_to_unichar(UNICHAR_BROKEN), kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0;
strcmp(id_to_unichar(UNICHAR_BROKEN),
kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0;
}
// Returns true if there are any repeated unicodes in the normalized
@ -800,7 +819,8 @@ public:
// Return a pointer to the CHAR_FRAGMENT class struct if the given
// unichar representation represents a character fragment.
const CHAR_FRAGMENT *get_fragment(const char *const unichar_repr) const {
if (unichar_repr == nullptr || unichar_repr[0] == '\0' || !ids.contains(unichar_repr, false)) {
if (unichar_repr == nullptr || unichar_repr[0] == '\0' ||
!ids.contains(unichar_repr, false)) {
return nullptr;
}
return get_fragment(unichar_to_id(unichar_repr));
@ -1020,8 +1040,9 @@ private:
// best_encoding contains the encoding that used the longest part of str.
// best_lengths (may be null) contains the lengths of best_encoding.
void encode_string(const char *str, int str_index, int str_length,
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
unsigned *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
std::vector<UNICHAR_ID> *encoding,
std::vector<char> *lengths, unsigned *best_total_length,
std::vector<UNICHAR_ID> *best_encoding,
std::vector<char> *best_lengths) const;
// Gets the properties for a grapheme string, combining properties for
@ -1034,7 +1055,8 @@ private:
// Load ourselves from a "file" where our only interface to the file is
// an implementation of fgets(). This is the parsing primitive accessed by
// the public routines load_from_file().
bool load_via_fgets(std::function<char *(char *, int)> fgets_cb, bool skip_fragments);
bool load_via_fgets(const std::function<char *(char *, int)> &fgets_cb,
bool skip_fragments);
// List of mappings to make when ingesting strings from the outside.
// The substitutions clean up text that should exists for rendering of

View File

@ -1782,13 +1782,13 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATU
Y2 = F2->Params[PicoFeatY];
A2 = F2->Params[PicoFeatDir];
AngleDelta = fabs(A1 - A2);
AngleDelta = std::fabs(A1 - A2);
if (AngleDelta > 0.5) {
AngleDelta = 1.0 - AngleDelta;
}
if (AngleDelta > matcher_clustering_max_angle_delta || fabs(X1 - X2) > SegmentLength ||
fabs(Y1 - Y2) > SegmentLength) {
if (AngleDelta > matcher_clustering_max_angle_delta || std::fabs(X1 - X2) > SegmentLength ||
std::fabs(Y1 - Y2) > SegmentLength) {
break;
}
}

View File

@ -1674,13 +1674,13 @@ float Mean(PROTOTYPE *Proto, uint16_t Dimension) {
float StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) {
switch (Proto->Style) {
case spherical:
return sqrt(Proto->Variance.Spherical);
return std::sqrt(Proto->Variance.Spherical);
case elliptical:
return sqrt(Proto->Variance.Elliptical[Dimension]);
return std::sqrt(Proto->Variance.Elliptical[Dimension]);
case mixed:
switch (Proto->Distrib[Dimension]) {
case normal:
return sqrt(Proto->Variance.Elliptical[Dimension]);
return std::sqrt(Proto->Variance.Elliptical[Dimension]);
case uniform:
case D_random:
return Proto->Variance.Elliptical[Dimension];
@ -2268,7 +2268,7 @@ static PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATIST
}
FillBuckets(NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), Proto->Mean[i],
sqrt(Proto->Variance.Elliptical[i]));
std::sqrt(Proto->Variance.Elliptical[i]));
if (DistributionOK(NormalBuckets)) {
continue;
}
@ -2576,7 +2576,7 @@ static bool Independent(PARAM_DESC *ParamDesc, int16_t N, float *CoVariance, flo
if ((*VARii == 0.0) || (*VARjj == 0.0)) {
CorrelationCoeff = 0.0;
} else {
CorrelationCoeff = sqrt(sqrt(*CoVariance * *CoVariance / (*VARii * *VARjj)));
CorrelationCoeff = sqrt(std::sqrt(*CoVariance * *CoVariance / (*VARii * *VARjj)));
}
if (CorrelationCoeff > Independence) {
return false;

View File

@ -207,7 +207,7 @@ PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) {
case spherical:
ReadNFloats(fp, 1, &(Proto->Variance.Spherical));
Proto->Magnitude.Spherical = 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast<float>(N));
Proto->TotalMagnitude = std::pow(Proto->Magnitude.Spherical, static_cast<float>(N));
Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
Proto->Distrib.clear();

View File

@ -676,7 +676,7 @@ IntegerMatcher::IntegerMatcher(tesseract::IntParam *classify_debug_level)
if (kSEExponentialMultiplier > 0.0) {
double scale =
1.0 - exp(-kSEExponentialMultiplier) *
1.0 - std::exp(-kSEExponentialMultiplier) *
exp(kSEExponentialMultiplier * (static_cast<double>(i) / SE_TABLE_SIZE));
evidence *= ClipToRange(scale, 0.0, 1.0);
}

View File

@ -365,14 +365,14 @@ void AddProtoToProtoPruner(PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *C
Length = Proto->Length;
X = Proto->X + X_SHIFT;
Pad = std::max(fabs(cos(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
fabs(sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
Pad = std::max(fabs(std::cos(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
fabs(std::sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug);
Y = Proto->Y + Y_SHIFT;
Pad = std::max(fabs(sin(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
fabs(cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
Pad = std::max(fabs(std::sin(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
fabs(std::cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug);
} /* AddProtoToProtoPruner */
@ -1388,8 +1388,8 @@ void InitTableFiller(float EndPad, float SidePad, float AnglePad, PROTO_STRUCT *
if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) {
/* rising diagonal proto */
Angle *= 2.0 * M_PI;
Cos = fabs(cos(Angle));
Sin = fabs(sin(Angle));
Cos = fabs(std::cos(Angle));
Sin = fabs(std::sin(Angle));
/* compute the positions of the corners of the acceptance region */
Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
@ -1438,8 +1438,8 @@ void InitTableFiller(float EndPad, float SidePad, float AnglePad, PROTO_STRUCT *
} else {
/* falling diagonal proto */
Angle *= 2.0 * M_PI;
Cos = fabs(cos(Angle));
Sin = fabs(sin(Angle));
Cos = fabs(std::cos(Angle));
Sin = fabs(std::sin(Angle));
/* compute the positions of the corners of the acceptance region */
Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;

View File

@ -33,7 +33,7 @@ namespace tesseract {
/*-----------------------------------------------------------------------------
Global Data Definitions and Declarations
-----------------------------------------------------------------------------*/
#define MINSEARCH -FLT_MAX
#define MINSEARCH (-FLT_MAX)
#define MAXSEARCH FLT_MAX
// Helper function to find the next essential dimension in a cycle.
@ -398,7 +398,7 @@ float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
}
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
return sqrt(DistanceSquared(k, dim, p1, p2));
return std::sqrt(DistanceSquared(k, dim, p1, p2));
}
/*---------------------------------------------------------------------------*/

View File

@ -38,7 +38,8 @@ namespace tesseract {
// instead of weak vtables in every compilation unit.
Dawg::~Dawg() = default;
bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, bool requires_complete) const {
bool Dawg::prefix_in_dawg(const WERD_CHOICE &word,
bool requires_complete) const {
if (word.empty()) {
return !requires_complete;
}
@ -56,7 +57,8 @@ bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, bool requires_complete) const
}
}
// Now check the last character.
return edge_char_of(node, word.unichar_id(end_index), requires_complete) != NO_EDGE;
return edge_char_of(node, word.unichar_id(end_index), requires_complete) !=
NO_EDGE;
}
bool Dawg::word_in_dawg(const WERD_CHOICE &word) const {
@ -84,7 +86,8 @@ int Dawg::check_for_words(const char *filename, const UNICHARSET &unicharset,
chomp_string(string); // remove newline
WERD_CHOICE word(string, unicharset);
if (word.length() > 0 && !word.contains_unichar_id(INVALID_UNICHAR_ID)) {
if (!match_words(&word, 0, 0, enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
if (!match_words(&word, 0, 0,
enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
tprintf("Missing word: %s\n", string);
++misses;
}
@ -106,21 +109,25 @@ void Dawg::iterate_words(const UNICHARSET &unicharset,
iterate_words_rec(word, 0, cb);
}
static void CallWithUTF8(std::function<void(const char *)> cb, const WERD_CHOICE *wc) {
static void CallWithUTF8(const std::function<void(const char *)> &cb,
const WERD_CHOICE *wc) {
std::string s;
wc->string_and_lengths(&s, nullptr);
cb(s.c_str());
}
void Dawg::iterate_words(const UNICHARSET &unicharset, std::function<void(const char *)> cb) const {
void Dawg::iterate_words(const UNICHARSET &unicharset,
const std::function<void(const char *)> &cb) const {
using namespace std::placeholders; // for _1
std::function<void(const WERD_CHOICE *)> shim(std::bind(CallWithUTF8, cb, _1));
std::function<void(const WERD_CHOICE *)> shim(
std::bind(CallWithUTF8, cb, _1));
WERD_CHOICE word(&unicharset);
iterate_words_rec(word, 0, shim);
}
void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore,
std::function<void(const WERD_CHOICE *)> cb) const {
void Dawg::iterate_words_rec(
const WERD_CHOICE &word_so_far, NODE_REF to_explore,
const std::function<void(const WERD_CHOICE *)> &cb) const {
NodeChildVector children;
this->unichar_ids_of(to_explore, &children, false);
for (auto &i : children) {
@ -136,7 +143,8 @@ void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore
}
}
bool Dawg::match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const {
bool Dawg::match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node,
UNICHAR_ID wildcard) const {
if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) {
bool any_matched = false;
NodeChildVector vec;
@ -187,7 +195,8 @@ SquishedDawg::~SquishedDawg() {
delete[] edges_;
}
EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const {
EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
bool word_end) const {
EDGE_REF edge = node;
if (node == 0) { // binary search
EDGE_REF start = 0;
@ -195,7 +204,8 @@ EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool w
int compare;
while (start <= end) {
edge = (start + end) >> 1; // (start + end) / 2
compare = given_greater_than_edge_rec(NO_EDGE, word_end, unichar_id, edges_[edge]);
compare = given_greater_than_edge_rec(NO_EDGE, word_end, unichar_id,
edges_[edge]);
if (compare == 0) { // given == vec[k]
return edge;
} else if (compare == 1) { // given > vec[k]
@ -258,8 +268,8 @@ void SquishedDawg::print_node(NODE_REF node, int max_num_edges) const {
eow = end_of_word(edge) ? eow_string : not_eow_string;
unichar_id = edge_letter(edge);
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", edge,
next_node(edge), unichar_id, direction, is_last, eow);
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n",
edge, next_node(edge), unichar_id, direction, is_last, eow);
if (edge - node > max_num_edges) {
return;
@ -273,8 +283,9 @@ void SquishedDawg::print_node(NODE_REF node, int max_num_edges) const {
eow = end_of_word(edge) ? eow_string : not_eow_string;
unichar_id = edge_letter(edge);
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", edge,
next_node(edge), unichar_id, direction, is_last, eow);
tprintf(REFFORMAT " : next = " REFFORMAT
", unichar_id = %d, %s %s %s\n",
edge, next_node(edge), unichar_id, direction, is_last, eow);
if (edge - node > MAX_NODE_EDGES_DISPLAY) {
return;
@ -291,9 +302,11 @@ void SquishedDawg::print_edge(EDGE_REF edge) const {
if (edge == NO_EDGE) {
tprintf("NO_EDGE\n");
} else {
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = '%d', %s %s %s\n", edge,
next_node(edge), edge_letter(edge), (forward_edge(edge) ? "FORWARD" : " "),
(last_edge(edge) ? "LAST" : " "), (end_of_word(edge) ? "EOW" : ""));
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = '%d', %s %s %s\n",
edge, next_node(edge), edge_letter(edge),
(forward_edge(edge) ? "FORWARD" : " "),
(last_edge(edge) ? "LAST" : " "),
(end_of_word(edge) ? "EOW" : ""));
}
}
@ -328,8 +341,8 @@ bool SquishedDawg::read_squished_dawg(TFile *file) {
return false;
}
if (debug_level_ > 2) {
tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n", type_, lang_.c_str(),
perm_, unicharset_size_, num_edges_);
tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n",
type_, lang_.c_str(), perm_, unicharset_size_, num_edges_);
for (EDGE_REF edge = 0; edge < num_edges_; ++edge) {
print_edge(edge);
}
@ -337,7 +350,8 @@ bool SquishedDawg::read_squished_dawg(TFile *file) {
return true;
}
std::unique_ptr<EDGE_REF[]> SquishedDawg::build_node_map(int32_t *num_nodes) const {
std::unique_ptr<EDGE_REF[]> SquishedDawg::build_node_map(
int32_t *num_nodes) const {
EDGE_REF edge;
std::unique_ptr<EDGE_REF[]> node_map(new EDGE_REF[num_edges_]);
int32_t node_counter;

View File

@ -147,16 +147,19 @@ public:
// For each word in the Dawg, call the given (permanent) callback with the
// text (UTF-8) version of the word.
void iterate_words(const UNICHARSET &unicharset, std::function<void(const char *)> cb) const;
void iterate_words(const UNICHARSET &unicharset,
const std::function<void(const char *)> &cb) const;
// Pure virtual function that should be implemented by the derived classes.
/// Returns the edge that corresponds to the letter out of this node.
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const = 0;
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
bool word_end) const = 0;
/// Fills the given NodeChildVector with all the unichar ids (and the
/// corresponding EDGE_REFs) for which there is an edge out of this node.
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const = 0;
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
bool word_end) const = 0;
/// Returns the next node visited by following the edge
/// indicated by the given EDGE_REF.
@ -175,7 +178,8 @@ public:
/// Fills vec with unichar ids that represent the character classes
/// of the given unichar_id.
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset,
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
const UNICHARSET &unicharset,
std::vector<UNICHAR_ID> *vec) const {
(void)unichar_id;
(void)unicharset;
@ -194,8 +198,13 @@ public:
}
protected:
Dawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level)
: lang_(lang), type_(type), perm_(perm), unicharset_size_(0), debug_level_(debug_level) {}
Dawg(DawgType type, const std::string &lang, PermuterType perm,
int debug_level)
: lang_(lang),
type_(type),
perm_(perm),
unicharset_size_(0),
debug_level_(debug_level) {}
/// Returns the next node visited by following this edge.
inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
@ -207,14 +216,16 @@ protected:
}
/// Returns the direction flag of this edge.
inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const {
return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? BACKWARD_EDGE : FORWARD_EDGE;
return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? BACKWARD_EDGE
: FORWARD_EDGE;
}
/// Returns true if this edge marks the end of a word.
inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const {
return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
}
/// Returns UNICHAR_ID recorded in this edge.
inline UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const {
inline UNICHAR_ID unichar_id_from_edge_rec(
const EDGE_RECORD &edge_rec) const {
return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
}
/// Sets the next node link for this edge in the Dawg.
@ -233,13 +244,14 @@ protected:
/// checked are the same)
/// 0 if edge_rec_match() returns true
/// -1 otherwise
inline int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id,
inline int given_greater_than_edge_rec(NODE_REF next_node, bool word_end,
UNICHAR_ID unichar_id,
const EDGE_RECORD &edge_rec) const {
UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node, curr_word_end,
curr_unichar_id)) {
if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
curr_word_end, curr_unichar_id)) {
return 0;
}
if (unichar_id > curr_unichar_id) {
@ -260,8 +272,9 @@ protected:
/// Returns true if all the values are equal (any value matches
/// next_node if next_node == NO_EDGE, any value matches word_end
/// if word_end is false).
inline bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id,
NODE_REF other_next_node, bool other_word_end,
inline bool edge_rec_match(NODE_REF next_node, bool word_end,
UNICHAR_ID unichar_id, NODE_REF other_next_node,
bool other_word_end,
UNICHAR_ID other_unichar_id) const {
return ((unichar_id == other_unichar_id) &&
(next_node == NO_EDGE || next_node == other_next_node) &&
@ -277,11 +290,13 @@ protected:
/// the *'s in this string are interpreted as wildcards.
/// WERD_CHOICE param is not passed by const so that wildcard searches
/// can modify it and work without having to copy WERD_CHOICEs.
bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const;
bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node,
UNICHAR_ID wildcard) const;
// Recursively iterate over all words in a dawg (see public iterate_words).
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore,
std::function<void(const WERD_CHOICE *)> cb) const;
void iterate_words_rec(
const WERD_CHOICE &word_so_far, NODE_REF to_explore,
const std::function<void(const WERD_CHOICE *)> &cb) const;
// Member Variables.
std::string lang_;
@ -339,12 +354,13 @@ protected:
// We're back in the punctuation dawg. Continuing there is the only option.
struct DawgPosition {
DawgPosition() = default;
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc)
: dawg_ref(dawgref)
, punc_ref(puncref)
, dawg_index(dawg_idx)
, punc_index(punc_idx)
, back_to_punc(backtopunc) {}
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref,
bool backtopunc)
: dawg_ref(dawgref),
punc_ref(puncref),
dawg_index(dawg_idx),
punc_index(punc_idx),
back_to_punc(backtopunc) {}
bool operator==(const DawgPosition &other) {
return dawg_index == other.dawg_index && dawg_ref == other.dawg_ref &&
punc_index == other.punc_index && punc_ref == other.punc_ref &&
@ -364,7 +380,8 @@ public:
/// Adds an entry for the given dawg_index with the given node to the vec.
/// Returns false if the same entry already exists in the vector,
/// true otherwise.
inline bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg) {
inline bool add_unique(const DawgPosition &new_pos, bool debug,
const char *debug_msg) {
for (auto position : *this) {
if (position == new_pos) {
return false;
@ -372,8 +389,9 @@ public:
}
push_back(new_pos);
if (debug) {
tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", debug_msg, new_pos.dawg_index,
new_pos.dawg_ref, new_pos.punc_ref, new_pos.back_to_punc ? " returned" : "");
tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", debug_msg,
new_pos.dawg_index, new_pos.dawg_ref, new_pos.punc_ref,
new_pos.back_to_punc ? " returned" : "");
}
return true;
}
@ -389,19 +407,23 @@ public:
//
class TESS_API SquishedDawg : public Dawg {
public:
SquishedDawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level)
: Dawg(type, lang, perm, debug_level) {}
SquishedDawg(const char *filename, DawgType type, const std::string &lang, PermuterType perm,
SquishedDawg(DawgType type, const std::string &lang, PermuterType perm,
int debug_level)
: Dawg(type, lang, perm, debug_level) {}
SquishedDawg(const char *filename, DawgType type, const std::string &lang,
PermuterType perm, int debug_level)
: Dawg(type, lang, perm, debug_level) {
TFile file;
ASSERT_HOST(file.Open(filename, nullptr));
ASSERT_HOST(read_squished_dawg(&file));
num_forward_edges_in_node0 = num_forward_edges(0);
}
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const std::string &lang,
PermuterType perm, int unicharset_size, int debug_level)
: Dawg(type, lang, perm, debug_level), edges_(edges), num_edges_(num_edges) {
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
const std::string &lang, PermuterType perm, int unicharset_size,
int debug_level)
: Dawg(type, lang, perm, debug_level),
edges_(edges),
num_edges_(num_edges) {
init(unicharset_size);
num_forward_edges_in_node0 = num_forward_edges(0);
if (debug_level > 3) {
@ -424,11 +446,13 @@ public:
}
/// Returns the edge that corresponds to the letter out of this node.
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const override;
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
bool word_end) const override;
/// Fills the given NodeChildVector with all the unichar ids (and the
/// corresponding EDGE_REFs) for which there is an edge out of this node.
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override {
void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
bool word_end) const override {
EDGE_REF edge = node;
if (!edge_occupied(edge) || edge == NO_EDGE) {
return;
@ -502,7 +526,8 @@ private:
}
/// Returns true if this edge is in the forward direction.
inline bool forward_edge(EDGE_REF edge_ref) const {
return (edge_occupied(edge_ref) && (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
return (edge_occupied(edge_ref) &&
(FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
}
/// Returns true if this edge is in the backward direction.
inline bool backward_edge(EDGE_REF edge_ref) const {

View File

@ -17,6 +17,7 @@
#include "networkio.h"
#include <cfloat> // for FLT_MAX
#include <cmath>
#include <allheaders.h>
#include "functions.h"
@ -28,7 +29,7 @@ namespace tesseract {
// Minimum value to output for certainty.
const float kMinCertainty = -20.0f;
// Probability corresponding to kMinCertainty.
const float kMinProb = exp(kMinCertainty);
const float kMinProb = std::exp(kMinCertainty);
// Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
@ -356,7 +357,7 @@ Image NetworkIO::ToPix() const {
} else if (num_features > 3) {
// More than 3 features use false yellow/blue color, assuming a signed
// input in the range [-1,1].
red = ClipToRange<int>(IntCastRounded(fabs(pixel) * 255), 0, 255);
red = ClipToRange<int>(IntCastRounded(std::fabs(pixel) * 255), 0, 255);
if (pixel >= 0) {
green = red;
blue = 0;
@ -586,7 +587,7 @@ void NetworkIO::EnsureBestLabel(int t, int label) {
// Helper function converts prob to certainty taking the minimum into account.
/* static */
float NetworkIO::ProbToCertainty(float prob) {
return prob > kMinProb ? log(prob) : kMinCertainty;
return prob > kMinProb ? std::log(prob) : kMinCertainty;
}
// Returns true if there is any bad value that is suspiciously like a GT
@ -807,7 +808,7 @@ void NetworkIO::ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const Network
// Reconstruct the target from the delta.
float comb_target = delta_line[i] + output;
comb_line[i] = comb_target - comb_line[i];
float base_delta = fabs(comb_target - base_line[i]);
float base_delta = std::fabs(comb_target - base_line[i]);
if (base_delta > max_base_delta) {
max_base_delta = base_delta;
}

View File

@ -41,11 +41,13 @@ const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
// Prints debug details of the node.
void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) const {
void RecodeNode::Print(int null_char, const UNICHARSET &unicharset,
int depth) const {
if (code == null_char) {
tprintf("null_char");
} else {
tprintf("label=%d, uid=%d=%s", code, unichar_id, unicharset.debug_str(unichar_id).c_str());
tprintf("label=%d, uid=%d=%s", code, unichar_id,
unicharset.debug_str(unichar_id).c_str());
}
tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%" PRIx64, score, certainty,
start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "",
@ -59,16 +61,16 @@ void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) c
}
// Borrows the pointer, which is expected to survive until *this is deleted.
RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text,
Dict *dict)
: recoder_(recoder)
, beam_size_(0)
, top_code_(-1)
, second_code_(-1)
, dict_(dict)
, space_delimited_(true)
, is_simple_text_(simple_text)
, null_char_(null_char) {
RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder,
int null_char, bool simple_text, Dict *dict)
: recoder_(recoder),
beam_size_(0),
top_code_(-1),
second_code_(-1),
dict_(dict),
space_delimited_(true),
is_simple_text_(simple_text),
null_char_(null_char) {
if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) {
space_delimited_ = false;
}
@ -84,9 +86,9 @@ RecodeBeamSearch::~RecodeBeamSearch() {
}
// Decodes the set of network outputs, storing the lattice internally.
void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, double cert_offset,
double worst_dict_cert, const UNICHARSET *charset,
int lstm_choice_mode) {
void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio,
double cert_offset, double worst_dict_cert,
const UNICHARSET *charset, int lstm_choice_mode) {
beam_size_ = 0;
int width = output.Width();
if (lstm_choice_mode) {
@ -94,14 +96,16 @@ void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, double
}
for (int t = 0; t < width; ++t) {
ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset);
DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
charset);
if (lstm_choice_mode) {
SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
}
}
}
void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float> &output, double dict_ratio,
double cert_offset, double worst_dict_cert,
void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float> &output,
double dict_ratio, double cert_offset,
double worst_dict_cert,
const UNICHARSET *charset) {
beam_size_ = 0;
int width = output.dim1();
@ -111,9 +115,9 @@ void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float> &output, double dict
}
}
void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict_ratio,
double cert_offset, double worst_dict_cert,
const UNICHARSET *charset, int lstm_choice_mode) {
void RecodeBeamSearch::DecodeSecondaryBeams(
const NetworkIO &output, double dict_ratio, double cert_offset,
double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode) {
for (auto data : secondary_beam_) {
delete data;
}
@ -128,14 +132,17 @@ void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict
t >= character_boundaries_[bucketNumber + 1]) {
++bucketNumber;
}
ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t), output.NumFeatures(),
kBeamWidths[0]);
DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset);
ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t),
output.NumFeatures(), kBeamWidths[0]);
DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset,
worst_dict_cert, charset);
}
}
void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outputs,
const UNICHARSET *charset, int xCoord) {
void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs,
int num_outputs,
const UNICHARSET *charset,
int xCoord) {
std::vector<std::pair<const char *, float>> choices;
for (int i = 0; i < num_outputs; ++i) {
if (outputs[i] >= 0.01f) {
@ -153,7 +160,8 @@ void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outp
while (choices.size() > pos && choices[pos].second > outputs[i]) {
pos++;
}
choices.insert(choices.begin() + pos, std::pair<const char *, float>(character, outputs[i]));
choices.insert(choices.begin() + pos,
std::pair<const char *, float>(character, outputs[i]));
}
}
timesteps.push_back(choices);
@ -162,7 +170,8 @@ void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outp
void RecodeBeamSearch::segmentTimestepsByCharacters() {
for (unsigned i = 1; i < character_boundaries_.size(); ++i) {
std::vector<std::vector<std::pair<const char *, float>>> segment;
for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i]; ++j) {
for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i];
++j) {
segment.push_back(timesteps[j]);
}
segmentedTimesteps.push_back(segment);
@ -170,7 +179,8 @@ void RecodeBeamSearch::segmentTimestepsByCharacters() {
}
std::vector<std::vector<std::pair<const char *, float>>>
RecodeBeamSearch::combineSegmentedTimesteps(
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *segmentedTimesteps) {
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*segmentedTimesteps) {
std::vector<std::vector<std::pair<const char *, float>>> combined_timesteps;
for (auto &segmentedTimestep : *segmentedTimesteps) {
for (auto &j : segmentedTimestep) {
@ -180,8 +190,10 @@ RecodeBeamSearch::combineSegmentedTimesteps(
return combined_timesteps;
}
void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
std::vector<int> *char_bounds_, int maxWidth) {
void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts,
std::vector<int> *ends,
std::vector<int> *char_bounds_,
int maxWidth) {
char_bounds_->push_back(0);
for (unsigned i = 0; i < ends->size(); ++i) {
int middle = ((*starts)[i + 1] - (*ends)[i]) / 2;
@ -192,8 +204,8 @@ void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::ve
}
// Returns the best path as labels/scores/xcoords similar to simple CTC.
void RecodeBeamSearch::ExtractBestPathAsLabels(std::vector<int> *labels,
std::vector<int> *xcoords) const {
void RecodeBeamSearch::ExtractBestPathAsLabels(
std::vector<int> *labels, std::vector<int> *xcoords) const {
labels->clear();
xcoords->clear();
std::vector<const RecodeNode *> best_nodes;
@ -215,22 +227,23 @@ void RecodeBeamSearch::ExtractBestPathAsLabels(std::vector<int> *labels,
// Returns the best path as unichar-ids/certs/ratings/xcoords skipping
// duplicates, nulls and intermediate parts.
void RecodeBeamSearch::ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset,
std::vector<int> *unichar_ids,
std::vector<float> *certs,
std::vector<float> *ratings,
void RecodeBeamSearch::ExtractBestPathAsUnicharIds(
bool debug, const UNICHARSET *unicharset, std::vector<int> *unichar_ids,
std::vector<float> *certs, std::vector<float> *ratings,
std::vector<int> *xcoords) const {
std::vector<const RecodeNode *> best_nodes;
ExtractBestPaths(&best_nodes, nullptr);
ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
if (debug) {
DebugPath(unicharset, best_nodes);
DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings, *xcoords);
DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
*xcoords);
}
}
// Returns the best path as a set of WERD_RES.
void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug,
void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box,
float scale_factor, bool debug,
const UNICHARSET *unicharset,
PointerVector<WERD_RES> *words,
int lstm_choice_mode) {
@ -245,9 +258,11 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_
ExtractBestPaths(&best_nodes, &second_nodes);
if (debug) {
DebugPath(unicharset, best_nodes);
ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings, &xcoords);
ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
&xcoords);
tprintf("\nSecond choice path:\n");
DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings, xcoords);
DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
xcoords);
}
// If lstm choice mode is required in granularity level 2, it stores the x
// Coordinates of every chosen character, to match the alternative choices to
@ -256,7 +271,8 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_
&character_boundaries_);
int num_ids = unichar_ids.size();
if (debug) {
DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings, xcoords);
DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
xcoords);
}
// Convert labels to unichar-ids.
int word_end = 0;
@ -283,16 +299,19 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_
if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
space_cert = certs[word_end];
}
bool leading_space = word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
bool leading_space =
word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
// Create a WERD_RES for the output word.
WERD_RES *word_res =
InitializeWord(leading_space, line_box, word_start, word_end,
std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
std::min(space_cert, prev_space_cert), unicharset,
xcoords, scale_factor);
for (int i = word_start; i < word_end; ++i) {
auto *choices = new BLOB_CHOICE_LIST;
BLOB_CHOICE_IT bc_it(choices);
auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1,
1.0f, static_cast<float>(INT16_MAX), 0.0f,
BCC_STATIC_CLASSIFIER);
int col = i - word_start;
choice->set_matrix_cell(col, col);
bc_it.add_after_then_move(choice);
@ -314,7 +333,8 @@ struct greater_than {
}
};
void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *charset,
void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs,
const UNICHARSET *charset,
bool secondary) const {
std::vector<std::vector<const RecodeNode *>> topology;
std::unordered_set<const RecodeNode *> visited;
@ -340,7 +360,7 @@ void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *
}
int ct = 0;
unsigned cb = 1;
for (std::vector<const RecodeNode *> layer : topology) {
for (const std::vector<const RecodeNode *> &layer : topology) {
if (cb >= character_boundaries_.size()) {
break;
}
@ -380,7 +400,8 @@ void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *
prevCode = " ";
}
if (uids) {
tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode, node->score);
tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode,
node->score);
} else {
tprintf("%s(|)%f(>)%s(|)%f\n", prevCode, prevScore, code, node->score);
}
@ -397,7 +418,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
}
// For the first iteration the original beam is analyzed. After that a
// new beam is calculated based on the results from the original beam.
std::vector<RecodeBeam *> &currentBeam = secondary_beam_.empty() ? beam_ : secondary_beam_;
std::vector<RecodeBeam *> &currentBeam =
secondary_beam_.empty() ? beam_ : secondary_beam_;
character_boundaries_[0] = 0;
for (unsigned j = 1; j < character_boundaries_.size(); ++j) {
std::vector<int> unichar_ids;
@ -415,7 +437,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
int backcounter = 0;
const RecodeNode *node = &entry.data();
while (node != nullptr && backcounter < backpath) {
if (node->code != null_char_ && node->unichar_id != INVALID_UNICHAR_ID) {
if (node->code != null_char_ &&
node->unichar_id != INVALID_UNICHAR_ID) {
validChar = true;
break;
}
@ -430,7 +453,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
if (!best.empty()) {
std::sort(best.begin(), best.end(), greater_than());
ExtractPath(best[0], &best_nodes, backpath);
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords);
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
&xcoords);
}
if (!unichar_ids.empty()) {
int bestPos = 0;
@ -466,7 +490,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
int id = unichar_ids[bestPos];
const char *result = unicharset->id_to_unichar_ext(id);
float rating = ratings[bestPos];
ctc_choices[j - 1].push_back(std::pair<const char *, float>(result, rating));
ctc_choices[j - 1].push_back(
std::pair<const char *, float>(result, rating));
} else {
std::vector<std::pair<const char *, float>> choice;
int id = unichar_ids[bestPos];
@ -504,7 +529,8 @@ void RecodeBeamSearch::DebugBeams(const UNICHARSET &unicharset) const {
continue;
}
// Print all the best scoring nodes for each unichar found.
tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict", kNodeContNames[c]);
tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
kNodeContNames[c]);
DebugBeamPos(unicharset, beam_[p]->beams_[index]);
}
}
@ -512,7 +538,8 @@ void RecodeBeamSearch::DebugBeams(const UNICHARSET &unicharset) const {
}
// Generates debug output of the content of a single beam position.
void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, const RecodeHeap &heap) const {
void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset,
const RecodeHeap &heap) const {
std::vector<const RecodeNode *> unichar_bests(unicharset.size());
const RecodeNode *null_best = nullptr;
int heap_size = heap.size();
@ -543,11 +570,10 @@ void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, const RecodeHe
// Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
// duplicates, nulls and intermediate parts.
/* static */
void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNode *> &best_nodes,
std::vector<int> *unichar_ids,
std::vector<float> *certs,
std::vector<float> *ratings,
std::vector<int> *xcoords,
void RecodeBeamSearch::ExtractPathAsUnicharIds(
const std::vector<const RecodeNode *> &best_nodes,
std::vector<int> *unichar_ids, std::vector<float> *certs,
std::vector<float> *ratings, std::vector<int> *xcoords,
std::vector<int> *character_boundaries) {
unichar_ids->clear();
certs->clear();
@ -571,7 +597,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
starts.push_back(t);
if (t < width) {
int unichar_id = best_nodes[t]->unichar_id;
if (unichar_id == UNICHAR_SPACE && !certs->empty() && best_nodes[t]->permuter != NO_PERM) {
if (unichar_id == UNICHAR_SPACE && !certs->empty() &&
best_nodes[t]->permuter != NO_PERM) {
// All the rating and certainty go on the previous character except
// for the space itself.
if (certainty < certs->back()) {
@ -587,8 +614,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
double cert = best_nodes[t++]->certainty;
// Special-case NO-PERM space to forget the certainty of the previous
// nulls. See long comment in ContinueContext.
if (cert < certainty ||
(unichar_id == UNICHAR_SPACE && best_nodes[t - 1]->permuter == NO_PERM)) {
if (cert < certainty || (unichar_id == UNICHAR_SPACE &&
best_nodes[t - 1]->permuter == NO_PERM)) {
certainty = cert;
}
rating -= cert;
@ -612,19 +639,23 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
// Sets up a word with the ratings matrix and fake blobs with boxes in the
// right places.
WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_box, int word_start,
WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space,
const TBOX &line_box, int word_start,
int word_end, float space_certainty,
const UNICHARSET *unicharset,
const std::vector<int> &xcoords, float scale_factor) {
const std::vector<int> &xcoords,
float scale_factor) {
// Make a fake blob for each non-zero label.
C_BLOB_LIST blobs;
C_BLOB_IT b_it(&blobs);
for (int i = word_start; i < word_end; ++i) {
if (static_cast<unsigned>(i + 1) < character_boundaries_.size()) {
TBOX box(static_cast<int16_t>(std::floor(character_boundaries_[i] * scale_factor)) +
TBOX box(static_cast<int16_t>(
std::floor(character_boundaries_[i] * scale_factor)) +
line_box.left(),
line_box.bottom(),
static_cast<int16_t>(std::ceil(character_boundaries_[i + 1] * scale_factor)) +
static_cast<int16_t>(
std::ceil(character_boundaries_[i + 1] * scale_factor)) +
line_box.left(),
line_box.top());
b_it.add_after_then_move(C_BLOB::FakeBlob(box));
@ -644,7 +675,8 @@ WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_
// Fills top_n_flags_ with bools that are true iff the corresponding output
// is one of the top_n.
void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int top_n) {
void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs,
int top_n) {
top_n_flags_.clear();
top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
top_code_ = -1;
@ -676,15 +708,17 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
top_n_flags_[null_char_] = TN_TOP2;
}
void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const float *outputs,
int num_outputs, int top_n) {
void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList,
const float *outputs, int num_outputs,
int top_n) {
top_n_flags_.clear();
top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
top_code_ = -1;
second_code_ = -1;
top_heap_.clear();
for (int i = 0; i < num_outputs; ++i) {
if ((top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) && !exList->count(i)) {
if ((top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) &&
!exList->count(i)) {
TopPair entry(outputs[i], i);
top_heap_.Push(&entry);
if (top_heap_.size() > top_n) {
@ -712,8 +746,9 @@ void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const flo
// Adds the computation for the current time-step to the beam. Call at each
// time-step in sequence from left to right. outputs is the activation vector
// for the current timestep.
void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio,
double cert_offset, double worst_dict_cert,
void RecodeBeamSearch::DecodeStep(const float *outputs, int t,
double dict_ratio, double cert_offset,
double worst_dict_cert,
const UNICHARSET *charset, bool debug) {
if (t == static_cast<int>(beam_.size())) {
beam_.push_back(new RecodeBeam);
@ -723,11 +758,12 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio
step->Clear();
if (t == 0) {
// The first step can only use singles and initials.
ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
dict_ratio, cert_offset, worst_dict_cert, step);
ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2,
charset, dict_ratio, cert_offset, worst_dict_cert, step);
if (dict_ != nullptr) {
ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
dict_ratio, cert_offset, worst_dict_cert, step);
ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs,
TN_TOP2, charset, dict_ratio, cert_offset,
worst_dict_cert, step);
}
} else {
RecodeBeam *prev = beam_[t - 1];
@ -759,8 +795,9 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio
// best first, but it comes before a lot of the worst, so it is slightly
// more efficient than going forwards.
for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset,
dict_ratio, cert_offset, worst_dict_cert, step);
ContinueContext(&prev->beams_[index].get(i).data(), index, outputs,
top_n, charset, dict_ratio, cert_offset,
worst_dict_cert, step);
}
}
for (int index = 0; index < kNumBeams; ++index) {
@ -775,15 +812,16 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio
if (step->best_initial_dawgs_[c].code >= 0) {
int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
RecodeHeap *dawg_heap = &step->beams_[index];
PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap);
PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
dawg_heap);
}
}
}
}
void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double dict_ratio,
double cert_offset, double worst_dict_cert,
const UNICHARSET *charset, bool debug) {
void RecodeBeamSearch::DecodeSecondaryStep(
const float *outputs, int t, double dict_ratio, double cert_offset,
double worst_dict_cert, const UNICHARSET *charset, bool debug) {
if (t == static_cast<int>(secondary_beam_.size())) {
secondary_beam_.push_back(new RecodeBeam);
}
@ -791,11 +829,12 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d
step->Clear();
if (t == 0) {
// The first step can only use singles and initials.
ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
dict_ratio, cert_offset, worst_dict_cert, step);
ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2,
charset, dict_ratio, cert_offset, worst_dict_cert, step);
if (dict_ != nullptr) {
ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
dict_ratio, cert_offset, worst_dict_cert, step);
ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs,
TN_TOP2, charset, dict_ratio, cert_offset,
worst_dict_cert, step);
}
} else {
RecodeBeam *prev = secondary_beam_[t - 1];
@ -827,8 +866,9 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d
// best first, but it comes before a lot of the worst, so it is slightly
// more efficient than going forwards.
for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset,
dict_ratio, cert_offset, worst_dict_cert, step);
ContinueContext(&prev->beams_[index].get(i).data(), index, outputs,
top_n, charset, dict_ratio, cert_offset,
worst_dict_cert, step);
}
}
for (int index = 0; index < kNumBeams; ++index) {
@ -843,7 +883,8 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d
if (step->best_initial_dawgs_[c].code >= 0) {
int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
RecodeHeap *dawg_heap = &step->beams_[index];
PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap);
PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
dawg_heap);
}
}
}
@ -853,10 +894,10 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d
// continuations of context prev, which is of the given length, using the
// given network outputs to provide scores to the choices. Uses only those
// choices for which top_n_flags[index] == top_n_flag.
void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const float *outputs,
TopNState top_n_flag, const UNICHARSET *charset,
double dict_ratio, double cert_offset,
double worst_dict_cert, RecodeBeam *step) {
void RecodeBeamSearch::ContinueContext(
const RecodeNode *prev, int index, const float *outputs,
TopNState top_n_flag, const UNICHARSET *charset, double dict_ratio,
double cert_offset, double worst_dict_cert, RecodeBeam *step) {
RecodedCharID prefix;
RecodedCharID full_code;
const RecodeNode *previous = prev;
@ -864,7 +905,8 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const
bool use_dawgs = IsDawgFromBeamsIndex(index);
NodeContinuation prev_cont = ContinuationFromBeamsIndex(index);
for (int p = length - 1; p >= 0; --p, previous = previous->prev) {
while (previous != nullptr && (previous->duplicate || previous->code == null_char_)) {
while (previous != nullptr &&
(previous->duplicate || previous->code == null_char_)) {
previous = previous->prev;
}
if (previous != nullptr) {
@ -875,26 +917,34 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const
if (prev != nullptr && !is_simple_text_) {
if (top_n_flags_[prev->code] == top_n_flag) {
if (prev_cont != NC_NO_DUP) {
float cert = NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert,
dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
}
if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 && prev->code != null_char_) {
float cert =
NetworkIO::ProbToCertainty(outputs[prev->code] + outputs[null_char_]) + cert_offset;
PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert,
dict_ratio, use_dawgs, NC_NO_DUP, prev, step);
NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
cert, worst_dict_cert, dict_ratio, use_dawgs,
NC_ANYTHING, prev, step);
}
if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 &&
prev->code != null_char_) {
float cert = NetworkIO::ProbToCertainty(outputs[prev->code] +
outputs[null_char_]) +
cert_offset;
PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
cert, worst_dict_cert, dict_ratio, use_dawgs,
NC_NO_DUP, prev, step);
}
}
if (prev_cont == NC_ONLY_DUP) {
return;
}
if (prev->code != null_char_ && length > 0 && top_n_flags_[null_char_] == top_n_flag) {
if (prev->code != null_char_ && length > 0 &&
top_n_flags_[null_char_] == top_n_flag) {
// Allow nulls within multi code sequences, as the nulls within are not
// explicitly included in the code sequence.
float cert = NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID, cert, worst_dict_cert,
dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
float cert =
NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID,
cert, worst_dict_cert, dict_ratio, use_dawgs,
NC_ANYTHING, prev, step);
}
}
const std::vector<int> *final_codes = recoder_.GetFinalCodes(prefix);
@ -920,18 +970,19 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const
!charset->get_enabled(unichar_id)) {
continue; // disabled by whitelist/blacklist
}
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ANYTHING,
prev, step);
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
use_dawgs, NC_ANYTHING, prev, step);
if (top_n_flag == TN_TOP2 && code != null_char_) {
float prob = outputs[code] + outputs[null_char_];
if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ &&
if (prev != nullptr && prev_cont == NC_ANYTHING &&
prev->code != null_char_ &&
((prev->code == top_code_ && code == second_code_) ||
(code == top_code_ && prev->code == second_code_))) {
prob += outputs[prev->code];
}
cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ONLY_DUP,
prev, step);
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
use_dawgs, NC_ONLY_DUP, prev, step);
}
}
}
@ -945,36 +996,43 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const
continue;
}
float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert,
dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert,
worst_dict_cert, dict_ratio, use_dawgs,
NC_ANYTHING, prev, step);
if (top_n_flag == TN_TOP2 && code != null_char_) {
float prob = outputs[code] + outputs[null_char_];
if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ &&
if (prev != nullptr && prev_cont == NC_ANYTHING &&
prev->code != null_char_ &&
((prev->code == top_code_ && code == second_code_) ||
(code == top_code_ && prev->code == second_code_))) {
prob += outputs[prev->code];
}
cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert,
dict_ratio, use_dawgs, NC_ONLY_DUP, prev, step);
PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID,
cert, worst_dict_cert, dict_ratio, use_dawgs,
NC_ONLY_DUP, prev, step);
}
}
}
}
// Continues for a new unichar, using dawg or non-dawg as per flag.
void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, float worst_dict_cert,
float dict_ratio, bool use_dawgs, NodeContinuation cont,
const RecodeNode *prev, RecodeBeam *step) {
void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert,
float worst_dict_cert, float dict_ratio,
bool use_dawgs, NodeContinuation cont,
const RecodeNode *prev,
RecodeBeam *step) {
if (use_dawgs) {
if (cert > worst_dict_cert) {
ContinueDawg(code, unichar_id, cert, cont, prev, step);
}
} else {
RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false, false, false, false,
cert * dict_ratio, prev, nullptr, nodawg_heap);
if (dict_ != nullptr && ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false,
false, false, false, cert * dict_ratio, prev, nullptr,
nodawg_heap);
if (dict_ != nullptr &&
((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
!dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
// Any top choice position that can start a new word, ie a space or
// any non-space-delimited character, should also be considered
@ -995,8 +1053,8 @@ void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, flo
} else {
dawg_cert *= dict_ratio;
}
PushInitialDawgIfBetter(code, unichar_id, permuter, false, false, dawg_cert, cont, prev,
step);
PushInitialDawgIfBetter(code, unichar_id, permuter, false, false,
dawg_cert, cont, prev, step);
}
}
}
@ -1004,13 +1062,14 @@ void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, flo
// Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev,
// appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id
// is a valid continuation of whatever is in prev.
void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeContinuation cont,
void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
NodeContinuation cont,
const RecodeNode *prev, RecodeBeam *step) {
RecodeHeap *dawg_heap = &step->beams_[BeamIndex(true, cont, 0)];
RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
if (unichar_id == INVALID_UNICHAR_ID) {
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false, false, false, cert,
prev, nullptr, dawg_heap);
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false,
false, false, cert, prev, nullptr, dawg_heap);
return;
}
// Avoid dictionary probe if score a total loss.
@ -1018,8 +1077,10 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo
if (prev != nullptr) {
score += prev->score;
}
if (dawg_heap->size() >= kBeamWidths[0] && score <= dawg_heap->PeekTop().data().score &&
nodawg_heap->size() >= kBeamWidths[0] && score <= nodawg_heap->PeekTop().data().score) {
if (dawg_heap->size() >= kBeamWidths[0] &&
score <= dawg_heap->PeekTop().data().score &&
nodawg_heap->size() >= kBeamWidths[0] &&
score <= nodawg_heap->PeekTop().data().score) {
return;
}
const RecodeNode *uni_prev = prev;
@ -1033,10 +1094,11 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo
if (uni_prev != nullptr && uni_prev->end_of_word) {
// Space is good. Push initial state, to the dawg beam and a regular
// space to the top choice beam.
PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false, false, cert, cont, prev,
step);
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter, false, false, false,
false, cert, prev, nullptr, nodawg_heap);
PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false,
false, cert, cont, prev, step);
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter,
false, false, false, false, cert, prev, nullptr,
nodawg_heap);
}
return;
} else if (uni_prev != nullptr && uni_prev->start_of_dawg &&
@ -1060,18 +1122,21 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo
} else {
return; // Can't continue if not a dict word.
}
auto permuter = static_cast<PermuterType>(
dict_->def_letter_is_okay(&dawg_args, dict_->getUnicharset(), unichar_id, false));
auto permuter = static_cast<PermuterType>(dict_->def_letter_is_okay(
&dawg_args, dict_->getUnicharset(), unichar_id, false));
if (permuter != NO_PERM) {
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start,
dawg_args.valid_end, false, cert, prev, dawg_args.updated_dawgs, dawg_heap);
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
word_start, dawg_args.valid_end, false, cert, prev,
dawg_args.updated_dawgs, dawg_heap);
if (dawg_args.valid_end && !space_delimited_) {
// We can start another word right away, so push initial state as well,
// to the dawg beam, and the regular character to the top choice beam,
// since non-dict words can start here too.
PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true, cert, cont, prev, step);
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start, true, false,
cert, prev, nullptr, nodawg_heap);
PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true,
cert, cont, prev, step);
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
word_start, true, false, cert, prev, nullptr,
nodawg_heap);
}
} else {
delete updated_dawgs;
@ -1081,9 +1146,11 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo
// Adds a RecodeNode composed of the tuple (code, unichar_id,
// initial-dawg-state, prev, cert) to the given heap if/ there is room or if
// better than the current worst element if already full.
void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, PermuterType permuter,
void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
PermuterType permuter,
bool start, bool end, float cert,
NodeContinuation cont, const RecodeNode *prev,
NodeContinuation cont,
const RecodeNode *prev,
RecodeBeam *step) {
RecodeNode *best_initial_dawg = &step->best_initial_dawgs_[cont];
float score = cert;
@ -1093,8 +1160,9 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, Permute
if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
auto *initial_dawgs = new DawgPositionVector;
dict_->default_dawgs(initial_dawgs, false);
RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, score, prev,
initial_dawgs, ComputeCodeHash(code, false, prev));
RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
score, prev, initial_dawgs,
ComputeCodeHash(code, false, prev));
*best_initial_dawg = node;
}
}
@ -1103,22 +1171,23 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, Permute
// false, false, false, false, cert, prev, nullptr) to heap if there is room
// or if better than the current worst element if already full.
/* static */
void RecodeBeamSearch::PushDupOrNoDawgIfBetter(int length, bool dup, int code, int unichar_id,
float cert, float worst_dict_cert, float dict_ratio,
bool use_dawgs, NodeContinuation cont,
const RecodeNode *prev, RecodeBeam *step) {
void RecodeBeamSearch::PushDupOrNoDawgIfBetter(
int length, bool dup, int code, int unichar_id, float cert,
float worst_dict_cert, float dict_ratio, bool use_dawgs,
NodeContinuation cont, const RecodeNode *prev, RecodeBeam *step) {
int index = BeamIndex(use_dawgs, cont, length);
if (use_dawgs) {
if (cert > worst_dict_cert) {
PushHeapIfBetter(kBeamWidths[length], code, unichar_id, prev ? prev->permuter : NO_PERM,
false, false, false, dup, cert, prev, nullptr, &step->beams_[index]);
PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
prev ? prev->permuter : NO_PERM, false, false, false,
dup, cert, prev, nullptr, &step->beams_[index]);
}
} else {
cert *= dict_ratio;
if (cert >= kMinCertainty || code == null_char_) {
PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
prev ? prev->permuter : TOP_CHOICE_PERM, false, false, false, dup, cert,
prev, nullptr, &step->beams_[index]);
prev ? prev->permuter : TOP_CHOICE_PERM, false, false,
false, dup, cert, prev, nullptr, &step->beams_[index]);
}
}
}
@ -1127,17 +1196,19 @@ void RecodeBeamSearch::PushDupOrNoDawgIfBetter(int length, bool dup, int code, i
// dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room
// or if better than the current worst element if already full.
void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
PermuterType permuter, bool dawg_start, bool word_start,
bool end, bool dup, float cert, const RecodeNode *prev,
DawgPositionVector *d, RecodeHeap *heap) {
PermuterType permuter, bool dawg_start,
bool word_start, bool end, bool dup,
float cert, const RecodeNode *prev,
DawgPositionVector *d,
RecodeHeap *heap) {
float score = cert;
if (prev != nullptr) {
score += prev->score;
}
if (heap->size() < max_size || score > heap->PeekTop().data().score) {
uint64_t hash = ComputeCodeHash(code, dup, prev);
RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, dup, cert, score, prev,
d, hash);
RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
dup, cert, score, prev, d, hash);
if (UpdateHeapIfMatched(&node, heap)) {
return;
}
@ -1154,7 +1225,8 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
// Adds a RecodeNode to heap if there is room
// or if better than the current worst element if already full.
void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHeap *heap) {
void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node,
RecodeHeap *heap) {
if (heap->size() < max_size || node->score > heap->PeekTop().data().score) {
if (UpdateHeapIfMatched(node, heap)) {
return;
@ -1170,7 +1242,8 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHe
// Searches the heap for a matching entry, and updates the score with
// reshuffle if needed. Returns true if there was a match.
bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap) {
bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node,
RecodeHeap *heap) {
// TODO(rays) consider hash map instead of linear search.
// It might not be faster because the hash map would have to be updated
// every time a heap reshuffle happens, and that would be a lot of overhead.
@ -1178,7 +1251,8 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea
for (auto &i : nodes) {
RecodeNode &node = i.data();
if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
node.permuter == new_node->permuter && node.start_of_dawg == new_node->start_of_dawg) {
node.permuter == new_node->permuter &&
node.start_of_dawg == new_node->start_of_dawg) {
if (new_node->score > node.score) {
// The new one is better. Update the entire node in the heap and
// reshuffle.
@ -1193,7 +1267,8 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea
}
// Computes and returns the code-hash for the given code and prev.
uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const {
uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
const RecodeNode *prev) const {
uint64_t hash = prev == nullptr ? 0 : prev->code_hash;
if (!dup && code != null_char_) {
int num_classes = recoder_.code_range();
@ -1209,7 +1284,8 @@ uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode
// during Decode. On return the best_nodes vector essentially contains the set
// of code, score pairs that make the optimal path with the constraint that
// the recoder can decode the code sequence back to a sequence of unichar-ids.
void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_nodes,
void RecodeBeamSearch::ExtractBestPaths(
std::vector<const RecodeNode *> *best_nodes,
std::vector<const RecodeNode *> *second_nodes) const {
// Scan both beams to extract the best and second best paths.
const RecodeNode *best_node = nullptr;
@ -1230,11 +1306,13 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_no
// last valid unichar_id.
const RecodeNode *dawg_node = node;
while (dawg_node != nullptr &&
(dawg_node->unichar_id == INVALID_UNICHAR_ID || dawg_node->duplicate)) {
(dawg_node->unichar_id == INVALID_UNICHAR_ID ||
dawg_node->duplicate)) {
dawg_node = dawg_node->prev;
}
if (dawg_node == nullptr ||
(!dawg_node->end_of_word && dawg_node->unichar_id != UNICHAR_SPACE)) {
(!dawg_node->end_of_word &&
dawg_node->unichar_id != UNICHAR_SPACE)) {
// Dawg node is not valid.
continue;
}
@ -1242,7 +1320,8 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_no
if (best_node == nullptr || node->score > best_node->score) {
second_best_node = best_node;
best_node = node;
} else if (second_best_node == nullptr || node->score > second_best_node->score) {
} else if (second_best_node == nullptr ||
node->score > second_best_node->score) {
second_best_node = node;
}
}
@ -1256,8 +1335,8 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_no
// Helper backtracks through the lattice from the given node, storing the
// path and reversing it.
void RecodeBeamSearch::ExtractPath(const RecodeNode *node,
std::vector<const RecodeNode *> *path) const {
void RecodeBeamSearch::ExtractPath(
const RecodeNode *node, std::vector<const RecodeNode *> *path) const {
path->clear();
while (node != nullptr) {
path->push_back(node);
@ -1266,7 +1345,8 @@ void RecodeBeamSearch::ExtractPath(const RecodeNode *node,
std::reverse(path->begin(), path->end());
}
void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::vector<const RecodeNode *> *path,
void RecodeBeamSearch::ExtractPath(const RecodeNode *node,
std::vector<const RecodeNode *> *path,
int limiter) const {
int pathcounter = 0;
path->clear();
@ -1279,7 +1359,8 @@ void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::vector<const Rec
}
// Helper prints debug information on the given lattice path.
void RecodeBeamSearch::DebugPath(const UNICHARSET *unicharset,
void RecodeBeamSearch::DebugPath(
const UNICHARSET *unicharset,
const std::vector<const RecodeNode *> &path) const {
for (unsigned c = 0; c < path.size(); ++c) {
const RecodeNode &node = *path[c];
@ -1289,19 +1370,18 @@ void RecodeBeamSearch::DebugPath(const UNICHARSET *unicharset,
}
// Helper prints debug information on the given unichar path.
void RecodeBeamSearch::DebugUnicharPath(const UNICHARSET *unicharset,
const std::vector<const RecodeNode *> &path,
const std::vector<int> &unichar_ids,
const std::vector<float> &certs,
const std::vector<float> &ratings,
const std::vector<int> &xcoords) const {
void RecodeBeamSearch::DebugUnicharPath(
const UNICHARSET *unicharset, const std::vector<const RecodeNode *> &path,
const std::vector<int> &unichar_ids, const std::vector<float> &certs,
const std::vector<float> &ratings, const std::vector<int> &xcoords) const {
auto num_ids = unichar_ids.size();
double total_rating = 0.0;
for (unsigned c = 0; c < num_ids; ++c) {
int coord = xcoords[c];
tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
unicharset->debug_str(unichar_ids[c]).c_str(), ratings[c], certs[c],
path[coord]->start_of_word, path[coord]->end_of_word, path[coord]->permuter);
path[coord]->start_of_word, path[coord]->end_of_word,
path[coord]->permuter);
total_rating += ratings[c];
}
tprintf("Path total rating = %g\n", total_rating);

View File

@ -63,11 +63,11 @@ const double kMinFittingLinespacings = 0.25;
namespace tesseract {
BaselineRow::BaselineRow(double line_spacing, TO_ROW *to_row)
: blobs_(to_row->blob_list())
, baseline_pt1_(0.0f, 0.0f)
, baseline_pt2_(0.0f, 0.0f)
, baseline_error_(0.0)
, good_baseline_(false) {
: blobs_(to_row->blob_list()),
baseline_pt1_(0.0f, 0.0f),
baseline_pt2_(0.0f, 0.0f),
baseline_error_(0.0),
good_baseline_(false) {
ComputeBoundingBox();
// Compute a scale factor for rounding to ints.
disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
@ -87,11 +87,11 @@ void BaselineRow::SetupOldLineParameters(TO_ROW *row) const {
// Outputs diagnostic information.
void BaselineRow::Print() const {
tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", baseline_pt1_.x(),
baseline_pt1_.y(), baseline_pt2_.x(), baseline_pt2_.y(), BaselineAngle(),
StraightYAtX(0.0));
tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_, baseline_error_,
good_baseline_);
tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
baseline_pt1_.x(), baseline_pt1_.y(), baseline_pt2_.x(),
baseline_pt2_.y(), BaselineAngle(), StraightYAtX(0.0));
tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_,
baseline_error_, good_baseline_);
bounding_box_.print();
}
@ -133,8 +133,9 @@ double BaselineRow::StraightYAtX(double x) const {
if (denominator == 0.0) {
return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
}
return baseline_pt1_.y() +
(x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) / denominator;
return baseline_pt1_.y() + (x - baseline_pt1_.x()) *
(baseline_pt2_.y() - baseline_pt1_.y()) /
denominator;
}
// Fits a straight baseline to the points. Returns true if it had enough
@ -170,7 +171,8 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) {
baseline_error_ = fitter_.Fit(&pt1, &pt2);
baseline_pt1_ = pt1;
baseline_pt2_ = pt2;
if (baseline_error_ > max_baseline_error_ && fitter_.SufficientPointsForIndependentFit()) {
if (baseline_error_ > max_baseline_error_ &&
fitter_.SufficientPointsForIndependentFit()) {
// The fit was bad but there were plenty of points, so try skipping
// the first and last few, and use the new line if it dramatically improves
// the error of fit.
@ -184,7 +186,10 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) {
int debug = 0;
#ifdef kDebugYCoord
Print();
debug = bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord ? 3 : 2;
debug = bounding_box_.bottom() < kDebugYCoord &&
bounding_box_.top() > kDebugYCoord
? 3
: 2;
#endif
// Now we obtained a direction from that fit, see if we can improve the
// fit using the same direction and some other start point.
@ -218,7 +223,8 @@ void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) {
return;
}
#ifdef kDebugYCoord
if (bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord && debug < 3)
if (bounding_box_.bottom() < kDebugYCoord &&
bounding_box_.top() > kDebugYCoord && debug < 3)
debug = 3;
#endif
FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
@ -226,7 +232,8 @@ void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) {
// Modifies the baseline to snap to the textline grid if the existing
// result is not good enough.
double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing,
double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction,
double line_spacing,
double line_offset) {
if (blobs_->empty()) {
if (debug > 1) {
@ -240,7 +247,8 @@ double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, dou
int best_index = -1;
for (unsigned i = 0; i < displacement_modes_.size(); ++i) {
double blob_y = displacement_modes_[i];
double error = BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset);
double error =
BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset);
if (debug > 1) {
tprintf("Mode at %g has error %g from model \n", blob_y, error);
}
@ -263,9 +271,11 @@ double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, dou
displacement_modes_[best_index]);
bounding_box_.print();
}
FitConstrainedIfBetter(debug, direction, model_margin, displacement_modes_[best_index]);
FitConstrainedIfBetter(debug, direction, model_margin,
displacement_modes_[best_index]);
} else if (debug > 1) {
tprintf("Linespacing model only moves current line by %g for row at:", shift);
tprintf("Linespacing model only moves current line by %g for row at:",
shift);
bounding_box_.print();
}
} else if (debug > 1) {
@ -296,7 +306,8 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) {
if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord)
debug = true;
#endif
FCOORD blob_pos((box.left() + box.right()) / 2.0f, blob->baseline_position());
FCOORD blob_pos((box.left() + box.right()) / 2.0f,
blob->baseline_position());
double offset = direction * blob_pos;
perp_blob_dists.push_back(offset);
#ifdef kDebugYCoord
@ -338,23 +349,27 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) {
// Otherwise the new fit will only replace the old if it is really better,
// or the old fit is marked bad and the new fit has sufficient points, as
// well as being within the max_baseline_error_.
void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance,
void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction,
double cheat_allowance,
double target_offset) {
double halfrange = fit_halfrange_ * direction.length();
double min_dist = target_offset - halfrange;
double max_dist = target_offset + halfrange;
ICOORD line_pt;
double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist, debug > 2, &line_pt);
double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
debug > 2, &line_pt);
// Allow cheat_allowance off the new error
new_error -= cheat_allowance;
double old_angle = BaselineAngle();
double new_angle = direction.angle();
if (debug > 1) {
tprintf("Constrained error = %g, original = %g", new_error, baseline_error_);
tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle, new_angle,
new_angle - old_angle, kMaxSkewDeviation);
tprintf("Constrained error = %g, original = %g", new_error,
baseline_error_);
tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle,
new_angle, new_angle - old_angle, kMaxSkewDeviation);
}
bool new_good_baseline = new_error <= max_baseline_error_ &&
bool new_good_baseline =
new_error <= max_baseline_error_ &&
(cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
// The new will replace the old if any are true:
// 1. the new error is better
@ -368,7 +383,8 @@ void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction, dou
baseline_pt2_ = baseline_pt1_ + direction;
good_baseline_ = new_good_baseline;
if (debug > 1) {
tprintf("Replacing with constrained baseline, good = %d\n", good_baseline_);
tprintf("Replacing with constrained baseline, good = %d\n",
good_baseline_);
}
} else if (debug > 1) {
tprintf("Keeping old baseline\n");
@ -400,14 +416,14 @@ void BaselineRow::ComputeBoundingBox() {
}
BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
: block_(block)
, debug_level_(debug_level)
, non_text_block_(non_text)
, good_skew_angle_(false)
, skew_angle_(0.0)
, line_spacing_(block->line_spacing)
, line_offset_(0.0)
, model_error_(0.0) {
: block_(block),
debug_level_(debug_level),
non_text_block_(non_text),
good_skew_angle_(false),
skew_angle_(0.0),
line_spacing_(block->line_spacing),
line_offset_(0.0),
model_error_(0.0) {
TO_ROW_IT row_it(block_->get_rows());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
// Sort the blobs on the rows.
@ -418,7 +434,8 @@ BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
// Computes and returns the absolute error of the given perp_disp from the
// given linespacing model.
double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing, double line_offset) {
double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
double line_offset) {
// Round to the nearest multiple of line_spacing + line offset.
int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
double model_y = line_spacing * multiple + line_offset;
@ -452,7 +469,8 @@ bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
good_skew_angle_ = false;
}
if (debug_level_ > 0) {
tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_, good_skew_angle_);
tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_,
good_skew_angle_);
}
return good_skew_angle_;
}
@ -483,9 +501,11 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
// baseline.
// Start by finding the row that is best fitted to the model.
unsigned best_row = 0;
double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), line_spacing_, line_offset_);
double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
line_spacing_, line_offset_);
for (unsigned r = 1; r < rows_.size(); ++r) {
double error = SpacingModelError(rows_[r]->PerpDisp(direction), line_spacing_, line_offset_);
double error = SpacingModelError(rows_[r]->PerpDisp(direction),
line_spacing_, line_offset_);
if (error < best_error) {
best_error = error;
best_row = r;
@ -494,11 +514,13 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
// Starting at the best fitting row, work outwards, syncing the offset.
double offset = line_offset_;
for (auto r = best_row + 1; r < rows_.size(); ++r) {
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, line_spacing_, offset);
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
line_spacing_, offset);
}
offset = line_offset_;
for (int r = best_row - 1; r >= 0; --r) {
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, line_spacing_, offset);
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
line_spacing_, offset);
}
}
@ -506,7 +528,8 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
void BaselineBlock::SetupBlockParameters() const {
if (line_spacing_ > 0.0) {
// Where was block_line_spacing set before?
float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_));
float min_spacing =
std::min(block_->line_spacing, static_cast<float>(line_spacing_));
if (min_spacing < block_->line_size) {
block_->line_size = min_spacing;
}
@ -549,8 +572,8 @@ void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
// As a side-effect, computes the xheights of the rows and the block.
// Although x-height estimation is conceptually separate, it is part of
// detecting perspective distortion and therefore baseline fitting.
void BaselineBlock::FitBaselineSplines(bool enable_splines, bool show_final_rows,
Textord *textord) {
void BaselineBlock::FitBaselineSplines(bool enable_splines,
bool show_final_rows, Textord *textord) {
double gradient = tan(skew_angle_);
FCOORD rotation(1.0f, 0.0f);
@ -565,8 +588,8 @@ void BaselineBlock::FitBaselineSplines(bool enable_splines, bool show_final_rows
int32_t xstarts[2] = {block_box.left(), block_box.right()};
double coeffs[3] = {0.0, row->line_m(), row->line_c()};
row->baseline = QSPLINE(1, xstarts, coeffs);
textord->compute_row_xheight(row, block_->block->classify_rotation(), row->line_m(),
block_->line_size);
textord->compute_row_xheight(row, block_->block->classify_rotation(),
row->line_m(), block_->line_size);
}
}
textord->compute_block_xheight(block_, gradient);
@ -599,7 +622,8 @@ void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) {
}
plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
// Show discarded blobs.
plot_blob_list(win, &block_->underlines, ScrollView::YELLOW, ScrollView::CORAL);
plot_blob_list(win, &block_->underlines, ScrollView::YELLOW,
ScrollView::CORAL);
if (block_->blobs.length() > 0) {
tprintf("%d blobs discarded as noise\n", block_->blobs.length());
}
@ -647,8 +671,9 @@ bool BaselineBlock::ComputeLineSpacing() {
}
}
if (debug_level_ > 0) {
tprintf("Spacing %g, in %zu rows, %d gaps fitted out of %d non-trivial\n", line_spacing_,
row_positions.size(), fitting_gaps, non_trivial_gaps);
tprintf("Spacing %g, in %zu rows, %d gaps fitted out of %d non-trivial\n",
line_spacing_, row_positions.size(), fitting_gaps,
non_trivial_gaps);
}
return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
}
@ -686,7 +711,8 @@ void BaselineBlock::EstimateLineSpacing() {
// Find the first row after row that overlaps it significantly.
const TBOX &row_box = row->bounding_box();
unsigned r2;
for (r2 = r + 1; r2 < rows_.size() && !row_box.major_x_overlap(rows_[r2]->bounding_box());
for (r2 = r + 1; r2 < rows_.size() &&
!row_box.major_x_overlap(rows_[r2]->bounding_box());
++r2) {
;
}
@ -703,7 +729,8 @@ void BaselineBlock::EstimateLineSpacing() {
// If we have at least one value, use it, otherwise leave the previous
// value unchanged.
if (!spacings.empty()) {
std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2, spacings.end());
std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2,
spacings.end());
line_spacing_ = spacings[spacings.size() / 2];
if (debug_level_ > 1) {
tprintf("Estimate of linespacing = %g\n", line_spacing_);
@ -718,14 +745,16 @@ void BaselineBlock::EstimateLineSpacing() {
void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) {
double spacings[3], offsets[3], errors[3];
int index_range;
errors[0] =
FitLineSpacingModel(positions, line_spacing_, &spacings[0], &offsets[0], &index_range);
errors[0] = FitLineSpacingModel(positions, line_spacing_, &spacings[0],
&offsets[0], &index_range);
if (index_range > 1) {
double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
// Try the hypotheses that there might be index_range +/- 1 line spaces.
errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1], &offsets[1], nullptr);
errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1],
&offsets[1], nullptr);
double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2], &offsets[2], nullptr);
errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2],
&offsets[2], nullptr);
for (int i = 1; i <= 2; ++i) {
if (errors[i] < errors[0]) {
spacings[0] = spacings[i];
@ -739,8 +768,8 @@ void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) {
line_offset_ = offsets[0];
model_error_ = errors[0];
if (debug_level_ > 0) {
tprintf("Final linespacing model = %g + offset %g, error %g\n", line_spacing_, line_offset_,
model_error_);
tprintf("Final linespacing model = %g + offset %g, error %g\n",
line_spacing_, line_offset_, model_error_);
}
}
}
@ -750,8 +779,9 @@ void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) {
// and the corresponding intercept in c_out, and the number of spacings seen
// in index_delta. Returns the error of fit to the line spacing model.
// Uses a simple linear regression, but optimized the offset using the median.
double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions, double m_in,
double *m_out, double *c_out, int *index_delta) {
double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
double m_in, double *m_out,
double *c_out, int *index_delta) {
if (m_in == 0.0f || positions.size() < 2) {
*m_out = m_in;
*c_out = 0.0;
@ -762,6 +792,7 @@ double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
}
std::vector<double> offsets;
// Get the offset (remainder) linespacing for each line and choose the median.
offsets.reserve(positions.size());
for (double position : positions) {
offsets.push_back(fmod(position, m_in));
}
@ -795,7 +826,8 @@ double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
*c_out = 0.0;
}
if (debug_level_ > 1) {
tprintf("Median offset = %g, compared to mean of %g.\n", *c_out, llsq.c(*m_out));
tprintf("Median offset = %g, compared to mean of %g.\n", *c_out,
llsq.c(*m_out));
}
// Index_delta is the number of hypothesized line gaps present.
if (index_delta != nullptr) {
@ -805,13 +837,14 @@ double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
// a full line-spacing in disagreement with the median.
double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
if (debug_level_ > 1) {
tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in, median_offset,
*m_out, *c_out, rms_error);
tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in,
median_offset, *m_out, *c_out, rms_error);
}
return rms_error;
}
BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew,
TO_BLOCK_LIST *blocks)
: page_skew_(page_skew), debug_level_(debug_level) {
TO_BLOCK_IT it(blocks);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
@ -863,8 +896,10 @@ void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
// x-heights and displaying debug information.
// NOTE that ComputeStraightBaselines must have been called first as this
// sets up data in the TO_ROWs upon which this function depends.
void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines,
bool remove_noise, bool show_final_rows,
void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD &page_tr,
bool enable_splines,
bool remove_noise,
bool show_final_rows,
Textord *textord) {
for (auto bl_block : blocks_) {
if (enable_splines) {

View File

@ -22,6 +22,7 @@
#include "tovars.h"
#include <algorithm> // for std::sort
#include <cmath>
#include <vector> // for std::vector
namespace tesseract {
@ -437,7 +438,7 @@ private:
}
const float real_pitch = box_pitch(box1, box2);
if (fabs(real_pitch - pitch) < pitch * kFPTolerance) {
if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) {
return true;
}
@ -645,7 +646,7 @@ void FPRow::EstimatePitch(bool pass1) {
// So we collect only pitch values between two good
// characters. and within tolerance in pass2.
if (pass1 ||
(prev_was_good && fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
(prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
good_pitches_.Add(pitch);
if (!is_box_modified(i - 1) && !is_box_modified(i)) {
good_gaps_.Add(gap);

View File

@ -25,8 +25,8 @@
#include "colpartitiongrid.h"
#include "colpartitionset.h"
#include "detlinefit.h"
#include "helpers.h" // for UpdateRange
#include "dppoint.h"
#include "helpers.h" // for UpdateRange
#include "host.h" // for NearlyEqual
#include "imagefind.h"
#include "workingpartset.h"
@ -89,14 +89,14 @@ const int kMaxColorDistance = 900;
// blob_type is the blob_region_type_ of the blobs in this partition.
// Vertical is the direction of logical vertical on the possibly skewed image.
ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD &vertical)
: left_margin_(-INT32_MAX)
, right_margin_(INT32_MAX)
, median_bottom_(INT32_MAX)
, median_top_(-INT32_MAX)
, median_left_(INT32_MAX)
, median_right_(-INT32_MAX)
, blob_type_(blob_type)
, vertical_(vertical) {
: left_margin_(-INT32_MAX),
right_margin_(INT32_MAX),
median_bottom_(INT32_MAX),
median_top_(-INT32_MAX),
median_left_(INT32_MAX),
median_right_(-INT32_MAX),
blob_type_(blob_type),
vertical_(vertical) {
memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
}
@ -105,8 +105,10 @@ ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD &vertical)
// WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
// the ColPartition owns the BLOBNBOX!!!
// Call DeleteBoxes before deleting the ColPartition.
ColPartition *ColPartition::FakePartition(const TBOX &box, PolyBlockType block_type,
BlobRegionType blob_type, BlobTextFlowType flow) {
ColPartition *ColPartition::FakePartition(const TBOX &box,
PolyBlockType block_type,
BlobRegionType blob_type,
BlobTextFlowType flow) {
auto *part = new ColPartition(blob_type, ICOORD(0, 1));
part->set_type(block_type);
part->set_flow(flow);
@ -124,7 +126,8 @@ ColPartition *ColPartition::FakePartition(const TBOX &box, PolyBlockType block_t
// than the surrounding text that may be a dropcap, two or more vertically
// touching characters, or some graphic element.
// If the given list is not nullptr, the partition is also added to the list.
ColPartition *ColPartition::MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list) {
ColPartition *ColPartition::MakeBigPartition(BLOBNBOX *box,
ColPartition_LIST *big_part_list) {
box->set_owner(nullptr);
auto *single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
single->set_flow(BTFT_NONE);
@ -155,8 +158,9 @@ ColPartition::~ColPartition() {
// Constructs a fake ColPartition with no BLOBNBOXes to represent a
// horizontal or vertical line, given a type and a bounding box.
ColPartition *ColPartition::MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical,
int left, int bottom, int right, int top) {
ColPartition *ColPartition::MakeLinePartition(BlobRegionType blob_type,
const ICOORD &vertical, int left,
int bottom, int right, int top) {
auto *part = new ColPartition(blob_type, vertical);
part->bounding_box_ = TBOX(left, bottom, right, top);
part->median_bottom_ = bottom;
@ -202,8 +206,9 @@ void ColPartition::AddBox(BLOBNBOX *bbox) {
right_key_ = BoxRightKey();
}
if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) {
tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", box.left(),
box.bottom(), box.right(), box.top(), bounding_box_.left(), bounding_box_.right());
tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
box.left(), box.bottom(), box.right(), box.top(),
bounding_box_.left(), bounding_box_.right());
}
}
@ -227,11 +232,13 @@ BLOBNBOX *ColPartition::BiggestBox() {
for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
BLOBNBOX *bbox = bb_it.data();
if (IsVerticalType()) {
if (biggest == nullptr || bbox->bounding_box().width() > biggest->bounding_box().width()) {
if (biggest == nullptr ||
bbox->bounding_box().width() > biggest->bounding_box().width()) {
biggest = bbox;
}
} else {
if (biggest == nullptr || bbox->bounding_box().height() > biggest->bounding_box().height()) {
if (biggest == nullptr ||
bbox->bounding_box().height() > biggest->bounding_box().height()) {
biggest = bbox;
}
}
@ -362,7 +369,8 @@ bool ColPartition::IsLegal() {
}
return false; // Bounding box invalid.
}
if (left_margin_ > bounding_box_.left() || right_margin_ < bounding_box_.right()) {
if (left_margin_ > bounding_box_.left() ||
right_margin_ < bounding_box_.right()) {
if (textord_debug_bugs) {
tprintf("Margins invalid\n");
Print();
@ -371,8 +379,8 @@ bool ColPartition::IsLegal() {
}
if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
if (textord_debug_bugs) {
tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(), right_key_,
BoxRightKey());
tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(),
right_key_, BoxRightKey());
Print();
}
return false; // Keys inside the box.
@ -383,10 +391,12 @@ bool ColPartition::IsLegal() {
// Returns true if the left and right edges are approximately equal.
bool ColPartition::MatchingColumns(const ColPartition &other) const {
int y = (MidY() + other.MidY()) / 2;
if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor, LeftAtY(y) / kColumnWidthFactor, 1)) {
if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
LeftAtY(y) / kColumnWidthFactor, 1)) {
return false;
}
if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor, RightAtY(y) / kColumnWidthFactor, 1)) {
if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
RightAtY(y) / kColumnWidthFactor, 1)) {
return false;
}
return true;
@ -400,10 +410,14 @@ bool ColPartition::MatchingTextColor(const ColPartition &other) const {
}
// Colors must match for other to count.
double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_);
double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_);
double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_);
double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_);
double d_this1_o =
ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_);
double d_this2_o =
ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_);
double d_o1_this =
ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_);
double d_o2_this =
ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_);
// All 4 distances must be small enough.
return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
@ -441,7 +455,8 @@ bool ColPartition::ConfirmNoTabViolation(const ColPartition &other) const {
}
// Returns true if other has a similar stroke width to this.
bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance,
bool ColPartition::MatchingStrokeWidth(const ColPartition &other,
double fractional_tolerance,
double constant_tolerance) const {
int match_count = 0;
int nonmatch_count = 0;
@ -450,8 +465,8 @@ bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractio
box_it.mark_cycle_pt();
other_it.mark_cycle_pt();
while (!box_it.cycled_list() && !other_it.cycled_list()) {
if (box_it.data()->MatchingStrokeWidth(*other_it.data(), fractional_tolerance,
constant_tolerance)) {
if (box_it.data()->MatchingStrokeWidth(
*other_it.data(), fractional_tolerance, constant_tolerance)) {
++match_count;
} else {
++nonmatch_count;
@ -468,7 +483,8 @@ bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractio
// (1) this is a ColPartition containing only diacritics, and
// (2) the base characters indicated on the diacritics all believably lie
// within the text line of the candidate ColPartition.
bool ColPartition::OKDiacriticMerge(const ColPartition &candidate, bool debug) const {
bool ColPartition::OKDiacriticMerge(const ColPartition &candidate,
bool debug) const {
BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
int min_top = INT32_MAX;
int max_bottom = -INT32_MAX;
@ -490,13 +506,14 @@ bool ColPartition::OKDiacriticMerge(const ColPartition &candidate, bool debug) c
}
// If the intersection of all vertical ranges of all base characters
// overlaps the median range of this, then it is OK.
bool result = min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_;
bool result =
min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_;
if (debug) {
if (result) {
tprintf("OKDiacritic!\n");
} else {
tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top, median_bottom_,
median_top_);
tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top,
median_bottom_, median_top_);
}
}
return result;
@ -591,7 +608,8 @@ int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) {
return count;
}
void ColPartition::SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density) {
void ColPartition::SetSpecialBlobsDensity(const BlobSpecialTextType type,
const float density) {
ASSERT_HOST(type < BSTT_COUNT);
special_blobs_densities_[type] = density;
}
@ -619,10 +637,12 @@ void ColPartition::ComputeSpecialBlobsDensity() {
// Partnerships are added symmetrically to partner and this.
void ColPartition::AddPartner(bool upper, ColPartition *partner) {
if (upper) {
partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, this);
partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true,
this);
upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
} else {
partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, this);
partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true,
this);
lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
}
}
@ -651,14 +671,16 @@ ColPartition *ColPartition::SingletonPartner(bool upper) {
}
// Merge with the other partition and delete it.
void ColPartition::Absorb(ColPartition *other, WidthCallback cb) {
void ColPartition::Absorb(ColPartition *other, const WidthCallback &cb) {
// The result has to either own all of the blobs or none of them.
// Verify the flag is consistent.
ASSERT_HOST(owns_blobs() == other->owns_blobs());
// TODO(nbeato): check owns_blobs better. Right now owns_blobs
// should always be true when this is called. So there is no issues.
if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()) ||
TabFind::WithinTestRegion(2, other->bounding_box_.left(), other->bounding_box_.bottom())) {
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom()) ||
TabFind::WithinTestRegion(2, other->bounding_box_.left(),
other->bounding_box_.bottom())) {
tprintf("Merging:");
Print();
other->Print();
@ -669,8 +691,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) {
for (int type = 0; type < BSTT_COUNT; ++type) {
unsigned w1 = boxes_.length();
unsigned w2 = other->boxes_.length();
float new_val =
special_blobs_densities_[type] * w1 + other->special_blobs_densities_[type] * w2;
float new_val = special_blobs_densities_[type] * w1 +
other->special_blobs_densities_[type] * w2;
if (!w1 || !w2) {
ASSERT_HOST((w1 + w2) > 0);
special_blobs_densities_[type] = new_val / (w1 + w2);
@ -723,7 +745,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) {
for (int upper = 0; upper < 2; ++upper) {
ColPartition_CLIST partners;
ColPartition_C_IT part_it(&partners);
part_it.add_list_after(upper ? &other->upper_partners_ : &other->lower_partners_);
part_it.add_list_after(upper ? &other->upper_partners_
: &other->lower_partners_);
for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
ColPartition *partner = part_it.extract();
partner->RemovePartner(!upper, other);
@ -747,7 +770,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) {
// the text involved, and is usually a fraction of the median size of merge1
// and/or merge2, or this.
// TODO(rays) Determine whether vertical text needs to be considered.
bool ColPartition::OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2,
bool ColPartition::OKMergeOverlap(const ColPartition &merge1,
const ColPartition &merge2,
int ok_box_overlap, bool debug) {
// Vertical partitions are not allowed to be involved.
if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
@ -916,7 +940,8 @@ void ColPartition::ComputeLimits() {
if (it.empty()) {
return;
}
if (IsImageType() || blob_type() == BRT_RECTIMAGE || blob_type() == BRT_POLYIMAGE) {
if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
blob_type() == BRT_POLYIMAGE) {
median_top_ = bounding_box_.top();
median_bottom_ = bounding_box_.bottom();
median_height_ = bounding_box_.height();
@ -957,7 +982,8 @@ void ColPartition::ComputeLimits() {
Print();
}
if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
tprintf("Made partition with bad left coords, %d > %d\n", left_margin_, bounding_box_.left());
tprintf("Made partition with bad left coords, %d > %d\n", left_margin_,
bounding_box_.left());
Print();
}
// Fix partner lists. The bounding box has changed and partners are stored
@ -973,7 +999,8 @@ void ColPartition::ComputeLimits() {
partner->AddPartner(!upper, this);
}
}
if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) {
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom())) {
tprintf("Recomputed box for partition %p\n", this);
Print();
}
@ -998,10 +1025,12 @@ void ColPartition::SetPartitionType(int resolution, ColPartitionSet *columns) {
int first_spanned_col = -1;
ColumnSpanningType span_type = columns->SpanningType(
resolution, bounding_box_.left(), bounding_box_.right(),
std::min(bounding_box_.height(), bounding_box_.width()), MidY(), left_margin_, right_margin_,
&first_column_, &last_column_, &first_spanned_col);
std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
left_margin_, right_margin_, &first_column_, &last_column_,
&first_spanned_col);
column_set_ = columns;
if (first_column_ < last_column_ && span_type == CST_PULLOUT && !IsLineType()) {
if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
!IsLineType()) {
// Unequal columns may indicate that the pullout spans one of the columns
// it lies in, so force it to be allocated to just that column.
if (first_spanned_col >= 0) {
@ -1026,8 +1055,8 @@ void ColPartition::SetPartitionType(int resolution, ColPartitionSet *columns) {
// in the columns.
PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const {
if (flow == CST_NOISE) {
if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && blob_type_ != BRT_RECTIMAGE &&
blob_type_ != BRT_VERT_TEXT) {
if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) {
return PT_NOISE;
}
flow = CST_FLOWING;
@ -1075,18 +1104,18 @@ PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const {
// Returns the first and last column touched by this partition.
// resolution refers to the ppi resolution of the image.
void ColPartition::ColumnRange(int resolution, ColPartitionSet *columns, int *first_col,
int *last_col) {
void ColPartition::ColumnRange(int resolution, ColPartitionSet *columns,
int *first_col, int *last_col) {
int first_spanned_col = -1;
ColumnSpanningType span_type =
columns->SpanningType(resolution, bounding_box_.left(), bounding_box_.right(),
ColumnSpanningType span_type = columns->SpanningType(
resolution, bounding_box_.left(), bounding_box_.right(),
std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
left_margin_, right_margin_, first_col, last_col, &first_spanned_col);
type_ = PartitionType(span_type);
}
// Sets the internal flags good_width_ and good_column_.
void ColPartition::SetColumnGoodness(WidthCallback cb) {
void ColPartition::SetColumnGoodness(const WidthCallback &cb) {
int y = MidY();
int width = RightAtY(y) - LeftAtY(y);
good_width_ = cb(width);
@ -1127,10 +1156,12 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
if (textord_debug_tabfind >= 4) {
tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count,
max_width * kMaxLeaderGapFractionOfMax, min_width * kMaxLeaderGapFractionOfMin);
max_width * kMaxLeaderGapFractionOfMax,
min_width * kMaxLeaderGapFractionOfMin);
}
if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
gap_iqr < min_width * kMaxLeaderGapFractionOfMin && blob_count >= kMinLeaderCount) {
gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
blob_count >= kMinLeaderCount) {
// This is stable enough to be called a leader, so check the widths.
// Since leader dashes can join, run a dp cutting algorithm and go
// on the cost.
@ -1151,7 +1182,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
projection[left - part_left].AddLocalCost(height);
}
}
DPPoint *best_end = DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance,
DPPoint *best_end =
DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance,
part_width, projection);
if (best_end != nullptr && best_end->total_cost() < blob_count) {
// Good enough. Call it a leader.
@ -1161,7 +1193,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
BLOBNBOX *blob = it.data();
// If the first or last blob is spaced too much, don't mark it.
if (it.at_first()) {
int gap = it.data_relative(1)->bounding_box().left() - blob->bounding_box().right();
int gap = it.data_relative(1)->bounding_box().left() -
blob->bounding_box().right();
if (blob->bounding_box().width() + gap > max_step) {
it.extract();
modified_blob_list = true;
@ -1169,7 +1202,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
}
}
if (it.at_last()) {
int gap = blob->bounding_box().left() - it.data_relative(-1)->bounding_box().right();
int gap = blob->bounding_box().left() -
it.data_relative(-1)->bounding_box().right();
if (blob->bounding_box().width() + gap > max_step) {
it.extract();
modified_blob_list = true;
@ -1188,7 +1222,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
if (best_end == nullptr) {
tprintf("No path\n");
} else {
tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(), blob_count);
tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
blob_count);
}
}
delete[] projection;
@ -1275,10 +1310,12 @@ void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) {
blob_type_ = BRT_NOISE;
}
}
if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) {
tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", blob_count,
noisy_count, good_blob_score_);
tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_, blob_type_);
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom())) {
tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
blob_count, noisy_count, good_blob_score_);
tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_,
blob_type_);
Print();
}
SetBlobTypes();
@ -1371,7 +1408,8 @@ bool ColPartition::HasGoodBaseline() {
// Adds this ColPartition to a matching WorkingPartSet if one can be found,
// otherwise starts a new one in the appropriate column, ending the previous.
void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution,
void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright,
int resolution,
ColPartition_LIST *used_parts,
WorkingPartSet_LIST *working_sets) {
if (block_owned_) {
@ -1414,10 +1452,11 @@ void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, in
// Find the column that the right edge falls in.
BLOCK_LIST completed_blocks;
TO_BLOCK_LIST to_blocks;
for (; !it.cycled_list() && col_index <= last_column_; it.forward(), ++col_index) {
for (; !it.cycled_list() && col_index <= last_column_;
it.forward(), ++col_index) {
WorkingPartSet *end_set = it.data();
end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks,
&to_blocks);
end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
&completed_blocks, &to_blocks);
}
work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
}
@ -1431,9 +1470,12 @@ void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, in
// The used partitions are put onto used_parts, as they may still be referred
// to in the partition grid. bleft, tright and resolution are the bounds
// and resolution of the original image.
void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution,
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts,
BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks) {
void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
int resolution,
ColPartition_LIST *block_parts,
ColPartition_LIST *used_parts,
BLOCK_LIST *completed_blocks,
TO_BLOCK_LIST *to_blocks) {
int page_height = tright.y() - bleft.y();
// Compute the initial spacing stats.
ColPartition_IT it(block_parts);
@ -1466,7 +1508,8 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
if (!it.at_last()) {
ColPartition *next_part = it.data_relative(1);
part->set_bottom_spacing(part->median_bottom() - next_part->median_bottom());
part->set_bottom_spacing(part->median_bottom() -
next_part->median_bottom());
part->set_top_spacing(part->median_top() - next_part->median_top());
} else {
part->set_bottom_spacing(page_height);
@ -1474,8 +1517,8 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
}
if (textord_debug_tabfind) {
part->Print();
tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n", side_steps.median(),
part->top_spacing(), part->bottom_spacing());
tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
side_steps.median(), part->top_spacing(), part->bottom_spacing());
}
++part_count;
}
@ -1508,21 +1551,25 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
tprintf(
"Spacings unequal: upper:%d/%d, lower:%d/%d,"
" sizes %d %d %d\n",
part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(),
next_part->bottom_spacing(), part->median_height(), next_part->median_height(),
part->top_spacing(), part->bottom_spacing(),
next_part->top_spacing(), next_part->bottom_spacing(),
part->median_height(), next_part->median_height(),
third_part != nullptr ? third_part->median_height() : 0);
}
// We can only consider adding the next line to the block if the sizes
// match and the lines are close enough for their size.
if (part->SizesSimilar(*next_part) &&
next_part->median_height() * kMaxSameBlockLineSpacing > part->bottom_spacing() &&
part->median_height() * kMaxSameBlockLineSpacing > part->top_spacing()) {
next_part->median_height() * kMaxSameBlockLineSpacing >
part->bottom_spacing() &&
part->median_height() * kMaxSameBlockLineSpacing >
part->top_spacing()) {
// Even now, we can only add it as long as the third line doesn't
// match in the same way and have a smaller bottom spacing.
if (third_part == nullptr || !next_part->SizesSimilar(*third_part) ||
third_part->median_height() * kMaxSameBlockLineSpacing <=
next_part->bottom_spacing() ||
next_part->median_height() * kMaxSameBlockLineSpacing <= next_part->top_spacing() ||
next_part->median_height() * kMaxSameBlockLineSpacing <=
next_part->top_spacing() ||
next_part->bottom_spacing() > part->bottom_spacing()) {
// Add to the current block.
sp_block_it.add_to_end(it.extract());
@ -1542,8 +1589,9 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
} else {
if (textord_debug_tabfind && !it.empty()) {
ColPartition *next_part = it.data();
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n", part->top_spacing(),
part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(),
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
part->top_spacing(), part->bottom_spacing(),
next_part->top_spacing(), next_part->bottom_spacing(),
part->median_height(), next_part->median_height());
}
}
@ -1570,8 +1618,9 @@ static void ClipCoord(const ICOORD &bleft, const ICOORD &tright, ICOORD *pos) {
// itself. Sets up the block for (old) textline formation correctly for
// vertical and horizontal text. The partitions are moved to used_parts
// afterwards, as they cannot be deleted yet.
static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing, BLOCK *block,
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) {
static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing,
BLOCK *block, ColPartition_LIST *block_parts,
ColPartition_LIST *used_parts) {
// Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it.
// Move all the parts to a done list as they are no longer needed, except
// that have have to continue to exist until the part grid is deleted.
@ -1646,7 +1695,8 @@ static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing, BLOCK *b
// Constructs a block from the given list of partitions.
// Arguments are as LineSpacingBlocks above.
TO_BLOCK *ColPartition::MakeBlock(const ICOORD &bleft, const ICOORD &tright,
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) {
ColPartition_LIST *block_parts,
ColPartition_LIST *used_parts) {
if (block_parts->empty()) {
return nullptr; // Nothing to do.
}
@ -1704,7 +1754,8 @@ TO_BLOCK *ColPartition::MakeBlock(const ICOORD &bleft, const ICOORD &tright,
// Constructs a block from the given list of vertical text partitions.
// Currently only creates rectangular blocks.
TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright,
TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft,
const ICOORD &tright,
ColPartition_LIST *block_parts,
ColPartition_LIST *used_parts) {
if (block_parts->empty()) {
@ -1722,8 +1773,8 @@ TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD
tprintf("Making block at:");
block_box.print();
}
auto *block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(), block_box.right(),
block_box.top());
auto *block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
block_box.right(), block_box.top());
block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
}
@ -1741,7 +1792,8 @@ TO_ROW *ColPartition::MakeToRow() {
int top = blob->bounding_box().top();
int bottom = blob->bounding_box().bottom();
if (row == nullptr) {
row = new TO_ROW(blob, static_cast<float>(top), static_cast<float>(bottom),
row =
new TO_ROW(blob, static_cast<float>(top), static_cast<float>(bottom),
static_cast<float>(line_size));
} else {
row->add_blob(blob, static_cast<float>(top), static_cast<float>(bottom),
@ -1785,7 +1837,8 @@ ColPartition *ColPartition::CopyButDontOwnBlobs() {
copy->set_owns_blobs(false);
BLOBNBOX_C_IT inserter(copy->boxes());
BLOBNBOX_C_IT traverser(boxes());
for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward()) {
for (traverser.mark_cycle_pt(); !traverser.cycled_list();
traverser.forward()) {
inserter.add_after_then_move(traverser.data());
}
return copy;
@ -1812,19 +1865,21 @@ void ColPartition::Print() const {
"ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
" w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
" ts=%d bs=%d ls=%d rs=%d\n",
boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
bounding_box_.left(), median_left_, bounding_box_.bottom(), median_bottom_,
bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', right_margin_, median_right_,
bounding_box_.top(), median_top_, good_width_, good_column_, type_, kBlobTypes[blob_type_],
flow_, first_column_, last_column_, boxes_.length(), space_above_, space_below_,
space_to_left_, space_to_right_);
boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B',
LeftAtY(y), bounding_box_.left(), median_left_, bounding_box_.bottom(),
median_bottom_, bounding_box_.right(), RightAtY(y),
right_key_tab_ ? 'T' : 'B', right_margin_, median_right_,
bounding_box_.top(), median_top_, good_width_, good_column_, type_,
kBlobTypes[blob_type_], flow_, first_column_, last_column_,
boxes_.length(), space_above_, space_below_, space_to_left_,
space_to_right_);
}
// Prints debug information on the colors.
void ColPartition::PrintColors() {
tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED], color1_[COLOR_GREEN],
color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL], color2_[COLOR_RED], color2_[COLOR_GREEN],
color2_[COLOR_BLUE]);
tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED],
color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL],
color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
}
// Sets the types of all partitions in the run to be the max of the types.
@ -1898,7 +1953,8 @@ void ColPartition::SmoothPartnerRun(int working_set_count) {
// one partner. This makes block creation simpler.
// If get_desperate is true, goes to more desperate merge methods
// to merge flowing text before breaking partnerships.
void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid) {
void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate,
ColPartitionGrid *grid) {
if (TypesSimilar(type_, type)) {
RefinePartnersInternal(true, get_desperate, grid);
RefinePartnersInternal(false, get_desperate, grid);
@ -1924,7 +1980,8 @@ void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, ColPar
// Cleans up the partners above if upper is true, else below.
// If get_desperate is true, goes to more desperate merge methods
// to merge flowing text before breaking partnerships.
void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, ColPartitionGrid *grid) {
void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate,
ColPartitionGrid *grid) {
ColPartition_CLIST *partners = upper ? &upper_partners_ : &lower_partners_;
if (!partners->empty() && !partners->singleton()) {
RefinePartnersByType(upper, partners);
@ -1952,8 +2009,10 @@ void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, ColPar
// Cleans up the partners above if upper is true, else below.
// Restricts the partners to only desirable types. For text and BRT_HLINE this
// means the same type_ , and for image types it means any image type.
void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom());
void ColPartition::RefinePartnersByType(bool upper,
ColPartition_CLIST *partners) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom());
if (debug) {
tprintf("Refining %d %s partners by type for:\n", partners->length(),
upper ? "Upper" : "Lower");
@ -1983,7 +2042,8 @@ void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners
// Only polyimages are allowed to have partners of any kind!
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition *partner = it.data();
if (partner->blob_type() != BRT_POLYIMAGE || blob_type() != BRT_POLYIMAGE) {
if (partner->blob_type() != BRT_POLYIMAGE ||
blob_type() != BRT_POLYIMAGE) {
if (debug) {
tprintf("Removing partner:");
partner->Print();
@ -2003,7 +2063,8 @@ void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners
// Gets rid of this<->b, leaving a clean chain.
// Also if we have this<->a and a<->this, then gets rid of this<->a, as
// this has multiple partners.
void ColPartition::RefinePartnerShortcuts(bool upper, ColPartition_CLIST *partners) {
void ColPartition::RefinePartnerShortcuts(bool upper,
ColPartition_CLIST *partners) {
bool done_any = false;
do {
done_any = false;
@ -2054,8 +2115,10 @@ void ColPartition::RefinePartnerShortcuts(bool upper, ColPartition_CLIST *partne
// by aggressive line fitting/splitting, as there are probably vertically
// joined blobs that cross textlines.
void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
ColPartition_CLIST *partners, ColPartitionGrid *grid) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom());
ColPartition_CLIST *partners,
ColPartitionGrid *grid) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom());
if (debug) {
tprintf("Refining %d %s partners by merge for:\n", partners->length(),
upper ? "Upper" : "Lower");
@ -2078,12 +2141,13 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
}
}
int overlap_increase;
ColPartition *candidate =
grid->BestMergeCandidate(part, &candidates, debug, nullptr, &overlap_increase);
ColPartition *candidate = grid->BestMergeCandidate(
part, &candidates, debug, nullptr, &overlap_increase);
if (candidate != nullptr && (overlap_increase <= 0 || desperate)) {
if (debug) {
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", part->HCoreOverlap(*candidate),
part->VCoreOverlap(*candidate), overlap_increase);
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
overlap_increase);
}
// Remove before merge and re-insert to keep the integrity of the grid.
grid->RemoveBBox(candidate);
@ -2102,8 +2166,10 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
// Cleans up the partners above if upper is true, else below.
// Keep the partner with the biggest overlap.
void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partners) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom());
void ColPartition::RefinePartnersByOverlap(bool upper,
ColPartition_CLIST *partners) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom());
if (debug) {
tprintf("Refining %d %s partners by overlap for:\n", partners->length(),
upper ? "Upper" : "Lower");
@ -2115,7 +2181,8 @@ void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partn
int best_overlap = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition *partner = it.data();
int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right()) -
int overlap =
std::min(bounding_box_.right(), partner->bounding_box_.right()) -
std::max(bounding_box_.left(), partner->bounding_box_.left());
if (overlap > best_overlap) {
best_overlap = overlap;
@ -2137,7 +2204,8 @@ void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partn
}
// Return true if bbox belongs better in this than other.
bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox, const ColPartition &other) {
bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox,
const ColPartition &other) {
const TBOX &box = bbox->bounding_box();
// Margins take priority.
int left = box.left();
@ -2150,14 +2218,17 @@ bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox, const ColPartition &other
}
int top = box.top();
int bottom = box.bottom();
int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_);
int other_overlap = std::min(top, other.median_top_) - std::max(bottom, other.median_bottom_);
int this_overlap =
std::min(top, median_top_) - std::max(bottom, median_bottom_);
int other_overlap =
std::min(top, other.median_top_) - std::max(bottom, other.median_bottom_);
int this_miss = median_top_ - median_bottom_ - this_overlap;
int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) {
tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n", box.left(),
box.bottom(), box.right(), box.top(), this_overlap, other_overlap, this_miss,
other_miss, median_top_, other.median_top_);
tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
box.left(), box.bottom(), box.right(), box.top(), this_overlap,
other_overlap, this_miss, other_miss, median_top_,
other.median_top_);
}
if (this_miss < other_miss) {
return true;
@ -2200,13 +2271,15 @@ bool ColPartition::IsInSameColumnAs(const ColPartition &part) const {
// Overlap does not occur when last < part.first or first > part.last.
// In other words, one is completely to the side of the other.
// This is just DeMorgan's law applied to that so the function returns true.
return (last_column_ >= part.first_column_) && (first_column_ <= part.last_column_);
return (last_column_ >= part.first_column_) &&
(first_column_ <= part.last_column_);
}
// Smoothes the spacings in the list into groups of equal linespacing.
// resolution is the resolution of the original image, used as a basis
// for thresholds in change of spacing. page_height is in pixels.
void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_LIST *parts) {
void ColPartition::SmoothSpacings(int resolution, int page_height,
ColPartition_LIST *parts) {
// The task would be trivial if we didn't have to allow for blips -
// occasional offsets in spacing caused by anomalous text, such as all
// caps, groups of descenders, joined words, Arabic etc.
@ -2258,13 +2331,17 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_
// The last time, everything is shifted up 1, so we present OKSpacingBlip
// with neighbourhood-1 and check that PN_LOWER matches the median.
if (neighbourhood[PN_LOWER] == nullptr ||
(!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER], resolution) &&
(neighbourhood[PN_UPPER] == nullptr || neighbourhood[PN_LOWER] == nullptr ||
(!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
resolution) &&
(neighbourhood[PN_UPPER] == nullptr ||
neighbourhood[PN_LOWER] == nullptr ||
!OKSpacingBlip(resolution, median_space, neighbourhood, 0)) &&
(neighbourhood[PN_UPPER - 1] == nullptr || neighbourhood[PN_LOWER - 1] == nullptr ||
(neighbourhood[PN_UPPER - 1] == nullptr ||
neighbourhood[PN_LOWER - 1] == nullptr ||
!OKSpacingBlip(resolution, median_space, neighbourhood, -1) ||
!neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
(neighbourhood[PN_UPPER + 1] == nullptr || neighbourhood[PN_LOWER + 1] == nullptr ||
(neighbourhood[PN_UPPER + 1] == nullptr ||
neighbourhood[PN_LOWER + 1] == nullptr ||
!OKSpacingBlip(resolution, median_space, neighbourhood, 1) ||
!neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
// The group has ended. PN_UPPER is the last member.
@ -2297,7 +2374,8 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_
if (neighbourhood[i] == nullptr) {
tprintf("NULL");
if (i > 0 && neighbourhood[i - 1] != nullptr) {
if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) {
if (neighbourhood[i - 1]->SingletonPartner(false) !=
nullptr) {
tprintf(" Lower partner:");
neighbourhood[i - 1]->SingletonPartner(false)->Print();
} else {
@ -2307,7 +2385,8 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_
tprintf("\n");
}
} else {
tprintf("Top = %d, bottom = %d\n", neighbourhood[i]->top_spacing(),
tprintf("Top = %d, bottom = %d\n",
neighbourhood[i]->top_spacing(),
neighbourhood[i]->bottom_spacing());
}
}
@ -2350,12 +2429,13 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_
// Returns true if the parts array of pointers to partitions matches the
// condition for a spacing blip. See SmoothSpacings for what this means
// and how it is used.
bool ColPartition::OKSpacingBlip(int resolution, int median_spacing, ColPartition **parts,
int offset) {
bool ColPartition::OKSpacingBlip(int resolution, int median_spacing,
ColPartition **parts, int offset) {
// The blip is OK if upper and lower sum to an OK value and at least
// one of above1 and below1 is equal to the median.
parts += offset;
return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], median_spacing, resolution) &&
return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], median_spacing,
resolution) &&
((parts[PN_ABOVE1] != nullptr &&
parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
(parts[PN_BELOW1] != nullptr &&
@ -2373,22 +2453,27 @@ bool ColPartition::SpacingEqual(int spacing, int resolution) const {
// Returns true if both the top and bottom spacings of this and other
// match to within suitable margins dictated by the image resolution.
bool ColPartition::SpacingsEqual(const ColPartition &other, int resolution) const {
int bottom_error =
std::max(BottomSpacingMargin(resolution), other.BottomSpacingMargin(resolution));
int top_error = std::max(TopSpacingMargin(resolution), other.TopSpacingMargin(resolution));
bool ColPartition::SpacingsEqual(const ColPartition &other,
int resolution) const {
int bottom_error = std::max(BottomSpacingMargin(resolution),
other.BottomSpacingMargin(resolution));
int top_error = std::max(TopSpacingMargin(resolution),
other.TopSpacingMargin(resolution));
return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
(NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2, bottom_error));
NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
bottom_error));
}
// Returns true if the sum spacing of this and other match the given
// spacing (or twice the given spacing) to within a suitable margin dictated
// by the image resolution.
bool ColPartition::SummedSpacingOK(const ColPartition &other, int spacing, int resolution) const {
int bottom_error =
std::max(BottomSpacingMargin(resolution), other.BottomSpacingMargin(resolution));
int top_error = std::max(TopSpacingMargin(resolution), other.TopSpacingMargin(resolution));
bool ColPartition::SummedSpacingOK(const ColPartition &other, int spacing,
int resolution) const {
int bottom_error = std::max(BottomSpacingMargin(resolution),
other.BottomSpacingMargin(resolution));
int top_error = std::max(TopSpacingMargin(resolution),
other.TopSpacingMargin(resolution));
int bottom_total = bottom_spacing_ + other.bottom_spacing_;
int top_total = top_spacing_ + other.top_spacing_;
return (NearlyEqual(spacing, bottom_total, bottom_error) &&
@ -2420,7 +2505,8 @@ bool ColPartition::SizesSimilar(const ColPartition &other) const {
// Helper updates margin_left and margin_right, being the bounds of the left
// margin of part of a block. Returns false and does not update the bounds if
// this partition has a disjoint margin with the established margin.
static bool UpdateLeftMargin(const ColPartition &part, int *margin_left, int *margin_right) {
static bool UpdateLeftMargin(const ColPartition &part, int *margin_left,
int *margin_right) {
const TBOX &part_box = part.bounding_box();
int top = part_box.top();
int bottom = part_box.bottom();
@ -2444,7 +2530,8 @@ static bool UpdateLeftMargin(const ColPartition &part, int *margin_left, int *ma
// condition that the intersection of the left margins is non-empty, ie the
// rightmost left margin is to the left of the leftmost left bounding box edge.
// On return the iterator is set to the start of the next run.
void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end) {
void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start,
ICOORD *end) {
ColPartition *part = part_it->data();
ColPartition *start_part = part;
int start_y = part->bounding_box_.top();
@ -2463,7 +2550,8 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *
do {
part_it->forward();
part = part_it->data();
} while (!part_it->at_first() && UpdateLeftMargin(*part, &margin_left, &margin_right));
} while (!part_it->at_first() &&
UpdateLeftMargin(*part, &margin_left, &margin_right));
// The run ended. If we were pushed inwards, compute the next run and
// extend it backwards into the run we just calculated to find the end of
// this run that provides a tight box.
@ -2475,13 +2563,15 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *
do {
next_it.forward();
part = next_it.data();
} while (!next_it.at_first() && UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
} while (!next_it.at_first() &&
UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
// Now extend the next run backwards into the original run to get the
// tightest fit.
do {
part_it->backward();
part = part_it->data();
} while (part != start_part && UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
} while (part != start_part &&
UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
part_it->forward();
}
// Now calculate the end_y.
@ -2495,16 +2585,17 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *
end->set_y(end_y);
end->set_x(part->XAtY(margin_right, end_y));
if (textord_debug_tabfind && !part_it->at_first()) {
tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", start_y, end_y,
part->XAtY(margin_left, end_y), end->x(), part->left_margin_,
part->bounding_box_.left());
tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
start_y, end_y, part->XAtY(margin_left, end_y), end->x(),
part->left_margin_, part->bounding_box_.left());
}
}
// Helper updates margin_left and margin_right, being the bounds of the right
// margin of part of a block. Returns false and does not update the bounds if
// this partition has a disjoint margin with the established margin.
static bool UpdateRightMargin(const ColPartition &part, int *margin_left, int *margin_right) {
static bool UpdateRightMargin(const ColPartition &part, int *margin_left,
int *margin_right) {
const TBOX &part_box = part.bounding_box();
int top = part_box.top();
int bottom = part_box.bottom();
@ -2529,7 +2620,8 @@ static bool UpdateRightMargin(const ColPartition &part, int *margin_left, int *m
// leftmost right margin is to the right of the rightmost right bounding box
// edge.
// On return the iterator is set to the start of the next run.
void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end) {
void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start,
ICOORD *end) {
ColPartition *part = part_it->data();
ColPartition *start_part = part;
int start_y = part->bounding_box_.bottom();
@ -2548,7 +2640,8 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD
do {
part_it->backward();
part = part_it->data();
} while (!part_it->at_last() && UpdateRightMargin(*part, &margin_left, &margin_right));
} while (!part_it->at_last() &&
UpdateRightMargin(*part, &margin_left, &margin_right));
// The run ended. If we were pushed inwards, compute the next run and
// extend it backwards to find the end of this run for a tight box.
int next_margin_right = INT32_MAX;
@ -2559,13 +2652,15 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD
do {
next_it.backward();
part = next_it.data();
} while (!next_it.at_last() && UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
} while (!next_it.at_last() &&
UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
// Now extend the next run forwards into the original run to get the
// tightest fit.
do {
part_it->forward();
part = part_it->data();
} while (part != start_part && UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
} while (part != start_part &&
UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
part_it->backward();
}
// Now calculate the end_y.
@ -2579,9 +2674,9 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD
end->set_y(end_y);
end->set_x(part->XAtY(margin_left, end_y));
if (textord_debug_tabfind && !part_it->at_last()) {
tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", start_y, end_y,
end->x(), part->XAtY(margin_right, end_y), part->bounding_box_.right(),
part->right_margin_);
tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
start_y, end_y, end->x(), part->XAtY(margin_right, end_y),
part->bounding_box_.right(), part->right_margin_);
}
}

View File

@ -81,7 +81,8 @@ public:
* Constructs a fake ColPartition with no BLOBNBOXes to represent a
* horizontal or vertical line, given a type and a bounding box.
*/
static ColPartition *MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left,
static ColPartition *MakeLinePartition(BlobRegionType blob_type,
const ICOORD &vertical, int left,
int bottom, int right, int top);
// Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
@ -90,14 +91,16 @@ public:
// the ColPartition owns the BLOBNBOX!!!
// Call DeleteBoxes before deleting the ColPartition.
static ColPartition *FakePartition(const TBOX &box, PolyBlockType block_type,
BlobRegionType blob_type, BlobTextFlowType flow);
BlobRegionType blob_type,
BlobTextFlowType flow);
// Constructs and returns a ColPartition with the given real BLOBNBOX,
// and sets it up to be a "big" partition (single-blob partition bigger
// than the surrounding text that may be a dropcap, two or more vertically
// touching characters, or some graphic element.
// If the given list is not nullptr, the partition is also added to the list.
static ColPartition *MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list);
static ColPartition *MakeBigPartition(BLOBNBOX *box,
ColPartition_LIST *big_part_list);
~ColPartition();
@ -389,7 +392,8 @@ public:
return false;
}
int overlap = VCoreOverlap(other);
int height = std::min(median_top_ - median_bottom_, other.median_top_ - other.median_bottom_);
int height = std::min(median_top_ - median_bottom_,
other.median_top_ - other.median_bottom_);
return overlap * 3 > height;
}
// Returns true if this and other can be combined without putting a
@ -412,7 +416,8 @@ public:
// Returns true if the types are similar to each other.
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
return (type1 == type2 || (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
return (type1 == type2 ||
(type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
(type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
}
@ -519,7 +524,8 @@ public:
bool ConfirmNoTabViolation(const ColPartition &other) const;
// Returns true if other has a similar stroke width to this.
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance,
bool MatchingStrokeWidth(const ColPartition &other,
double fractional_tolerance,
double constant_tolerance) const;
// Returns true if candidate is an acceptable diacritic base char merge
// with this as the diacritic.
@ -548,7 +554,8 @@ public:
// Set the density value for a particular BlobSpecialTextType, should ONLY be
// used for debugging or testing. In production code, use
// ComputeSpecialBlobsDensity instead.
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density);
void SetSpecialBlobsDensity(const BlobSpecialTextType type,
const float density);
// Compute the SpecialTextType density of blobs, where we assume
// that the SpecialTextType in the boxes_ has been set.
void ComputeSpecialBlobsDensity();
@ -565,14 +572,14 @@ public:
ColPartition *SingletonPartner(bool upper);
// Merge with the other partition and delete it.
void Absorb(ColPartition *other, WidthCallback cb);
void Absorb(ColPartition *other, const WidthCallback &cb);
// Returns true if the overlap between this and the merged pair of
// merge candidates is sufficiently trivial to be allowed.
// The merged box can graze the edge of this by the ok_box_overlap
// if that exceeds the margin to the median top and bottom.
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap,
bool debug);
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2,
int ok_box_overlap, bool debug);
// Find the blob at which to split this to minimize the overlap with the
// given box. Returns the first blob to go in the second partition.
@ -606,10 +613,11 @@ public:
// Returns the first and last column touched by this partition.
// resolution refers to the ppi resolution of the image.
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col);
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col,
int *last_col);
// Sets the internal flags good_width_ and good_column_.
void SetColumnGoodness(WidthCallback cb);
void SetColumnGoodness(const WidthCallback &cb);
// Determines whether the blobs in this partition mostly represent
// a leader (fixed pitch sequence) and sets the member blobs accordingly.
@ -634,8 +642,9 @@ public:
// Adds this ColPartition to a matching WorkingPartSet if one can be found,
// otherwise starts a new one in the appropriate column, ending the previous.
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution,
ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set);
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright,
int resolution, ColPartition_LIST *used_parts,
WorkingPartSet_LIST *working_set);
// From the given block_parts list, builds one or more BLOCKs and
// corresponding TO_BLOCKs, such that the line spacing is uniform in each.
@ -643,17 +652,21 @@ public:
// The used partitions are put onto used_parts, as they may still be referred
// to in the partition grid. bleft, tright and resolution are the bounds
// and resolution of the original image.
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution,
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts,
BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks);
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
int resolution, ColPartition_LIST *block_parts,
ColPartition_LIST *used_parts,
BLOCK_LIST *completed_blocks,
TO_BLOCK_LIST *to_blocks);
// Constructs a block from the given list of partitions.
// Arguments are as LineSpacingBlocks above.
static TO_BLOCK *MakeBlock(const ICOORD &bleft, const ICOORD &tright,
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts);
ColPartition_LIST *block_parts,
ColPartition_LIST *used_parts);
// Constructs a block from the given list of vertical text partitions.
// Currently only creates rectangular blocks.
static TO_BLOCK *MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright,
static TO_BLOCK *MakeVerticalTextBlock(const ICOORD &bleft,
const ICOORD &tright,
ColPartition_LIST *block_parts,
ColPartition_LIST *used_parts);
@ -686,7 +699,8 @@ public:
// one partner. This makes block creation simpler.
// If get_desperate is true, goes to more desperate merge methods
// to merge flowing text before breaking partnerships.
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid);
void RefinePartners(PolyBlockType type, bool get_desperate,
ColPartitionGrid *grid);
// Returns true if this column partition is in the same column as
// part. This function will only work after the SetPartitionType function
@ -700,8 +714,10 @@ public:
const ColPartition *part2 = *static_cast<const ColPartition *const *>(p2);
int mid_y1 = part1->bounding_box_.y_middle();
int mid_y2 = part2->bounding_box_.y_middle();
if ((part2->bounding_box_.bottom() <= mid_y1 && mid_y1 <= part2->bounding_box_.top()) ||
(part1->bounding_box_.bottom() <= mid_y2 && mid_y2 <= part1->bounding_box_.top())) {
if ((part2->bounding_box_.bottom() <= mid_y1 &&
mid_y1 <= part2->bounding_box_.top()) ||
(part1->bounding_box_.bottom() <= mid_y2 &&
mid_y2 <= part1->bounding_box_.top())) {
// Sort by increasing x.
return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
}
@ -721,7 +737,8 @@ private:
// Cleans up the partners above if upper is true, else below.
// If get_desperate is true, goes to more desperate merge methods
// to merge flowing text before breaking partnerships.
void RefinePartnersInternal(bool upper, bool get_desperate, ColPartitionGrid *grid);
void RefinePartnersInternal(bool upper, bool get_desperate,
ColPartitionGrid *grid);
// Restricts the partners to only desirable types. For text and BRT_HLINE this
// means the same type_ , and for image types it means any image type.
void RefinePartnersByType(bool upper, ColPartition_CLIST *partners);
@ -736,7 +753,8 @@ private:
// is set, indicating that the textlines probably need to be regenerated
// by aggressive line fitting/splitting, as there are probably vertically
// joined blobs that cross textlines.
void RefineTextPartnersByMerge(bool upper, bool desperate, ColPartition_CLIST *partners,
void RefineTextPartnersByMerge(bool upper, bool desperate,
ColPartition_CLIST *partners,
ColPartitionGrid *grid);
// Keep the partner with the biggest overlap.
void RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partners);
@ -747,12 +765,14 @@ private:
// Smoothes the spacings in the list into groups of equal linespacing.
// resolution is the resolution of the original image, used as a basis
// for thresholds in change of spacing. page_height is in pixels.
static void SmoothSpacings(int resolution, int page_height, ColPartition_LIST *parts);
static void SmoothSpacings(int resolution, int page_height,
ColPartition_LIST *parts);
// Returns true if the parts array of pointers to partitions matches the
// condition for a spacing blip. See SmoothSpacings for what this means
// and how it is used.
static bool OKSpacingBlip(int resolution, int median_spacing, ColPartition **parts, int offset);
static bool OKSpacingBlip(int resolution, int median_spacing,
ColPartition **parts, int offset);
// Returns true if both the top and bottom spacings of this match the given
// spacing to within suitable margins dictated by the image resolution.
@ -765,7 +785,8 @@ private:
// Returns true if the sum spacing of this and other match the given
// spacing (or twice the given spacing) to within a suitable margin dictated
// by the image resolution.
bool SummedSpacingOK(const ColPartition &other, int spacing, int resolution) const;
bool SummedSpacingOK(const ColPartition &other, int spacing,
int resolution) const;
// Returns a suitable spacing margin that can be applied to bottoms of
// text lines, based on the resolution and the stored side_step_.
@ -792,7 +813,8 @@ private:
// rightmost right bounding box edge.
// TODO(rays) Not good enough. Needs improving to tightly wrap text in both
// directions, and to loosely wrap images.
static void RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end);
static void RightEdgeRun(ColPartition_IT *part_it, ICOORD *start,
ICOORD *end);
// The margins are determined by the position of the nearest vertically
// overlapping neighbour to the side. They indicate the maximum extent
@ -893,7 +915,8 @@ private:
};
// Typedef it now in case it becomes a class later.
using ColPartitionGridSearch = GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>;
using ColPartitionGridSearch =
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>;
} // namespace tesseract.

View File

@ -25,6 +25,7 @@
#include "imagefind.h"
#include <algorithm>
#include <utility>
namespace tesseract {
@ -63,12 +64,15 @@ const double kMaxPartitionSpacing = 1.75;
// decision in GridSmoothNeighbour.
const int kSmoothDecisionMargin = 4;
ColPartitionGrid::ColPartitionGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
: BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize, bleft, tright) {}
ColPartitionGrid::ColPartitionGrid(int gridsize, const ICOORD &bleft,
const ICOORD &tright)
: BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(
gridsize, bleft, tright) {}
// Handles a click event in a display window.
void ColPartitionGrid::HandleClick(int x, int y) {
BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x,
y);
// Run a radial search for partitions that overlap.
ColPartitionGridSearch radsearch(this);
radsearch.SetUniqueMode(true);
@ -93,8 +97,9 @@ void ColPartitionGrid::HandleClick(int x, int y) {
// true, then the partitions are merged.
// Both callbacks are deleted before returning.
void ColPartitionGrid::Merges(
std::function<bool(ColPartition *, TBOX *)> box_cb,
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb) {
const std::function<bool(ColPartition *, TBOX *)> &box_cb,
const std::function<bool(const ColPartition *, const ColPartition *)>
&confirm_cb) {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
@ -112,8 +117,9 @@ void ColPartitionGrid::Merges(
// true, then the partitions are merged.
// Returns true if the partition is consumed by one or more merges.
bool ColPartitionGrid::MergePart(
std::function<bool(ColPartition *, TBOX *)> box_cb,
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb,
const std::function<bool(ColPartition *, TBOX *)> &box_cb,
const std::function<bool(const ColPartition *, const ColPartition *)>
&confirm_cb,
ColPartition *part) {
if (part->IsUnMergeableType()) {
return false;
@ -138,12 +144,13 @@ bool ColPartitionGrid::MergePart(
FindMergeCandidates(part, box, debug, &merge_candidates);
// Find the best merge candidate based on minimal overlap increase.
int overlap_increase;
ColPartition *neighbour =
BestMergeCandidate(part, &merge_candidates, debug, confirm_cb, &overlap_increase);
ColPartition *neighbour = BestMergeCandidate(part, &merge_candidates, debug,
confirm_cb, &overlap_increase);
if (neighbour != nullptr && overlap_increase <= 0) {
if (debug) {
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", part->HCoreOverlap(*neighbour),
part->VCoreOverlap(*neighbour), overlap_increase);
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
overlap_increase);
}
// Looks like a good candidate so merge it.
RemoveBBox(neighbour);
@ -171,7 +178,8 @@ bool ColPartitionGrid::MergePart(
// In general we only want to merge partitions that look like they
// are on the same text line, ie their median limits overlap, but we have
// to make exceptions for diacritics and stray punctuation.
static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candidate, bool debug) {
static bool OKMergeCandidate(const ColPartition *part,
const ColPartition *candidate, bool debug) {
const TBOX &part_box = part->bounding_box();
if (candidate == part) {
return false; // Ignore itself.
@ -205,7 +213,8 @@ static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candi
}
// Candidates must either overlap in median y,
// or part or candidate must be an acceptable diacritic.
if (!part->VSignificantCoreOverlap(*candidate) && !part->OKDiacriticMerge(*candidate, debug) &&
if (!part->VSignificantCoreOverlap(*candidate) &&
!part->OKDiacriticMerge(*candidate, debug) &&
!candidate->OKDiacriticMerge(*part, debug)) {
if (debug) {
tprintf("Candidate fails overlap and diacritic tests!\n");
@ -221,7 +230,8 @@ static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candi
// the overlap with them uncombined.
// An overlap is not counted if passes the OKMergeOverlap test with ok_overlap
// as the pixel overlap limit. merge1 and merge2 must both be non-nullptr.
static int IncreaseInOverlap(const ColPartition *merge1, const ColPartition *merge2, int ok_overlap,
static int IncreaseInOverlap(const ColPartition *merge1,
const ColPartition *merge2, int ok_overlap,
ColPartition_CLIST *parts) {
ASSERT_HOST(merge1 != nullptr && merge2 != nullptr);
int total_area = 0;
@ -236,7 +246,8 @@ static int IncreaseInOverlap(const ColPartition *merge1, const ColPartition *mer
TBOX part_box = part->bounding_box();
// Compute the overlap of the merged box with part.
int overlap_area = part_box.intersection(merged_box).area();
if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, ok_overlap, false)) {
if (overlap_area > 0 &&
!part->OKMergeOverlap(*merge1, *merge2, ok_overlap, false)) {
total_area += overlap_area;
// Subtract the overlap of merge1 and merge2 individually.
overlap_area = part_box.intersection(merge1->bounding_box()).area();
@ -289,7 +300,8 @@ static bool TestCompatibleCandidates(const ColPartition &part, bool debug,
ColPartition_C_IT it2(it);
for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
ColPartition *candidate2 = it2.data();
if (candidate2 != candidate && !OKMergeCandidate(candidate, candidate2, false)) {
if (candidate2 != candidate &&
!OKMergeCandidate(candidate, candidate2, false)) {
if (debug) {
tprintf("NC overlap failed:Candidate:");
candidate2->bounding_box().print();
@ -341,7 +353,8 @@ int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid **overlap_grid) {
// Finds all the ColPartitions in the grid that overlap with the given
// box and returns them SortByBoxLeft(ed) and uniqued in the given list.
// Any partition equal to not_this (may be nullptr) is excluded.
void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this,
void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box,
const ColPartition *not_this,
ColPartition_CLIST *parts) {
ColPartitionGridSearch rsearch(this);
rsearch.StartRectSearch(box);
@ -396,7 +409,8 @@ void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box, const ColParti
// in overlap, or tightly spaced text would end up in bits.
ColPartition *ColPartitionGrid::BestMergeCandidate(
const ColPartition *part, ColPartition_CLIST *candidates, bool debug,
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb,
const std::function<bool(const ColPartition *, const ColPartition *)>
&confirm_cb,
int *overlap_increase) {
if (overlap_increase != nullptr) {
*overlap_increase = 0;
@ -404,7 +418,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
if (candidates->empty()) {
return nullptr;
}
int ok_overlap = static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
int ok_overlap =
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
// The best neighbour to merge with is the one that causes least
// total pairwise overlap among all the neighbours.
// If more than one offers the same total overlap, choose the one
@ -424,8 +439,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
// we need anything that might be overlapped by the merged box.
FindOverlappingPartitions(full_box, part, &neighbours);
if (debug) {
tprintf("Finding best merge candidate from %d, %d neighbours for box:", candidates->length(),
neighbours.length());
tprintf("Finding best merge candidate from %d, %d neighbours for box:",
candidates->length(), neighbours.length());
part_box.print();
}
// If the best increase in overlap is positive, then we also check the
@ -434,7 +449,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
// non-candidate overlap is better than the best overlap, then return
// the worst non-candidate overlap instead.
ColPartition_CLIST non_candidate_neighbours;
non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true, &neighbours, candidates);
non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
&neighbours, candidates);
int worst_nc_increase = 0;
int best_increase = INT32_MAX;
int best_area = 0;
@ -454,8 +470,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
best_increase = increase;
best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
if (debug) {
tprintf("New best merge candidate has increase %d, area %d, over box:", increase,
best_area);
tprintf("New best merge candidate has increase %d, area %d, over box:",
increase, best_area);
full_box.print();
candidate->Print();
}
@ -466,7 +482,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
best_candidate = candidate;
}
}
increase = IncreaseInOverlap(part, candidate, ok_overlap, &non_candidate_neighbours);
increase = IncreaseInOverlap(part, candidate, ok_overlap,
&non_candidate_neighbours);
if (increase > worst_nc_increase) {
worst_nc_increase = increase;
}
@ -478,7 +495,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
// but only if each candidate is either a good diacritic merge with part,
// or an ok merge candidate with all the others.
// See TestCompatibleCandidates for more explanation and a picture.
if (worst_nc_increase < best_increase && TestCompatibleCandidates(*part, debug, candidates)) {
if (worst_nc_increase < best_increase &&
TestCompatibleCandidates(*part, debug, candidates)) {
best_increase = worst_nc_increase;
}
}
@ -490,7 +508,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
// Helper to remove the given box from the given partition, put it in its
// own partition, and add to the partition list.
static void RemoveBadBox(BLOBNBOX *box, ColPartition *part, ColPartition_LIST *part_list) {
static void RemoveBadBox(BLOBNBOX *box, ColPartition *part,
ColPartition_LIST *part_list) {
part->RemoveBox(box);
ColPartition::MakeBigPartition(box, part_list);
}
@ -501,8 +520,10 @@ static void RemoveBadBox(BLOBNBOX *box, ColPartition *part, ColPartition_LIST *p
// Blobs that cause overlaps get removed, put in individual partitions
// and added to the big_parts list. They are most likely characters on
// 2 textlines that touch, or something big like a dropcap.
void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts) {
int ok_overlap = static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
void ColPartitionGrid::SplitOverlappingPartitions(
ColPartition_LIST *big_parts) {
int ok_overlap =
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
@ -534,7 +555,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
BLOBNBOX *excluded = part->BiggestBox();
TBOX shrunken = part->BoundsWithoutBox(excluded);
if (!shrunken.overlap(neighbour_box) &&
excluded->bounding_box().height() > kBigPartSizeRatio * shrunken.height()) {
excluded->bounding_box().height() >
kBigPartSizeRatio * shrunken.height()) {
// Removing the biggest box fixes the overlap, so do it!
gsearch.RemoveBBox();
RemoveBadBox(excluded, part, big_parts);
@ -550,7 +572,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
BLOBNBOX *excluded = neighbour->BiggestBox();
TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
if (!shrunken.overlap(box) &&
excluded->bounding_box().height() > kBigPartSizeRatio * shrunken.height()) {
excluded->bounding_box().height() >
kBigPartSizeRatio * shrunken.height()) {
// Removing the biggest box fixes the overlap, so do it!
rsearch.RemoveBBox();
RemoveBadBox(excluded, neighbour, big_parts);
@ -562,7 +585,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
ColPartition *right_part = nullptr;
if (neighbour_overlap_count <= part_overlap_count || part->IsSingleton()) {
if (neighbour_overlap_count <= part_overlap_count ||
part->IsSingleton()) {
// Try to split the neighbour to reduce overlap.
BLOBNBOX *split_blob = neighbour->OverlapSplitBlob(box);
if (split_blob != nullptr) {
@ -608,15 +632,18 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if anything was changed.
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map,
const TBOX &im_box, const FCOORD &rotation) {
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type,
Image nontext_map,
const TBOX &im_box,
const FCOORD &rotation) {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition *part;
bool any_changed = false;
while ((part = gsearch.NextFullSearch()) != nullptr) {
if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type())) {
if (part->flow() != source_type ||
BLOBNBOX::IsLineType(part->blob_type())) {
continue;
}
const TBOX &box = part->bounding_box();
@ -658,7 +685,8 @@ void ColPartitionGrid::ReflectInYAxis() {
// it into proper blocks or columns.
// TODO(rays) some kind of sort function would be useful and probably better
// than the default here, which is to sort by order of the grid search.
void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {
void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks,
TO_BLOCK_LIST *to_blocks) {
TO_BLOCK_IT to_block_it(to_blocks);
BLOCK_IT block_it(blocks);
// All partitions will be put on this list and deleted on return.
@ -672,8 +700,10 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LI
part_it.add_after_then_move(part);
// The partition has to be at least vaguely like text.
BlobRegionType blob_type = part->blob_type();
if (BLOBNBOX::IsTextType(blob_type) || (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT;
if (BLOBNBOX::IsTextType(blob_type) ||
(blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
PolyBlockType type =
blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT;
// Get metrics from the row that will be used for the block.
TBOX box = part->bounding_box();
int median_width = part->median_width();
@ -685,7 +715,8 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LI
part->DeleteBoxes();
continue;
}
auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), box.right(), box.top());
auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
box.right(), box.top());
block->pdblk.set_poly_block(new POLY_BLOCK(box, type));
auto *to_block = new TO_BLOCK(block);
TO_ROW_IT row_it(to_block->get_rows());
@ -780,7 +811,8 @@ bool ColPartitionGrid::MakeColPartSets(PartSetVector *part_sets) {
bool any_parts_found = false;
while ((part = gsearch.NextFullSearch()) != nullptr) {
BlobRegionType blob_type = part->blob_type();
if (blob_type != BRT_NOISE && (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
if (blob_type != BRT_NOISE &&
(blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
int grid_x, grid_y;
const TBOX &part_box = part->bounding_box();
GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
@ -815,10 +847,12 @@ ColPartitionSet *ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) {
ColPartition *part;
while ((part = gsearch.NextFullSearch()) != nullptr) {
BlobRegionType blob_type = part->blob_type();
if (blob_type != BRT_NOISE && (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
if (blob_type != BRT_NOISE &&
(blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
// Consider for single column.
BlobTextFlowType flow = part->flow();
if ((blob_type == BRT_TEXT && (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
if ((blob_type == BRT_TEXT &&
(flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
if (single_column_part == nullptr) {
@ -841,7 +875,7 @@ ColPartitionSet *ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) {
if (single_column_part != nullptr) {
// Make a ColPartitionSet out of the single_column_part as a candidate
// for the single column case.
single_column_part->SetColumnGoodness(cb);
single_column_part->SetColumnGoodness(std::move(cb));
return new ColPartitionSet(single_column_part);
}
return nullptr;
@ -923,7 +957,8 @@ void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST *im_blobs) {
// The boxes within the partitions have changed (by deskew) so recompute
// the bounds of all the partitions and reinsert them into the grid.
void ColPartitionGrid::RecomputeBounds(int gridsize, const ICOORD &bleft, const ICOORD &tright,
void ColPartitionGrid::RecomputeBounds(int gridsize, const ICOORD &bleft,
const ICOORD &tright,
const ICOORD &vertical) {
ColPartition_LIST saved_parts;
ColPartition_IT part_it(&saved_parts);
@ -957,7 +992,8 @@ void ColPartitionGrid::GridFindMargins(ColPartitionSet **best_columns) {
ColPartition *part;
while ((part = gsearch.NextFullSearch()) != nullptr) {
// Set up a rectangle search x-bounded by the column and y by the part.
ColPartitionSet *columns = best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr;
ColPartitionSet *columns =
best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr;
FindPartitionMargins(columns, part);
const TBOX &box = part->bounding_box();
if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
@ -972,7 +1008,8 @@ void ColPartitionGrid::GridFindMargins(ColPartitionSet **best_columns) {
// best_columns, which may be nullptr, is an array of pointers indicating the
// column set at each y-coordinate in the grid.
// best_columns is usually the best_columns_ member of ColumnFinder.
void ColPartitionGrid::ListFindMargins(ColPartitionSet **best_columns, ColPartition_LIST *parts) {
void ColPartitionGrid::ListFindMargins(ColPartitionSet **best_columns,
ColPartition_LIST *parts) {
ColPartition_IT part_it(parts);
for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
ColPartition *part = part_it.data();
@ -1050,15 +1087,18 @@ void ColPartitionGrid::FindFigureCaptions() {
while ((part = gsearch.NextFullSearch()) != nullptr) {
if (part->IsImageType()) {
const TBOX &part_box = part->bounding_box();
bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom());
bool debug =
AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom());
ColPartition *best_caption = nullptr;
int best_dist = 0; // Distance to best_caption.
int best_upper = 0; // Direction of best_caption.
// Handle both lower and upper directions.
for (int upper = 0; upper < 2; ++upper) {
ColPartition_C_IT partner_it(upper ? part->upper_partners() : part->lower_partners());
ColPartition_C_IT partner_it(upper ? part->upper_partners()
: part->lower_partners());
// If there are no image partners, then this direction is ok.
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); partner_it.forward()) {
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
partner_it.forward()) {
ColPartition *partner = partner_it.data();
if (partner->IsImageType()) {
break;
@ -1068,7 +1108,8 @@ void ColPartitionGrid::FindFigureCaptions() {
continue;
}
// Find the nearest totally overlapping text partner.
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); partner_it.forward()) {
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
partner_it.forward()) {
ColPartition *partner = partner_it.data();
if (!partner->IsTextType() || partner->type() == PT_TABLE) {
continue;
@ -1080,7 +1121,8 @@ void ColPartitionGrid::FindFigureCaptions() {
tprintf("Considering partner:");
partner_box.print();
}
if (partner_box.left() >= part_box.left() && partner_box.right() <= part_box.right()) {
if (partner_box.left() >= part_box.left() &&
partner_box.right() <= part_box.right()) {
int dist = partner_box.y_gap(part_box);
if (best_caption == nullptr || dist < best_dist) {
best_dist = dist;
@ -1106,7 +1148,8 @@ void ColPartitionGrid::FindFigureCaptions() {
ColPartition *end_partner = nullptr;
ColPartition *next_partner = nullptr;
for (ColPartition *partner = best_caption;
partner != nullptr && line_count <= kMaxCaptionLines; partner = next_partner) {
partner != nullptr && line_count <= kMaxCaptionLines;
partner = next_partner) {
if (!partner->IsTextType()) {
end_partner = partner;
break;
@ -1115,7 +1158,8 @@ void ColPartitionGrid::FindFigureCaptions() {
total_height += partner->bounding_box().height();
next_partner = partner->SingletonPartner(best_upper);
if (next_partner != nullptr) {
int gap = partner->bounding_box().y_gap(next_partner->bounding_box());
int gap =
partner->bounding_box().y_gap(next_partner->bounding_box());
if (gap > biggest_gap) {
biggest_gap = gap;
end_partner = next_partner;
@ -1132,8 +1176,8 @@ void ColPartitionGrid::FindFigureCaptions() {
}
}
if (debug) {
tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", line_count,
biggest_gap, smallest_gap, mean_height);
tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
line_count, biggest_gap, smallest_gap, mean_height);
if (end_partner != nullptr) {
tprintf("End partner:");
end_partner->bounding_box().print();
@ -1144,7 +1188,8 @@ void ColPartitionGrid::FindFigureCaptions() {
}
if (line_count <= kMaxCaptionLines) {
// This is a qualified caption. Mark the text as caption.
for (ColPartition *partner = best_caption; partner != nullptr && partner != end_partner;
for (ColPartition *partner = best_caption;
partner != nullptr && partner != end_partner;
partner = next_partner) {
partner->set_type(PT_CAPTION_TEXT);
partner->SetBlobTypes();
@ -1232,7 +1277,8 @@ void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition *part) {
// Finds the best partner in the given direction for the given partition.
// Stores the result with AddPartner.
void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, ColPartition *part) {
void ColPartitionGrid::FindVPartitionPartners(bool to_the_left,
ColPartition *part) {
if (part->type() == PT_NOISE) {
return; // Noise is not allowed to partner anything.
}
@ -1292,7 +1338,8 @@ void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) {
gsearch.StartFullSearch();
ColPartition *part;
while ((part = gsearch.NextFullSearch()) != nullptr) {
part->RefinePartners(static_cast<PolyBlockType>(type), get_desperate, this);
part->RefinePartners(static_cast<PolyBlockType>(type), get_desperate,
this);
// Iterator may have been messed up by a merge.
gsearch.RepositionIterator();
}
@ -1304,9 +1351,11 @@ void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) {
// Finds and returns a list of candidate ColPartitions to merge with part.
// The candidates must overlap search_box, and when merged must not
// overlap any other partitions that are not overlapped by each individually.
void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX &search_box,
bool debug, ColPartition_CLIST *candidates) {
int ok_overlap = static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
void ColPartitionGrid::FindMergeCandidates(const ColPartition *part,
const TBOX &search_box, bool debug,
ColPartition_CLIST *candidates) {
int ok_overlap =
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
const TBOX &part_box = part->bounding_box();
// Now run the rect search.
ColPartitionGridSearch rsearch(this);
@ -1393,7 +1442,8 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX
// into images.
// Returns true if the partition was changed.
bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug, ColPartition *part) {
const FCOORD &rerotation, bool debug,
ColPartition *part) {
const TBOX &part_box = part->bounding_box();
if (debug) {
tprintf("Smooothing part at:");
@ -1409,8 +1459,8 @@ bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box,
for (int d = 0; d < BND_COUNT; ++d) {
int dist;
auto dir = static_cast<BlobNeighbourDir>(d);
BlobRegionType type =
SmoothInOneDirection(dir, nontext_map, im_box, rerotation, debug, *part, &dist);
BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box,
rerotation, debug, *part, &dist);
if (debug) {
tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist);
}
@ -1459,8 +1509,9 @@ bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box,
// Sets up a search box based on the part_box, padded in all directions
// except direction. Also setup dist_scaling to weight x,y distances according
// to the given direction.
static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, const TBOX &part_box,
int min_padding, TBOX *search_box, ICOORD *dist_scaling) {
static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction,
const TBOX &part_box, int min_padding,
TBOX *search_box, ICOORD *dist_scaling) {
*search_box = part_box;
// Generate a pad value based on the min dimension of part_box, but at least
// min_padding and then scaled by kMaxPadFactor.
@ -1511,20 +1562,21 @@ enum NeighbourPartitionType {
// partitions that makes a decisive result (if any) and returns the type
// and the distance of the collection. If there are any pixels in the
// nontext_map, then the decision is biased towards image.
BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map,
const TBOX &im_box, const FCOORD &rerotation,
bool debug, const ColPartition &part,
BlobRegionType ColPartitionGrid::SmoothInOneDirection(
BlobNeighbourDir direction, Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug, const ColPartition &part,
int *best_distance) {
// Set up a rectangle search bounded by the part.
const TBOX &part_box = part.bounding_box();
TBOX search_box;
ICOORD dist_scaling;
ComputeSearchBoxAndScaling(direction, part_box, gridsize(), &search_box, &dist_scaling);
bool image_region =
ImageFind::CountPixelsInRotatedBox(search_box, im_box, rerotation, nontext_map) > 0;
ComputeSearchBoxAndScaling(direction, part_box, gridsize(), &search_box,
&dist_scaling);
bool image_region = ImageFind::CountPixelsInRotatedBox(
search_box, im_box, rerotation, nontext_map) > 0;
std::vector<int> dists[NPT_COUNT];
AccumulatePartDistances(part, dist_scaling, search_box, nontext_map, im_box, rerotation, debug,
dists);
AccumulatePartDistances(part, dist_scaling, search_box, nontext_map, im_box,
rerotation, debug, dists);
// By iteratively including the next smallest distance across the vectors,
// (as in a merge sort) we can use the vector indices as counts of each type
// and find the nearest set of objects that give us a definite decision.
@ -1551,33 +1603,35 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction
}
*best_distance = min_dist;
if (debug) {
tprintf("Totals: htext=%u+%u, vtext=%u+%u, image=%u+%u, at dist=%d\n", counts[NPT_HTEXT],
counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE],
image_bias, min_dist);
tprintf("Totals: htext=%u+%u, vtext=%u+%u, image=%u+%u, at dist=%d\n",
counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT],
counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE], image_bias, min_dist);
}
// See if we have a decision yet.
auto image_count = counts[NPT_IMAGE];
auto htext_score =
counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - (image_count + counts[NPT_WEAK_VTEXT]);
auto vtext_score =
counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - (image_count + counts[NPT_WEAK_HTEXT]);
auto htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
(image_count + counts[NPT_WEAK_VTEXT]);
auto vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
(image_count + counts[NPT_WEAK_HTEXT]);
if (image_count > 0 && image_bias - htext_score >= kSmoothDecisionMargin &&
image_bias - vtext_score >= kSmoothDecisionMargin) {
*best_distance = dists[NPT_IMAGE][0];
if (!dists[NPT_WEAK_VTEXT].empty() && *best_distance > dists[NPT_WEAK_VTEXT][0]) {
if (!dists[NPT_WEAK_VTEXT].empty() &&
*best_distance > dists[NPT_WEAK_VTEXT][0]) {
*best_distance = dists[NPT_WEAK_VTEXT][0];
}
if (!dists[NPT_WEAK_HTEXT].empty() && *best_distance > dists[NPT_WEAK_HTEXT][0]) {
if (!dists[NPT_WEAK_HTEXT].empty() &&
*best_distance > dists[NPT_WEAK_HTEXT][0]) {
*best_distance = dists[NPT_WEAK_HTEXT][0];
}
return BRT_POLYIMAGE;
}
if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && counts[NPT_HTEXT] > 0 &&
htext_score >= kSmoothDecisionMargin) {
if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) &&
counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) {
*best_distance = dists[NPT_HTEXT][0];
return BRT_TEXT;
} else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && counts[NPT_VTEXT] > 0 &&
vtext_score >= kSmoothDecisionMargin) {
} else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) &&
counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) {
*best_distance = dists[NPT_VTEXT][0];
return BRT_VERT_TEXT;
}
@ -1592,11 +1646,10 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction
// The nontext_map (+im_box, rerotation) is used to make text invisible if
// there is non-text in between.
// dists must be an array of vectors of size NPT_COUNT.
void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
const ICOORD &dist_scaling, const TBOX &search_box,
Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug,
std::vector<int> *dists) {
void ColPartitionGrid::AccumulatePartDistances(
const ColPartition &base_part, const ICOORD &dist_scaling,
const TBOX &search_box, Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug, std::vector<int> *dists) {
const TBOX &part_box = base_part.bounding_box();
ColPartitionGridSearch rsearch(this);
rsearch.SetUniqueMode(true);
@ -1605,14 +1658,16 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
// Search for compatible neighbours with a similar strokewidth, but not
// on the other side of a tab vector.
while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
if (neighbour->IsUnMergeableType() || !base_part.ConfirmNoTabViolation(*neighbour) ||
if (neighbour->IsUnMergeableType() ||
!base_part.ConfirmNoTabViolation(*neighbour) ||
neighbour == &base_part) {
continue;
}
TBOX nbox = neighbour->bounding_box();
BlobRegionType n_type = neighbour->blob_type();
if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
!ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, nontext_map)) {
!ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation,
nontext_map)) {
continue; // Text not visible the other side of image.
}
if (BLOBNBOX::IsLineType(n_type)) {
@ -1673,7 +1728,8 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
// neighbours that vertically overlap significantly.
// columns may be nullptr, and indicates the assigned column structure this
// is applicable to part.
void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns, ColPartition *part) {
void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns,
ColPartition *part) {
// Set up a rectangle search x-bounded by the column and y by the part.
TBOX box = part->bounding_box();
int y = part->MidY();
@ -1693,19 +1749,20 @@ void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns, ColPartiti
left_margin -= kColumnWidthFactor;
right_margin += kColumnWidthFactor;
// Search for ColPartitions that reduce the margin.
left_margin =
FindMargin(box.left() + box.height(), true, left_margin, box.bottom(), box.top(), part);
left_margin = FindMargin(box.left() + box.height(), true, left_margin,
box.bottom(), box.top(), part);
part->set_left_margin(left_margin);
// Search for ColPartitions that reduce the margin.
right_margin =
FindMargin(box.right() - box.height(), false, right_margin, box.bottom(), box.top(), part);
right_margin = FindMargin(box.right() - box.height(), false, right_margin,
box.bottom(), box.top(), part);
part->set_right_margin(right_margin);
}
// Starting at x, and going in the specified direction, up to x_limit, finds
// the margin for the given y range by searching sideways,
// and ignoring not_this.
int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, int y_bottom, int y_top,
int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
int y_bottom, int y_top,
const ColPartition *not_this) {
int height = y_top - y_bottom;
// Iterate the ColPartitions in the grid.

View File

@ -47,16 +47,18 @@ public:
// calls the confirm_cb to check any more rules. If the confirm_cb returns
// true, then the partitions are merged.
// Both callbacks are deleted before returning.
void Merges(std::function<bool(ColPartition *, TBOX *)> box_cb,
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb);
void Merges(const std::function<bool(ColPartition *, TBOX *)> &box_cb,
const std::function<bool(const ColPartition *,
const ColPartition *)> &confirm_cb);
// For the given partition, calls the box_cb permanent callback
// to compute the search box, searches the box, and if a candidate is found,
// calls the confirm_cb to check any more rules. If the confirm_cb returns
// true, then the partitions are merged.
// Returns true if the partition is consumed by one or more merges.
bool MergePart(std::function<bool(ColPartition *, TBOX *)> box_cb,
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb,
bool MergePart(const std::function<bool(ColPartition *, TBOX *)> &box_cb,
const std::function<bool(const ColPartition *,
const ColPartition *)> &confirm_cb,
ColPartition *part);
// Computes and returns the total overlap of all partitions in the grid.
@ -78,7 +80,8 @@ public:
// See colpartitiongrid.cpp for a diagram.
ColPartition *BestMergeCandidate(
const ColPartition *part, ColPartition_CLIST *candidates, bool debug,
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb,
const std::function<bool(const ColPartition *, const ColPartition *)>
&confirm_cb,
int *overlap_increase);
// Split partitions where it reduces overlap between their bounding boxes.
@ -98,8 +101,8 @@ public:
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if anything was changed.
bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation);
bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map,
const TBOX &im_box, const FCOORD &rerotation);
// Reflects the grid and its colpartitions in the y-axis, assuming that
// all blob boxes have already been done.
@ -150,7 +153,8 @@ public:
// Improves the margins of the ColPartitions in the list by calling
// FindPartitionMargins on each.
void ListFindMargins(ColPartitionSet **best_columns, ColPartition_LIST *parts);
void ListFindMargins(ColPartitionSet **best_columns,
ColPartition_LIST *parts);
// Deletes all the partitions in the grid after disowning all the blobs.
void DeleteParts();
@ -185,8 +189,8 @@ private:
// Finds and returns a list of candidate ColPartitions to merge with part.
// The candidates must overlap search_box, and when merged must not
// overlap any other partitions that are not overlapped by each individually.
void FindMergeCandidates(const ColPartition *part, const TBOX &search_box, bool debug,
ColPartition_CLIST *candidates);
void FindMergeCandidates(const ColPartition *part, const TBOX &search_box,
bool debug, ColPartition_CLIST *candidates);
// Smoothes the region type/flow type of the given part by looking at local
// neighbours and the given image mask. Searches a padded rectangle with the
@ -199,7 +203,8 @@ private:
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if the partition was changed.
bool SmoothRegionType(Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug,
bool SmoothRegionType(Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug,
ColPartition *part);
// Executes the search for SmoothRegionType in a single direction.
// Creates a bounding box that is padded in all directions except direction,
@ -207,17 +212,21 @@ private:
// partitions that makes a decisive result (if any) and returns the type
// and the distance of the collection. If there are any pixels in the
// nontext_map, then the decision is biased towards image.
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map,
const TBOX &im_box, const FCOORD &rerotation, bool debug,
const ColPartition &part, int *best_distance);
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction,
Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug,
const ColPartition &part,
int *best_distance);
// Counts the partitions in the given search_box by appending the gap
// distance (scaled by dist_scaling) of the part from the base_part to the
// vector of the appropriate type for the partition. Prior to return, the
// vectors in the dists array are sorted in increasing order.
// dists must be an array of vectors of size NPT_COUNT.
void AccumulatePartDistances(const ColPartition &base_part, const ICOORD &dist_scaling,
const TBOX &search_box, Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug, std::vector<int> *dists);
void AccumulatePartDistances(const ColPartition &base_part,
const ICOORD &dist_scaling,
const TBOX &search_box, Image nontext_map,
const TBOX &im_box, const FCOORD &rerotation,
bool debug, std::vector<int> *dists);
// Improves the margins of the ColPartition by searching for
// neighbours that vertically overlap significantly.
@ -226,8 +235,8 @@ private:
// Starting at x, and going in the specified direction, up to x_limit, finds
// the margin for the given y range by searching sideways,
// and ignoring not_this.
int FindMargin(int x, bool right_to_left, int x_limit, int y_bottom, int y_top,
const ColPartition *not_this);
int FindMargin(int x, bool right_to_left, int x_limit, int y_bottom,
int y_top, const ColPartition *not_this);
};
} // namespace tesseract.

View File

@ -90,7 +90,8 @@ void ColPartitionSet::RelinquishParts() {
}
// Attempt to improve this by adding partitions or expanding partitions.
void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *src_sets) {
void ColPartitionSet::ImproveColumnCandidate(const WidthCallback &cb,
PartSetVector *src_sets) {
int set_size = src_sets->size();
// Iterate over the provided column sets, as each one may have something
// to improve this.
@ -140,7 +141,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr
// it was before, so use the tab.
part->CopyLeftTab(*col_part, false);
part->SetColumnGoodness(cb);
} else if (col_box_left < part_left && (box_width_ok || !part_width_ok)) {
} else if (col_box_left < part_left &&
(box_width_ok || !part_width_ok)) {
// The box is leaving the good column metric at least as good as
// it was before, so use the box.
part->CopyLeftTab(*col_part, true);
@ -149,7 +151,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr
part_left = part->left_key();
}
if (col_right > part_right &&
(part_it.at_last() || part_it.data_relative(1)->left_key() > col_right)) {
(part_it.at_last() ||
part_it.data_relative(1)->left_key() > col_right)) {
// The right edge is better, so we can possibly expand it.
int col_box_right = col_part->BoxRightKey();
bool tab_width_ok = cb(part->KeyWidth(part_left, col_right));
@ -159,7 +162,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr
// it was before, so use the tab.
part->CopyRightTab(*col_part, false);
part->SetColumnGoodness(cb);
} else if (col_box_right > part_right && (box_width_ok || !part_width_ok)) {
} else if (col_box_right > part_right &&
(box_width_ok || !part_width_ok)) {
// The box is leaving the good column metric at least as good as
// it was before, so use the box.
part->CopyRightTab(*col_part, true);
@ -173,8 +177,10 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr
// If this set is good enough to represent a new partitioning into columns,
// add it to the vector of sets, otherwise delete it.
void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback cb) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom());
void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets,
const WidthCallback &cb) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom());
if (debug) {
tprintf("Considering new column candidate:\n");
Print();
@ -222,7 +228,8 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthC
// Return true if the partitions in other are all compatible with the columns
// in this.
bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback cb) {
bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other,
const WidthCallback &cb) {
if (debug) {
tprintf("CompatibleColumns testing compatibility\n");
Print();
@ -288,7 +295,8 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other, Widt
if (debug) {
int next_right = next_part->bounding_box().right();
tprintf("CompatibleColumns false due to 2 parts of good width\n");
tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left, next_right);
tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left,
next_right);
right_col->Print();
}
return false;
@ -375,7 +383,8 @@ ColPartitionSet *ColPartitionSet::Copy(bool good_only) {
}
// Return the bounding boxes of columns at the given y-range
void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments) {
void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
ColSegment_LIST *segments) {
ColPartition_IT it(&parts_);
ColSegment_IT col_it(segments);
col_it.move_to_last();
@ -392,7 +401,8 @@ void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *s
#ifndef GRAPHICS_DISABLED
// Display the edges of the columns at the given y coords.
void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win) {
void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
ScrollView *win) {
ColPartition_IT it(&parts_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition *part = it.data();
@ -410,10 +420,9 @@ void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, ScrollView *wi
// Column indices are 2n + 1 for real columns (0 based) and even values
// represent the gaps in between columns, with 0 being left of the leftmost.
// resolution refers to the ppi resolution of the image.
ColumnSpanningType ColPartitionSet::SpanningType(int resolution, int left, int right, int height,
int y, int left_margin, int right_margin,
int *first_col, int *last_col,
int *first_spanned_col) {
ColumnSpanningType ColPartitionSet::SpanningType(
int resolution, int left, int right, int height, int y, int left_margin,
int right_margin, int *first_col, int *last_col, int *first_spanned_col) {
*first_col = -1;
*last_col = -1;
*first_spanned_col = -1;
@ -505,7 +514,8 @@ ColumnSpanningType ColPartitionSet::SpanningType(int resolution, int left, int r
// columns that do not match and start new ones for the new columns in this.
// As ColPartitions are turned into BLOCKs, the used ones are put in
// used_parts, as they still need to be referenced in the grid.
void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution,
void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft,
const ICOORD &tright, int resolution,
ColPartition_LIST *used_parts,
WorkingPartSet_LIST *working_set_list) {
// Move the input list to a temporary location so we can delete its elements
@ -525,11 +535,12 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh
for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
ColPartition *column = col_it.data();
// Any existing column to the left of column is completed.
while (!src_it.empty() && ((working_set = src_it.data())->column() == nullptr ||
while (!src_it.empty() &&
((working_set = src_it.data())->column() == nullptr ||
working_set->column()->right_key() <= column->left_key())) {
src_it.extract();
working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks,
&to_blocks);
working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
&completed_blocks, &to_blocks);
delete working_set;
src_it.forward();
}
@ -542,7 +553,8 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh
// A matching column gets to stay, and first_new_set gets all the
// completed_sets.
working_set = src_it.empty() ? nullptr : src_it.data();
if (working_set != nullptr && working_set->column()->MatchingColumns(*column)) {
if (working_set != nullptr &&
working_set->column()->MatchingColumns(*column)) {
working_set->set_column(column);
dest_it.add_after_then_move(src_it.extract());
src_it.forward();
@ -557,8 +569,8 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh
// Complete any remaining src working sets.
while (!src_it.empty()) {
working_set = src_it.extract();
working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks,
&to_blocks);
working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
&completed_blocks, &to_blocks);
delete working_set;
src_it.forward();
}
@ -573,8 +585,10 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh
}
// Accumulate the widths and gaps into the given variables.
void ColPartitionSet::AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples,
int *total_gap, int *gap_samples) {
void ColPartitionSet::AccumulateColumnWidthsAndGaps(int *total_width,
int *width_samples,
int *total_gap,
int *gap_samples) {
ColPartition_IT it(&parts_);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition *part = it.data();
@ -597,8 +611,9 @@ void ColPartitionSet::Print() {
tprintf(
"Partition set of %d parts, %d good, coverage=%d+%d"
" (%d,%d)->(%d,%d)\n",
it.length(), good_column_count_, good_coverage_, bad_coverage_, bounding_box_.left(),
bounding_box_.bottom(), bounding_box_.right(), bounding_box_.top());
it.length(), good_column_count_, good_coverage_, bad_coverage_,
bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(),
bounding_box_.top());
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition *part = it.data();
part->Print();
@ -608,7 +623,8 @@ void ColPartitionSet::Print() {
// PRIVATE CODE.
// Add the given partition to the list in the appropriate place.
void ColPartitionSet::AddPartition(ColPartition *new_part, ColPartition_IT *it) {
void ColPartitionSet::AddPartition(ColPartition *new_part,
ColPartition_IT *it) {
AddPartitionCoverageAndBox(*new_part);
int new_right = new_part->right_key();
if (it->data()->left_key() >= new_right) {

View File

@ -71,15 +71,17 @@ public:
void RelinquishParts();
// Attempt to improve this by adding partitions or expanding partitions.
void ImproveColumnCandidate(WidthCallback cb, PartSetVector *src_sets);
void ImproveColumnCandidate(const WidthCallback &cb, PartSetVector *src_sets);
// If this set is good enough to represent a new partitioning into columns,
// add it to the vector of sets, otherwise delete it.
void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback cb);
void AddToColumnSetsIfUnique(PartSetVector *column_sets,
const WidthCallback &cb);
// Return true if the partitions in other are all compatible with the columns
// in this.
bool CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback cb);
bool CompatibleColumns(bool debug, ColPartitionSet *other,
const WidthCallback &cb);
// Returns the total width of all blobs in the part_set that do not lie
// within an approved column. Used as a cost measure for using this
@ -104,20 +106,22 @@ public:
// represent the gaps in between columns, with 0 being left of the leftmost.
// resolution refers to the ppi resolution of the image. It may be 0 if only
// the first_col and last_col are required.
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y,
int left_margin, int right_margin, int *first_col, int *last_col,
int *first_spanned_col);
ColumnSpanningType SpanningType(int resolution, int left, int right,
int height, int y, int left_margin,
int right_margin, int *first_col,
int *last_col, int *first_spanned_col);
// The column_set has changed. Close down all in-progress WorkingPartSets in
// columns that do not match and start new ones for the new columns in this.
// As ColPartitions are turned into BLOCKs, the used ones are put in
// used_parts, as they still need to be referenced in the grid.
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution,
ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set);
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright,
int resolution, ColPartition_LIST *used_parts,
WorkingPartSet_LIST *working_set);
// Accumulate the widths and gaps into the given variables.
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap,
int *gap_samples);
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples,
int *total_gap, int *gap_samples);
// Provide debug output for this ColPartitionSet and all the ColPartitions.
void Print();

View File

@ -38,6 +38,7 @@
#include "underlin.h"
#include <algorithm>
#include <cmath>
#include <vector> // for std::vector
namespace tesseract {
@ -357,7 +358,7 @@ void compute_page_skew( // get average gradient
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
row = row_it.data();
blob_count = row->blob_list()->length();
row_err = static_cast<int32_t>(ceil(row->line_error()));
row_err = static_cast<int32_t>(std::ceil(row->line_error()));
if (row_err <= 0) {
row_err = 1;
}
@ -636,7 +637,7 @@ void delete_non_dropout_rows( // find lines
min_y = block_box.bottom() - 1;
max_y = block_box.top() + 1;
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
line_index = static_cast<int32_t>(floor(row_it.data()->intercept()));
line_index = static_cast<int32_t>(std::floor(row_it.data()->intercept()));
if (line_index <= min_y) {
min_y = line_index - 1;
}
@ -668,7 +669,7 @@ void delete_non_dropout_rows( // find lines
compute_dropout_distances(&occupation[0], &deltas[0], line_count);
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
row = row_it.data();
line_index = static_cast<int32_t>(floor(row->intercept()));
line_index = static_cast<int32_t>(std::floor(row->intercept()));
distance = deltas[line_index - min_y];
if (find_best_dropout_row(row, distance, block->line_spacing / 2, line_index, &row_it,
testing_on)) {
@ -726,7 +727,7 @@ bool find_best_dropout_row( // find neighbours
row_offset = row_inc;
do {
next_row = row_it->data_relative(row_offset);
next_index = static_cast<int32_t>(floor(next_row->intercept()));
next_index = static_cast<int32_t>(std::floor(next_row->intercept()));
if ((distance < 0 && next_index < line_index &&
next_index > line_index + distance + distance) ||
(distance >= 0 && next_index > line_index &&
@ -774,7 +775,7 @@ TBOX deskew_block_coords( // block box
BLOBNBOX *blob; // current blob
BLOBNBOX_IT blob_it; // iterator
length = sqrt(gradient * gradient + 1);
length = std::sqrt(gradient * gradient + 1);
rotation = FCOORD(1 / length, -gradient / length);
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
row = row_it.data();
@ -815,7 +816,7 @@ void compute_line_occupation( // project blobs
FCOORD rotation; // inverse of skew
line_count = max_y - min_y + 1;
length = sqrt(gradient * gradient + 1);
length = std::sqrt(gradient * gradient + 1);
rotation = FCOORD(1 / length, -gradient / length);
for (line_index = 0; line_index < line_count; line_index++) {
deltas[line_index] = 0;
@ -1193,7 +1194,7 @@ void compute_row_stats( // find lines
row_it.backward();
} while (!row_it.at_last());
block->key_row = prev_row;
block->baseline_offset = fmod(prev_row->parallel_c(), block->line_spacing);
block->baseline_offset = std::fmod(prev_row->parallel_c(), block->line_spacing);
if (testing_on) {
tprintf("Blob based spacing=(%g,%g), offset=%g", block->line_size, block->line_spacing,
block->baseline_offset);
@ -1237,7 +1238,7 @@ void compute_row_stats( // find lines
block->line_spacing = rows[row_index]->spacing;
block->max_blob_size = block->line_spacing * textord_excess_blobsize;
}
block->baseline_offset = fmod(rows[row_index]->intercept(), block->line_spacing);
block->baseline_offset = std::fmod(rows[row_index]->intercept(), block->line_spacing);
}
if (testing_on) {
tprintf("\nEstimate line size=%g, spacing=%g, offset=%g\n", block->line_size,
@ -1796,7 +1797,7 @@ void separate_underlines(TO_BLOCK *block, // block to do
int min_blob_height = static_cast<int>(textord_min_blob_height_fraction * block->line_size + 0.5);
// length of vector
length = sqrt(1 + gradient * gradient);
length = std::sqrt(1 + gradient * gradient);
g_vec = FCOORD(1 / length, -gradient / length);
blob_rotation = FCOORD(rotation.x(), -rotation.y());
blob_rotation.rotate(g_vec); // undoing everything
@ -2295,7 +2296,7 @@ void assign_blobs_to_rows( // find lines
(block->block->pdblk.bounding_box().bottom() + block->block->pdblk.bounding_box().top()) /
2.0f;
if (gradient != nullptr) {
g_length = sqrt(1 + *gradient * *gradient);
g_length = std::sqrt(1 + *gradient * *gradient);
}
#ifndef GRAPHICS_DISABLED
if (drawing_skew) {

View File

@ -32,6 +32,7 @@
#include "textord.h"
#include "tprintf.h"
#include <cmath>
#include <vector> // for std::vector
#include <algorithm>
@ -1450,7 +1451,7 @@ void make_first_xheight( // find xheight
for (blobindex = 0; blobindex < blobcount; blobindex++) {
int xcenter = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
float base = baseline->y(xcenter);
float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
float bottomdiff = std::fabs(base - blobcoords[blobindex].bottom());
int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
int height = static_cast<int>(blobcoords[blobindex].top() - base + 0.5);
if (blobcoords[blobindex].height() > init_lineheight * kMinHeight) {

View File

@ -22,6 +22,7 @@
#include <algorithm>
#include <cmath>
#include <utility>
#include "tablefind.h"
#include <allheaders.h>
@ -157,11 +158,11 @@ void DeleteObject(T *object) {
}
TableFinder::TableFinder()
: resolution_(0)
, global_median_xheight_(0)
, global_median_blob_width_(0)
, global_median_ledding_(0)
, left_to_right_language_(true) {}
: resolution_(0),
global_median_xheight_(0),
global_median_blob_width_(0),
global_median_ledding_(0),
left_to_right_language_(true) {}
TableFinder::~TableFinder() {
// ColPartitions and ColSegments created by this class for storage in grids
@ -177,7 +178,8 @@ void TableFinder::set_left_to_right_language(bool order) {
left_to_right_language_ = order;
}
void TableFinder::Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right) {
void TableFinder::Init(int grid_size, const ICOORD &bottom_left,
const ICOORD &top_right) {
// Initialize clean partitions list and grid
clean_part_grid_.Init(grid_size, bottom_left, top_right);
leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right);
@ -188,7 +190,8 @@ void TableFinder::Init(int grid_size, const ICOORD &bottom_left, const ICOORD &t
// Copy cleaned partitions from part_grid_ to clean_part_grid_ and
// insert leaders and rulers into the leader_and_ruling_grid_
void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block) {
void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid,
TO_BLOCK *block) {
// Calculate stats. This lets us filter partitions in AllowTextPartition()
// and filter blobs in AllowBlob().
SetGlobalSpacings(grid);
@ -255,7 +258,8 @@ void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block)
}
// High level function to perform table detection
void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_columns,
void TableFinder::LocateTables(ColPartitionGrid *grid,
ColPartitionSet **all_columns,
WidthCallback width_cb, const FCOORD &reskew) {
// initialize spacing, neighbors, and columns
InitializePartitions(all_columns);
@ -264,8 +268,10 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col
if (textord_show_tables) {
ScrollView *table_win = MakeWindow(0, 300, "Column Partitions & Neighbors");
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
DisplayColPartitionConnections(table_win, &clean_part_grid_, ScrollView::ORANGE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
ScrollView::AQUAMARINE);
DisplayColPartitionConnections(table_win, &clean_part_grid_,
ScrollView::ORANGE);
table_win = MakeWindow(100, 300, "Fragmented Text");
DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE);
@ -339,7 +345,8 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col
#ifndef GRAPHICS_DISABLED
if (textord_show_tables) {
ScrollView *table_win = MakeWindow(1400, 600, "Recognized Tables");
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, ScrollView::BLUE);
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE,
ScrollView::BLUE);
table_grid_.DisplayBoxes(table_win);
}
#endif // !GRAPHICS_DISABLED
@ -353,7 +360,8 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col
#ifndef GRAPHICS_DISABLED
if (textord_show_tables) {
ScrollView *table_win = MakeWindow(1500, 300, "Detected Tables");
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, ScrollView::BLUE);
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE,
ScrollView::BLUE);
table_grid_.DisplayBoxes(table_win);
}
#endif // !GRAPHICS_DISABLED
@ -362,7 +370,7 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col
// Merge all colpartitions in table regions to make them a single
// colpartition and revert types of isolated table cells not
// assigned to any table to their original types.
MakeTableBlocks(grid, all_columns, width_cb);
MakeTableBlocks(grid, all_columns, std::move(width_cb));
}
// All grids have the same dimensions. The clean_part_grid_ sizes are set from
// the part_grid_ that is passed to InsertCleanPartitions, which was the same as
@ -452,7 +460,8 @@ void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition *part) {
// Look for the next split in the partition.
for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
const TBOX &box = box_it.data()->bounding_box();
if (previous_right != INT32_MIN && box.left() - previous_right > kThreshold) {
if (previous_right != INT32_MIN &&
box.left() - previous_right > kThreshold) {
// We have a split position. Split the partition in two pieces.
// Insert the left piece in the grid and keep processing the right.
int mid_x = (box.left() + previous_right) / 2;
@ -484,7 +493,8 @@ bool TableFinder::AllowTextPartition(const ColPartition &part) const {
const int median_area = global_median_xheight_ * global_median_blob_width_;
const double kAreaPerBlobRequired = median_area * kAllowTextArea;
// Keep comparisons strictly greater to disallow 0!
return part.median_height() > kHeightRequired && part.median_width() > kWidthRequired &&
return part.median_height() > kHeightRequired &&
part.median_width() > kWidthRequired &&
part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
}
@ -512,13 +522,15 @@ ScrollView *TableFinder::MakeWindow(int x, int y, const char *window_name) {
#endif
// Make single-column blocks from good_columns_ partitions.
void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns, ColSegment_LIST *column_blocks) {
void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns,
ColSegment_LIST *column_blocks) {
for (int i = 0; i < gridheight(); ++i) {
ColPartitionSet *columns = all_columns[i];
if (columns != nullptr) {
ColSegment_LIST new_blocks;
// Get boxes from the current vertical position on the grid
columns->GetColumnBoxes(i * gridsize(), (i + 1) * gridsize(), &new_blocks);
columns->GetColumnBoxes(i * gridsize(), (i + 1) * gridsize(),
&new_blocks);
// Merge the new_blocks boxes into column_blocks if they are well-aligned
GroupColumnBlocks(&new_blocks, column_blocks);
}
@ -526,7 +538,8 @@ void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns, ColSegment_LIST
}
// Merge column segments into the current list if they are well aligned.
void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks, ColSegment_LIST *column_blocks) {
void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks,
ColSegment_LIST *column_blocks) {
ColSegment_IT src_it(new_blocks);
ColSegment_IT dest_it(column_blocks);
// iterate through the source list
@ -558,8 +571,10 @@ void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks, ColSegment_LIST
bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) {
int x_margin = 20;
int y_margin = 5;
return (abs(b1.left() - b2.left()) < x_margin) && (abs(b1.right() - b2.right()) < x_margin) &&
(abs(b1.top() - b2.bottom()) < y_margin || abs(b2.top() - b1.bottom()) < y_margin);
return (abs(b1.left() - b2.left()) < x_margin) &&
(abs(b1.right() - b2.right()) < x_margin) &&
(abs(b1.top() - b2.bottom()) < y_margin ||
abs(b2.top() - b1.bottom()) < y_margin);
}
// Set up info for clean_part_grid_ partitions to be valid during detection
@ -571,7 +586,8 @@ void TableFinder::InitializePartitions(ColPartitionSet **all_columns) {
}
// Set left, right and top, bottom spacings of each colpartition.
void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns) {
void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid,
ColPartitionSet **all_columns) {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(grid);
gsearch.StartFullSearch();
@ -599,7 +615,8 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
ColPartition *neighbor = nullptr;
while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) {
if (neighbor->type() == PT_PULLOUT_IMAGE || neighbor->type() == PT_FLOWING_IMAGE ||
if (neighbor->type() == PT_PULLOUT_IMAGE ||
neighbor->type() == PT_FLOWING_IMAGE ||
neighbor->type() == PT_HEADING_IMAGE) {
int right = neighbor->bounding_box().right();
if (right < box.left()) {
@ -611,7 +628,8 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
neighbor = nullptr;
while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) {
if (neighbor->type() == PT_PULLOUT_IMAGE || neighbor->type() == PT_FLOWING_IMAGE ||
if (neighbor->type() == PT_PULLOUT_IMAGE ||
neighbor->type() == PT_FLOWING_IMAGE ||
neighbor->type() == PT_HEADING_IMAGE) {
int left = neighbor->bounding_box().left();
if (left > box.right()) {
@ -623,8 +641,9 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
ColPartition *upper_part = part->SingletonPartner(true);
if (upper_part) {
int space = std::max(
0, static_cast<int>(upper_part->bounding_box().bottom() - part->bounding_box().bottom()));
int space =
std::max(0, static_cast<int>(upper_part->bounding_box().bottom() -
part->bounding_box().bottom()));
part->set_space_above(space);
} else {
// TODO(nbeato): What constitutes a good value?
@ -635,8 +654,9 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
ColPartition *lower_part = part->SingletonPartner(false);
if (lower_part) {
int space = std::max(
0, static_cast<int>(part->bounding_box().bottom() - lower_part->bounding_box().bottom()));
int space =
std::max(0, static_cast<int>(part->bounding_box().bottom() -
lower_part->bounding_box().bottom()));
part->set_space_below(space);
} else {
// TODO(nbeato): What constitutes a good value?
@ -650,14 +670,17 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
// Set spacing and closest neighbors above and below a given colpartition.
void TableFinder::SetVerticalSpacing(ColPartition *part) {
TBOX box = part->bounding_box();
int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y()));
int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast<int>(bleft().y()));
int top_range =
std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y()));
int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing,
static_cast<int>(bleft().y()));
box.set_top(top_range);
box.set_bottom(bottom_range);
TBOX part_box = part->bounding_box();
// Start a rect search
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(&clean_part_grid_);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(
&clean_part_grid_);
rectsearch.StartRectSearch(box);
ColPartition *neighbor;
int min_space_above = kMaxVerticalSpacing;
@ -676,7 +699,8 @@ void TableFinder::SetVerticalSpacing(ColPartition *part) {
min_space_below = gap;
below_neighbor = neighbor;
} // If neighbor is above current partition
else if (part_box.top() < neighbor_box.bottom() && gap < min_space_above) {
else if (part_box.top() < neighbor_box.bottom() &&
gap < min_space_above) {
min_space_above = gap;
above_neighbor = neighbor;
}
@ -777,7 +801,8 @@ void TableFinder::MarkTablePartitions() {
if (textord_tablefind_show_mark) {
ScrollView *table_win = MakeWindow(300, 300, "Initial Table Partitions");
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
ScrollView::AQUAMARINE);
}
#endif
FilterFalseAlarms();
@ -785,7 +810,8 @@ void TableFinder::MarkTablePartitions() {
if (textord_tablefind_show_mark) {
ScrollView *table_win = MakeWindow(600, 300, "Filtered Table Partitions");
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
ScrollView::AQUAMARINE);
}
#endif
SmoothTablePartitionRuns();
@ -793,7 +819,8 @@ void TableFinder::MarkTablePartitions() {
if (textord_tablefind_show_mark) {
ScrollView *table_win = MakeWindow(900, 300, "Smoothed Table Partitions");
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
ScrollView::AQUAMARINE);
}
#endif
FilterFalseAlarms();
@ -801,7 +828,8 @@ void TableFinder::MarkTablePartitions() {
if (textord_tablefind_show_mark || textord_show_tables) {
ScrollView *table_win = MakeWindow(900, 300, "Final Table Partitions");
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
ScrollView::AQUAMARINE);
}
#endif
}
@ -815,7 +843,8 @@ void TableFinder::MarkTablePartitions() {
// 4- Partitions with leaders before/after them.
void TableFinder::MarkPartitionsUsingLocalInformation() {
// Iterate the ColPartitions in the grid.
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&clean_part_grid_);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(
&clean_part_grid_);
gsearch.StartFullSearch();
ColPartition *part = nullptr;
while ((part = gsearch.NextFullSearch()) != nullptr) {
@ -850,7 +879,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const {
BLOBNBOX_CLIST *part_boxes = part->boxes();
BLOBNBOX_C_IT it(part_boxes);
// Check if this is a relatively small partition (such as a single word)
if (part->bounding_box().width() < kMinBoxesInTextPartition * part->median_height() &&
if (part->bounding_box().width() <
kMinBoxesInTextPartition * part->median_height() &&
part_boxes->length() < kMinBoxesInTextPartition) {
return true;
}
@ -907,7 +937,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const {
}
// Since no large gap was found, return false if the partition is too
// long to be a data cell
if (part->bounding_box().width() > kMaxBoxesInDataPartition * part->median_height() ||
if (part->bounding_box().width() >
kMaxBoxesInDataPartition * part->median_height() ||
part_boxes->length() > kMaxBoxesInDataPartition) {
return false;
}
@ -1016,19 +1047,23 @@ void TableFinder::FilterParagraphEndings() {
// To account for that, check if the partition center is to
// the left of the one above it.
int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2;
int upper_mid = (upper_part->bounding_box().left() + upper_part->bounding_box().right()) / 2;
int upper_mid = (upper_part->bounding_box().left() +
upper_part->bounding_box().right()) /
2;
int current_spacing = 0; // spacing of the current line to margin
int upper_spacing = 0; // spacing of the previous line to the margin
if (left_to_right_language_) {
// Left to right languages, use mid - left to figure out the distance
// the middle is from the left margin.
int left = std::min(part->bounding_box().left(), upper_part->bounding_box().left());
int left = std::min(part->bounding_box().left(),
upper_part->bounding_box().left());
current_spacing = mid - left;
upper_spacing = upper_mid - left;
} else {
// Right to left languages, use right - mid to figure out the distance
// the middle is from the right margin.
int right = std::max(part->bounding_box().right(), upper_part->bounding_box().right());
int right = std::max(part->bounding_box().right(),
upper_part->bounding_box().right());
current_spacing = right - mid;
upper_spacing = right - upper_mid;
}
@ -1046,7 +1081,8 @@ void TableFinder::FilterParagraphEndings() {
// The last line of a paragraph should be left aligned.
// TODO(nbeato): This would be untrue if the text was right aligned.
// How often is that?
if (part->space_to_left() > kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) {
if (part->space_to_left() >
kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) {
continue;
}
// The line above it should be right aligned (assuming justified format).
@ -1055,7 +1091,8 @@ void TableFinder::FilterParagraphEndings() {
// line could have fit on the previous line). So compare
// whitespace to text.
if (upper_part->bounding_box().width() <
kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right()) {
kMinParagraphEndingTextToWhitespaceRatio *
upper_part->space_to_right()) {
continue;
}
@ -1153,7 +1190,8 @@ void TableFinder::SetColumnsType(ColSegment_LIST *column_blocks) {
TBOX box = seg->bounding_box();
int num_table_cells = 0;
int num_text_cells = 0;
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rsearch(&clean_part_grid_);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rsearch(
&clean_part_grid_);
rsearch.SetUniqueMode(true);
rsearch.StartRectSearch(box);
ColPartition *part = nullptr;
@ -1178,7 +1216,8 @@ void TableFinder::SetColumnsType(ColSegment_LIST *column_blocks) {
}
// Move column blocks to grid
void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid) {
void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments,
ColSegmentGrid *col_seg_grid) {
ColSegment_IT it(segments);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColSegment *seg = it.extract();
@ -1200,7 +1239,8 @@ void TableFinder::GridMergeColumnBlocks() {
int margin = gridsize();
// Iterate the Column Blocks in the grid.
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(&col_seg_grid_);
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
&col_seg_grid_);
gsearch.StartFullSearch();
ColSegment *seg;
while ((seg = gsearch.NextFullSearch()) != nullptr) {
@ -1214,12 +1254,15 @@ void TableFinder::GridMergeColumnBlocks() {
do {
TBOX box = seg->bounding_box();
// slightly expand the search region vertically
int top_range = std::min(box.top() + margin, static_cast<int>(tright().y()));
int bottom_range = std::max(box.bottom() - margin, static_cast<int>(bleft().y()));
int top_range =
std::min(box.top() + margin, static_cast<int>(tright().y()));
int bottom_range =
std::max(box.bottom() - margin, static_cast<int>(bleft().y()));
box.set_top(top_range);
box.set_bottom(bottom_range);
neighbor_found = false;
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(&col_seg_grid_);
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(
&col_seg_grid_);
rectsearch.StartRectSearch(box);
ColSegment *neighbor = nullptr;
while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
@ -1277,7 +1320,8 @@ void TableFinder::GridMergeColumnBlocks() {
void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
ColSegment_IT it(table_columns);
// Iterate the ColPartitions in the grid.
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&clean_part_grid_);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(
&clean_part_grid_);
gsearch.StartFullSearch();
ColPartition *part;
while ((part = gsearch.NextFullSearch()) != nullptr) {
@ -1291,7 +1335,8 @@ void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
// Start a search below the current cell to find bottom neighbours
// Note: a full search will always process things above it first, so
// this should be starting at the highest cell and working its way down.
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> vsearch(&clean_part_grid_);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> vsearch(
&clean_part_grid_);
vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom());
ColPartition *neighbor = nullptr;
bool found_neighbours = false;
@ -1326,11 +1371,13 @@ void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
// Mark regions in a column that are x-bounded by the column boundaries and
// y-bounded by the table columns' projection on the y-axis as table regions
void TableFinder::GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions) {
void TableFinder::GetTableRegions(ColSegment_LIST *table_columns,
ColSegment_LIST *table_regions) {
ColSegment_IT cit(table_columns);
ColSegment_IT rit(table_regions);
// Iterate through column blocks
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(&col_seg_grid_);
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
&col_seg_grid_);
gsearch.StartFullSearch();
ColSegment *part;
int page_height = tright().y() - bleft().y();
@ -1389,7 +1436,8 @@ void TableFinder::GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIS
// single line and hence the tables get merged together
void TableFinder::GridMergeTableRegions() {
// Iterate the table regions in the grid.
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(&table_grid_);
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
&table_grid_);
gsearch.StartFullSearch();
ColSegment *seg = nullptr;
while ((seg = gsearch.NextFullSearch()) != nullptr) {
@ -1402,7 +1450,8 @@ void TableFinder::GridMergeTableRegions() {
search_region.set_left(bleft().x());
search_region.set_right(tright().x());
neighbor_found = false;
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(&table_grid_);
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(
&table_grid_);
rectsearch.StartRectSearch(search_region);
ColSegment *neighbor = nullptr;
while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
@ -1454,13 +1503,15 @@ bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) {
// Check for ColPartitions spanning both table regions
TBOX bbox = box1.bounding_union(box2);
// Start a rect search on bbox
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(&clean_part_grid_);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(
&clean_part_grid_);
rectsearch.StartRectSearch(bbox);
ColPartition *part = nullptr;
while ((part = rectsearch.NextRectSearch()) != nullptr) {
const TBOX &part_box = part->bounding_box();
// return true if a colpartition spanning both table regions is found
if (part_box.overlap(box1) && part_box.overlap(box2) && !part->IsImageType()) {
if (part_box.overlap(box1) && part_box.overlap(box2) &&
!part->IsImageType()) {
return true;
}
}
@ -1542,12 +1593,14 @@ void TableFinder::GrowTableBox(const TBOX &table_box, TBOX *result_box) {
// Grow a table by increasing the size of the box to include
// partitions with significant overlap with the table.
void TableFinder::GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range,
void TableFinder::GrowTableToIncludePartials(const TBOX &table_box,
const TBOX &search_range,
TBOX *result_box) {
// Rulings are in a different grid, so search 2 grids for rulings, text,
// and table partitions that are not entirely within the new box.
for (int i = 0; i < 2; ++i) {
ColPartitionGrid *grid = (i == 0) ? &fragmented_text_grid_ : &leader_and_ruling_grid_;
ColPartitionGrid *grid =
(i == 0) ? &fragmented_text_grid_ : &leader_and_ruling_grid_;
ColPartitionGridSearch rectsearch(grid);
rectsearch.StartRectSearch(search_range);
ColPartition *part = nullptr;
@ -1569,7 +1622,8 @@ void TableFinder::GrowTableToIncludePartials(const TBOX &table_box, const TBOX &
// Grow a table by expanding to the extents of significantly
// overlapping lines.
void TableFinder::GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range,
void TableFinder::GrowTableToIncludeLines(const TBOX &table_box,
const TBOX &search_range,
TBOX *result_box) {
ColPartitionGridSearch rsearch(&leader_and_ruling_grid_);
rsearch.SetUniqueMode(true);
@ -1601,7 +1655,8 @@ void TableFinder::GrowTableToIncludeLines(const TBOX &table_box, const TBOX &sea
// Checks whether the horizontal line belong to the table by looking at the
// side spacing of extra ColParitions that will be included in the table
// due to expansion
bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &table_box) {
bool TableFinder::HLineBelongsToTable(const ColPartition &part,
const TBOX &table_box) {
if (!part.IsHorizontalLine()) {
return false;
}
@ -1627,7 +1682,8 @@ bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &tabl
// Rulings are in a different grid, so search 2 grids for rulings, text,
// and table partitions that are introduced by the new box.
for (int i = 0; i < 2; ++i) {
ColPartitionGrid *grid = (i == 0) ? &clean_part_grid_ : &leader_and_ruling_grid_;
ColPartitionGrid *grid =
(i == 0) ? &clean_part_grid_ : &leader_and_ruling_grid_;
// Start a rect search on bbox
ColPartitionGridSearch rectsearch(grid);
rectsearch.SetUniqueMode(true);
@ -1672,12 +1728,14 @@ bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &tabl
void TableFinder::IncludeLeftOutColumnHeaders(TBOX *table_box) {
// Start a search above the current table to look for column headers
ColPartitionGridSearch vsearch(&clean_part_grid_);
vsearch.StartVerticalSearch(table_box->left(), table_box->right(), table_box->top());
vsearch.StartVerticalSearch(table_box->left(), table_box->right(),
table_box->top());
ColPartition *neighbor = nullptr;
ColPartition *previous_neighbor = nullptr;
while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
// Max distance to find a table heading.
const int max_distance = kMaxColumnHeaderDistance * neighbor->median_height();
const int max_distance =
kMaxColumnHeaderDistance * neighbor->median_height();
int table_top = table_box->top();
const TBOX &box = neighbor->bounding_box();
// Do not continue if the next box is way above
@ -1714,7 +1772,8 @@ void TableFinder::DeleteSingleColumnTables() {
// create an integer array to hold projection on x-axis
int *table_xprojection = new int[page_width];
// Iterate through all tables in the table grid
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(&table_grid_);
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(
&table_grid_);
table_search.StartFullSearch();
ColSegment *table;
while ((table = table_search.NextFullSearch()) != nullptr) {
@ -1724,7 +1783,8 @@ void TableFinder::DeleteSingleColumnTables() {
table_xprojection[i] = 0;
}
// Start a rect search on table_box
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(&clean_part_grid_);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(
&clean_part_grid_);
rectsearch.SetUniqueMode(true);
rectsearch.StartRectSearch(table_box);
ColPartition *part;
@ -1938,7 +1998,8 @@ void TableFinder::DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW);
}
void TableFinder::DisplayColPartitionConnections(ScrollView *win, ColPartitionGrid *grid,
void TableFinder::DisplayColPartitionConnections(ScrollView *win,
ColPartitionGrid *grid,
ScrollView::Color color) {
// Iterate the ColPartitions in the grid.
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(grid);
@ -1982,8 +2043,9 @@ void TableFinder::DisplayColPartitionConnections(ScrollView *win, ColPartitionGr
// Merge all colpartitions in table regions to make them a single
// colpartition and revert types of isolated table cells not
// assigned to any table to their original types.
void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_columns,
WidthCallback width_cb) {
void TableFinder::MakeTableBlocks(ColPartitionGrid *grid,
ColPartitionSet **all_columns,
const WidthCallback &width_cb) {
// Since we have table blocks already, remove table tags from all
// colpartitions
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(grid);
@ -1997,13 +2059,15 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_
}
// Now make a single colpartition out of each table block and remove
// all colpartitions contained within a table
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(&table_grid_);
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(
&table_grid_);
table_search.StartFullSearch();
ColSegment *table;
while ((table = table_search.NextFullSearch()) != nullptr) {
const TBOX &table_box = table->bounding_box();
// Start a rect search on table_box
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(grid);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(
grid);
rectsearch.StartRectSearch(table_box);
ColPartition *part;
ColPartition *table_partition = nullptr;
@ -2045,7 +2109,10 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_
//////// ColSegment code
////////
ColSegment::ColSegment()
: ELIST_LINK(), num_table_cells_(0), num_text_cells_(0), type_(COL_UNKNOWN) {}
: ELIST_LINK(),
num_table_cells_(0),
num_text_cells_(0),
type_(COL_UNKNOWN) {}
// Provides a color for BBGrid to draw the rectangle.
ScrollView::Color ColSegment::BoxColor() const {

View File

@ -107,7 +107,8 @@ private:
// Typedef BBGrid of ColSegments
using ColSegmentGrid = BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>;
using ColSegmentGridSearch = GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>;
using ColSegmentGridSearch =
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>;
// TableFinder is a utility class to find a set of tables given a set of
// ColPartitions and Columns. The TableFinder will mark candidate ColPartitions
@ -143,8 +144,8 @@ public:
// tables. The columns and width callbacks are used to merge tables.
// The reskew argument is only used to write the tables to the out.png
// if that feature is enabled.
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb,
const FCOORD &reskew);
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns,
WidthCallback width_cb, const FCOORD &reskew);
protected:
// Access for the grid dimensions.
@ -179,7 +180,8 @@ protected:
// Utility function to move segments to col_seg_grid
// Note: Move includes ownership,
// so segments will be be owned by col_seg_grid
void MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid);
void MoveColSegmentsToGrid(ColSegment_LIST *segments,
ColSegmentGrid *col_seg_grid);
//////// Set up code to run during table detection to correctly
//////// initialize variables on column partitions that are used later.
@ -191,7 +193,8 @@ protected:
// Set left, right and top, bottom spacings of each colpartition.
// Left/right spacings are w.r.t the column boundaries
// Top/bottom spacings are w.r.t. previous and next colpartitions
static void SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns);
static void SetPartitionSpacings(ColPartitionGrid *grid,
ColPartitionSet **all_columns);
// Set spacing and closest neighbors above and below a given colpartition.
void SetVerticalSpacing(ColPartition *part);
@ -263,10 +266,12 @@ protected:
////////
// Get Column segments from best_columns_
void GetColumnBlocks(ColPartitionSet **columns, ColSegment_LIST *col_segments);
void GetColumnBlocks(ColPartitionSet **columns,
ColSegment_LIST *col_segments);
// Group Column segments into consecutive single column regions.
void GroupColumnBlocks(ColSegment_LIST *current_segments, ColSegment_LIST *col_segments);
void GroupColumnBlocks(ColSegment_LIST *current_segments,
ColSegment_LIST *col_segments);
// Check if two boxes are consecutive within the same column
bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2);
@ -295,7 +300,8 @@ protected:
// earlier functions) in the x direction and the min/max extent of
// overlapping table columns in the y direction.
// Section 4.2 of paper.
void GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions);
void GetTableRegions(ColSegment_LIST *table_columns,
ColSegment_LIST *table_regions);
//////// Functions to "patch up" found tables
////////
@ -316,11 +322,12 @@ protected:
void GrowTableBox(const TBOX &table_box, TBOX *result_box);
// Grow a table by increasing the size of the box to include
// partitions with significant overlap with the table.
void GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range,
TBOX *result_box);
void GrowTableToIncludePartials(const TBOX &table_box,
const TBOX &search_range, TBOX *result_box);
// Grow a table by expanding to the extents of significantly
// overlapping lines.
void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, TBOX *result_box);
void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range,
TBOX *result_box);
// Checks whether the horizontal line belong to the table by looking at the
// side spacing of extra ColParitions that will be included in the table
// due to expansion
@ -351,12 +358,14 @@ protected:
// Displays Colpartitions marked as table row. Overlays them on top of
// part_grid_.
void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color);
void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols,
ScrollView::Color color);
// Displays the colpartitions using a new coloring on an existing window.
// Note: This method is only for debug purpose during development and
// would not be part of checked in code
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color,
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
ScrollView::Color text_color,
ScrollView::Color table_color);
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
ScrollView::Color default_color);
@ -366,7 +375,8 @@ protected:
// Merge all colpartitions in table regions to make them a single
// colpartition and revert types of isolated table cells not
// assigned to any table to their original types.
void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb);
void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns,
const WidthCallback &width_cb);
/////////////////////////////////////////////////
// Useful objects used during table find process.

View File

@ -720,15 +720,6 @@ int StructuredTable::CountPartitions(const TBOX &box) {
//////// TableRecognizer Class
////////
TableRecognizer::TableRecognizer()
: text_grid_(nullptr)
, line_grid_(nullptr)
, min_height_(0)
, min_width_(0)
, max_text_height_(INT32_MAX) {}
TableRecognizer::~TableRecognizer() = default;
void TableRecognizer::Init() {}
void TableRecognizer::set_text_grid(ColPartitionGrid *text_grid) {

View File

@ -250,8 +250,8 @@ protected:
class TESS_API TableRecognizer {
public:
TableRecognizer();
~TableRecognizer();
TableRecognizer() = default;
~TableRecognizer() = default;
// Initialization code. Must be called after the constructor.
void Init();
@ -358,13 +358,13 @@ protected:
static bool IsWeakTableRow(StructuredTable *table, int row);
// Input data, used as read only data to make decisions.
ColPartitionGrid *text_grid_; // Text ColPartitions
ColPartitionGrid *line_grid_; // Line ColPartitions
ColPartitionGrid *text_grid_ = nullptr; // Text ColPartitions
ColPartitionGrid *line_grid_ = nullptr; // Line ColPartitions
// Table constraints, a "good" table must satisfy these.
int min_height_;
int min_width_;
int min_height_ = 0;
int min_width_ = 0;
// Filters, used to prevent awkward partitions from destroying structure.
int max_text_height_; // Horizontal lines may intersect taller text.
int max_text_height_ = INT32_MAX; // Horizontal lines may intersect taller text.
};
} // namespace tesseract

View File

@ -325,7 +325,7 @@ float Textord::filter_noise_blobs(BLOBNBOX_LIST *src_list, // original list
(tesseract::CCStruct::kDescenderFraction + tesseract::CCStruct::kXHeightFraction +
2 * tesseract::CCStruct::kAscenderFraction) /
tesseract::CCStruct::kXHeightFraction);
min_y = floor(initial_x / 2);
min_y = std::floor(initial_x / 2);
max_x = ceil(initial_x * textord_width_limit);
small_it.move_to_first();
for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
@ -729,7 +729,7 @@ void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BL
int best_g = 0;
float best_angle_diff = FLT_MAX;
for (const auto &group : groups) {
double angle_diff = fabs(block_angle - group->angle);
double angle_diff = std::fabs(block_angle - group->angle);
if (angle_diff > M_PI) {
angle_diff = fabs(angle_diff - 2.0 * M_PI);
}

View File

@ -36,6 +36,7 @@
#endif
#include <algorithm>
#include <cmath>
#include <memory>
#define MAXSPACING 128 /*max expected spacing in pix */
@ -295,7 +296,7 @@ void Textord::row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx,
/* Collect first pass stats for row */
if (!good_block_space_estimate) {
block_space_gap_width = int16_t(floor(row->xheight / 2));
block_space_gap_width = int16_t(std::floor(row->xheight / 2));
}
if (!row->blob_list()->empty()) {
if (tosp_threshold_bias1 > 0) {
@ -435,7 +436,7 @@ are ignoring big gaps*/
if (suspected_table) {
sane_space =
std::max(tosp_table_kn_sp_ratio * row->kern_size, tosp_table_xht_sp_ratio * row->xheight);
sane_threshold = int32_t(floor((sane_space + row->kern_size) / 2));
sane_threshold = int32_t(std::floor((sane_space + row->kern_size) / 2));
if ((row->space_size < sane_space) || (row->space_threshold < sane_threshold)) {
if (tosp_debug_level > 5) {
@ -606,7 +607,7 @@ It comes to the same thing.
(Though there is a difference in that old textor has integer space_size
and kern_size.)
*/
row->space_threshold = int32_t(floor((row->space_size + row->kern_size) / 2));
row->space_threshold = int32_t(std::floor((row->space_size + row->kern_size) / 2));
}
// Apply the same logic and ratios as in row_spacing_stats to
@ -648,7 +649,7 @@ bool Textord::isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_sta
crude_threshold_estimate =
std::max(tosp_init_guess_kn_mult * kern_estimate, tosp_init_guess_xht_mult * row->xheight);
small_gaps_count =
stats_count_under(all_gap_stats, static_cast<int16_t>(ceil(crude_threshold_estimate)));
stats_count_under(all_gap_stats, static_cast<int16_t>(std::ceil(crude_threshold_estimate)));
total = all_gap_stats->get_total();
if ((total <= tosp_redo_kern_limit) ||
@ -718,7 +719,7 @@ bool Textord::isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_sta
} else {
row->kern_size = all_gap_stats->median();
}
row->space_threshold = int32_t(floor((row->space_size + row->kern_size) / 2));
row->space_threshold = int32_t(std::floor((row->space_size + row->kern_size) / 2));
/* Sanity check */
if ((row->kern_size >= row->space_threshold) || (row->space_threshold >= row->space_size) ||
(row->space_threshold <= 0)) {
@ -793,7 +794,7 @@ threshold is not within it, move the threshold so that is is just inside it.
reqd_zero_width = 3;
}
for (index = int16_t(ceil(kn)); index < int16_t(floor(sp)); index++) {
for (index = int16_t(std::ceil(kn)); index < int16_t(std::floor(sp)); index++) {
if (all_gap_stats->pile_count(index) == 0) {
if (zero_width == 0) {
zero_start = index;
@ -909,7 +910,7 @@ the gap between the word being built and the next one. */
current_gap = box_it.data()->bounding_box().left() - next_rep_char_word_right;
current_within_xht_gap = current_gap;
if (current_gap > tosp_rep_space * repetition_spacing) {
prev_blanks = static_cast<uint8_t>(floor(current_gap / row->space_size));
prev_blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
if (prev_blanks < 1) {
prev_blanks = 1;
}
@ -1002,7 +1003,7 @@ the gap between the word being built and the next one. */
current_gap = word->bounding_box().left() - prev_x;
current_within_xht_gap = current_gap;
if (current_gap > tosp_rep_space * repetition_spacing) {
blanks = static_cast<uint8_t>(floor(current_gap / row->space_size));
blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
if (blanks < 1) {
blanks = 1;
}
@ -1066,7 +1067,7 @@ the gap between the word being built and the next one. */
repetition_spacing = find_mean_blob_spacing(word);
current_gap = word->bounding_box().left() - prev_x;
if (current_gap > tosp_rep_space * repetition_spacing) {
blanks = static_cast<uint8_t>(floor(current_gap / row->space_size));
blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
if (blanks < 1) {
blanks = 1;
}
@ -1738,7 +1739,7 @@ caps ht chars which should NOT have their box reduced: T, Y, V, W etc
if (left_limit > junk) {
*left_above_xht = INT16_MAX; // No area above xht
} else {
*left_above_xht = static_cast<int16_t>(floor(left_limit));
*left_above_xht = static_cast<int16_t>(std::floor(left_limit));
}
/*
Find reduced LH limit of blob - the left extent of the region ABOVE the
@ -1762,7 +1763,7 @@ Find reduced RH limit of blob - the right extent of the region BELOW the xht.
return TBOX(); // no area within xht so return empty box
}
return TBOX(ICOORD(static_cast<int16_t>(floor(left_limit)), blob_box.bottom()),
ICOORD(static_cast<int16_t>(ceil(right_limit)), blob_box.top()));
return TBOX(ICOORD(static_cast<int16_t>(std::floor(left_limit)), blob_box.bottom()),
ICOORD(static_cast<int16_t>(std::ceil(right_limit)), blob_box.top()));
}
} // namespace tesseract

View File

@ -23,6 +23,8 @@
#include "wordseg.h"
#include <cmath>
#include "blobbox.h"
#include "cjkpitch.h"
#include "drawtord.h"
@ -222,7 +224,7 @@ int32_t row_words( // compute space size
lower = row->xheight * textord_words_initial_lower;
upper = row->xheight * textord_words_initial_upper;
cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, 3, cluster_stats);
while (cluster_count < 2 && ceil(lower) < floor(upper)) {
while (cluster_count < 2 && std::ceil(lower) < std::floor(upper)) {
// shrink gap
upper = (upper * 3 + lower) / 4;
lower = (lower * 3 + upper) / 4;

View File

@ -26,8 +26,7 @@
using namespace tesseract;
static int list_components(TessdataManager &tm, const char *filename)
{
static int list_components(TessdataManager &tm, const char *filename) {
// Initialize TessdataManager with the data in the given traineddata file.
if (filename != nullptr && !tm.Init(filename)) {
tprintf("Failed to read %s\n", filename);
@ -37,8 +36,7 @@ static int list_components(TessdataManager &tm, const char *filename)
return EXIT_SUCCESS;
}
static int list_network(TessdataManager &tm, const char *filename)
{
static int list_network(TessdataManager &tm, const char *filename) {
if (filename != nullptr && !tm.Init(filename)) {
tprintf("Failed to read %s\n", filename);
return EXIT_FAILURE;
@ -62,7 +60,7 @@ static int list_network(TessdataManager &tm, const char *filename)
std::cout << "Layer Learning Rates: ";
auto layers = recognizer.EnumerateLayers();
for (auto id : layers) {
for (const auto &id : layers) {
auto layer = recognizer.GetLayer(id);
std::cout << id << "(" << layer->name() << ")"
<< "=" << recognizer.GetLayerLearningRate(id)
@ -138,7 +136,8 @@ int main(int argc, char **argv) {
} else {
printf("Output %s created successfully.\n", output_file.c_str());
}
} else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) {
} else if (argc >= 4 &&
(strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) {
// Initialize TessdataManager with the data in the given traineddata file.
if (!tm.Init(argv[2])) {
tprintf("Failed to read %s\n", argv[2]);
@ -173,7 +172,8 @@ int main(int argc, char **argv) {
if (tm.ExtractToFile(filename.c_str())) {
printf("Wrote %s\n", filename.c_str());
} else if (errno != 0) {
printf("Error, could not extract %s: %s\n", filename.c_str(), strerror(errno));
printf("Error, could not extract %s: %s\n", filename.c_str(),
strerror(errno));
return EXIT_FAILURE;
}
}
@ -184,7 +184,8 @@ int main(int argc, char **argv) {
std::string traineddata_filename = new_traineddata_filename;
traineddata_filename += ".__tmp__";
if (rename(new_traineddata_filename, traineddata_filename.c_str()) != 0) {
tprintf("Failed to create a temporary file %s\n", traineddata_filename.c_str());
tprintf("Failed to create a temporary file %s\n",
traineddata_filename.c_str());
return EXIT_FAILURE;
}
@ -212,7 +213,8 @@ int main(int argc, char **argv) {
std::vector<char> lstm_data;
fp.OpenWrite(&lstm_data);
ASSERT_HOST(recognizer.Serialize(&tm, &fp));
tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0], lstm_data.size());
tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0],
lstm_data.size());
if (!tm.SaveFile(argv[2], nullptr)) {
tprintf("Failed to write modified traineddata:%s!\n", argv[2]);
return EXIT_FAILURE;

View File

@ -24,6 +24,7 @@
#include <algorithm>
#include <cfloat> // for FLT_MAX
#include <cmath>
#include <memory>
namespace tesseract {
@ -266,7 +267,7 @@ float CTC::CalculateBiasFraction() {
if (total_labels == 0) {
return 0.0f;
}
return exp(std::max(true_pos - false_pos, 1) * log(kMinProb_) / total_labels);
return exp(std::max(true_pos - false_pos, 1) * std::log(kMinProb_) / total_labels);
}
// Given ln(x) and ln(y), returns ln(x + y), using:
@ -319,7 +320,7 @@ void CTC::Backward(GENERIC_2D_ARRAY<double> *log_probs) const {
const float *outputs_tp1 = outputs_[t + 1];
for (int u = min_labels_[t]; u <= max_labels_[t]; ++u) {
// Continuing the same label.
double log_sum = log_probs->get(t + 1, u) + log(outputs_tp1[labels_[u]]);
double log_sum = log_probs->get(t + 1, u) + std::log(outputs_tp1[labels_[u]]);
// Change from previous label.
if (u + 1 < num_labels_) {
double prev_prob = outputs_tp1[labels_[u + 1]];

View File

@ -51,16 +51,16 @@ const float kFontMergeDistance = 0.025;
MasterTrainer::MasterTrainer(NormalizationMode norm_mode, bool shape_analysis,
bool replicate_samples, int debug_level)
: norm_mode_(norm_mode)
, samples_(fontinfo_table_)
, junk_samples_(fontinfo_table_)
, verify_samples_(fontinfo_table_)
, charsetsize_(0)
, enable_shape_analysis_(shape_analysis)
, enable_replication_(replicate_samples)
, fragments_(nullptr)
, prev_unichar_id_(-1)
, debug_level_(debug_level) {}
: norm_mode_(norm_mode),
samples_(fontinfo_table_),
junk_samples_(fontinfo_table_),
verify_samples_(fontinfo_table_),
charsetsize_(0),
enable_shape_analysis_(shape_analysis),
enable_replication_(replicate_samples),
fragments_(nullptr),
prev_unichar_id_(-1),
debug_level_(debug_level) {}
MasterTrainer::~MasterTrainer() {
delete[] fragments_;
@ -137,10 +137,14 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name,
const FEATURE_DEFS_STRUCT &feature_defs,
bool verification) {
char buffer[2048];
const int int_feature_type = ShortNameToFeatureType(feature_defs, kIntFeatureType);
const int micro_feature_type = ShortNameToFeatureType(feature_defs, kMicroFeatureType);
const int cn_feature_type = ShortNameToFeatureType(feature_defs, kCNFeatureType);
const int geo_feature_type = ShortNameToFeatureType(feature_defs, kGeoFeatureType);
const int int_feature_type =
ShortNameToFeatureType(feature_defs, kIntFeatureType);
const int micro_feature_type =
ShortNameToFeatureType(feature_defs, kMicroFeatureType);
const int cn_feature_type =
ShortNameToFeatureType(feature_defs, kCNFeatureType);
const int geo_feature_type =
ShortNameToFeatureType(feature_defs, kGeoFeatureType);
FILE *fp = fopen(page_name, "rb");
if (fp == nullptr) {
@ -175,8 +179,8 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name,
sample->set_font_id(font_id);
sample->set_page_num(page_number + page_images_.size());
sample->set_bounding_box(bounding_box);
sample->ExtractCharDesc(int_feature_type, micro_feature_type, cn_feature_type, geo_feature_type,
char_desc);
sample->ExtractCharDesc(int_feature_type, micro_feature_type,
cn_feature_type, geo_feature_type, char_desc);
AddSample(verification, unichar.c_str(), sample);
delete char_desc;
}
@ -186,7 +190,8 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name,
// Adds the given single sample to the trainer, setting the classid
// appropriately from the given unichar_str.
void MasterTrainer::AddSample(bool verification, const char *unichar, TrainingSample *sample) {
void MasterTrainer::AddSample(bool verification, const char *unichar,
TrainingSample *sample) {
if (verification) {
verify_samples_.AddSample(unichar, sample);
prev_unichar_id_ = -1;
@ -314,7 +319,8 @@ void MasterTrainer::SetupMasterShapes() {
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance,
&char_shapes_end_fragment);
char_shapes.AppendMasterShapes(char_shapes_end_fragment, nullptr);
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, &char_shapes);
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance,
&char_shapes);
master_shapes_.AppendMasterShapes(char_shapes, nullptr);
tprintf("Master shape_table:%s\n", master_shapes_.SummaryStr().c_str());
}
@ -383,13 +389,13 @@ bool MasterTrainer::LoadFontInfo(const char *filename) {
fontinfo.name = font_name;
fontinfo.properties = 0;
fontinfo.universal_id = 0;
if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold, &fixed, &serif,
&fraktur) != 6) {
if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold,
&fixed, &serif, &fraktur) != 6) {
delete[] font_name;
continue;
}
fontinfo.properties =
(italic << 0) + (bold << 1) + (fixed << 2) + (serif << 3) + (fraktur << 4);
fontinfo.properties = (italic << 0) + (bold << 1) + (fixed << 2) +
(serif << 3) + (fraktur << 4);
if (!fontinfo_table_.contains(fontinfo)) {
fontinfo_table_.push_back(fontinfo);
} else {
@ -477,7 +483,8 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
fi->init_spacing(unicharset_.size());
FontSpacingInfo *spacing = nullptr;
for (int l = 0; l < num_unichars; ++l) {
if (tfscanf(fontinfo_file, "%s %d %d %d", uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) {
if (tfscanf(fontinfo_file, "%s %d %d %d", uch, &x_gap_before, &x_gap_after,
&num_kerned) != 4) {
tprintf("Bad format of font spacing file %s\n", filename);
fclose(fontinfo_file);
return false;
@ -498,7 +505,8 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
if (!valid || !unicharset_.contains_unichar(kerned_uch)) {
continue;
}
spacing->kerned_unichar_ids.push_back(unicharset_.unichar_to_id(kerned_uch));
spacing->kerned_unichar_ids.push_back(
unicharset_.unichar_to_id(kerned_uch));
spacing->kerned_x_gaps.push_back(static_cast<int16_t>(x_gap * scale));
}
if (valid) {
@ -572,13 +580,14 @@ void MasterTrainer::SetupFlatShapeTable(ShapeTable *shape_table) {
// Sets up a Clusterer for mftraining on a single shape_id.
// Call FreeClusterer on the return value after use.
CLUSTERER *MasterTrainer::SetupForClustering(const ShapeTable &shape_table,
const FEATURE_DEFS_STRUCT &feature_defs, int shape_id,
int *num_samples) {
CLUSTERER *MasterTrainer::SetupForClustering(
const ShapeTable &shape_table, const FEATURE_DEFS_STRUCT &feature_defs,
int shape_id, int *num_samples) {
int desc_index = ShortNameToFeatureType(feature_defs, kMicroFeatureType);
int num_params = feature_defs.FeatureDesc[desc_index]->NumParams;
ASSERT_HOST(num_params == (int)MicroFeatureParameter::MFCount);
CLUSTERER *clusterer = MakeClusterer(num_params, feature_defs.FeatureDesc[desc_index]->ParamDesc);
CLUSTERER *clusterer = MakeClusterer(
num_params, feature_defs.FeatureDesc[desc_index]->ParamDesc);
// We want to iterate over the samples of just the one shape.
IndexMapBiDi shape_map;
@ -612,12 +621,14 @@ CLUSTERER *MasterTrainer::SetupForClustering(const ShapeTable &shape_table,
void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset,
const UNICHARSET &shape_set,
const ShapeTable &shape_table,
CLASS_STRUCT *float_classes, const char *inttemp_file,
CLASS_STRUCT *float_classes,
const char *inttemp_file,
const char *pffmtable_file) {
auto *classify = new tesseract::Classify();
// Move the fontinfo table to classify.
fontinfo_table_.MoveTo(&classify->get_fontinfo_table());
INT_TEMPLATES_STRUCT *int_templates = classify->CreateIntTemplates(float_classes, shape_set);
INT_TEMPLATES_STRUCT *int_templates =
classify->CreateIntTemplates(float_classes, shape_set);
FILE *fp = fopen(inttemp_file, "wb");
if (fp == nullptr) {
tprintf("Error, failed to open file \"%s\"\n", inttemp_file);
@ -631,10 +642,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset,
// We put the shapetable_cutoffs in a vector, and compute the
// unicharset cutoffs along the way.
std::vector<uint16_t> shapetable_cutoffs;
std::vector<uint16_t> unichar_cutoffs;
for (int c = 0; c < unicharset.size(); ++c) {
unichar_cutoffs.push_back(0);
}
std::vector<uint16_t> unichar_cutoffs(unicharset.size());
/* then write out each class */
for (int i = 0; i < int_templates->NumClasses; ++i) {
INT_CLASS_STRUCT *Class = ClassForClassId(int_templates, i);
@ -679,7 +687,8 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset,
// Generate debug output relating to the canonical distance between the
// two given UTF8 grapheme strings.
void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar_str2) {
void MasterTrainer::DebugCanonical(const char *unichar_str1,
const char *unichar_str2) {
int class_id1 = unicharset_.unichar_to_id(unichar_str1);
int class_id2 = unicharset_.unichar_to_id(unichar_str2);
if (class_id2 == INVALID_UNICHAR_ID) {
@ -689,8 +698,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar
tprintf("No unicharset entry found for %s\n", unichar_str1);
return;
} else {
tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", class_id1, unichar_str1,
class_id2, unichar_str2);
tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", class_id1,
unichar_str1, class_id2, unichar_str2);
}
int num_fonts = samples_.NumFonts();
const IntFeatureMap &feature_map = feature_map_;
@ -714,7 +723,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar
if (samples_.NumClassSamples(f2, class_id2, false) == 0) {
continue;
}
float dist = samples_.ClusterDistance(f1, class_id1, f2, class_id2, feature_map);
float dist =
samples_.ClusterDistance(f1, class_id1, f2, class_id2, feature_map);
tprintf(" %5.3f", dist);
}
tprintf("\n");
@ -725,7 +735,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar
if (samples_.NumClassSamples(f, class_id1, true) > 0) {
shapes.AddShape(class_id1, f);
}
if (class_id1 != class_id2 && samples_.NumClassSamples(f, class_id2, true) > 0) {
if (class_id1 != class_id2 &&
samples_.NumClassSamples(f, class_id2, true) > 0) {
shapes.AddShape(class_id2, f);
}
}
@ -743,14 +754,17 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar
// Until the features window is destroyed, each click in the features window
// will display the samples that have that feature in a separate window.
void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font,
const char *unichar_str2, int canonical_font) {
const char *unichar_str2,
int canonical_font) {
const IntFeatureMap &feature_map = feature_map_;
const IntFeatureSpace &feature_space = feature_map.feature_space();
ScrollView *f_window = CreateFeatureSpaceWindow("Features", 100, 500);
ClearFeatureSpaceWindow(norm_mode_ == NM_BASELINE ? baseline : character, f_window);
ClearFeatureSpaceWindow(norm_mode_ == NM_BASELINE ? baseline : character,
f_window);
int class_id2 = samples_.unicharset().unichar_to_id(unichar_str2);
if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) {
const TrainingSample *sample = samples_.GetCanonicalSample(canonical_font, class_id2);
const TrainingSample *sample =
samples_.GetCanonicalSample(canonical_font, class_id2);
for (uint32_t f = 0; f < sample->num_features(); ++f) {
RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED);
}
@ -780,8 +794,8 @@ void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font,
Shape shape;
shape.AddToShape(class_id1, cloud_font);
s_window->Clear();
samples_.DisplaySamplesWithFeature(feature_index, shape, feature_space, ScrollView::GREEN,
s_window);
samples_.DisplaySamplesWithFeature(feature_index, shape, feature_space,
ScrollView::GREEN, s_window);
s_window->Update();
}
}
@ -790,22 +804,25 @@ void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font,
}
#endif // !GRAPHICS_DISABLED
void MasterTrainer::TestClassifierVOld(bool replicate_samples, ShapeClassifier *test_classifier,
void MasterTrainer::TestClassifierVOld(bool replicate_samples,
ShapeClassifier *test_classifier,
ShapeClassifier *old_classifier) {
SampleIterator sample_it;
sample_it.Init(nullptr, nullptr, replicate_samples, &samples_);
ErrorCounter::DebugNewErrors(test_classifier, old_classifier, CT_UNICHAR_TOPN_ERR,
fontinfo_table_, page_images_, &sample_it);
ErrorCounter::DebugNewErrors(test_classifier, old_classifier,
CT_UNICHAR_TOPN_ERR, fontinfo_table_,
page_images_, &sample_it);
}
// Tests the given test_classifier on the internal samples.
// See TestClassifier for details.
void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, int report_level,
void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode,
int report_level,
bool replicate_samples,
ShapeClassifier *test_classifier,
std::string *report_string) {
TestClassifier(error_mode, report_level, replicate_samples, &samples_, test_classifier,
report_string);
TestClassifier(error_mode, report_level, replicate_samples, &samples_,
test_classifier, report_string);
}
// Tests the given test_classifier on the given samples.
@ -822,8 +839,10 @@ void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, int report_le
// If report_string is non-nullptr, a summary of the results for each font
// is appended to the report_string.
double MasterTrainer::TestClassifier(CountTypes error_mode, int report_level,
bool replicate_samples, TrainingSampleSet *samples,
ShapeClassifier *test_classifier, std::string *report_string) {
bool replicate_samples,
TrainingSampleSet *samples,
ShapeClassifier *test_classifier,
std::string *report_string) {
SampleIterator sample_it;
sample_it.Init(nullptr, nullptr, replicate_samples, samples);
if (report_level > 0) {
@ -837,8 +856,9 @@ double MasterTrainer::TestClassifier(CountTypes error_mode, int report_level,
tprintf("Testing %sREPLICATED:\n", replicate_samples ? "" : "NON-");
}
double unichar_error = 0.0;
ErrorCounter::ComputeErrorRate(test_classifier, report_level, error_mode, fontinfo_table_,
page_images_, &sample_it, &unichar_error, nullptr, report_string);
ErrorCounter::ComputeErrorRate(test_classifier, report_level, error_mode,
fontinfo_table_, page_images_, &sample_it,
&unichar_error, nullptr, report_string);
return unichar_error;
}
@ -857,14 +877,16 @@ float MasterTrainer::ShapeDistance(const ShapeTable &shapes, int s1, int s2) {
// distances between characters of matching font where possible.
for (int c1 = 0; c1 < num_chars1; ++c1) {
for (int c2 = 0; c2 < num_chars2; ++c2) {
dist_sum += samples_.UnicharDistance(shape1[c1], shape2[c2], true, feature_map);
dist_sum +=
samples_.UnicharDistance(shape1[c1], shape2[c2], true, feature_map);
++dist_count;
}
}
} else {
// In the single unichar case, there is little alternative, but to compute
// the squared-order distance between pairs of fonts.
dist_sum = samples_.UnicharDistance(shape1[0], shape2[0], false, feature_map);
dist_sum =
samples_.UnicharDistance(shape1[0], shape2[0], false, feature_map);
++dist_count;
}
return dist_sum / dist_count;
@ -942,8 +964,8 @@ void MasterTrainer::ReplaceFragmentedSamples() {
// * No shape shall have more than max_shape_unichars in it,
// * Don't merge shapes where the distance between them exceeds max_dist.
const float kInfiniteDist = 999.0f;
void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float max_dist,
ShapeTable *shapes) {
void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars,
float max_dist, ShapeTable *shapes) {
int num_shapes = shapes->NumShapes();
int max_merges = num_shapes - min_shapes;
// TODO: avoid new / delete.
@ -971,8 +993,8 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float
int num_unichars = shapes->MergedUnicharCount(min_s1, min_s2);
shape_dists[min_s1][min_s2 - min_s1 - 1].distance = kInfiniteDist;
if (num_unichars > max_shape_unichars) {
tprintf("Merge of %d and %d with %d would exceed max of %d unichars\n", min_s1, min_s2,
num_unichars, max_shape_unichars);
tprintf("Merge of %d and %d with %d would exceed max of %d unichars\n",
min_s1, min_s2, num_unichars, max_shape_unichars);
} else {
shapes->MergeShapes(min_s1, min_s2);
shape_dists[min_s2].clear();
@ -980,13 +1002,15 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float
for (int s = 0; s < min_s1; ++s) {
if (!shape_dists[s].empty()) {
shape_dists[s][min_s1 - s - 1].distance = ShapeDistance(*shapes, s, min_s1);
shape_dists[s][min_s1 - s - 1].distance =
ShapeDistance(*shapes, s, min_s1);
shape_dists[s][min_s2 - s - 1].distance = kInfiniteDist;
}
}
for (int s2 = min_s1 + 1; s2 < num_shapes; ++s2) {
if (shape_dists[min_s1][s2 - min_s1 - 1].distance < kInfiniteDist) {
shape_dists[min_s1][s2 - min_s1 - 1].distance = ShapeDistance(*shapes, min_s1, s2);
shape_dists[min_s1][s2 - min_s1 - 1].distance =
ShapeDistance(*shapes, min_s1, s2);
}
}
for (int s = min_s1 + 1; s < min_s2; ++s) {

View File

@ -69,7 +69,7 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) {
float Angle, Length;
/* if p1 and p2 are not close in length, don't let them match */
Length = fabs(p1->Length - p2->Length);
Length = std::fabs(p1->Length - p2->Length);
if (Length > MAX_LENGTH_MISMATCH) {
return (0.0);
}
@ -88,8 +88,8 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) {
}
/* set the dummy pico-feature at one end of p1 and match it to p2 */
Feature->Params[PicoFeatX] = p1->X + cos(Angle) * Length;
Feature->Params[PicoFeatY] = p1->Y + sin(Angle) * Length;
Feature->Params[PicoFeatX] = p1->X + std::cos(Angle) * Length;
Feature->Params[PicoFeatY] = p1->Y + std::sin(Angle) * Length;
if (DummyFastMatch(Feature, p2)) {
Evidence = SubfeatureEvidence(Feature, p2);
if (Evidence < WorstEvidence) {
@ -101,8 +101,8 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) {
}
/* set the dummy pico-feature at the other end of p1 and match it to p2 */
Feature->Params[PicoFeatX] = p1->X - cos(Angle) * Length;
Feature->Params[PicoFeatY] = p1->Y - sin(Angle) * Length;
Feature->Params[PicoFeatX] = p1->X - std::cos(Angle) * Length;
Feature->Params[PicoFeatY] = p1->Y - std::sin(Angle) * Length;
if (DummyFastMatch(Feature, p2)) {
Evidence = SubfeatureEvidence(Feature, p2);
if (Evidence < WorstEvidence) {
@ -266,7 +266,7 @@ bool DummyFastMatch(FEATURE Feature, PROTO_STRUCT *Proto) {
float AngleError;
MaxAngleError = training_angle_pad / 360.0;
AngleError = fabs(Proto->Angle - Feature->Params[PicoFeatDir]);
AngleError = std::fabs(Proto->Angle - Feature->Params[PicoFeatDir]);
if (AngleError > 0.5) {
AngleError = 1.0 - AngleError;
}
@ -296,8 +296,8 @@ void ComputePaddedBoundingBox(PROTO_STRUCT *Proto, float TangentPad, float Ortho
FRECT *BoundingBox) {
float Length = Proto->Length / 2.0 + TangentPad;
float Angle = Proto->Angle * 2.0 * M_PI;
float CosOfAngle = fabs(cos(Angle));
float SinOfAngle = fabs(sin(Angle));
float CosOfAngle = fabs(std::cos(Angle));
float SinOfAngle = fabs(std::sin(Angle));
float Pad = std::max(CosOfAngle * Length, SinOfAngle * OrthogonalPad);
BoundingBox->MinX = Proto->X - Pad;

View File

@ -22,6 +22,7 @@
# include "config_auto.h"
#endif
#include <cmath>
#include <string>
#include "lstmtrainer.h"
@ -71,14 +72,17 @@ const int kTargetXScale = 5;
const int kTargetYScale = 100;
#endif // !GRAPHICS_DISABLED
LSTMTrainer::LSTMTrainer() : randomly_rotate_(false), training_data_(0), sub_trainer_(nullptr) {
LSTMTrainer::LSTMTrainer()
: randomly_rotate_(false), training_data_(0), sub_trainer_(nullptr) {
EmptyConstructor();
debug_interval_ = 0;
}
LSTMTrainer::LSTMTrainer(const char *model_base, const char *checkpoint_name, int debug_interval,
int64_t max_memory)
: randomly_rotate_(false), training_data_(max_memory), sub_trainer_(nullptr) {
LSTMTrainer::LSTMTrainer(const char *model_base, const char *checkpoint_name,
int debug_interval, int64_t max_memory)
: randomly_rotate_(false),
training_data_(max_memory),
sub_trainer_(nullptr) {
EmptyConstructor();
debug_interval_ = debug_interval;
model_base_ = model_base;
@ -96,7 +100,8 @@ LSTMTrainer::~LSTMTrainer() {
// Tries to deserialize a trainer from the given file and silently returns
// false in case of failure.
bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_traineddata) {
bool LSTMTrainer::TryLoadingCheckpoint(const char *filename,
const char *old_traineddata) {
std::vector<char> data;
if (!LoadDataFromFile(filename, &data)) {
return false;
@ -106,7 +111,8 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra
return false;
}
if (IsIntMode()) {
tprintf("Error, %s is an integer (fast) model, cannot continue training\n", filename);
tprintf("Error, %s is an integer (fast) model, cannot continue training\n",
filename);
return false;
}
if (((old_traineddata == nullptr || *old_traineddata == '\0') &&
@ -114,7 +120,8 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra
filename == old_traineddata) {
return true; // Normal checkpoint load complete.
}
tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(), recoder_.code_range());
tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(),
recoder_.code_range());
if (old_traineddata == nullptr || *old_traineddata == '\0') {
tprintf("Must supply the old traineddata for code conversion!\n");
return false;
@ -152,21 +159,23 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra
// are implemented.
// For other args see NetworkBuilder::InitNetwork.
// Note: Be sure to call InitCharSet before InitNetwork!
bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index, int net_flags,
float weight_range, float learning_rate, float momentum,
bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index,
int net_flags, float weight_range,
float learning_rate, float momentum,
float adam_beta) {
mgr_.SetVersionString(mgr_.VersionString() + ":" + network_spec);
adam_beta_ = adam_beta;
learning_rate_ = learning_rate;
momentum_ = momentum;
SetNullChar();
if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec, append_index, net_flags,
weight_range, &randomizer_, &network_)) {
if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec,
append_index, net_flags, weight_range,
&randomizer_, &network_)) {
return false;
}
network_str_ += network_spec;
tprintf("Built network:%s from request %s\n",
network_->spec().c_str(), network_spec);
tprintf("Built network:%s from request %s\n", network_->spec().c_str(),
network_spec);
tprintf(
"Training parameters:\n Debug interval = %d,"
" weights = %g, learning rate = %g, momentum=%g\n",
@ -222,14 +231,14 @@ void LSTMTrainer::InitIterations() {
// If the training sample is usable, grid searches for the optimal
// dict_ratio/cert_offset, and returns the results in a string of space-
// separated triplets of ratio,offset=worderr.
Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, int iteration,
double min_dict_ratio, double dict_ratio_step,
double max_dict_ratio, double min_cert_offset,
double cert_offset_step, double max_cert_offset,
std::string &results) {
Trainability LSTMTrainer::GridSearchDictParams(
const ImageData *trainingdata, int iteration, double min_dict_ratio,
double dict_ratio_step, double max_dict_ratio, double min_cert_offset,
double cert_offset_step, double max_cert_offset, std::string &results) {
sample_iteration_ = iteration;
NetworkIO fwd_outputs, targets;
Trainability result = PrepareForBackward(trainingdata, &fwd_outputs, &targets);
Trainability result =
PrepareForBackward(trainingdata, &fwd_outputs, &targets);
if (result == UNENCODABLE || result == HI_PRECISION_ERR || dict_ == nullptr) {
return result;
}
@ -238,8 +247,10 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
std::vector<int> truth_labels, ocr_labels, xcoords;
ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels));
// NO-dict error.
RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr);
base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr);
RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(),
nullptr);
base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty,
nullptr);
base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
std::string truth_text = DecodeLabels(truth_labels);
std::string ocr_text = DecodeLabels(ocr_labels);
@ -248,18 +259,21 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_);
for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) {
for (double c = min_cert_offset; c < max_cert_offset; c += cert_offset_step) {
search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, nullptr);
for (double c = min_cert_offset; c < max_cert_offset;
c += cert_offset_step) {
search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty,
nullptr);
search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
truth_text = DecodeLabels(truth_labels);
ocr_text = DecodeLabels(ocr_labels);
// This is destructive on both strings.
double word_error = ComputeWordError(&truth_text, &ocr_text);
if ((r == min_dict_ratio && c == min_cert_offset) || !std::isfinite(word_error)) {
if ((r == min_dict_ratio && c == min_cert_offset) ||
!std::isfinite(word_error)) {
std::string t = DecodeLabels(truth_labels);
std::string o = DecodeLabels(ocr_labels);
tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c, t.c_str(), o.c_str(),
word_error, truth_labels[0]);
tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c,
t.c_str(), o.c_str(), word_error, truth_labels[0]);
}
results += " " + std::to_string(r);
results += "," + std::to_string(c);
@ -278,17 +292,20 @@ void LSTMTrainer::DebugNetwork() {
// tesseract into memory ready for training. Returns false if nothing was
// loaded.
bool LSTMTrainer::LoadAllTrainingData(const std::vector<std::string> &filenames,
CachingStrategy cache_strategy, bool randomly_rotate) {
CachingStrategy cache_strategy,
bool randomly_rotate) {
randomly_rotate_ = randomly_rotate;
training_data_.Clear();
return training_data_.LoadDocuments(filenames, cache_strategy, LoadDataFromFile);
return training_data_.LoadDocuments(filenames, cache_strategy,
LoadDataFromFile);
}
// Keeps track of best and locally worst char error_rate and launches tests
// using tester, when a new min or max is reached.
// Writes checkpoints at appropriate times and builds and returns a log message
// to indicate progress. Returns false if nothing interesting happened.
bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, std::string &log_msg) {
bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,
std::string &log_msg) {
PrepareLogMsg(log_msg);
double error_rate = CharError();
int iteration = learning_iteration();
@ -320,7 +337,8 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, std::string &log_msg)
sub_trainer_.reset();
stall_iteration_ = learning_iteration() + kMinStallIterations;
if (TransitionTrainingStage(kStageTransitionThreshold)) {
log_msg += " Transitioned to stage " + std::to_string(CurrentTrainingStage());
log_msg +=
" Transitioned to stage " + std::to_string(CurrentTrainingStage());
}
SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);
if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
@ -386,7 +404,8 @@ void LSTMTrainer::PrepareLogMsg(std::string &log_msg) const {
// Appends <intro_str> iteration learning_iteration()/training_iteration()/
// sample_iteration() to the log_msg.
void LSTMTrainer::LogIterations(const char *intro_str, std::string &log_msg) const {
void LSTMTrainer::LogIterations(const char *intro_str,
std::string &log_msg) const {
log_msg += intro_str;
log_msg += " iteration " + std::to_string(learning_iteration());
log_msg += "/" + std::to_string(training_iteration());
@ -396,7 +415,8 @@ void LSTMTrainer::LogIterations(const char *intro_str, std::string &log_msg) con
// Returns true and increments the training_stage_ if the error rate has just
// passed through the given threshold for the first time.
bool LSTMTrainer::TransitionTrainingStage(float error_threshold) {
if (best_error_rate_ < error_threshold && training_stage_ + 1 < num_training_stages_) {
if (best_error_rate_ < error_threshold &&
training_stage_ + 1 < num_training_stages_) {
++training_stage_;
return true;
}
@ -404,8 +424,8 @@ bool LSTMTrainer::TransitionTrainingStage(float error_threshold) {
}
// Writes to the given file. Returns false in case of error.
bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr,
TFile *fp) const {
bool LSTMTrainer::Serialize(SerializeAmount serialize_amount,
const TessdataManager *mgr, TFile *fp) const {
if (!LSTMRecognizer::Serialize(mgr, fp)) {
return false;
}
@ -470,7 +490,8 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, const TessdataMana
return false;
}
std::vector<char> sub_data;
if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, *sub_trainer_, &sub_data)) {
if (sub_trainer_ != nullptr &&
!SaveTrainingDump(LIGHT, *sub_trainer_, &sub_data)) {
return false;
}
if (!fp->Serialize(sub_data)) {
@ -587,11 +608,13 @@ void LSTMTrainer::StartSubtrainer(std::string &log_msg) {
log_msg += " Failed to revert to previous best for trial!";
sub_trainer_.reset();
} else {
log_msg += " Trial sub_trainer_ from iteration " + std::to_string(sub_trainer_->training_iteration());
log_msg += " Trial sub_trainer_ from iteration " +
std::to_string(sub_trainer_->training_iteration());
// Reduce learning rate so it doesn't diverge this time.
sub_trainer_->ReduceLearningRates(this, log_msg);
// If it fails again, we will wait twice as long before reverting again.
int stall_offset = learning_iteration() - sub_trainer_->learning_iteration();
int stall_offset =
learning_iteration() - sub_trainer_->learning_iteration();
stall_iteration_ = learning_iteration() + 2 * stall_offset;
sub_trainer_->stall_iteration_ = stall_iteration_;
// Re-save the best trainer with the new learning rates and stall iteration.
@ -619,7 +642,8 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
int end_iteration = training_iteration();
while (sub_trainer_->training_iteration() < end_iteration &&
sub_margin >= kSubTrainerMarginFraction) {
int target_iteration = sub_trainer_->training_iteration() + kNumPagesPerBatch;
int target_iteration =
sub_trainer_->training_iteration() + kNumPagesPerBatch;
while (sub_trainer_->training_iteration() < target_iteration) {
sub_trainer_->TrainOnLine(this, false);
}
@ -631,12 +655,14 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
sub_error = sub_trainer_->CharError();
sub_margin = (training_error - sub_error) / sub_error;
}
if (sub_error < best_error_rate_ && sub_margin >= kSubTrainerMarginFraction) {
if (sub_error < best_error_rate_ &&
sub_margin >= kSubTrainerMarginFraction) {
// The sub_trainer_ has won the race to a new best. Switch to it.
std::vector<char> updated_trainer;
SaveTrainingDump(LIGHT, *sub_trainer_, &updated_trainer);
ReadTrainingDump(updated_trainer, *this);
log_msg += " Sub trainer wins at iteration " + std::to_string(training_iteration());
log_msg += " Sub trainer wins at iteration " +
std::to_string(training_iteration());
log_msg += "\n";
return STR_REPLACED;
}
@ -647,11 +673,13 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
// Reduces network learning rates, either for everything, or for layers
// independently, according to NF_LAYER_SPECIFIC_LR.
void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer, std::string &log_msg) {
void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer,
std::string &log_msg) {
if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
int num_reduced =
ReduceLayerLearningRates(kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
log_msg += "\nReduced learning rate on layers: " + std::to_string(num_reduced);
int num_reduced = ReduceLayerLearningRates(
kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
log_msg +=
"\nReduced learning rate on layers: " + std::to_string(num_reduced);
} else {
ScaleLearningRate(kLearningRateDecay);
log_msg += "\nReduced learning rate to :" + std::to_string(learning_rate_);
@ -712,7 +740,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
copy_trainer.SetIteration(iteration);
// Train on the sample, but keep the update in updates_ instead of
// applying to the weights.
const ImageData *trainingdata = copy_trainer.TrainOnLine(samples_trainer, true);
const ImageData *trainingdata =
copy_trainer.TrainOnLine(samples_trainer, true);
if (trainingdata == nullptr) {
continue;
}
@ -727,7 +756,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
samples_trainer->ReadTrainingDump(updated_trainer, layer_trainer);
Network *layer = layer_trainer.GetLayer(layers[i]);
// Update the weights in just the layer, using Adam if enabled.
layer->Update(0.0, momentum_, adam_beta_, layer_trainer.training_iteration_ + 1);
layer->Update(0.0, momentum_, adam_beta_,
layer_trainer.training_iteration_ + 1);
// Zero the updates matrix again.
layer->Update(0.0, 0.0, 0.0, 0);
// Train again on the same sample, again holding back the updates.
@ -735,9 +765,10 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
// Count the sign changes in the updates in layer vs in copy_trainer.
float before_bad = bad_sums[ww][i];
float before_ok = ok_sums[ww][i];
layer->CountAlternators(*copy_trainer.GetLayer(layers[i]), &ok_sums[ww][i],
&bad_sums[ww][i]);
float bad_frac = bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok;
layer->CountAlternators(*copy_trainer.GetLayer(layers[i]),
&ok_sums[ww][i], &bad_sums[ww][i]);
float bad_frac =
bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok;
if (bad_frac > 0.0f) {
bad_frac = (bad_sums[ww][i] - before_bad) / bad_frac;
}
@ -756,8 +787,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
TFloat total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
TFloat frac_down = bad_sums[LR_DOWN][i] / total_down;
TFloat frac_same = bad_sums[LR_SAME][i] / total_same;
tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(), lr * factor,
100.0 * frac_down, lr, 100.0 * frac_same);
tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(),
lr * factor, 100.0 * frac_down, lr, 100.0 * frac_same);
if (frac_down < frac_same * kImprovementFraction) {
tprintf(" REDUCED\n");
ScaleLayerLearningRate(layers[i], factor);
@ -781,9 +812,10 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
// Converts the string to integer class labels, with appropriate null_char_s
// in between if not in SimpleTextOutput mode. Returns false on failure.
/* static */
bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unicharset,
const UnicharCompress *recoder, bool simple_text, int null_char,
std::vector<int> *labels) {
bool LSTMTrainer::EncodeString(const std::string &str,
const UNICHARSET &unicharset,
const UnicharCompress *recoder, bool simple_text,
int null_char, std::vector<int> *labels) {
if (str.c_str() == nullptr || str.length() <= 0) {
tprintf("Empty truth string!\n");
return false;
@ -795,7 +827,8 @@ bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unichar
labels->push_back(null_char);
}
std::string cleaned = unicharset.CleanupString(str.c_str());
if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr, &err_index)) {
if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr,
&err_index)) {
bool success = true;
for (auto internal_label : internal_labels) {
if (recoder != nullptr) {
@ -835,19 +868,23 @@ bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unichar
// Performs forward-backward on the given trainingdata.
// Returns a Trainability enum to indicate the suitability of the sample.
Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata, bool batch) {
Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata,
bool batch) {
NetworkIO fwd_outputs, targets;
Trainability trainable = PrepareForBackward(trainingdata, &fwd_outputs, &targets);
Trainability trainable =
PrepareForBackward(trainingdata, &fwd_outputs, &targets);
++sample_iteration_;
if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
return trainable; // Sample was unusable.
}
bool debug = debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
bool debug =
debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
// Run backprop on the output.
NetworkIO bp_deltas;
if (network_->IsTraining() &&
(trainable != PERFECT ||
training_iteration() > last_perfect_training_iteration_ + perfect_delay_)) {
training_iteration() >
last_perfect_training_iteration_ + perfect_delay_)) {
network_->Backward(debug, targets, &scratch_space_, &bp_deltas);
network_->Update(learning_rate_, batch ? -1.0f : momentum_, adam_beta_,
training_iteration_ + 1);
@ -864,18 +901,21 @@ Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata, bool batch)
// Prepares the ground truth, runs forward, and prepares the targets.
// Returns a Trainability enum to indicate the suitability of the sample.
Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs,
Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata,
NetworkIO *fwd_outputs,
NetworkIO *targets) {
if (trainingdata == nullptr) {
tprintf("Null trainingdata.\n");
return UNENCODABLE;
}
// Ensure repeatability of random elements even across checkpoints.
bool debug = debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
bool debug =
debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
std::vector<int> truth_labels;
if (!EncodeString(trainingdata->transcription(), &truth_labels)) {
tprintf("Can't encode transcription: '%s' in language '%s'\n",
trainingdata->transcription().c_str(), trainingdata->language().c_str());
trainingdata->transcription().c_str(),
trainingdata->language().c_str());
return UNENCODABLE;
}
bool upside_down = false;
@ -908,8 +948,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
float image_scale;
NetworkIO inputs;
bool invert = trainingdata->boxes().empty();
if (!RecognizeLine(*trainingdata, invert, debug, invert, upside_down, &image_scale, &inputs,
fwd_outputs)) {
if (!RecognizeLine(*trainingdata, invert, debug, invert, upside_down,
&image_scale, &inputs, fwd_outputs)) {
tprintf("Image %s not trainable\n", trainingdata->imagefilename().c_str());
return UNENCODABLE;
}
@ -917,12 +957,14 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
LossType loss_type = OutputLossType();
if (loss_type == LT_SOFTMAX) {
if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) {
tprintf("Compute simple targets failed for %s!\n", trainingdata->imagefilename().c_str());
tprintf("Compute simple targets failed for %s!\n",
trainingdata->imagefilename().c_str());
return UNENCODABLE;
}
} else if (loss_type == LT_CTC) {
if (!ComputeCTCTargets(truth_labels, fwd_outputs, targets)) {
tprintf("Compute CTC targets failed for %s!\n", trainingdata->imagefilename().c_str());
tprintf("Compute CTC targets failed for %s!\n",
trainingdata->imagefilename().c_str());
return UNENCODABLE;
}
} else {
@ -936,7 +978,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
if (loss_type != LT_CTC) {
LabelsFromOutputs(*targets, &truth_labels, &xcoords);
}
if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels, *targets)) {
if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels,
*targets)) {
tprintf("Input width was %d\n", inputs.Width());
return UNENCODABLE;
}
@ -945,7 +988,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
targets->SubtractAllFromFloat(*fwd_outputs);
if (debug_interval_ != 0) {
if (truth_text != ocr_text) {
tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(), ocr_text.c_str());
tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(),
ocr_text.c_str());
}
}
double char_error = ComputeCharError(truth_labels, ocr_labels);
@ -968,7 +1012,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
// restored. *this must always be the master trainer that retains the only
// copy of the training data and language model. trainer is the model that is
// actually serialized.
bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer &trainer,
bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount,
const LSTMTrainer &trainer,
std::vector<char> *data) const {
TFile fp;
fp.OpenWrite(data);
@ -976,7 +1021,8 @@ bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, const LSTMT
}
// Restores the model to *this.
bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr, const char *data, int size) {
bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr,
const char *data, int size) {
if (size == 0) {
tprintf("Warning: data size is 0 in LSTMTrainer::ReadLocalTrainingDump\n");
return false;
@ -990,7 +1036,8 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr, const char *
bool LSTMTrainer::SaveTraineddata(const char *filename) {
std::vector<char> recognizer_data;
SaveRecognitionDump(&recognizer_data);
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0], recognizer_data.size());
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
recognizer_data.size());
return mgr_.SaveFile(filename, SaveDataToFile);
}
@ -1025,8 +1072,8 @@ void LSTMTrainer::FillErrorBuffer(double new_error, ErrorTypes type) {
// Helper generates a map from each current recoder_ code (ie softmax index)
// to the corresponding old_recoder code, or -1 if there isn't one.
std::vector<int> LSTMTrainer::MapRecoder(const UNICHARSET &old_chset,
const UnicharCompress &old_recoder) const {
std::vector<int> LSTMTrainer::MapRecoder(
const UNICHARSET &old_chset, const UnicharCompress &old_recoder) const {
int num_new_codes = recoder_.code_range();
int num_new_unichars = GetUnicharset().size();
std::vector<int> code_map(num_new_codes, -1);
@ -1045,7 +1092,8 @@ std::vector<int> LSTMTrainer::MapRecoder(const UNICHARSET &old_chset,
continue;
}
// The old unicharset must have the same unichar.
int old_uid = uid < num_new_unichars
int old_uid =
uid < num_new_unichars
? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid))
: old_chset.size() - 1;
if (old_uid == INVALID_UNICHAR_ID) {
@ -1079,7 +1127,8 @@ void LSTMTrainer::InitCharSet() {
// Helper computes and sets the null_char_.
void LSTMTrainer::SetNullChar() {
null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN : GetUnicharset().size();
null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN
: GetUnicharset().size();
RecodedCharID code;
recoder_.EncodeUnichar(null_char_, &code);
null_char_ = code(0);
@ -1103,7 +1152,8 @@ void LSTMTrainer::EmptyConstructor() {
// as an image in the given window, and the corresponding labels at the
// corresponding x_starts.
// Returns false if the truth string is empty.
bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata,
bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs,
const ImageData &trainingdata,
const NetworkIO &fwd_outputs,
const std::vector<int> &truth_labels,
const NetworkIO &outputs) {
@ -1118,12 +1168,15 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
std::vector<int> xcoords;
LabelsFromOutputs(outputs, &labels, &xcoords);
std::string text = DecodeLabels(labels);
tprintf("Iteration %d: GROUND TRUTH : %s\n", training_iteration(), truth_text.c_str());
tprintf("Iteration %d: GROUND TRUTH : %s\n", training_iteration(),
truth_text.c_str());
if (truth_text != text) {
tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(), text.c_str());
tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(),
text.c_str());
}
if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) {
tprintf("TRAINING activation path for truth string %s\n", truth_text.c_str());
tprintf("TRAINING activation path for truth string %s\n",
truth_text.c_str());
DebugActivationPath(outputs, labels, xcoords);
#ifndef GRAPHICS_DISABLED
DisplayForward(inputs, labels, xcoords, "LSTMTraining", &align_win_);
@ -1140,11 +1193,12 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
#ifndef GRAPHICS_DISABLED
// Displays the network targets as line a line graph.
void LSTMTrainer::DisplayTargets(const NetworkIO &targets, const char *window_name,
ScrollView **window) {
void LSTMTrainer::DisplayTargets(const NetworkIO &targets,
const char *window_name, ScrollView **window) {
int width = targets.Width();
int num_features = targets.NumFeatures();
Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale, window);
Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale,
window);
for (int c = 0; c < num_features; ++c) {
int color = c % (ScrollView::GREEN_YELLOW - 1) + 2;
(*window)->Pen(static_cast<ScrollView::Color>(color));
@ -1176,7 +1230,8 @@ void LSTMTrainer::DisplayTargets(const NetworkIO &targets, const char *window_na
// Builds a no-compromises target where the first positions should be the
// truth labels and the rest is padded with the null_char_.
bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs, const std::vector<int> &truth_labels,
bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs,
const std::vector<int> &truth_labels,
NetworkIO *targets) {
if (truth_labels.size() > targets->Width()) {
tprintf("Error: transcription %s too long to fit into target of width %d\n",
@ -1197,18 +1252,19 @@ bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs, const std::vector
// Builds a target using standard CTC. truth_labels should be pre-padded with
// nulls wherever desired. They don't have to be between all labels.
// outputs is input-output, as it gets clipped to minimum probability.
bool LSTMTrainer::ComputeCTCTargets(const std::vector<int> &truth_labels, NetworkIO *outputs,
NetworkIO *targets) {
bool LSTMTrainer::ComputeCTCTargets(const std::vector<int> &truth_labels,
NetworkIO *outputs, NetworkIO *targets) {
// Bottom-clip outputs to a minimum probability.
CTC::NormalizeProbs(outputs);
return CTC::ComputeCTCTargets(truth_labels, null_char_, outputs->float_array(), targets);
return CTC::ComputeCTCTargets(truth_labels, null_char_,
outputs->float_array(), targets);
}
// Computes network errors, and stores the results in the rolling buffers,
// along with the supplied text_error.
// Returns the delta error of the current sample (not running average.)
double LSTMTrainer::ComputeErrorRates(const NetworkIO &deltas, double char_error,
double word_error) {
double LSTMTrainer::ComputeErrorRates(const NetworkIO &deltas,
double char_error, double word_error) {
UpdateErrorBuffer(ComputeRMSError(deltas), ET_RMS);
// Delta error is the fraction of timesteps with >0.5 error in the top choice
// score. If zero, then the top choice characters are guaranteed correct,
@ -1253,7 +1309,7 @@ double LSTMTrainer::ComputeWinnerError(const NetworkIO &deltas) {
for (int t = 0; t < width; ++t) {
const float *class_errs = deltas.f(t);
for (int c = 0; c < num_classes; ++c) {
float abs_delta = fabs(class_errs[c]);
float abs_delta = std::fabs(class_errs[c]);
// TODO(rays) Filtering cases where the delta is very large to cut out
// GT errors doesn't work. Find a better way or get better truth.
if (0.5 <= abs_delta) {
@ -1292,7 +1348,8 @@ double LSTMTrainer::ComputeCharError(const std::vector<int> &truth_str,
// Computes word recall error rate using a very simple bag of words algorithm.
// NOTE that this is destructive on both input strings.
double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_str) {
double LSTMTrainer::ComputeWordError(std::string *truth_str,
std::string *ocr_str) {
using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;
std::vector<std::string> truth_words = split(*truth_str, ' ');
if (truth_words.empty()) {
@ -1300,7 +1357,7 @@ double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_st
}
std::vector<std::string> ocr_words = split(*ocr_str, ' ');
StrMap word_counts;
for (auto truth_word : truth_words) {
for (const auto &truth_word : truth_words) {
std::string truth_word_string(truth_word.c_str());
auto it = word_counts.find(truth_word_string);
if (it == word_counts.end()) {
@ -1309,7 +1366,7 @@ double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_st
++it->second;
}
}
for (auto ocr_word : ocr_words) {
for (const auto &ocr_word : ocr_words) {
std::string ocr_word_string(ocr_word.c_str());
auto it = word_counts.find(ocr_word_string);
if (it == word_counts.end()) {
@ -1333,7 +1390,8 @@ void LSTMTrainer::UpdateErrorBuffer(double new_error, ErrorTypes type) {
int index = training_iteration_ % kRollingBufferSize_;
error_buffers_[type][index] = new_error;
// Compute the mean error.
int mean_count = std::min<int>(training_iteration_ + 1, error_buffers_[type].size());
int mean_count =
std::min<int>(training_iteration_ + 1, error_buffers_[type].size());
double buffer_sum = 0.0;
for (int i = 0; i < mean_count; ++i) {
buffer_sum += error_buffers_[type][i];
@ -1353,8 +1411,9 @@ void LSTMTrainer::RollErrorBuffers() {
}
++training_iteration_;
if (debug_interval_ != 0) {
tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n", error_rates_[ET_RMS],
error_rates_[ET_DELTA], error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR],
tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n",
error_rates_[ET_RMS], error_rates_[ET_DELTA],
error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR],
error_rates_[ET_SKIP_RATIO]);
}
}
@ -1364,11 +1423,14 @@ void LSTMTrainer::RollErrorBuffers() {
// Tester is an externally supplied callback function that tests on some
// data set with a given model and records the error rates in a graph.
std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
const std::vector<char> &model_data, TestCallback tester) {
if (error_rate > best_error_rate_ && iteration < best_iteration_ + kErrorGraphInterval) {
const std::vector<char> &model_data,
const TestCallback &tester) {
if (error_rate > best_error_rate_ &&
iteration < best_iteration_ + kErrorGraphInterval) {
// Too soon to record a new point.
if (tester != nullptr && !worst_model_data_.empty()) {
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size());
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
worst_model_data_.size());
return tester(worst_iteration_, nullptr, mgr_, CurrentTrainingStage());
} else {
return "";
@ -1384,8 +1446,10 @@ std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
if (error_rate < best_error_rate_) {
// This is a new (global) minimum.
if (tester != nullptr && !worst_model_data_.empty()) {
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size());
result = tester(worst_iteration_, worst_error_rates_, mgr_, CurrentTrainingStage());
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
worst_model_data_.size());
result = tester(worst_iteration_, worst_error_rates_, mgr_,
CurrentTrainingStage());
worst_model_data_.clear();
best_model_data_ = model_data;
}
@ -1397,23 +1461,28 @@ std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
// Compute 2% decay time.
double two_percent_more = error_rate + 2.0;
int i;
for (i = best_error_history_.size() - 1; i >= 0 && best_error_history_[i] < two_percent_more;
--i) {
for (i = best_error_history_.size() - 1;
i >= 0 && best_error_history_[i] < two_percent_more; --i) {
}
int old_iteration = i >= 0 ? best_error_iterations_[i] : 0;
improvement_steps_ = iteration - old_iteration;
tprintf("2 Percent improvement time=%d, best error was %g @ %d\n", improvement_steps_,
i >= 0 ? best_error_history_[i] : 100.0, old_iteration);
tprintf("2 Percent improvement time=%d, best error was %g @ %d\n",
improvement_steps_, i >= 0 ? best_error_history_[i] : 100.0,
old_iteration);
} else if (error_rate > best_error_rate_) {
// This is a new (local) maximum.
if (tester != nullptr) {
if (!best_model_data_.empty()) {
mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0], best_model_data_.size());
result = tester(best_iteration_, best_error_rates_, mgr_, CurrentTrainingStage());
mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0],
best_model_data_.size());
result = tester(best_iteration_, best_error_rates_, mgr_,
CurrentTrainingStage());
} else if (!worst_model_data_.empty()) {
// Allow for multiple data points with "worst" error rate.
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size());
result = tester(worst_iteration_, worst_error_rates_, mgr_, CurrentTrainingStage());
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
worst_model_data_.size());
result = tester(worst_iteration_, worst_error_rates_, mgr_,
CurrentTrainingStage());
}
if (result.length() > 0) {
best_model_data_.clear();

View File

@ -73,7 +73,8 @@ class LSTMTrainer;
// Function to compute and record error rates on some external test set(s).
// Args are: iteration, mean errors, model, training stage.
// Returns a string containing logging information about the tests.
using TestCallback = std::function<std::string(int, const double *, const TessdataManager &, int)>;
using TestCallback = std::function<std::string(int, const double *,
const TessdataManager &, int)>;
// Trainer class for LSTM networks. Most of the effort is in creating the
// ideal target outputs from the transcription. A box file is used if it is
@ -82,8 +83,8 @@ using TestCallback = std::function<std::string(int, const double *, const Tessda
class TESS_UNICHARSET_TRAINING_API LSTMTrainer : public LSTMRecognizer {
public:
LSTMTrainer();
LSTMTrainer(const char *model_base, const char *checkpoint_name, int debug_interval,
int64_t max_memory);
LSTMTrainer(const char *model_base, const char *checkpoint_name,
int debug_interval, int64_t max_memory);
virtual ~LSTMTrainer();
// Tries to deserialize a trainer from the given file and silently returns
@ -113,8 +114,9 @@ public:
// are implemented.
// For other args see NetworkBuilder::InitNetwork.
// Note: Be sure to call InitCharSet before InitNetwork!
bool InitNetwork(const char *network_spec, int append_index, int net_flags, float weight_range,
float learning_rate, float momentum, float adam_beta);
bool InitNetwork(const char *network_spec, int append_index, int net_flags,
float weight_range, float learning_rate, float momentum,
float adam_beta);
// Initializes a trainer from a serialized TFNetworkModel proto.
// Returns the global step of TensorFlow graph or 0 if failed.
// Building a compatible TF graph: See tfnetwork.proto.
@ -160,7 +162,8 @@ public:
// NewSingleError.
double LastSingleError(ErrorTypes type) const {
return error_buffers_[type]
[(training_iteration() + kRollingBufferSize_ - 1) % kRollingBufferSize_];
[(training_iteration() + kRollingBufferSize_ - 1) %
kRollingBufferSize_];
}
const DocumentCache &training_data() const {
return training_data_;
@ -172,11 +175,10 @@ public:
// If the training sample is usable, grid searches for the optimal
// dict_ratio/cert_offset, and returns the results in a string of space-
// separated triplets of ratio,offset=worderr.
Trainability GridSearchDictParams(const ImageData *trainingdata, int iteration,
double min_dict_ratio, double dict_ratio_step,
double max_dict_ratio, double min_cert_offset,
double cert_offset_step, double max_cert_offset,
std::string &results);
Trainability GridSearchDictParams(
const ImageData *trainingdata, int iteration, double min_dict_ratio,
double dict_ratio_step, double max_dict_ratio, double min_cert_offset,
double cert_offset_step, double max_cert_offset, std::string &results);
// Provides output on the distribution of weight values.
void DebugNetwork();
@ -184,20 +186,22 @@ public:
// Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for training. Returns false if nothing was
// loaded.
bool LoadAllTrainingData(const std::vector<std::string> &filenames, CachingStrategy cache_strategy,
bool LoadAllTrainingData(const std::vector<std::string> &filenames,
CachingStrategy cache_strategy,
bool randomly_rotate);
// Keeps track of best and locally worst error rate, using internally computed
// values. See MaintainCheckpointsSpecific for more detail.
bool MaintainCheckpoints(TestCallback tester, std::string &log_msg);
bool MaintainCheckpoints(const TestCallback &tester, std::string &log_msg);
// Keeps track of best and locally worst error_rate (whatever it is) and
// launches tests using rec_model, when a new min or max is reached.
// Writes checkpoints using train_model at appropriate times and builds and
// returns a log message to indicate progress. Returns false if nothing
// interesting happened.
bool MaintainCheckpointsSpecific(int iteration, const std::vector<char> *train_model,
const std::vector<char> *rec_model, TestCallback tester,
std::string &log_msg);
bool MaintainCheckpointsSpecific(int iteration,
const std::vector<char> *train_model,
const std::vector<char> *rec_model,
TestCallback tester, std::string &log_msg);
// Builds a string containing a progress message with current error rates.
void PrepareLogMsg(std::string &log_msg) const;
// Appends <intro_str> iteration learning_iteration()/training_iteration()/
@ -214,7 +218,8 @@ public:
}
// Writes to the given file. Returns false in case of error.
bool Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr, TFile *fp) const;
bool Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr,
TFile *fp) const;
// Reads from the given file. Returns false in case of error.
bool DeSerialize(const TessdataManager *mgr, TFile *fp);
@ -240,18 +245,20 @@ public:
// Even if it looks like all weights should remain the same, an adjustment
// will be made to guarantee a different result when reverting to an old best.
// Returns the number of layer learning rates that were reduced.
int ReduceLayerLearningRates(TFloat factor, int num_samples, LSTMTrainer *samples_trainer);
int ReduceLayerLearningRates(TFloat factor, int num_samples,
LSTMTrainer *samples_trainer);
// Converts the string to integer class labels, with appropriate null_char_s
// in between if not in SimpleTextOutput mode. Returns false on failure.
bool EncodeString(const std::string &str, std::vector<int> *labels) const {
return EncodeString(str, GetUnicharset(), IsRecoding() ? &recoder_ : nullptr,
SimpleTextOutput(), null_char_, labels);
return EncodeString(str, GetUnicharset(),
IsRecoding() ? &recoder_ : nullptr, SimpleTextOutput(),
null_char_, labels);
}
// Static version operates on supplied unicharset, encoder, simple_text.
static bool EncodeString(const std::string &str, const UNICHARSET &unicharset,
const UnicharCompress *recoder, bool simple_text, int null_char,
std::vector<int> *labels);
const UnicharCompress *recoder, bool simple_text,
int null_char, std::vector<int> *labels);
// Performs forward-backward on the given trainingdata.
// Returns the sample that was used or nullptr if the next sample was deemed
@ -259,7 +266,8 @@ public:
// holds the training samples.
const ImageData *TrainOnLine(LSTMTrainer *samples_trainer, bool batch) {
int sample_index = sample_iteration();
const ImageData *image = samples_trainer->training_data_.GetPageBySerial(sample_index);
const ImageData *image =
samples_trainer->training_data_.GetPageBySerial(sample_index);
if (image != nullptr) {
Trainability trainable = TrainOnLine(image, batch);
if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
@ -274,30 +282,34 @@ public:
// Prepares the ground truth, runs forward, and prepares the targets.
// Returns a Trainability enum to indicate the suitability of the sample.
Trainability PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs,
NetworkIO *targets);
Trainability PrepareForBackward(const ImageData *trainingdata,
NetworkIO *fwd_outputs, NetworkIO *targets);
// Writes the trainer to memory, so that the current training state can be
// restored. *this must always be the master trainer that retains the only
// copy of the training data and language model. trainer is the model that is
// actually serialized.
bool SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer &trainer,
bool SaveTrainingDump(SerializeAmount serialize_amount,
const LSTMTrainer &trainer,
std::vector<char> *data) const;
// Reads previously saved trainer from memory. *this must always be the
// master trainer that retains the only copy of the training data and
// language model. trainer is the model that is restored.
bool ReadTrainingDump(const std::vector<char> &data, LSTMTrainer &trainer) const {
bool ReadTrainingDump(const std::vector<char> &data,
LSTMTrainer &trainer) const {
if (data.empty()) {
return false;
}
return ReadSizedTrainingDump(&data[0], data.size(), trainer);
}
bool ReadSizedTrainingDump(const char *data, int size, LSTMTrainer &trainer) const {
bool ReadSizedTrainingDump(const char *data, int size,
LSTMTrainer &trainer) const {
return trainer.ReadLocalTrainingDump(&mgr_, data, size);
}
// Restores the model to *this.
bool ReadLocalTrainingDump(const TessdataManager *mgr, const char *data, int size);
bool ReadLocalTrainingDump(const TessdataManager *mgr, const char *data,
int size);
// Sets up the data for MaintainCheckpoints from a light ReadTrainingDump.
void SetupCheckpointInfo();
@ -334,26 +346,30 @@ protected:
// corresponding x_starts.
// Returns false if the truth string is empty.
bool DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata,
const NetworkIO &fwd_outputs, const std::vector<int> &truth_labels,
const NetworkIO &fwd_outputs,
const std::vector<int> &truth_labels,
const NetworkIO &outputs);
// Displays the network targets as line a line graph.
void DisplayTargets(const NetworkIO &targets, const char *window_name, ScrollView **window);
void DisplayTargets(const NetworkIO &targets, const char *window_name,
ScrollView **window);
// Builds a no-compromises target where the first positions should be the
// truth labels and the rest is padded with the null_char_.
bool ComputeTextTargets(const NetworkIO &outputs, const std::vector<int> &truth_labels,
bool ComputeTextTargets(const NetworkIO &outputs,
const std::vector<int> &truth_labels,
NetworkIO *targets);
// Builds a target using standard CTC. truth_labels should be pre-padded with
// nulls wherever desired. They don't have to be between all labels.
// outputs is input-output, as it gets clipped to minimum probability.
bool ComputeCTCTargets(const std::vector<int> &truth_labels, NetworkIO *outputs,
NetworkIO *targets);
bool ComputeCTCTargets(const std::vector<int> &truth_labels,
NetworkIO *outputs, NetworkIO *targets);
// Computes network errors, and stores the results in the rolling buffers,
// along with the supplied text_error.
// Returns the delta error of the current sample (not running average.)
double ComputeErrorRates(const NetworkIO &deltas, double char_error, double word_error);
double ComputeErrorRates(const NetworkIO &deltas, double char_error,
double word_error);
// Computes the network activation RMS error rate.
double ComputeRMSError(const NetworkIO &deltas);
@ -366,7 +382,8 @@ protected:
double ComputeWinnerError(const NetworkIO &deltas);
// Computes a very simple bag of chars char error rate.
double ComputeCharError(const std::vector<int> &truth_str, const std::vector<int> &ocr_str);
double ComputeCharError(const std::vector<int> &truth_str,
const std::vector<int> &ocr_str);
// Computes a very simple bag of words word recall error rate.
// NOTE that this is destructive on both input strings.
double ComputeWordError(std::string *truth_str, std::string *ocr_str);
@ -380,8 +397,9 @@ protected:
// Given that error_rate is either a new min or max, updates the best/worst
// error rates, and record of progress.
std::string UpdateErrorGraph(int iteration, double error_rate, const std::vector<char> &model_data,
TestCallback tester);
std::string UpdateErrorGraph(int iteration, double error_rate,
const std::vector<char> &model_data,
const TestCallback &tester);
protected:
#ifndef GRAPHICS_DISABLED

View File

@ -74,9 +74,9 @@ void SVSync::StartProcess(const char *executable, const char *args) {
STARTUPINFO start_info;
PROCESS_INFORMATION proc_info;
GetStartupInfo(&start_info);
if (!CreateProcess(nullptr, const_cast<char *>(proc.c_str()), nullptr, nullptr, FALSE,
CREATE_NO_WINDOW | DETACHED_PROCESS, nullptr, nullptr, &start_info,
&proc_info))
if (!CreateProcess(nullptr, const_cast<char *>(proc.c_str()), nullptr,
nullptr, FALSE, CREATE_NO_WINDOW | DETACHED_PROCESS,
nullptr, nullptr, &start_info, &proc_info))
return;
# else
int pid = fork();
@ -243,14 +243,15 @@ static const char *ScrollViewProg() {
}
// The arguments to the program to invoke to start ScrollView
static std::string ScrollViewCommand(std::string scrollview_path) {
static std::string ScrollViewCommand(const std::string &scrollview_path) {
// The following ugly ifdef is to enable the output of the java runtime
// to be sent down a black hole on non-windows to ignore all the
// exceptions in piccolo. Ideally piccolo would be debugged to make
// this unnecessary.
// Also the path has to be separated by ; on windows and : otherwise.
# ifdef _WIN32
const char cmd_template[] = "-Djava.library.path=\"%s\" -jar \"%s/ScrollView.jar\"";
const char cmd_template[] =
"-Djava.library.path=\"%s\" -jar \"%s/ScrollView.jar\"";
# else
const char cmd_template[] =
@ -289,14 +290,15 @@ SVNetwork::SVNetwork(const char *hostname, int port) {
# endif // _WIN32
if (getaddrinfo(hostname, port_string.c_str(), nullptr, &addr_info) != 0) {
std::cerr << "Error resolving name for ScrollView host " << std::string(hostname) << ":" << port
<< std::endl;
std::cerr << "Error resolving name for ScrollView host "
<< std::string(hostname) << ":" << port << std::endl;
# ifdef _WIN32
WSACleanup();
# endif // _WIN32
}
stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, addr_info->ai_protocol);
stream_ = socket(addr_info->ai_family, addr_info->ai_socktype,
addr_info->ai_protocol);
if (stream_ < 0) {
std::cerr << "Failed to open socket" << std::endl;
@ -324,7 +326,8 @@ SVNetwork::SVNetwork(const char *hostname, int port) {
Close();
for (;;) {
stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, addr_info->ai_protocol);
stream_ = socket(addr_info->ai_family, addr_info->ai_socktype,
addr_info->ai_protocol);
if (stream_ >= 0) {
if (connect(stream_, addr_info->ai_addr, addr_info->ai_addrlen) == 0) {
break;

View File

@ -111,7 +111,7 @@ int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
if (static_cast<int>(length) == 0) {
return (0);
}
angle = static_cast<int>(floor(asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5));
angle = static_cast<int>(floor(std::asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5));
/* Use dot product */
if (vector1.dot(vector2) < 0) {

View File

@ -46,7 +46,7 @@
/* How many to keep */
#define MAX_NUM_SEAMS 150
/* How many to keep */
#define NO_FULL_PRIORITY -1 /* Special marker for pri. */
#define NO_FULL_PRIORITY (-1) // Special marker for pri.
/* Evaluate right away */
#define BAD_PRIORITY 9999.0

View File

@ -51,7 +51,7 @@ PRIORITY Wordrec::grade_split_length(SPLIT *split) {
if (split_length <= 0) {
grade = 0;
} else {
grade = sqrt(split_length) * chop_split_dist_knob;
grade = std::sqrt(split_length) * chop_split_dist_knob;
}
return (std::max(0.0f, grade));

View File

@ -984,8 +984,8 @@ float LanguageModel::ComputeNgramCost(const char *unichar, float certainty, floa
*found_small_prob = true;
prob = language_model_ngram_small_prob;
}
*ngram_cost = -1.0 * log2(prob);
float ngram_and_classifier_cost = -1.0 * log2(CertaintyScore(certainty) / denom) +
*ngram_cost = -1 * std::log2(prob);
float ngram_and_classifier_cost = -1 * std::log2(CertaintyScore(certainty) / denom) +
*ngram_cost * language_model_ngram_scale_factor;
if (language_model_debug_level > 1) {
tprintf("-log [ p(%s) * p(%s | %s) ] = -log2(%g*%g) = %g\n", unichar, unichar, context_ptr,
@ -1341,24 +1341,24 @@ void LanguageModel::ExtractFeaturesFromPath(const ViterbiStateEntry &vse, float
int permuter = vse.dawg_info->permuter;
if (permuter == NUMBER_PERM || permuter == USER_PATTERN_PERM) {
if (vse.consistency_info.num_digits == vse.length) {
features[PTRAIN_DIGITS_SHORT + len] = 1.0;
features[PTRAIN_DIGITS_SHORT + len] = 1.0f;
} else {
features[PTRAIN_NUM_SHORT + len] = 1.0;
features[PTRAIN_NUM_SHORT + len] = 1.0f;
}
} else if (permuter == DOC_DAWG_PERM) {
features[PTRAIN_DOC_SHORT + len] = 1.0;
features[PTRAIN_DOC_SHORT + len] = 1.0f;
} else if (permuter == SYSTEM_DAWG_PERM || permuter == USER_DAWG_PERM ||
permuter == COMPOUND_PERM) {
features[PTRAIN_DICT_SHORT + len] = 1.0;
features[PTRAIN_DICT_SHORT + len] = 1.0f;
} else if (permuter == FREQ_DAWG_PERM) {
features[PTRAIN_FREQ_SHORT + len] = 1.0;
features[PTRAIN_FREQ_SHORT + len] = 1.0f;
}
}
// Record shape cost feature (normalized by path length).
features[PTRAIN_SHAPE_COST_PER_CHAR] =
vse.associate_stats.shape_cost / static_cast<float>(vse.length);
// Record ngram cost. (normalized by the path length).
features[PTRAIN_NGRAM_COST_PER_CHAR] = 0.0;
features[PTRAIN_NGRAM_COST_PER_CHAR] = 0.0f;
if (vse.ngram_info != nullptr) {
features[PTRAIN_NGRAM_COST_PER_CHAR] =
vse.ngram_info->ngram_cost / static_cast<float>(vse.length);
@ -1369,7 +1369,7 @@ void LanguageModel::ExtractFeaturesFromPath(const ViterbiStateEntry &vse, float
features[PTRAIN_NUM_BAD_CASE] = vse.consistency_info.NumInconsistentCase();
features[PTRAIN_XHEIGHT_CONSISTENCY] = vse.consistency_info.xht_decision;
features[PTRAIN_NUM_BAD_CHAR_TYPE] =
vse.dawg_info == nullptr ? vse.consistency_info.NumInconsistentChartype() : 0.0;
vse.dawg_info == nullptr ? vse.consistency_info.NumInconsistentChartype() : 0.0f;
features[PTRAIN_NUM_BAD_SPACING] = vse.consistency_info.NumInconsistentSpaces();
// Disabled this feature for now due to its poor performance.
// features[PTRAIN_NUM_BAD_FONT] = vse.consistency_info.inconsistent_font;

View File

@ -94,7 +94,7 @@ bool ParamsModel::Equivalent(const ParamsModel &that) const {
}
for (unsigned i = 0; i < weights_vec_[p].size(); i++) {
if (weights_vec_[p][i] != that.weights_vec_[p][i] &&
fabs(weights_vec_[p][i] - that.weights_vec_[p][i]) > epsilon) {
std::fabs(weights_vec_[p][i] - that.weights_vec_[p][i]) > epsilon) {
return false;
}
}