mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
Fix old TODO (STATS::rangemax_)
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
31a968432d
commit
76faf16006
@ -159,7 +159,7 @@ PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation,
|
|||||||
// Helper computes median xheight in the image.
|
// Helper computes median xheight in the image.
|
||||||
static double MedianXHeight(BLOCK_LIST *block_list) {
|
static double MedianXHeight(BLOCK_LIST *block_list) {
|
||||||
BLOCK_IT block_it(block_list);
|
BLOCK_IT block_it(block_list);
|
||||||
STATS xheights(0, block_it.data()->pdblk.bounding_box().height());
|
STATS xheights(0, block_it.data()->pdblk.bounding_box().height() - 1);
|
||||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
||||||
ROW_IT row_it(block_it.data()->row_list());
|
ROW_IT row_it(block_it.data()->row_list());
|
||||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||||
|
@ -2015,7 +2015,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
|
|||||||
void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
|
void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
|
||||||
PAGE_RES_IT page_res_it(page_res);
|
PAGE_RES_IT page_res_it(page_res);
|
||||||
WERD_RES *word; // current word
|
WERD_RES *word; // current word
|
||||||
STATS doc_fonts(0, font_table_size_); // font counters
|
STATS doc_fonts(0, font_table_size_ - 1); // font counters
|
||||||
|
|
||||||
// Gather font id statistics.
|
// Gather font id statistics.
|
||||||
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
|
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
|
||||||
|
@ -103,8 +103,8 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
|
|||||||
// Returns a new x-height maximally compatible with the result in word_res.
|
// Returns a new x-height maximally compatible with the result in word_res.
|
||||||
// See comment above for overall algorithm.
|
// See comment above for overall algorithm.
|
||||||
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift) {
|
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift) {
|
||||||
STATS top_stats(0, UINT8_MAX);
|
STATS top_stats(0, UINT8_MAX - 1);
|
||||||
STATS shift_stats(-UINT8_MAX, UINT8_MAX);
|
STATS shift_stats(-UINT8_MAX, UINT8_MAX - 1);
|
||||||
int bottom_shift = 0;
|
int bottom_shift = 0;
|
||||||
int num_blobs = word_res->rebuild_word->NumBlobs();
|
int num_blobs = word_res->rebuild_word->NumBlobs();
|
||||||
do {
|
do {
|
||||||
|
@ -1623,8 +1623,8 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
|
|||||||
UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
|
UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
|
||||||
UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
|
UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
|
||||||
}
|
}
|
||||||
STATS lefts(lmin, lmax + 1);
|
STATS lefts(lmin, lmax);
|
||||||
STATS rights(rmin, rmax + 1);
|
STATS rights(rmin, rmax);
|
||||||
for (int i = start; i < end; i++) {
|
for (int i = start; i < end; i++) {
|
||||||
RowScratchRegisters &sr = (*rows)[i];
|
RowScratchRegisters &sr = (*rows)[i];
|
||||||
if (sr.ri_->num_words == 0) {
|
if (sr.ri_->num_words == 0) {
|
||||||
@ -1655,7 +1655,7 @@ int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start,
|
|||||||
(rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
|
(rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
|
||||||
int word_width =
|
int word_width =
|
||||||
(rows[row_start].ri_->lword_box.width() + rows[row_end - 1].ri_->lword_box.width()) / 2;
|
(rows[row_start].ri_->lword_box.width() + rows[row_end - 1].ri_->lword_box.width()) / 2;
|
||||||
STATS spacing_widths(0, 5 + word_width);
|
STATS spacing_widths(0, 4 + word_width);
|
||||||
for (int i = row_start; i < row_end; i++) {
|
for (int i = row_start; i < row_end; i++) {
|
||||||
if (rows[i].ri_->num_words > 1) {
|
if (rows[i].ri_->num_words > 1) {
|
||||||
spacing_widths.add(rows[i].ri_->average_interword_space, 1);
|
spacing_widths.add(rows[i].ri_->average_interword_space, 1);
|
||||||
|
@ -810,7 +810,7 @@ void TO_ROW::compute_vertical_projection() { // project whole row
|
|||||||
row_box += blob_it.data()->bounding_box();
|
row_box += blob_it.data()->bounding_box();
|
||||||
}
|
}
|
||||||
|
|
||||||
projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN);
|
projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN - 1);
|
||||||
projection_left = row_box.left() - PROJECTION_MARGIN;
|
projection_left = row_box.left() - PROJECTION_MARGIN;
|
||||||
projection_right = row_box.right() + PROJECTION_MARGIN;
|
projection_right = row_box.right() + PROJECTION_MARGIN;
|
||||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||||
|
@ -725,8 +725,8 @@ public:
|
|||||||
ASSERT_HOST(block->pdblk.poly_block() != nullptr);
|
ASSERT_HOST(block->pdblk.poly_block() != nullptr);
|
||||||
block->rotate(rotation);
|
block->rotate(rotation);
|
||||||
// Update the median size statistic from the blobs list.
|
// Update the median size statistic from the blobs list.
|
||||||
STATS widths(0, block->pdblk.bounding_box().width());
|
STATS widths(0, block->pdblk.bounding_box().width() - 1);
|
||||||
STATS heights(0, block->pdblk.bounding_box().height());
|
STATS heights(0, block->pdblk.bounding_box().height() - 1);
|
||||||
BLOBNBOX_IT blob_it(&blobs);
|
BLOBNBOX_IT blob_it(&blobs);
|
||||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||||
widths.add(blob_it.data()->bounding_box().width(), 1);
|
widths.add(blob_it.data()->bounding_box().width(), 1);
|
||||||
|
@ -40,14 +40,14 @@ namespace tesseract {
|
|||||||
*
|
*
|
||||||
* Construct a new stats element by allocating and zeroing the memory.
|
* Construct a new stats element by allocating and zeroing the memory.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
|
STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value) {
|
||||||
if (max_bucket_value_plus_1 <= min_bucket_value) {
|
if (max_bucket_value < min_bucket_value) {
|
||||||
min_bucket_value = 0;
|
min_bucket_value = 0;
|
||||||
max_bucket_value_plus_1 = 1;
|
max_bucket_value = 1;
|
||||||
}
|
}
|
||||||
rangemin_ = min_bucket_value; // setup
|
rangemin_ = min_bucket_value; // setup
|
||||||
rangemax_ = max_bucket_value_plus_1;
|
rangemax_ = max_bucket_value;
|
||||||
buckets_ = new int32_t[rangemax_ - rangemin_];
|
buckets_ = new int32_t[1 + rangemax_ - rangemin_];
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,16 +56,16 @@ STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
|
|||||||
*
|
*
|
||||||
* Alter the range on an existing stats element.
|
* Alter the range on an existing stats element.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
|
bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value) {
|
||||||
if (max_bucket_value_plus_1 <= min_bucket_value) {
|
if (max_bucket_value < min_bucket_value) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
|
if (rangemax_ - rangemin_ != max_bucket_value - min_bucket_value) {
|
||||||
delete[] buckets_;
|
delete[] buckets_;
|
||||||
buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value];
|
buckets_ = new int32_t[1 + max_bucket_value - min_bucket_value];
|
||||||
}
|
}
|
||||||
rangemin_ = min_bucket_value; // setup
|
rangemin_ = min_bucket_value; // setup
|
||||||
rangemax_ = max_bucket_value_plus_1;
|
rangemax_ = max_bucket_value;
|
||||||
clear(); // zero it
|
clear(); // zero it
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -78,7 +78,7 @@ bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
|
|||||||
void STATS::clear() { // clear out buckets
|
void STATS::clear() { // clear out buckets
|
||||||
total_count_ = 0;
|
total_count_ = 0;
|
||||||
if (buckets_ != nullptr) {
|
if (buckets_ != nullptr) {
|
||||||
memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
|
memset(buckets_, 0, (1 + rangemax_ - rangemin_) * sizeof(buckets_[0]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,12 +97,11 @@ STATS::~STATS() {
|
|||||||
* Add a set of samples to (or delete from) a pile.
|
* Add a set of samples to (or delete from) a pile.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
void STATS::add(int32_t value, int32_t count) {
|
void STATS::add(int32_t value, int32_t count) {
|
||||||
if (buckets_ == nullptr) {
|
if (buckets_ != nullptr) {
|
||||||
return;
|
value = ClipToRange(value, rangemin_, rangemax_);
|
||||||
|
buckets_[value - rangemin_] += count;
|
||||||
|
total_count_ += count; // keep count of total
|
||||||
}
|
}
|
||||||
value = ClipToRange(value, rangemin_, rangemax_ - 1);
|
|
||||||
buckets_[value - rangemin_] += count;
|
|
||||||
total_count_ += count; // keep count of total
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
@ -116,7 +115,7 @@ int32_t STATS::mode() const { // get mode of samples
|
|||||||
}
|
}
|
||||||
int32_t max = buckets_[0]; // max cell count
|
int32_t max = buckets_[0]; // max cell count
|
||||||
int32_t maxindex = 0; // index of max
|
int32_t maxindex = 0; // index of max
|
||||||
for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
|
for (int index = rangemax_ - rangemin_; index > 0; --index) {
|
||||||
if (buckets_[index] > max) {
|
if (buckets_[index] > max) {
|
||||||
max = buckets_[index]; // find biggest
|
max = buckets_[index]; // find biggest
|
||||||
maxindex = index;
|
maxindex = index;
|
||||||
@ -135,7 +134,7 @@ double STATS::mean() const { // get mean of samples
|
|||||||
return static_cast<double>(rangemin_);
|
return static_cast<double>(rangemin_);
|
||||||
}
|
}
|
||||||
int64_t sum = 0;
|
int64_t sum = 0;
|
||||||
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
|
for (int index = rangemax_ - rangemin_; index >= 0; --index) {
|
||||||
sum += static_cast<int64_t>(index) * buckets_[index];
|
sum += static_cast<int64_t>(index) * buckets_[index];
|
||||||
}
|
}
|
||||||
return static_cast<double>(sum) / total_count_ + rangemin_;
|
return static_cast<double>(sum) / total_count_ + rangemin_;
|
||||||
@ -152,7 +151,7 @@ double STATS::sd() const { // standard deviation
|
|||||||
}
|
}
|
||||||
int64_t sum = 0;
|
int64_t sum = 0;
|
||||||
double sqsum = 0.0;
|
double sqsum = 0.0;
|
||||||
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
|
for (int index = rangemax_ - rangemin_; index >= 0; --index) {
|
||||||
sum += static_cast<int64_t>(index) * buckets_[index];
|
sum += static_cast<int64_t>(index) * buckets_[index];
|
||||||
sqsum += static_cast<double>(index) * index * buckets_[index];
|
sqsum += static_cast<double>(index) * index * buckets_[index];
|
||||||
}
|
}
|
||||||
@ -186,7 +185,7 @@ double STATS::ile(double frac) const {
|
|||||||
#endif
|
#endif
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
|
for (index = 0; index <= rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
if (index > 0) {
|
if (index > 0) {
|
||||||
@ -207,7 +206,7 @@ int32_t STATS::min_bucket() const { // Find min
|
|||||||
return rangemin_;
|
return rangemin_;
|
||||||
}
|
}
|
||||||
int32_t min = 0;
|
int32_t min = 0;
|
||||||
for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
|
for (min = 0; (min <= rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
return rangemin_ + min;
|
return rangemin_ + min;
|
||||||
@ -224,7 +223,7 @@ int32_t STATS::max_bucket() const { // Find max
|
|||||||
return rangemin_;
|
return rangemin_;
|
||||||
}
|
}
|
||||||
int32_t max;
|
int32_t max;
|
||||||
for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
|
for (max = rangemax_ - rangemin_; max > 0 && buckets_[max] == 0; max--) {
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
return rangemin_ + max;
|
return rangemin_ + max;
|
||||||
@ -270,7 +269,7 @@ bool STATS::local_min(int32_t x) const {
|
|||||||
if (buckets_ == nullptr) {
|
if (buckets_ == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
|
x = ClipToRange(x, rangemin_, rangemax_) - rangemin_;
|
||||||
if (buckets_[x] == 0) {
|
if (buckets_[x] == 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -281,10 +280,10 @@ bool STATS::local_min(int32_t x) const {
|
|||||||
if (index >= 0 && buckets_[index] < buckets_[x]) {
|
if (index >= 0 && buckets_[index] < buckets_[x]) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
|
for (index = x + 1; index <= rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
|
if (index <= rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
return true;
|
return true;
|
||||||
@ -304,7 +303,7 @@ void STATS::smooth(int32_t factor) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
STATS result(rangemin_, rangemax_);
|
STATS result(rangemin_, rangemax_);
|
||||||
int entrycount = rangemax_ - rangemin_;
|
int entrycount = 1 + rangemax_ - rangemin_;
|
||||||
for (int entry = 0; entry < entrycount; entry++) {
|
for (int entry = 0; entry < entrycount; entry++) {
|
||||||
// centre weight
|
// centre weight
|
||||||
int count = buckets_[entry] * factor;
|
int count = buckets_[entry] * factor;
|
||||||
@ -368,7 +367,7 @@ int32_t STATS::cluster(float lower, // thresholds
|
|||||||
clusters[0].add(entry, count);
|
clusters[0].add(entry, count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
|
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
|
||||||
pile_count(entry) <= pile_count(entry - 1);
|
pile_count(entry) <= pile_count(entry - 1);
|
||||||
entry++) {
|
entry++) {
|
||||||
count = pile_count(entry) - clusters[0].pile_count(entry);
|
count = pile_count(entry) - clusters[0].pile_count(entry);
|
||||||
@ -386,7 +385,7 @@ int32_t STATS::cluster(float lower, // thresholds
|
|||||||
do {
|
do {
|
||||||
new_cluster = false;
|
new_cluster = false;
|
||||||
new_mode = 0;
|
new_mode = 0;
|
||||||
for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
|
for (entry = 0; entry <= rangemax_ - rangemin_; entry++) {
|
||||||
count = buckets_[entry] - clusters[0].buckets_[entry];
|
count = buckets_[entry] - clusters[0].buckets_[entry];
|
||||||
// remaining pile
|
// remaining pile
|
||||||
if (count > 0) { // any to handle
|
if (count > 0) { // any to handle
|
||||||
@ -433,7 +432,7 @@ int32_t STATS::cluster(float lower, // thresholds
|
|||||||
clusters[0].add(entry, count);
|
clusters[0].add(entry, count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
|
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
|
||||||
pile_count(entry) <= pile_count(entry - 1);
|
pile_count(entry) <= pile_count(entry - 1);
|
||||||
entry++) {
|
entry++) {
|
||||||
count = pile_count(entry) - clusters[0].pile_count(entry);
|
count = pile_count(entry) - clusters[0].pile_count(entry);
|
||||||
@ -482,7 +481,7 @@ int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes)
|
|||||||
if (max_modes <= 0) {
|
if (max_modes <= 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
int src_count = rangemax_ - rangemin_;
|
int src_count = 1 + rangemax_ - rangemin_;
|
||||||
// Used copies the counts in buckets_ as they get used.
|
// Used copies the counts in buckets_ as they get used.
|
||||||
STATS used(rangemin_, rangemax_);
|
STATS used(rangemin_, rangemax_);
|
||||||
modes.clear();
|
modes.clear();
|
||||||
@ -605,7 +604,7 @@ void STATS::plot(ScrollView *window, // to draw in
|
|||||||
}
|
}
|
||||||
window->Pen(colour);
|
window->Pen(colour);
|
||||||
|
|
||||||
for (int index = 0; index < rangemax_ - rangemin_; index++) {
|
for (int index = 0; index <= rangemax_ - rangemin_; index++) {
|
||||||
window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1),
|
window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1),
|
||||||
yorigin + yscale * buckets_[index]);
|
yorigin + yscale * buckets_[index]);
|
||||||
}
|
}
|
||||||
@ -630,7 +629,7 @@ void STATS::plotline(ScrollView *window, // to draw in
|
|||||||
}
|
}
|
||||||
window->Pen(colour);
|
window->Pen(colour);
|
||||||
window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
|
window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
|
||||||
for (int index = 0; index < rangemax_ - rangemin_; index++) {
|
for (int index = 0; index <= rangemax_ - rangemin_; index++) {
|
||||||
window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]);
|
window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -30,23 +30,20 @@ namespace tesseract {
|
|||||||
class TESS_API STATS {
|
class TESS_API STATS {
|
||||||
public:
|
public:
|
||||||
// The histogram buckets are in the range
|
// The histogram buckets are in the range
|
||||||
// [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.
|
|
||||||
// [min_bucket_value, max_bucket_value].
|
// [min_bucket_value, max_bucket_value].
|
||||||
// Any data under min_bucket value is silently mapped to min_bucket_value,
|
// Any data under min_bucket value is silently mapped to min_bucket_value,
|
||||||
// and likewise, any data over max_bucket_value is silently mapped to
|
// and likewise, any data over max_bucket_value is silently mapped to
|
||||||
// max_bucket_value.
|
// max_bucket_value.
|
||||||
// In the internal array, min_bucket_value maps to 0 and
|
// In the internal array, min_bucket_value maps to 0 and
|
||||||
// max_bucket_value_plus_1 - min_bucket_value to the array size.
|
// 1 + max_bucket_value - min_bucket_value to the array size.
|
||||||
// TODO(rays) This is ugly. Convert the second argument to
|
STATS(int32_t min_bucket_value, int32_t max_bucket_value);
|
||||||
// max_bucket_value and all the code that uses it.
|
|
||||||
STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
|
|
||||||
STATS() = default; // empty for arrays
|
STATS() = default; // empty for arrays
|
||||||
|
|
||||||
~STATS();
|
~STATS();
|
||||||
|
|
||||||
// (Re)Sets the range and clears the counts.
|
// (Re)Sets the range and clears the counts.
|
||||||
// See the constructor for info on max and min values.
|
// See the constructor for info on max and min values.
|
||||||
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
|
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value);
|
||||||
|
|
||||||
void clear(); // empty buckets
|
void clear(); // empty buckets
|
||||||
|
|
||||||
@ -79,8 +76,8 @@ public:
|
|||||||
if (value <= rangemin_) {
|
if (value <= rangemin_) {
|
||||||
return buckets_[0];
|
return buckets_[0];
|
||||||
}
|
}
|
||||||
if (value >= rangemax_ - 1) {
|
if (value >= rangemax_) {
|
||||||
return buckets_[rangemax_ - rangemin_ - 1];
|
return buckets_[rangemax_ - rangemin_];
|
||||||
}
|
}
|
||||||
return buckets_[value - rangemin_];
|
return buckets_[value - rangemin_];
|
||||||
}
|
}
|
||||||
@ -142,7 +139,6 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
int32_t rangemin_ = 0; // min of range
|
int32_t rangemin_ = 0; // min of range
|
||||||
// rangemax_ is not well named as it is really one past the max.
|
|
||||||
int32_t rangemax_ = 0; // max of range
|
int32_t rangemax_ = 0; // max of range
|
||||||
int32_t total_count_ = 0; // no of samples
|
int32_t total_count_ = 0; // no of samples
|
||||||
int32_t *buckets_ = nullptr; // array of cells
|
int32_t *buckets_ = nullptr; // array of cells
|
||||||
|
@ -294,7 +294,7 @@ void LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
|
|||||||
void LSTMRecognizer::OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output,
|
void LSTMRecognizer::OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output,
|
||||||
float *sd) {
|
float *sd) {
|
||||||
const int kOutputScale = INT8_MAX;
|
const int kOutputScale = INT8_MAX;
|
||||||
STATS stats(0, kOutputScale + 1);
|
STATS stats(0, kOutputScale);
|
||||||
for (int t = 0; t < outputs.Width(); ++t) {
|
for (int t = 0; t < outputs.Width(); ++t) {
|
||||||
int best_label = outputs.BestLabel(t, nullptr);
|
int best_label = outputs.BestLabel(t, nullptr);
|
||||||
if (best_label != null_char_) {
|
if (best_label != null_char_) {
|
||||||
|
@ -127,7 +127,7 @@ void NetworkIO::ZeroInvalidElements() {
|
|||||||
static void ComputeBlackWhite(Image pix, float *black, float *white) {
|
static void ComputeBlackWhite(Image pix, float *black, float *white) {
|
||||||
int width = pixGetWidth(pix);
|
int width = pixGetWidth(pix);
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
STATS mins(0, 256), maxes(0, 256);
|
STATS mins(0, 255), maxes(0, 255);
|
||||||
if (width >= 3) {
|
if (width >= 3) {
|
||||||
int y = height / 2;
|
int y = height / 2;
|
||||||
l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y;
|
l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y;
|
||||||
|
@ -525,7 +525,7 @@ static void HistogramWeight(TFloat weight, STATS *histogram) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void WeightMatrix::Debug2D(const char *msg) {
|
void WeightMatrix::Debug2D(const char *msg) {
|
||||||
STATS histogram(0, kHistogramBuckets);
|
STATS histogram(0, kHistogramBuckets - 1);
|
||||||
if (int_mode_) {
|
if (int_mode_) {
|
||||||
for (int i = 0; i < wi_.dim1(); ++i) {
|
for (int i = 0; i < wi_.dim1(); ++i) {
|
||||||
for (int j = 0; j < wi_.dim2(); ++j) {
|
for (int j = 0; j < wi_.dim2(); ++j) {
|
||||||
|
@ -320,7 +320,7 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) {
|
|||||||
}
|
}
|
||||||
// Set up a histogram using disp_quant_factor_ as the bucket size.
|
// Set up a histogram using disp_quant_factor_ as the bucket size.
|
||||||
STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
|
STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
|
||||||
IntCastRounded(max_dist / disp_quant_factor_) + 1);
|
IntCastRounded(max_dist / disp_quant_factor_));
|
||||||
for (double perp_blob_dist : perp_blob_dists) {
|
for (double perp_blob_dist : perp_blob_dists) {
|
||||||
dist_stats.add(IntCastRounded(perp_blob_dist / disp_quant_factor_), 1);
|
dist_stats.add(IntCastRounded(perp_blob_dist / disp_quant_factor_), 1);
|
||||||
}
|
}
|
||||||
|
@ -55,7 +55,7 @@ bool test_underline( // look for underlines
|
|||||||
|
|
||||||
auto blob_box = blob->bounding_box();
|
auto blob_box = blob->bounding_box();
|
||||||
auto blob_width = blob->bounding_box().width();
|
auto blob_width = blob->bounding_box().width();
|
||||||
projection.set_range(blob_box.bottom(), blob_box.top() + 1);
|
projection.set_range(blob_box.bottom(), blob_box.top());
|
||||||
if (testing_on) {
|
if (testing_on) {
|
||||||
// blob->plot(to_win,GOLDENROD,GOLDENROD);
|
// blob->plot(to_win,GOLDENROD,GOLDENROD);
|
||||||
// line_color_index(to_win,GOLDENROD);
|
// line_color_index(to_win,GOLDENROD);
|
||||||
|
@ -808,7 +808,7 @@ bool ColumnFinder::BiggestUnassignedRange(int set_count, const bool *any_columns
|
|||||||
int ColumnFinder::RangeModalColumnSet(int **column_set_costs, const int *assigned_costs, int start,
|
int ColumnFinder::RangeModalColumnSet(int **column_set_costs, const int *assigned_costs, int start,
|
||||||
int end) {
|
int end) {
|
||||||
int column_count = column_sets_.size();
|
int column_count = column_sets_.size();
|
||||||
STATS column_stats(0, column_count);
|
STATS column_stats(0, column_count - 1);
|
||||||
for (int part_i = start; part_i < end; ++part_i) {
|
for (int part_i = start; part_i < end; ++part_i) {
|
||||||
for (int col_j = 0; col_j < column_count; ++col_j) {
|
for (int col_j = 0; col_j < column_count; ++col_j) {
|
||||||
if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) {
|
if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) {
|
||||||
@ -1577,8 +1577,8 @@ void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST *block
|
|||||||
FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
|
FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
|
||||||
// Rotate all the blobs if needed and recompute the bounding boxes.
|
// Rotate all the blobs if needed and recompute the bounding boxes.
|
||||||
// Compute the block median blob width and height as we go.
|
// Compute the block median blob width and height as we go.
|
||||||
STATS widths(0, block->pdblk.bounding_box().width());
|
STATS widths(0, block->pdblk.bounding_box().width() - 1);
|
||||||
STATS heights(0, block->pdblk.bounding_box().height());
|
STATS heights(0, block->pdblk.bounding_box().height() - 1);
|
||||||
RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, &widths, &heights);
|
RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, &widths, &heights);
|
||||||
TO_ROW_IT row_it(to_block->get_rows());
|
TO_ROW_IT row_it(to_block->get_rows());
|
||||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||||
|
@ -949,12 +949,12 @@ void ColPartition::ComputeLimits() {
|
|||||||
median_right_ = bounding_box_.right();
|
median_right_ = bounding_box_.right();
|
||||||
median_width_ = bounding_box_.width();
|
median_width_ = bounding_box_.width();
|
||||||
} else {
|
} else {
|
||||||
STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
|
STATS top_stats(bounding_box_.bottom(), bounding_box_.top());
|
||||||
STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
|
STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top());
|
||||||
STATS height_stats(0, bounding_box_.height() + 1);
|
STATS height_stats(0, bounding_box_.height());
|
||||||
STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
|
STATS left_stats(bounding_box_.left(), bounding_box_.right());
|
||||||
STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
|
STATS right_stats(bounding_box_.left(), bounding_box_.right());
|
||||||
STATS width_stats(0, bounding_box_.width() + 1);
|
STATS width_stats(0, bounding_box_.width());
|
||||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||||
bbox = it.data();
|
bbox = it.data();
|
||||||
if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
|
if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
|
||||||
@ -1132,8 +1132,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
|
|||||||
bool result = false;
|
bool result = false;
|
||||||
// Gather statistics on the gaps between blobs and the widths of the blobs.
|
// Gather statistics on the gaps between blobs and the widths of the blobs.
|
||||||
int part_width = bounding_box_.width();
|
int part_width = bounding_box_.width();
|
||||||
STATS gap_stats(0, part_width);
|
STATS gap_stats(0, part_width - 1);
|
||||||
STATS width_stats(0, part_width);
|
STATS width_stats(0, part_width - 1);
|
||||||
BLOBNBOX_C_IT it(&boxes_);
|
BLOBNBOX_C_IT it(&boxes_);
|
||||||
BLOBNBOX *prev_blob = it.data();
|
BLOBNBOX *prev_blob = it.data();
|
||||||
prev_blob->set_flow(BTFT_NEIGHBOURS);
|
prev_blob->set_flow(BTFT_NEIGHBOURS);
|
||||||
@ -1489,7 +1489,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
|
|||||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||||
ColPartition *part = it.data();
|
ColPartition *part = it.data();
|
||||||
ASSERT_HOST(!part->boxes()->empty());
|
ASSERT_HOST(!part->boxes()->empty());
|
||||||
STATS side_steps(0, part->bounding_box().height());
|
STATS side_steps(0, part->bounding_box().height() - 1);
|
||||||
if (part->bounding_box().height() > max_line_height) {
|
if (part->bounding_box().height() > max_line_height) {
|
||||||
max_line_height = part->bounding_box().height();
|
max_line_height = part->bounding_box().height();
|
||||||
}
|
}
|
||||||
@ -1626,7 +1626,7 @@ static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing,
|
|||||||
// that have have to continue to exist until the part grid is deleted.
|
// that have have to continue to exist until the part grid is deleted.
|
||||||
// Compute the median blob size as we go, as the block needs to know.
|
// Compute the median blob size as we go, as the block needs to know.
|
||||||
TBOX block_box(block->pdblk.bounding_box());
|
TBOX block_box(block->pdblk.bounding_box());
|
||||||
STATS sizes(0, std::max(block_box.width(), block_box.height()));
|
STATS sizes(0, std::max(block_box.width(), block_box.height()) - 1);
|
||||||
bool text_type = block->pdblk.poly_block()->IsText();
|
bool text_type = block->pdblk.poly_block()->IsText();
|
||||||
ColPartition_IT it(block_parts);
|
ColPartition_IT it(block_parts);
|
||||||
auto *to_block = new TO_BLOCK(block);
|
auto *to_block = new TO_BLOCK(block);
|
||||||
@ -1884,8 +1884,8 @@ void ColPartition::PrintColors() {
|
|||||||
|
|
||||||
// Sets the types of all partitions in the run to be the max of the types.
|
// Sets the types of all partitions in the run to be the max of the types.
|
||||||
void ColPartition::SmoothPartnerRun(int working_set_count) {
|
void ColPartition::SmoothPartnerRun(int working_set_count) {
|
||||||
STATS left_stats(0, working_set_count);
|
STATS left_stats(0, working_set_count - 1);
|
||||||
STATS right_stats(0, working_set_count);
|
STATS right_stats(0, working_set_count - 1);
|
||||||
PolyBlockType max_type = type_;
|
PolyBlockType max_type = type_;
|
||||||
ColPartition *partner;
|
ColPartition *partner;
|
||||||
for (partner = SingletonPartner(false); partner != nullptr;
|
for (partner = SingletonPartner(false); partner != nullptr;
|
||||||
@ -2250,7 +2250,7 @@ bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox,
|
|||||||
// The iterator is passed by value so the iteration does not modify the
|
// The iterator is passed by value so the iteration does not modify the
|
||||||
// caller's iterator.
|
// caller's iterator.
|
||||||
static int MedianSpacing(int page_height, ColPartition_IT it) {
|
static int MedianSpacing(int page_height, ColPartition_IT it) {
|
||||||
STATS stats(0, page_height);
|
STATS stats(0, page_height - 1);
|
||||||
while (!it.cycled_list()) {
|
while (!it.cycled_list()) {
|
||||||
ColPartition *part = it.data();
|
ColPartition *part = it.data();
|
||||||
it.forward();
|
it.forward();
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
* Description: Methods to process images containing devanagari symbols,
|
* Description: Methods to process images containing devanagari symbols,
|
||||||
* prior to classification.
|
* prior to classification.
|
||||||
* Author: Shobhit Saxena
|
* Author: Shobhit Saxena
|
||||||
* Created: Mon Nov 17 20:26:01 IST 2008
|
|
||||||
*
|
*
|
||||||
* (C) Copyright 2008, Google Inc.
|
* (C) Copyright 2008, Google Inc.
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -387,7 +386,7 @@ Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const {
|
|||||||
// It also prunes very small blobs from calculation.
|
// It also prunes very small blobs from calculation.
|
||||||
int ShiroRekhaSplitter::GetModeHeight(Image pix) {
|
int ShiroRekhaSplitter::GetModeHeight(Image pix) {
|
||||||
Boxa *boxa = pixConnComp(pix, nullptr, 8);
|
Boxa *boxa = pixConnComp(pix, nullptr, 8);
|
||||||
STATS heights(0, pixGetHeight(pix));
|
STATS heights(0, pixGetHeight(pix) - 1);
|
||||||
heights.clear();
|
heights.clear();
|
||||||
for (int i = 0; i < boxaGetCount(boxa); ++i) {
|
for (int i = 0; i < boxaGetCount(boxa); ++i) {
|
||||||
Box *box = boxaGetBox(boxa, i, L_CLONE);
|
Box *box = boxaGetBox(boxa, i, L_CLONE);
|
||||||
|
@ -43,7 +43,7 @@ GAPMAP::GAPMAP( // Constructor
|
|||||||
int16_t gap_width;
|
int16_t gap_width;
|
||||||
int16_t start_of_row;
|
int16_t start_of_row;
|
||||||
int16_t end_of_row;
|
int16_t end_of_row;
|
||||||
STATS xht_stats(0, 128);
|
STATS xht_stats(0, 127);
|
||||||
int16_t min_quantum;
|
int16_t min_quantum;
|
||||||
int16_t max_quantum;
|
int16_t max_quantum;
|
||||||
int16_t i;
|
int16_t i;
|
||||||
|
@ -451,9 +451,9 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Image pix, int factor,
|
|||||||
Image scaled = pixClipRectangle(pix, scaled_box, nullptr);
|
Image scaled = pixClipRectangle(pix, scaled_box, nullptr);
|
||||||
|
|
||||||
// Compute stats over the whole image.
|
// Compute stats over the whole image.
|
||||||
STATS red_stats(0, 256);
|
STATS red_stats(0, 255);
|
||||||
STATS green_stats(0, 256);
|
STATS green_stats(0, 255);
|
||||||
STATS blue_stats(0, 256);
|
STATS blue_stats(0, 255);
|
||||||
uint32_t *data = pixGetData(scaled);
|
uint32_t *data = pixGetData(scaled);
|
||||||
ASSERT_HOST(pixGetWpl(scaled) == width_pad);
|
ASSERT_HOST(pixGetWpl(scaled) == width_pad);
|
||||||
for (int y = 0; y < height_pad; ++y) {
|
for (int y = 0; y < height_pad; ++y) {
|
||||||
|
@ -518,7 +518,7 @@ void vigorous_noise_removal(TO_BLOCK *block) {
|
|||||||
max_height = blob->bounding_box().height();
|
max_height = blob->bounding_box().height();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
STATS hstats(0, max_height + 1);
|
STATS hstats(0, max_height);
|
||||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||||
BLOBNBOX *blob = b_it.data();
|
BLOBNBOX *blob = b_it.data();
|
||||||
int height = blob->bounding_box().height();
|
int height = blob->bounding_box().height();
|
||||||
@ -1288,16 +1288,16 @@ void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) {
|
|||||||
// Compute the best guess of xheight of each row individually.
|
// Compute the best guess of xheight of each row individually.
|
||||||
// Use xheight and ascrise values of the rows where ascenders were found.
|
// Use xheight and ascrise values of the rows where ascenders were found.
|
||||||
get_min_max_xheight(block->line_size, &min_height, &max_height);
|
get_min_max_xheight(block->line_size, &min_height, &max_height);
|
||||||
STATS row_asc_xheights(min_height, max_height + 1);
|
STATS row_asc_xheights(min_height, max_height);
|
||||||
STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
|
STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
|
||||||
static_cast<int>(max_height * asc_frac_xheight) + 1);
|
static_cast<int>(max_height * asc_frac_xheight));
|
||||||
int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
|
int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
|
||||||
int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
|
int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
|
||||||
STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
|
STATS row_asc_descdrop(min_desc_height, max_desc_height);
|
||||||
STATS row_desc_xheights(min_height, max_height + 1);
|
STATS row_desc_xheights(min_height, max_height);
|
||||||
STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
|
STATS row_desc_descdrop(min_desc_height, max_desc_height);
|
||||||
STATS row_cap_xheights(min_height, max_height + 1);
|
STATS row_cap_xheights(min_height, max_height);
|
||||||
STATS row_cap_floating_xheights(min_height, max_height + 1);
|
STATS row_cap_floating_xheights(min_height, max_height);
|
||||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||||
row = row_it.data();
|
row = row_it.data();
|
||||||
// Compute the xheight of this row if it has not been computed before.
|
// Compute the xheight of this row if it has not been computed before.
|
||||||
@ -1394,8 +1394,8 @@ void Textord::compute_row_xheight(TO_ROW *row, // row to do
|
|||||||
|
|
||||||
int min_height, max_height;
|
int min_height, max_height;
|
||||||
get_min_max_xheight(block_line_size, &min_height, &max_height);
|
get_min_max_xheight(block_line_size, &min_height, &max_height);
|
||||||
STATS heights(min_height, max_height + 1);
|
STATS heights(min_height, max_height);
|
||||||
STATS floating_heights(min_height, max_height + 1);
|
STATS floating_heights(min_height, max_height);
|
||||||
fill_heights(row, gradient, min_height, max_height, &heights, &floating_heights);
|
fill_heights(row, gradient, min_height, max_height, &heights, &floating_heights);
|
||||||
row->ascrise = 0.0f;
|
row->ascrise = 0.0f;
|
||||||
row->xheight = 0.0f;
|
row->xheight = 0.0f;
|
||||||
@ -1594,7 +1594,7 @@ int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count
|
|||||||
float height; // height of blob
|
float height; // height of blob
|
||||||
BLOBNBOX_IT blob_it = row->blob_list();
|
BLOBNBOX_IT blob_it = row->blob_list();
|
||||||
BLOBNBOX *blob; // current blob
|
BLOBNBOX *blob; // current blob
|
||||||
STATS heights(min_height, max_height + 1);
|
STATS heights(min_height, max_height);
|
||||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||||
blob = blob_it.data();
|
blob = blob_it.data();
|
||||||
if (!blob->joined_to_prev()) {
|
if (!blob->joined_to_prev()) {
|
||||||
|
@ -426,7 +426,7 @@ int get_blob_coords( // get boxes
|
|||||||
int losscount; // lost blobs
|
int losscount; // lost blobs
|
||||||
int maxlosscount; // greatest lost blobs
|
int maxlosscount; // greatest lost blobs
|
||||||
/*height stat collection */
|
/*height stat collection */
|
||||||
STATS heightstat(0, MAXHEIGHT);
|
STATS heightstat(0, MAXHEIGHT - 1);
|
||||||
|
|
||||||
if (blob_it.empty()) {
|
if (blob_it.empty()) {
|
||||||
return 0; // none
|
return 0; // none
|
||||||
@ -1347,7 +1347,7 @@ void old_first_xheight( // the wiseowl way
|
|||||||
) {
|
) {
|
||||||
int blobindex; /*current blob */
|
int blobindex; /*current blob */
|
||||||
/*height statistics */
|
/*height statistics */
|
||||||
STATS heightstat(0, MAXHEIGHT);
|
STATS heightstat(0, MAXHEIGHT - 1);
|
||||||
int height; /*height of blob */
|
int height; /*height of blob */
|
||||||
int xcentre; /*centre of blob */
|
int xcentre; /*centre of blob */
|
||||||
int lineheight; /*approx xheight */
|
int lineheight; /*approx xheight */
|
||||||
@ -1427,7 +1427,7 @@ void make_first_xheight( // find xheight
|
|||||||
QSPLINE *baseline, /*established */
|
QSPLINE *baseline, /*established */
|
||||||
float jumplimit /*min ascender height */
|
float jumplimit /*min ascender height */
|
||||||
) {
|
) {
|
||||||
STATS heightstat(0, HEIGHTBUCKETS);
|
STATS heightstat(0, HEIGHTBUCKETS - 1);
|
||||||
int lefts[HEIGHTBUCKETS];
|
int lefts[HEIGHTBUCKETS];
|
||||||
int rights[HEIGHTBUCKETS];
|
int rights[HEIGHTBUCKETS];
|
||||||
int modelist[MODENUM];
|
int modelist[MODENUM];
|
||||||
|
@ -567,7 +567,7 @@ void StrokeWidth::MarkLeaderNeighbours(const ColPartition *part, LeftOrRight sid
|
|||||||
|
|
||||||
// Helper to compute the UQ of the square-ish CJK characters.
|
// Helper to compute the UQ of the square-ish CJK characters.
|
||||||
static int UpperQuartileCJKSize(int gridsize, BLOBNBOX_LIST *blobs) {
|
static int UpperQuartileCJKSize(int gridsize, BLOBNBOX_LIST *blobs) {
|
||||||
STATS sizes(0, gridsize * kMaxCJKSizeRatio);
|
STATS sizes(0, gridsize * kMaxCJKSizeRatio - 1);
|
||||||
BLOBNBOX_IT it(blobs);
|
BLOBNBOX_IT it(blobs);
|
||||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||||
BLOBNBOX *blob = it.data();
|
BLOBNBOX *blob = it.data();
|
||||||
|
@ -971,7 +971,7 @@ void TabFind::ComputeColumnWidths(ScrollView *tab_win, ColPartitionGrid *part_gr
|
|||||||
#endif // !GRAPHICS_DISABLED
|
#endif // !GRAPHICS_DISABLED
|
||||||
// Accumulate column sections into a STATS
|
// Accumulate column sections into a STATS
|
||||||
int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
|
int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
|
||||||
STATS col_widths(0, col_widths_size + 1);
|
STATS col_widths(0, col_widths_size);
|
||||||
ApplyPartitionsToColumnWidths(part_grid, &col_widths);
|
ApplyPartitionsToColumnWidths(part_grid, &col_widths);
|
||||||
#ifndef GRAPHICS_DISABLED
|
#ifndef GRAPHICS_DISABLED
|
||||||
if (tab_win != nullptr) {
|
if (tab_win != nullptr) {
|
||||||
@ -1099,8 +1099,8 @@ int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
|
|||||||
TabVector_IT it(lines);
|
TabVector_IT it(lines);
|
||||||
int prev_right = -1;
|
int prev_right = -1;
|
||||||
int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
|
int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
|
||||||
STATS gaps(0, max_gap);
|
STATS gaps(0, max_gap - 1);
|
||||||
STATS heights(0, max_gap);
|
STATS heights(0, max_gap - 1);
|
||||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||||
TabVector *v = it.data();
|
TabVector *v = it.data();
|
||||||
TabVector *partner = v->GetSinglePartner();
|
TabVector *partner = v->GetSinglePartner();
|
||||||
|
@ -714,9 +714,9 @@ void TableFinder::SetVerticalSpacing(ColPartition *part) {
|
|||||||
|
|
||||||
// Set global spacing and x-height estimates
|
// Set global spacing and x-height estimates
|
||||||
void TableFinder::SetGlobalSpacings(ColPartitionGrid *grid) {
|
void TableFinder::SetGlobalSpacings(ColPartitionGrid *grid) {
|
||||||
STATS xheight_stats(0, kMaxVerticalSpacing + 1);
|
STATS xheight_stats(0, kMaxVerticalSpacing);
|
||||||
STATS width_stats(0, kMaxBlobWidth + 1);
|
STATS width_stats(0, kMaxBlobWidth);
|
||||||
STATS ledding_stats(0, kMaxVerticalSpacing + 1);
|
STATS ledding_stats(0, kMaxVerticalSpacing);
|
||||||
// Iterate the ColPartitions in the grid.
|
// Iterate the ColPartitions in the grid.
|
||||||
ColPartitionGridSearch gsearch(grid);
|
ColPartitionGridSearch gsearch(grid);
|
||||||
gsearch.SetUniqueMode(true);
|
gsearch.SetUniqueMode(true);
|
||||||
|
@ -529,8 +529,8 @@ int StructuredTable::FindHorizontalMargin(ColPartitionGrid *grid, int border, bo
|
|||||||
void StructuredTable::CalculateStats() {
|
void StructuredTable::CalculateStats() {
|
||||||
const int kMaxCellHeight = 1000;
|
const int kMaxCellHeight = 1000;
|
||||||
const int kMaxCellWidth = 1000;
|
const int kMaxCellWidth = 1000;
|
||||||
STATS height_stats(0, kMaxCellHeight + 1);
|
STATS height_stats(0, kMaxCellHeight);
|
||||||
STATS width_stats(0, kMaxCellWidth + 1);
|
STATS width_stats(0, kMaxCellWidth);
|
||||||
|
|
||||||
for (unsigned i = 0; i < row_count(); ++i) {
|
for (unsigned i = 0; i < row_count(); ++i) {
|
||||||
height_stats.add(row_height(i), column_count());
|
height_stats.add(row_height(i), column_count());
|
||||||
|
@ -608,7 +608,7 @@ void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) {
|
|||||||
max_gutter = kGutterToNeighbourRatio * mean_height;
|
max_gutter = kGutterToNeighbourRatio * mean_height;
|
||||||
}
|
}
|
||||||
|
|
||||||
STATS gutters(0, max_gutter + 1);
|
STATS gutters(0, max_gutter);
|
||||||
// Evaluate the boxes for their goodness, calculating the coverage as we go.
|
// Evaluate the boxes for their goodness, calculating the coverage as we go.
|
||||||
// Remove boxes that are not good and shorten the list to the first and
|
// Remove boxes that are not good and shorten the list to the first and
|
||||||
// last good boxes.
|
// last good boxes.
|
||||||
@ -901,7 +901,7 @@ TabVector *TabVector::VerticalTextlinePartner() {
|
|||||||
if (width < 0) {
|
if (width < 0) {
|
||||||
width = -width;
|
width = -width;
|
||||||
}
|
}
|
||||||
STATS gaps(0, width * 2);
|
STATS gaps(0, width * 2 - 1);
|
||||||
BLOBNBOX *prev_bbox = nullptr;
|
BLOBNBOX *prev_bbox = nullptr;
|
||||||
box_it2.mark_cycle_pt();
|
box_it2.mark_cycle_pt();
|
||||||
for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
|
for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
|
||||||
|
@ -163,8 +163,8 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix
|
|||||||
block_votes = like_votes = other_votes = 0;
|
block_votes = like_votes = other_votes = 0;
|
||||||
maxwidth = static_cast<int32_t>(ceil(bad_row->xheight * textord_words_maxspace));
|
maxwidth = static_cast<int32_t>(ceil(bad_row->xheight * textord_words_maxspace));
|
||||||
if (bad_row->pitch_decision != PITCH_DEF_FIXED && bad_row->pitch_decision != PITCH_DEF_PROP) {
|
if (bad_row->pitch_decision != PITCH_DEF_FIXED && bad_row->pitch_decision != PITCH_DEF_PROP) {
|
||||||
block_stats.set_range(0, maxwidth);
|
block_stats.set_range(0, maxwidth - 1);
|
||||||
like_stats.set_range(0, maxwidth);
|
like_stats.set_range(0, maxwidth - 1);
|
||||||
block_index = 1;
|
block_index = 1;
|
||||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
||||||
block = block_it.data();
|
block = block_it.data();
|
||||||
@ -392,7 +392,7 @@ bool try_doc_fixed( // determine pitch
|
|||||||
float final_pitch; // output pitch
|
float final_pitch; // output pitch
|
||||||
float row_y; // baseline
|
float row_y; // baseline
|
||||||
STATS projection; // entire page
|
STATS projection; // entire page
|
||||||
STATS pitches(0, MAX_ALLOWED_PITCH);
|
STATS pitches(0, MAX_ALLOWED_PITCH - 1);
|
||||||
// for median
|
// for median
|
||||||
float sp_sd; // space sd
|
float sp_sd; // space sd
|
||||||
int16_t mid_cuts; // no of cheap cuts
|
int16_t mid_cuts; // no of cheap cuts
|
||||||
@ -438,7 +438,7 @@ bool try_doc_fixed( // determine pitch
|
|||||||
if (pitches.get_total() == 0) {
|
if (pitches.get_total() == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
projection.set_range(projection_left, projection_right);
|
projection.set_range(projection_left, projection_right - 1);
|
||||||
|
|
||||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
||||||
block = block_it.data();
|
block = block_it.data();
|
||||||
@ -663,7 +663,7 @@ bool row_pitch_stats( // find line stats
|
|||||||
float gaps[BLOCK_STATS_CLUSTERS];
|
float gaps[BLOCK_STATS_CLUSTERS];
|
||||||
// blobs
|
// blobs
|
||||||
BLOBNBOX_IT blob_it = row->blob_list();
|
BLOBNBOX_IT blob_it = row->blob_list();
|
||||||
STATS gap_stats(0, maxwidth);
|
STATS gap_stats(0, maxwidth - 1);
|
||||||
STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
|
STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
|
||||||
// clusters
|
// clusters
|
||||||
|
|
||||||
@ -801,9 +801,9 @@ bool find_row_pitch( // find lines
|
|||||||
float dm_pitch; // pitch with dm on
|
float dm_pitch; // pitch with dm on
|
||||||
float pitch; // revised estimate
|
float pitch; // revised estimate
|
||||||
float initial_pitch; // guess at pitch
|
float initial_pitch; // guess at pitch
|
||||||
STATS gap_stats(0, maxwidth);
|
STATS gap_stats(0, maxwidth - 1);
|
||||||
// centre-centre
|
// centre-centre
|
||||||
STATS pitch_stats(0, maxwidth);
|
STATS pitch_stats(0, maxwidth - 1);
|
||||||
|
|
||||||
row->fixed_pitch = 0.0f;
|
row->fixed_pitch = 0.0f;
|
||||||
initial_pitch = row->fp_space;
|
initial_pitch = row->fp_space;
|
||||||
@ -1225,7 +1225,7 @@ float tune_row_pitch2( // find fp cells
|
|||||||
std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); // summed projection
|
std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); // summed projection
|
||||||
|
|
||||||
for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
|
for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
|
||||||
sum_proj[textord_pitch_range + pitch_delta].set_range(0, best_pitch + pitch_delta + 1);
|
sum_proj[textord_pitch_range + pitch_delta].set_range(0, best_pitch + pitch_delta);
|
||||||
}
|
}
|
||||||
for (pixel = projection_left; pixel <= projection_right; pixel++) {
|
for (pixel = projection_left; pixel <= projection_right; pixel++) {
|
||||||
for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
|
for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) {
|
||||||
|
@ -80,7 +80,7 @@ void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob) {
|
|||||||
uint32_t *data = pixGetData(dist_pix);
|
uint32_t *data = pixGetData(dist_pix);
|
||||||
int wpl = pixGetWpl(dist_pix);
|
int wpl = pixGetWpl(dist_pix);
|
||||||
// Horizontal width of stroke.
|
// Horizontal width of stroke.
|
||||||
STATS h_stats(0, width + 1);
|
STATS h_stats(0, width);
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
uint32_t *pixels = data + y * wpl;
|
uint32_t *pixels = data + y * wpl;
|
||||||
int prev_pixel = 0;
|
int prev_pixel = 0;
|
||||||
@ -104,7 +104,7 @@ void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Vertical width of stroke.
|
// Vertical width of stroke.
|
||||||
STATS v_stats(0, height + 1);
|
STATS v_stats(0, height);
|
||||||
for (int x = 0; x < width; ++x) {
|
for (int x = 0; x < width; ++x) {
|
||||||
int prev_pixel = 0;
|
int prev_pixel = 0;
|
||||||
int pixel = GET_DATA_BYTE(data, x);
|
int pixel = GET_DATA_BYTE(data, x);
|
||||||
@ -300,7 +300,7 @@ float Textord::filter_noise_blobs(BLOBNBOX_LIST *src_list, // original list
|
|||||||
BLOBNBOX_IT noise_it = noise_list;
|
BLOBNBOX_IT noise_it = noise_list;
|
||||||
BLOBNBOX_IT small_it = small_list;
|
BLOBNBOX_IT small_it = small_list;
|
||||||
BLOBNBOX_IT large_it = large_list;
|
BLOBNBOX_IT large_it = large_list;
|
||||||
STATS size_stats(0, MAX_NEAREST_DIST);
|
STATS size_stats(0, MAX_NEAREST_DIST - 1);
|
||||||
// blob heights
|
// blob heights
|
||||||
float min_y; // size limits
|
float min_y; // size limits
|
||||||
float max_y;
|
float max_y;
|
||||||
|
@ -112,10 +112,10 @@ void Textord::block_spacing_stats(TO_BLOCK *block, GAPMAP *gapmap, bool &old_tex
|
|||||||
TO_ROW *row; // current row
|
TO_ROW *row; // current row
|
||||||
BLOBNBOX_IT blob_it; // iterator
|
BLOBNBOX_IT blob_it; // iterator
|
||||||
|
|
||||||
STATS centre_to_centre_stats(0, MAXSPACING);
|
STATS centre_to_centre_stats(0, MAXSPACING - 1);
|
||||||
// DEBUG USE ONLY
|
// DEBUG USE ONLY
|
||||||
STATS all_gap_stats(0, MAXSPACING);
|
STATS all_gap_stats(0, MAXSPACING - 1);
|
||||||
STATS space_gap_stats(0, MAXSPACING);
|
STATS space_gap_stats(0, MAXSPACING - 1);
|
||||||
int16_t minwidth = MAXSPACING; // narrowest blob
|
int16_t minwidth = MAXSPACING; // narrowest blob
|
||||||
TBOX blob_box;
|
TBOX blob_box;
|
||||||
TBOX prev_blob_box;
|
TBOX prev_blob_box;
|
||||||
@ -274,10 +274,10 @@ void Textord::row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx,
|
|||||||
) {
|
) {
|
||||||
// iterator
|
// iterator
|
||||||
BLOBNBOX_IT blob_it = row->blob_list();
|
BLOBNBOX_IT blob_it = row->blob_list();
|
||||||
STATS all_gap_stats(0, MAXSPACING);
|
STATS all_gap_stats(0, MAXSPACING - 1);
|
||||||
STATS cert_space_gap_stats(0, MAXSPACING);
|
STATS cert_space_gap_stats(0, MAXSPACING - 1);
|
||||||
STATS all_space_gap_stats(0, MAXSPACING);
|
STATS all_space_gap_stats(0, MAXSPACING - 1);
|
||||||
STATS small_gap_stats(0, MAXSPACING);
|
STATS small_gap_stats(0, MAXSPACING - 1);
|
||||||
TBOX blob_box;
|
TBOX blob_box;
|
||||||
TBOX prev_blob_box;
|
TBOX prev_blob_box;
|
||||||
int16_t gap_width;
|
int16_t gap_width;
|
||||||
@ -636,9 +636,9 @@ bool Textord::isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_sta
|
|||||||
int16_t total;
|
int16_t total;
|
||||||
// iterator
|
// iterator
|
||||||
BLOBNBOX_IT blob_it = row->blob_list();
|
BLOBNBOX_IT blob_it = row->blob_list();
|
||||||
STATS cert_space_gap_stats(0, MAXSPACING);
|
STATS cert_space_gap_stats(0, MAXSPACING - 1);
|
||||||
STATS all_space_gap_stats(0, MAXSPACING);
|
STATS all_space_gap_stats(0, MAXSPACING - 1);
|
||||||
STATS small_gap_stats(0, MAXSPACING);
|
STATS small_gap_stats(0, MAXSPACING - 1);
|
||||||
TBOX blob_box;
|
TBOX blob_box;
|
||||||
TBOX prev_blob_box;
|
TBOX prev_blob_box;
|
||||||
int16_t gap_width;
|
int16_t gap_width;
|
||||||
|
@ -166,9 +166,9 @@ void find_underlined_blobs( // get chop points
|
|||||||
TBOX blob_box = u_line->bounding_box();
|
TBOX blob_box = u_line->bounding_box();
|
||||||
// cell iterator
|
// cell iterator
|
||||||
ICOORDELT_IT cell_it = chop_cells;
|
ICOORDELT_IT cell_it = chop_cells;
|
||||||
STATS upper_proj(blob_box.left(), blob_box.right() + 1);
|
STATS upper_proj(blob_box.left(), blob_box.right());
|
||||||
STATS middle_proj(blob_box.left(), blob_box.right() + 1);
|
STATS middle_proj(blob_box.left(), blob_box.right());
|
||||||
STATS lower_proj(blob_box.left(), blob_box.right() + 1);
|
STATS lower_proj(blob_box.left(), blob_box.right());
|
||||||
C_OUTLINE_IT out_it; // outlines of blob
|
C_OUTLINE_IT out_it; // outlines of blob
|
||||||
|
|
||||||
ASSERT_HOST(u_line->cblob() != nullptr);
|
ASSERT_HOST(u_line->cblob() != nullptr);
|
||||||
|
@ -185,7 +185,7 @@ int32_t row_words( // compute space size
|
|||||||
TBOX blob_box; // bounding box
|
TBOX blob_box; // bounding box
|
||||||
// iterator
|
// iterator
|
||||||
BLOBNBOX_IT blob_it = row->blob_list();
|
BLOBNBOX_IT blob_it = row->blob_list();
|
||||||
STATS gap_stats(0, maxwidth);
|
STATS gap_stats(0, maxwidth - 1);
|
||||||
STATS cluster_stats[4]; // clusters
|
STATS cluster_stats[4]; // clusters
|
||||||
|
|
||||||
testpt = ICOORD(textord_test_x, textord_test_y);
|
testpt = ICOORD(textord_test_x, textord_test_y);
|
||||||
@ -341,7 +341,7 @@ int32_t row_words2( // compute space size
|
|||||||
TBOX blob_box; // bounding box
|
TBOX blob_box; // bounding box
|
||||||
// iterator
|
// iterator
|
||||||
BLOBNBOX_IT blob_it = row->blob_list();
|
BLOBNBOX_IT blob_it = row->blob_list();
|
||||||
STATS gap_stats(0, maxwidth);
|
STATS gap_stats(0, maxwidth - 1);
|
||||||
// gap sizes
|
// gap sizes
|
||||||
float gaps[BLOCK_STATS_CLUSTERS];
|
float gaps[BLOCK_STATS_CLUSTERS];
|
||||||
STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
|
STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
|
||||||
|
@ -22,7 +22,7 @@ class STATSTest : public testing::Test {
|
|||||||
public:
|
public:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
std::locale::global(std::locale(""));
|
std::locale::global(std::locale(""));
|
||||||
stats_.set_range(0, 16);
|
stats_.set_range(0, 15);
|
||||||
for (size_t i = 0; i < countof(kTestData); ++i) {
|
for (size_t i = 0; i < countof(kTestData); ++i) {
|
||||||
stats_.add(i, kTestData[i]);
|
stats_.add(i, kTestData[i]);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user