diff --git a/src/ccmain/applybox.cpp b/src/ccmain/applybox.cpp index 70811423..d550adfe 100644 --- a/src/ccmain/applybox.cpp +++ b/src/ccmain/applybox.cpp @@ -159,7 +159,7 @@ PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation, // Helper computes median xheight in the image. static double MedianXHeight(BLOCK_LIST *block_list) { BLOCK_IT block_it(block_list); - STATS xheights(0, block_it.data()->pdblk.bounding_box().height()); + STATS xheights(0, block_it.data()->pdblk.bounding_box().height() - 1); for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { ROW_IT row_it(block_it.data()->row_list()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index 944ed1ef..6f5fcff4 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -2015,7 +2015,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) { void Tesseract::font_recognition_pass(PAGE_RES *page_res) { PAGE_RES_IT page_res_it(page_res); WERD_RES *word; // current word - STATS doc_fonts(0, font_table_size_); // font counters + STATS doc_fonts(0, font_table_size_ - 1); // font counters // Gather font id statistics. for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) { diff --git a/src/ccmain/fixxht.cpp b/src/ccmain/fixxht.cpp index 80ea0831..9253673d 100644 --- a/src/ccmain/fixxht.cpp +++ b/src/ccmain/fixxht.cpp @@ -103,8 +103,8 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) { // Returns a new x-height maximally compatible with the result in word_res. // See comment above for overall algorithm. float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift) { - STATS top_stats(0, UINT8_MAX); - STATS shift_stats(-UINT8_MAX, UINT8_MAX); + STATS top_stats(0, UINT8_MAX - 1); + STATS shift_stats(-UINT8_MAX, UINT8_MAX - 1); int bottom_shift = 0; int num_blobs = word_res->rebuild_word->NumBlobs(); do { diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp index 601afe9f..fec08810 100644 --- a/src/ccmain/paragraphs.cpp +++ b/src/ccmain/paragraphs.cpp @@ -1623,8 +1623,8 @@ void RecomputeMarginsAndClearHypotheses(std::vector *rows, UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax); UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax); } - STATS lefts(lmin, lmax + 1); - STATS rights(rmin, rmax + 1); + STATS lefts(lmin, lmax); + STATS rights(rmin, rmax); for (int i = start; i < end; i++) { RowScratchRegisters &sr = (*rows)[i]; if (sr.ri_->num_words == 0) { @@ -1655,7 +1655,7 @@ int InterwordSpace(const std::vector &rows, int row_start, (rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2; int word_width = (rows[row_start].ri_->lword_box.width() + rows[row_end - 1].ri_->lword_box.width()) / 2; - STATS spacing_widths(0, 5 + word_width); + STATS spacing_widths(0, 4 + word_width); for (int i = row_start; i < row_end; i++) { if (rows[i].ri_->num_words > 1) { spacing_widths.add(rows[i].ri_->average_interword_space, 1); diff --git a/src/ccstruct/blobbox.cpp b/src/ccstruct/blobbox.cpp index 6539a723..6201a31c 100644 --- a/src/ccstruct/blobbox.cpp +++ b/src/ccstruct/blobbox.cpp @@ -810,7 +810,7 @@ void TO_ROW::compute_vertical_projection() { // project whole row row_box += blob_it.data()->bounding_box(); } - projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN); + projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN - 1); projection_left = row_box.left() - PROJECTION_MARGIN; projection_right = row_box.right() + PROJECTION_MARGIN; for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { diff --git a/src/ccstruct/blobbox.h b/src/ccstruct/blobbox.h index 5fafb3da..996448e3 100644 --- a/src/ccstruct/blobbox.h +++ b/src/ccstruct/blobbox.h @@ -725,8 +725,8 @@ public: ASSERT_HOST(block->pdblk.poly_block() != nullptr); block->rotate(rotation); // Update the median size statistic from the blobs list. - STATS widths(0, block->pdblk.bounding_box().width()); - STATS heights(0, block->pdblk.bounding_box().height()); + STATS widths(0, block->pdblk.bounding_box().width() - 1); + STATS heights(0, block->pdblk.bounding_box().height() - 1); BLOBNBOX_IT blob_it(&blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { widths.add(blob_it.data()->bounding_box().width(), 1); diff --git a/src/ccstruct/statistc.cpp b/src/ccstruct/statistc.cpp index 70c10fc3..5df60be1 100644 --- a/src/ccstruct/statistc.cpp +++ b/src/ccstruct/statistc.cpp @@ -40,14 +40,14 @@ namespace tesseract { * * Construct a new stats element by allocating and zeroing the memory. **********************************************************************/ -STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) { - if (max_bucket_value_plus_1 <= min_bucket_value) { +STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value) { + if (max_bucket_value < min_bucket_value) { min_bucket_value = 0; - max_bucket_value_plus_1 = 1; + max_bucket_value = 1; } rangemin_ = min_bucket_value; // setup - rangemax_ = max_bucket_value_plus_1; - buckets_ = new int32_t[rangemax_ - rangemin_]; + rangemax_ = max_bucket_value; + buckets_ = new int32_t[1 + rangemax_ - rangemin_]; clear(); } @@ -56,16 +56,16 @@ STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) { * * Alter the range on an existing stats element. **********************************************************************/ -bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) { - if (max_bucket_value_plus_1 <= min_bucket_value) { +bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value) { + if (max_bucket_value < min_bucket_value) { return false; } - if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) { + if (rangemax_ - rangemin_ != max_bucket_value - min_bucket_value) { delete[] buckets_; - buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value]; + buckets_ = new int32_t[1 + max_bucket_value - min_bucket_value]; } rangemin_ = min_bucket_value; // setup - rangemax_ = max_bucket_value_plus_1; + rangemax_ = max_bucket_value; clear(); // zero it return true; } @@ -78,7 +78,7 @@ bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) void STATS::clear() { // clear out buckets total_count_ = 0; if (buckets_ != nullptr) { - memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0])); + memset(buckets_, 0, (1 + rangemax_ - rangemin_) * sizeof(buckets_[0])); } } @@ -97,12 +97,11 @@ STATS::~STATS() { * Add a set of samples to (or delete from) a pile. **********************************************************************/ void STATS::add(int32_t value, int32_t count) { - if (buckets_ == nullptr) { - return; + if (buckets_ != nullptr) { + value = ClipToRange(value, rangemin_, rangemax_); + buckets_[value - rangemin_] += count; + total_count_ += count; // keep count of total } - value = ClipToRange(value, rangemin_, rangemax_ - 1); - buckets_[value - rangemin_] += count; - total_count_ += count; // keep count of total } /********************************************************************** @@ -116,7 +115,7 @@ int32_t STATS::mode() const { // get mode of samples } int32_t max = buckets_[0]; // max cell count int32_t maxindex = 0; // index of max - for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) { + for (int index = rangemax_ - rangemin_; index > 0; --index) { if (buckets_[index] > max) { max = buckets_[index]; // find biggest maxindex = index; @@ -135,7 +134,7 @@ double STATS::mean() const { // get mean of samples return static_cast(rangemin_); } int64_t sum = 0; - for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { + for (int index = rangemax_ - rangemin_; index >= 0; --index) { sum += static_cast(index) * buckets_[index]; } return static_cast(sum) / total_count_ + rangemin_; @@ -152,7 +151,7 @@ double STATS::sd() const { // standard deviation } int64_t sum = 0; double sqsum = 0.0; - for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { + for (int index = rangemax_ - rangemin_; index >= 0; --index) { sum += static_cast(index) * buckets_[index]; sqsum += static_cast(index) * index * buckets_[index]; } @@ -186,7 +185,7 @@ double STATS::ile(double frac) const { #endif int sum = 0; int index = 0; - for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) { + for (index = 0; index <= rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) { ; } if (index > 0) { @@ -207,7 +206,7 @@ int32_t STATS::min_bucket() const { // Find min return rangemin_; } int32_t min = 0; - for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) { + for (min = 0; (min <= rangemax_ - rangemin_) && (buckets_[min] == 0); min++) { ; } return rangemin_ + min; @@ -224,7 +223,7 @@ int32_t STATS::max_bucket() const { // Find max return rangemin_; } int32_t max; - for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) { + for (max = rangemax_ - rangemin_; max > 0 && buckets_[max] == 0; max--) { ; } return rangemin_ + max; @@ -270,7 +269,7 @@ bool STATS::local_min(int32_t x) const { if (buckets_ == nullptr) { return false; } - x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_; + x = ClipToRange(x, rangemin_, rangemax_) - rangemin_; if (buckets_[x] == 0) { return true; } @@ -281,10 +280,10 @@ bool STATS::local_min(int32_t x) const { if (index >= 0 && buckets_[index] < buckets_[x]) { return false; } - for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) { + for (index = x + 1; index <= rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) { ; } - if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) { + if (index <= rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) { return false; } else { return true; @@ -304,7 +303,7 @@ void STATS::smooth(int32_t factor) { return; } STATS result(rangemin_, rangemax_); - int entrycount = rangemax_ - rangemin_; + int entrycount = 1 + rangemax_ - rangemin_; for (int entry = 0; entry < entrycount; entry++) { // centre weight int count = buckets_[entry] * factor; @@ -368,7 +367,7 @@ int32_t STATS::cluster(float lower, // thresholds clusters[0].add(entry, count); } } - for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ && + for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ && pile_count(entry) <= pile_count(entry - 1); entry++) { count = pile_count(entry) - clusters[0].pile_count(entry); @@ -386,7 +385,7 @@ int32_t STATS::cluster(float lower, // thresholds do { new_cluster = false; new_mode = 0; - for (entry = 0; entry < rangemax_ - rangemin_; entry++) { + for (entry = 0; entry <= rangemax_ - rangemin_; entry++) { count = buckets_[entry] - clusters[0].buckets_[entry]; // remaining pile if (count > 0) { // any to handle @@ -433,7 +432,7 @@ int32_t STATS::cluster(float lower, // thresholds clusters[0].add(entry, count); } } - for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ && + for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ && pile_count(entry) <= pile_count(entry - 1); entry++) { count = pile_count(entry) - clusters[0].pile_count(entry); @@ -482,7 +481,7 @@ int STATS::top_n_modes(int max_modes, std::vector> &modes) if (max_modes <= 0) { return 0; } - int src_count = rangemax_ - rangemin_; + int src_count = 1 + rangemax_ - rangemin_; // Used copies the counts in buckets_ as they get used. STATS used(rangemin_, rangemax_); modes.clear(); @@ -605,7 +604,7 @@ void STATS::plot(ScrollView *window, // to draw in } window->Pen(colour); - for (int index = 0; index < rangemax_ - rangemin_; index++) { + for (int index = 0; index <= rangemax_ - rangemin_; index++) { window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1), yorigin + yscale * buckets_[index]); } @@ -630,7 +629,7 @@ void STATS::plotline(ScrollView *window, // to draw in } window->Pen(colour); window->SetCursor(xorigin, yorigin + yscale * buckets_[0]); - for (int index = 0; index < rangemax_ - rangemin_; index++) { + for (int index = 0; index <= rangemax_ - rangemin_; index++) { window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]); } } diff --git a/src/ccstruct/statistc.h b/src/ccstruct/statistc.h index 4689d3dd..2c135cc1 100644 --- a/src/ccstruct/statistc.h +++ b/src/ccstruct/statistc.h @@ -30,23 +30,20 @@ namespace tesseract { class TESS_API STATS { public: // The histogram buckets are in the range - // [min_bucket_value, max_bucket_value_plus_1 - 1] i.e. // [min_bucket_value, max_bucket_value]. // Any data under min_bucket value is silently mapped to min_bucket_value, // and likewise, any data over max_bucket_value is silently mapped to // max_bucket_value. // In the internal array, min_bucket_value maps to 0 and - // max_bucket_value_plus_1 - min_bucket_value to the array size. - // TODO(rays) This is ugly. Convert the second argument to - // max_bucket_value and all the code that uses it. - STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1); + // 1 + max_bucket_value - min_bucket_value to the array size. + STATS(int32_t min_bucket_value, int32_t max_bucket_value); STATS() = default; // empty for arrays ~STATS(); // (Re)Sets the range and clears the counts. // See the constructor for info on max and min values. - bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1); + bool set_range(int32_t min_bucket_value, int32_t max_bucket_value); void clear(); // empty buckets @@ -79,8 +76,8 @@ public: if (value <= rangemin_) { return buckets_[0]; } - if (value >= rangemax_ - 1) { - return buckets_[rangemax_ - rangemin_ - 1]; + if (value >= rangemax_) { + return buckets_[rangemax_ - rangemin_]; } return buckets_[value - rangemin_]; } @@ -142,7 +139,6 @@ public: private: int32_t rangemin_ = 0; // min of range - // rangemax_ is not well named as it is really one past the max. int32_t rangemax_ = 0; // max of range int32_t total_count_ = 0; // no of samples int32_t *buckets_ = nullptr; // array of cells diff --git a/src/lstm/lstmrecognizer.cpp b/src/lstm/lstmrecognizer.cpp index 52f8a9f6..658d89e9 100644 --- a/src/lstm/lstmrecognizer.cpp +++ b/src/lstm/lstmrecognizer.cpp @@ -294,7 +294,7 @@ void LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo void LSTMRecognizer::OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd) { const int kOutputScale = INT8_MAX; - STATS stats(0, kOutputScale + 1); + STATS stats(0, kOutputScale); for (int t = 0; t < outputs.Width(); ++t) { int best_label = outputs.BestLabel(t, nullptr); if (best_label != null_char_) { diff --git a/src/lstm/networkio.cpp b/src/lstm/networkio.cpp index 080fed0f..a6e7d69a 100644 --- a/src/lstm/networkio.cpp +++ b/src/lstm/networkio.cpp @@ -127,7 +127,7 @@ void NetworkIO::ZeroInvalidElements() { static void ComputeBlackWhite(Image pix, float *black, float *white) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); - STATS mins(0, 256), maxes(0, 256); + STATS mins(0, 255), maxes(0, 255); if (width >= 3) { int y = height / 2; l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y; diff --git a/src/lstm/weightmatrix.cpp b/src/lstm/weightmatrix.cpp index 57a07dcb..86255266 100644 --- a/src/lstm/weightmatrix.cpp +++ b/src/lstm/weightmatrix.cpp @@ -525,7 +525,7 @@ static void HistogramWeight(TFloat weight, STATS *histogram) { } void WeightMatrix::Debug2D(const char *msg) { - STATS histogram(0, kHistogramBuckets); + STATS histogram(0, kHistogramBuckets - 1); if (int_mode_) { for (int i = 0; i < wi_.dim1(); ++i) { for (int j = 0; j < wi_.dim2(); ++j) { diff --git a/src/textord/baselinedetect.cpp b/src/textord/baselinedetect.cpp index 309ee51b..82df673b 100644 --- a/src/textord/baselinedetect.cpp +++ b/src/textord/baselinedetect.cpp @@ -320,7 +320,7 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) { } // Set up a histogram using disp_quant_factor_ as the bucket size. STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_), - IntCastRounded(max_dist / disp_quant_factor_) + 1); + IntCastRounded(max_dist / disp_quant_factor_)); for (double perp_blob_dist : perp_blob_dists) { dist_stats.add(IntCastRounded(perp_blob_dist / disp_quant_factor_), 1); } diff --git a/src/textord/blkocc.cpp b/src/textord/blkocc.cpp index 26fe308c..69e18f6a 100644 --- a/src/textord/blkocc.cpp +++ b/src/textord/blkocc.cpp @@ -55,7 +55,7 @@ bool test_underline( // look for underlines auto blob_box = blob->bounding_box(); auto blob_width = blob->bounding_box().width(); - projection.set_range(blob_box.bottom(), blob_box.top() + 1); + projection.set_range(blob_box.bottom(), blob_box.top()); if (testing_on) { // blob->plot(to_win,GOLDENROD,GOLDENROD); // line_color_index(to_win,GOLDENROD); diff --git a/src/textord/colfind.cpp b/src/textord/colfind.cpp index 1d4f5d61..9e8558bf 100644 --- a/src/textord/colfind.cpp +++ b/src/textord/colfind.cpp @@ -808,7 +808,7 @@ bool ColumnFinder::BiggestUnassignedRange(int set_count, const bool *any_columns int ColumnFinder::RangeModalColumnSet(int **column_set_costs, const int *assigned_costs, int start, int end) { int column_count = column_sets_.size(); - STATS column_stats(0, column_count); + STATS column_stats(0, column_count - 1); for (int part_i = start; part_i < end; ++part_i) { for (int col_j = 0; col_j < column_count; ++col_j) { if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) { @@ -1577,8 +1577,8 @@ void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST *block FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block); // Rotate all the blobs if needed and recompute the bounding boxes. // Compute the block median blob width and height as we go. - STATS widths(0, block->pdblk.bounding_box().width()); - STATS heights(0, block->pdblk.bounding_box().height()); + STATS widths(0, block->pdblk.bounding_box().width() - 1); + STATS heights(0, block->pdblk.bounding_box().height() - 1); RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, &widths, &heights); TO_ROW_IT row_it(to_block->get_rows()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { diff --git a/src/textord/colpartition.cpp b/src/textord/colpartition.cpp index 7ddb88ed..27ca2c91 100644 --- a/src/textord/colpartition.cpp +++ b/src/textord/colpartition.cpp @@ -949,12 +949,12 @@ void ColPartition::ComputeLimits() { median_right_ = bounding_box_.right(); median_width_ = bounding_box_.width(); } else { - STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1); - STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1); - STATS height_stats(0, bounding_box_.height() + 1); - STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1); - STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1); - STATS width_stats(0, bounding_box_.width() + 1); + STATS top_stats(bounding_box_.bottom(), bounding_box_.top()); + STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top()); + STATS height_stats(0, bounding_box_.height()); + STATS left_stats(bounding_box_.left(), bounding_box_.right()); + STATS right_stats(bounding_box_.left(), bounding_box_.right()); + STATS width_stats(0, bounding_box_.width()); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) { @@ -1132,8 +1132,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { bool result = false; // Gather statistics on the gaps between blobs and the widths of the blobs. int part_width = bounding_box_.width(); - STATS gap_stats(0, part_width); - STATS width_stats(0, part_width); + STATS gap_stats(0, part_width - 1); + STATS width_stats(0, part_width - 1); BLOBNBOX_C_IT it(&boxes_); BLOBNBOX *prev_blob = it.data(); prev_blob->set_flow(BTFT_NEIGHBOURS); @@ -1489,7 +1489,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition *part = it.data(); ASSERT_HOST(!part->boxes()->empty()); - STATS side_steps(0, part->bounding_box().height()); + STATS side_steps(0, part->bounding_box().height() - 1); if (part->bounding_box().height() > max_line_height) { max_line_height = part->bounding_box().height(); } @@ -1626,7 +1626,7 @@ static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing, // that have have to continue to exist until the part grid is deleted. // Compute the median blob size as we go, as the block needs to know. TBOX block_box(block->pdblk.bounding_box()); - STATS sizes(0, std::max(block_box.width(), block_box.height())); + STATS sizes(0, std::max(block_box.width(), block_box.height()) - 1); bool text_type = block->pdblk.poly_block()->IsText(); ColPartition_IT it(block_parts); auto *to_block = new TO_BLOCK(block); @@ -1884,8 +1884,8 @@ void ColPartition::PrintColors() { // Sets the types of all partitions in the run to be the max of the types. void ColPartition::SmoothPartnerRun(int working_set_count) { - STATS left_stats(0, working_set_count); - STATS right_stats(0, working_set_count); + STATS left_stats(0, working_set_count - 1); + STATS right_stats(0, working_set_count - 1); PolyBlockType max_type = type_; ColPartition *partner; for (partner = SingletonPartner(false); partner != nullptr; @@ -2250,7 +2250,7 @@ bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox, // The iterator is passed by value so the iteration does not modify the // caller's iterator. static int MedianSpacing(int page_height, ColPartition_IT it) { - STATS stats(0, page_height); + STATS stats(0, page_height - 1); while (!it.cycled_list()) { ColPartition *part = it.data(); it.forward(); diff --git a/src/textord/devanagari_processing.cpp b/src/textord/devanagari_processing.cpp index 4ee9253c..5deccea6 100644 --- a/src/textord/devanagari_processing.cpp +++ b/src/textord/devanagari_processing.cpp @@ -3,7 +3,6 @@ * Description: Methods to process images containing devanagari symbols, * prior to classification. * Author: Shobhit Saxena - * Created: Mon Nov 17 20:26:01 IST 2008 * * (C) Copyright 2008, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -387,7 +386,7 @@ Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const { // It also prunes very small blobs from calculation. int ShiroRekhaSplitter::GetModeHeight(Image pix) { Boxa *boxa = pixConnComp(pix, nullptr, 8); - STATS heights(0, pixGetHeight(pix)); + STATS heights(0, pixGetHeight(pix) - 1); heights.clear(); for (int i = 0; i < boxaGetCount(boxa); ++i) { Box *box = boxaGetBox(boxa, i, L_CLONE); diff --git a/src/textord/gap_map.cpp b/src/textord/gap_map.cpp index cc93d6c9..4c20d117 100644 --- a/src/textord/gap_map.cpp +++ b/src/textord/gap_map.cpp @@ -43,7 +43,7 @@ GAPMAP::GAPMAP( // Constructor int16_t gap_width; int16_t start_of_row; int16_t end_of_row; - STATS xht_stats(0, 128); + STATS xht_stats(0, 127); int16_t min_quantum; int16_t max_quantum; int16_t i; diff --git a/src/textord/imagefind.cpp b/src/textord/imagefind.cpp index 94e80807..ac769226 100644 --- a/src/textord/imagefind.cpp +++ b/src/textord/imagefind.cpp @@ -451,9 +451,9 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Image pix, int factor, Image scaled = pixClipRectangle(pix, scaled_box, nullptr); // Compute stats over the whole image. - STATS red_stats(0, 256); - STATS green_stats(0, 256); - STATS blue_stats(0, 256); + STATS red_stats(0, 255); + STATS green_stats(0, 255); + STATS blue_stats(0, 255); uint32_t *data = pixGetData(scaled); ASSERT_HOST(pixGetWpl(scaled) == width_pad); for (int y = 0; y < height_pad; ++y) { diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp index 61d8fde9..984a9d0d 100644 --- a/src/textord/makerow.cpp +++ b/src/textord/makerow.cpp @@ -518,7 +518,7 @@ void vigorous_noise_removal(TO_BLOCK *block) { max_height = blob->bounding_box().height(); } } - STATS hstats(0, max_height + 1); + STATS hstats(0, max_height); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOBNBOX *blob = b_it.data(); int height = blob->bounding_box().height(); @@ -1288,16 +1288,16 @@ void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { // Compute the best guess of xheight of each row individually. // Use xheight and ascrise values of the rows where ascenders were found. get_min_max_xheight(block->line_size, &min_height, &max_height); - STATS row_asc_xheights(min_height, max_height + 1); + STATS row_asc_xheights(min_height, max_height); STATS row_asc_ascrise(static_cast(min_height * asc_frac_xheight), - static_cast(max_height * asc_frac_xheight) + 1); + static_cast(max_height * asc_frac_xheight)); int min_desc_height = static_cast(min_height * desc_frac_xheight); int max_desc_height = static_cast(max_height * desc_frac_xheight); - STATS row_asc_descdrop(min_desc_height, max_desc_height + 1); - STATS row_desc_xheights(min_height, max_height + 1); - STATS row_desc_descdrop(min_desc_height, max_desc_height + 1); - STATS row_cap_xheights(min_height, max_height + 1); - STATS row_cap_floating_xheights(min_height, max_height + 1); + STATS row_asc_descdrop(min_desc_height, max_desc_height); + STATS row_desc_xheights(min_height, max_height); + STATS row_desc_descdrop(min_desc_height, max_desc_height); + STATS row_cap_xheights(min_height, max_height); + STATS row_cap_floating_xheights(min_height, max_height); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row = row_it.data(); // Compute the xheight of this row if it has not been computed before. @@ -1394,8 +1394,8 @@ void Textord::compute_row_xheight(TO_ROW *row, // row to do int min_height, max_height; get_min_max_xheight(block_line_size, &min_height, &max_height); - STATS heights(min_height, max_height + 1); - STATS floating_heights(min_height, max_height + 1); + STATS heights(min_height, max_height); + STATS floating_heights(min_height, max_height); fill_heights(row, gradient, min_height, max_height, &heights, &floating_heights); row->ascrise = 0.0f; row->xheight = 0.0f; @@ -1594,7 +1594,7 @@ int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count float height; // height of blob BLOBNBOX_IT blob_it = row->blob_list(); BLOBNBOX *blob; // current blob - STATS heights(min_height, max_height + 1); + STATS heights(min_height, max_height); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { blob = blob_it.data(); if (!blob->joined_to_prev()) { diff --git a/src/textord/oldbasel.cpp b/src/textord/oldbasel.cpp index d5291fd4..fe26591f 100644 --- a/src/textord/oldbasel.cpp +++ b/src/textord/oldbasel.cpp @@ -426,7 +426,7 @@ int get_blob_coords( // get boxes int losscount; // lost blobs int maxlosscount; // greatest lost blobs /*height stat collection */ - STATS heightstat(0, MAXHEIGHT); + STATS heightstat(0, MAXHEIGHT - 1); if (blob_it.empty()) { return 0; // none @@ -1347,7 +1347,7 @@ void old_first_xheight( // the wiseowl way ) { int blobindex; /*current blob */ /*height statistics */ - STATS heightstat(0, MAXHEIGHT); + STATS heightstat(0, MAXHEIGHT - 1); int height; /*height of blob */ int xcentre; /*centre of blob */ int lineheight; /*approx xheight */ @@ -1427,7 +1427,7 @@ void make_first_xheight( // find xheight QSPLINE *baseline, /*established */ float jumplimit /*min ascender height */ ) { - STATS heightstat(0, HEIGHTBUCKETS); + STATS heightstat(0, HEIGHTBUCKETS - 1); int lefts[HEIGHTBUCKETS]; int rights[HEIGHTBUCKETS]; int modelist[MODENUM]; diff --git a/src/textord/strokewidth.cpp b/src/textord/strokewidth.cpp index c1fa3c69..8cf23db1 100644 --- a/src/textord/strokewidth.cpp +++ b/src/textord/strokewidth.cpp @@ -567,7 +567,7 @@ void StrokeWidth::MarkLeaderNeighbours(const ColPartition *part, LeftOrRight sid // Helper to compute the UQ of the square-ish CJK characters. static int UpperQuartileCJKSize(int gridsize, BLOBNBOX_LIST *blobs) { - STATS sizes(0, gridsize * kMaxCJKSizeRatio); + STATS sizes(0, gridsize * kMaxCJKSizeRatio - 1); BLOBNBOX_IT it(blobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX *blob = it.data(); diff --git a/src/textord/tabfind.cpp b/src/textord/tabfind.cpp index bb0d15d9..462d0ff9 100644 --- a/src/textord/tabfind.cpp +++ b/src/textord/tabfind.cpp @@ -971,7 +971,7 @@ void TabFind::ComputeColumnWidths(ScrollView *tab_win, ColPartitionGrid *part_gr #endif // !GRAPHICS_DISABLED // Accumulate column sections into a STATS int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor; - STATS col_widths(0, col_widths_size + 1); + STATS col_widths(0, col_widths_size); ApplyPartitionsToColumnWidths(part_grid, &col_widths); #ifndef GRAPHICS_DISABLED if (tab_win != nullptr) { @@ -1099,8 +1099,8 @@ int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) { TabVector_IT it(lines); int prev_right = -1; int max_gap = static_cast(kMaxGutterWidthAbsolute * resolution_); - STATS gaps(0, max_gap); - STATS heights(0, max_gap); + STATS gaps(0, max_gap - 1); + STATS heights(0, max_gap - 1); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TabVector *v = it.data(); TabVector *partner = v->GetSinglePartner(); diff --git a/src/textord/tablefind.cpp b/src/textord/tablefind.cpp index c0eed61d..d67f1824 100644 --- a/src/textord/tablefind.cpp +++ b/src/textord/tablefind.cpp @@ -714,9 +714,9 @@ void TableFinder::SetVerticalSpacing(ColPartition *part) { // Set global spacing and x-height estimates void TableFinder::SetGlobalSpacings(ColPartitionGrid *grid) { - STATS xheight_stats(0, kMaxVerticalSpacing + 1); - STATS width_stats(0, kMaxBlobWidth + 1); - STATS ledding_stats(0, kMaxVerticalSpacing + 1); + STATS xheight_stats(0, kMaxVerticalSpacing); + STATS width_stats(0, kMaxBlobWidth); + STATS ledding_stats(0, kMaxVerticalSpacing); // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(grid); gsearch.SetUniqueMode(true); diff --git a/src/textord/tablerecog.cpp b/src/textord/tablerecog.cpp index 2bcb4c07..70547339 100644 --- a/src/textord/tablerecog.cpp +++ b/src/textord/tablerecog.cpp @@ -529,8 +529,8 @@ int StructuredTable::FindHorizontalMargin(ColPartitionGrid *grid, int border, bo void StructuredTable::CalculateStats() { const int kMaxCellHeight = 1000; const int kMaxCellWidth = 1000; - STATS height_stats(0, kMaxCellHeight + 1); - STATS width_stats(0, kMaxCellWidth + 1); + STATS height_stats(0, kMaxCellHeight); + STATS width_stats(0, kMaxCellWidth); for (unsigned i = 0; i < row_count(); ++i) { height_stats.add(row_height(i), column_count()); diff --git a/src/textord/tabvector.cpp b/src/textord/tabvector.cpp index 1d82397d..8ef12b27 100644 --- a/src/textord/tabvector.cpp +++ b/src/textord/tabvector.cpp @@ -608,7 +608,7 @@ void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) { max_gutter = kGutterToNeighbourRatio * mean_height; } - STATS gutters(0, max_gutter + 1); + STATS gutters(0, max_gutter); // Evaluate the boxes for their goodness, calculating the coverage as we go. // Remove boxes that are not good and shorten the list to the first and // last good boxes. @@ -901,7 +901,7 @@ TabVector *TabVector::VerticalTextlinePartner() { if (width < 0) { width = -width; } - STATS gaps(0, width * 2); + STATS gaps(0, width * 2 - 1); BLOBNBOX *prev_bbox = nullptr; box_it2.mark_cycle_pt(); for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { diff --git a/src/textord/topitch.cpp b/src/textord/topitch.cpp index 53f9322d..b5fcacb7 100644 --- a/src/textord/topitch.cpp +++ b/src/textord/topitch.cpp @@ -163,8 +163,8 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix block_votes = like_votes = other_votes = 0; maxwidth = static_cast(ceil(bad_row->xheight * textord_words_maxspace)); if (bad_row->pitch_decision != PITCH_DEF_FIXED && bad_row->pitch_decision != PITCH_DEF_PROP) { - block_stats.set_range(0, maxwidth); - like_stats.set_range(0, maxwidth); + block_stats.set_range(0, maxwidth - 1); + like_stats.set_range(0, maxwidth - 1); block_index = 1; for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { block = block_it.data(); @@ -392,7 +392,7 @@ bool try_doc_fixed( // determine pitch float final_pitch; // output pitch float row_y; // baseline STATS projection; // entire page - STATS pitches(0, MAX_ALLOWED_PITCH); + STATS pitches(0, MAX_ALLOWED_PITCH - 1); // for median float sp_sd; // space sd int16_t mid_cuts; // no of cheap cuts @@ -438,7 +438,7 @@ bool try_doc_fixed( // determine pitch if (pitches.get_total() == 0) { return false; } - projection.set_range(projection_left, projection_right); + projection.set_range(projection_left, projection_right - 1); for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { block = block_it.data(); @@ -663,7 +663,7 @@ bool row_pitch_stats( // find line stats float gaps[BLOCK_STATS_CLUSTERS]; // blobs BLOBNBOX_IT blob_it = row->blob_list(); - STATS gap_stats(0, maxwidth); + STATS gap_stats(0, maxwidth - 1); STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; // clusters @@ -801,9 +801,9 @@ bool find_row_pitch( // find lines float dm_pitch; // pitch with dm on float pitch; // revised estimate float initial_pitch; // guess at pitch - STATS gap_stats(0, maxwidth); + STATS gap_stats(0, maxwidth - 1); // centre-centre - STATS pitch_stats(0, maxwidth); + STATS pitch_stats(0, maxwidth - 1); row->fixed_pitch = 0.0f; initial_pitch = row->fp_space; @@ -1225,7 +1225,7 @@ float tune_row_pitch2( // find fp cells std::unique_ptr sum_proj(new STATS[textord_pitch_range * 2 + 1]); // summed projection for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) { - sum_proj[textord_pitch_range + pitch_delta].set_range(0, best_pitch + pitch_delta + 1); + sum_proj[textord_pitch_range + pitch_delta].set_range(0, best_pitch + pitch_delta); } for (pixel = projection_left; pixel <= projection_right; pixel++) { for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) { diff --git a/src/textord/tordmain.cpp b/src/textord/tordmain.cpp index 3305a4e4..a7f2a168 100644 --- a/src/textord/tordmain.cpp +++ b/src/textord/tordmain.cpp @@ -80,7 +80,7 @@ void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob) { uint32_t *data = pixGetData(dist_pix); int wpl = pixGetWpl(dist_pix); // Horizontal width of stroke. - STATS h_stats(0, width + 1); + STATS h_stats(0, width); for (int y = 0; y < height; ++y) { uint32_t *pixels = data + y * wpl; int prev_pixel = 0; @@ -104,7 +104,7 @@ void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob) { } } // Vertical width of stroke. - STATS v_stats(0, height + 1); + STATS v_stats(0, height); for (int x = 0; x < width; ++x) { int prev_pixel = 0; int pixel = GET_DATA_BYTE(data, x); @@ -300,7 +300,7 @@ float Textord::filter_noise_blobs(BLOBNBOX_LIST *src_list, // original list BLOBNBOX_IT noise_it = noise_list; BLOBNBOX_IT small_it = small_list; BLOBNBOX_IT large_it = large_list; - STATS size_stats(0, MAX_NEAREST_DIST); + STATS size_stats(0, MAX_NEAREST_DIST - 1); // blob heights float min_y; // size limits float max_y; diff --git a/src/textord/tospace.cpp b/src/textord/tospace.cpp index 75b3aed0..440f1c58 100644 --- a/src/textord/tospace.cpp +++ b/src/textord/tospace.cpp @@ -112,10 +112,10 @@ void Textord::block_spacing_stats(TO_BLOCK *block, GAPMAP *gapmap, bool &old_tex TO_ROW *row; // current row BLOBNBOX_IT blob_it; // iterator - STATS centre_to_centre_stats(0, MAXSPACING); + STATS centre_to_centre_stats(0, MAXSPACING - 1); // DEBUG USE ONLY - STATS all_gap_stats(0, MAXSPACING); - STATS space_gap_stats(0, MAXSPACING); + STATS all_gap_stats(0, MAXSPACING - 1); + STATS space_gap_stats(0, MAXSPACING - 1); int16_t minwidth = MAXSPACING; // narrowest blob TBOX blob_box; TBOX prev_blob_box; @@ -274,10 +274,10 @@ void Textord::row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx, ) { // iterator BLOBNBOX_IT blob_it = row->blob_list(); - STATS all_gap_stats(0, MAXSPACING); - STATS cert_space_gap_stats(0, MAXSPACING); - STATS all_space_gap_stats(0, MAXSPACING); - STATS small_gap_stats(0, MAXSPACING); + STATS all_gap_stats(0, MAXSPACING - 1); + STATS cert_space_gap_stats(0, MAXSPACING - 1); + STATS all_space_gap_stats(0, MAXSPACING - 1); + STATS small_gap_stats(0, MAXSPACING - 1); TBOX blob_box; TBOX prev_blob_box; int16_t gap_width; @@ -636,9 +636,9 @@ bool Textord::isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_sta int16_t total; // iterator BLOBNBOX_IT blob_it = row->blob_list(); - STATS cert_space_gap_stats(0, MAXSPACING); - STATS all_space_gap_stats(0, MAXSPACING); - STATS small_gap_stats(0, MAXSPACING); + STATS cert_space_gap_stats(0, MAXSPACING - 1); + STATS all_space_gap_stats(0, MAXSPACING - 1); + STATS small_gap_stats(0, MAXSPACING - 1); TBOX blob_box; TBOX prev_blob_box; int16_t gap_width; diff --git a/src/textord/underlin.cpp b/src/textord/underlin.cpp index 8d5abe24..112d5fdd 100644 --- a/src/textord/underlin.cpp +++ b/src/textord/underlin.cpp @@ -166,9 +166,9 @@ void find_underlined_blobs( // get chop points TBOX blob_box = u_line->bounding_box(); // cell iterator ICOORDELT_IT cell_it = chop_cells; - STATS upper_proj(blob_box.left(), blob_box.right() + 1); - STATS middle_proj(blob_box.left(), blob_box.right() + 1); - STATS lower_proj(blob_box.left(), blob_box.right() + 1); + STATS upper_proj(blob_box.left(), blob_box.right()); + STATS middle_proj(blob_box.left(), blob_box.right()); + STATS lower_proj(blob_box.left(), blob_box.right()); C_OUTLINE_IT out_it; // outlines of blob ASSERT_HOST(u_line->cblob() != nullptr); diff --git a/src/textord/wordseg.cpp b/src/textord/wordseg.cpp index 3dd741a8..cb9202eb 100644 --- a/src/textord/wordseg.cpp +++ b/src/textord/wordseg.cpp @@ -185,7 +185,7 @@ int32_t row_words( // compute space size TBOX blob_box; // bounding box // iterator BLOBNBOX_IT blob_it = row->blob_list(); - STATS gap_stats(0, maxwidth); + STATS gap_stats(0, maxwidth - 1); STATS cluster_stats[4]; // clusters testpt = ICOORD(textord_test_x, textord_test_y); @@ -341,7 +341,7 @@ int32_t row_words2( // compute space size TBOX blob_box; // bounding box // iterator BLOBNBOX_IT blob_it = row->blob_list(); - STATS gap_stats(0, maxwidth); + STATS gap_stats(0, maxwidth - 1); // gap sizes float gaps[BLOCK_STATS_CLUSTERS]; STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; diff --git a/unittest/stats_test.cc b/unittest/stats_test.cc index b0e03ab3..5c5d76dd 100644 --- a/unittest/stats_test.cc +++ b/unittest/stats_test.cc @@ -22,7 +22,7 @@ class STATSTest : public testing::Test { public: void SetUp() override { std::locale::global(std::locale("")); - stats_.set_range(0, 16); + stats_.set_range(0, 15); for (size_t i = 0; i < countof(kTestData); ++i) { stats_.add(i, kTestData[i]); }