Replace remaining GenericVector by std::vector for src/ccmain

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-03-15 17:53:11 +01:00
parent bf42f8313d
commit 1f94d79c81
13 changed files with 239 additions and 237 deletions

View File

@ -24,7 +24,6 @@
# include "boxread.h"
#endif // ndef DISABLED_LEGACY_ENGINE
#include <tesseract/unichar.h>
#include "genericvector.h"
#include "pageres.h"
#include "tesseractclass.h"
#include "unicharset.h"
@ -489,7 +488,7 @@ void Tesseract::ReSegmentByClassification(PAGE_RES *page_res) {
if (word->text() == nullptr || word->text()[0] == '\0')
continue; // Ignore words that have no text.
// Convert the correct text to a vector of UNICHAR_ID
GenericVector<UNICHAR_ID> target_text;
std::vector<UNICHAR_ID> target_text;
if (!ConvertStringToUnichars(word->text(), &target_text)) {
tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n", word->text());
pr_it.DeleteCurrentWord();
@ -505,7 +504,7 @@ void Tesseract::ReSegmentByClassification(PAGE_RES *page_res) {
/// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
/// @return false if an invalid UNICHAR_ID is encountered.
bool Tesseract::ConvertStringToUnichars(const char *utf8, GenericVector<UNICHAR_ID> *class_ids) {
bool Tesseract::ConvertStringToUnichars(const char *utf8, std::vector<UNICHAR_ID> *class_ids) {
for (int step = 0; *utf8 != '\0'; utf8 += step) {
const char *next_space = strchr(utf8, ' ');
if (next_space == nullptr)
@ -528,10 +527,10 @@ bool Tesseract::ConvertStringToUnichars(const char *utf8, GenericVector<UNICHAR_
/// applies a full search on the classifier results to find the best classified
/// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
/// substitutions ARE used.
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID> &target_text, WERD_RES *word_res) {
bool Tesseract::FindSegmentation(const std::vector<UNICHAR_ID> &target_text, WERD_RES *word_res) {
// Classify all required combinations of blobs and save results in choices.
const int word_length = word_res->box_word->length();
auto *choices = new GenericVector<BLOB_CHOICE_LIST *>[word_length];
auto *choices = new std::vector<BLOB_CHOICE_LIST *>[word_length];
for (int i = 0; i < word_length; ++i) {
for (int j = 1; j <= kMaxGroupSize && i + j <= word_length; ++j) {
BLOB_CHOICE_LIST *match_result =
@ -552,8 +551,11 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID> &target_text, W
float best_rating = 0.0f;
SearchForText(choices, 0, word_length, target_text, 0, 0.0f, &search_segmentation, &best_rating,
&word_res->best_state);
for (int i = 0; i < word_length; ++i)
choices[i].delete_data_pointers();
for (int i = 0; i < word_length; ++i) {
for (auto choice : choices[i]) {
delete choice;
}
}
delete[] choices;
if (word_res->best_state.empty()) {
// Build the original segmentation and if it is the same length as the
@ -583,9 +585,9 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID> &target_text, W
/// Recursive helper to find a match to the target_text (from text_index
/// position) in the choices (from choices_pos position).
/// @param choices is an array of GenericVectors, of length choices_length,
/// @param choices is an array of vectors of length choices_length,
/// with each element representing a starting position in the word, and the
/// #GenericVector holding classification results for a sequence of consecutive
/// #vector holding classification results for a sequence of consecutive
/// blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
/// @param choices_pos
/// @param choices_length
@ -595,8 +597,8 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID> &target_text, W
/// @param segmentation
/// @param best_rating
/// @param best_segmentation
void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST *> *choices, int choices_pos,
int choices_length, const GenericVector<UNICHAR_ID> &target_text,
void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, int choices_pos,
int choices_length, const std::vector<UNICHAR_ID> &target_text,
int text_index, float rating, std::vector<int> *segmentation,
float *best_rating, std::vector<int> *best_segmentation) {
const UnicharAmbigsVector &table = getDict().getUnicharAmbigs().dang_ambigs();

View File

@ -461,8 +461,8 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
continue;
}
// Two words sharing the same language model, excellent!
GenericVector<WERD_CHOICE *> overrides_word1;
GenericVector<WERD_CHOICE *> overrides_word2;
std::vector<WERD_CHOICE *> overrides_word1;
std::vector<WERD_CHOICE *> overrides_word2;
const auto orig_w1_str = w_prev->best_choice->unichar_string();
const auto orig_w2_str = w->best_choice->unichar_string();
@ -768,7 +768,7 @@ static int SelectBestWords(double rating_ratio, double certainty_margin, bool de
PointerVector<WERD_RES> *best_words) {
// Process the smallest groups of words that have an overlapping word
// boundary at the end.
GenericVector<WERD_RES *> out_words;
std::vector<WERD_RES *> out_words;
// Index into each word vector (best, new).
int b = 0, n = 0;
int num_best = 0, num_new = 0;
@ -893,19 +893,19 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
return false;
real_word->rej_cblob_list()->sort(&C_BLOB::SortByXMiddle);
// Get the noise outlines into a vector with matching bool map.
GenericVector<C_OUTLINE *> outlines;
std::vector<C_OUTLINE *> outlines;
real_word->GetNoiseOutlines(&outlines);
GenericVector<bool> word_wanted;
GenericVector<bool> overlapped_any_blob;
GenericVector<C_BLOB *> target_blobs;
std::vector<bool> word_wanted;
std::vector<bool> overlapped_any_blob;
std::vector<C_BLOB *> target_blobs;
AssignDiacriticsToOverlappingBlobs(outlines, pass, real_word, pr_it, &word_wanted,
&overlapped_any_blob, &target_blobs);
// Filter the outlines that overlapped any blob and put them into the word
// now. This simplifies the remaining task and also makes it more accurate
// as it has more completed blobs to work on.
GenericVector<bool> wanted;
GenericVector<C_BLOB *> wanted_blobs;
GenericVector<C_OUTLINE *> wanted_outlines;
std::vector<bool> wanted;
std::vector<C_BLOB *> wanted_blobs;
std::vector<C_OUTLINE *> wanted_outlines;
int num_overlapped = 0;
int num_overlapped_used = 0;
for (int i = 0; i < overlapped_any_blob.size(); ++i) {
@ -948,11 +948,11 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
// Output: word_wanted indicates which outlines are to be assigned to a blob,
// target_blobs indicates which to assign to, and overlapped_any_blob is
// true for all outlines that overlapped a blob.
void Tesseract::AssignDiacriticsToOverlappingBlobs(const GenericVector<C_OUTLINE *> &outlines,
void Tesseract::AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *> &outlines,
int pass, WERD *real_word, PAGE_RES_IT *pr_it,
GenericVector<bool> *word_wanted,
GenericVector<bool> *overlapped_any_blob,
GenericVector<C_BLOB *> *target_blobs) {
std::vector<bool> *word_wanted,
std::vector<bool> *overlapped_any_blob,
std::vector<C_BLOB *> *target_blobs) {
std::vector<bool> blob_wanted;
word_wanted->resize(outlines.size(), false);
overlapped_any_blob->resize(outlines.size(), false);
@ -999,10 +999,10 @@ void Tesseract::AssignDiacriticsToOverlappingBlobs(const GenericVector<C_OUTLINE
// Attempts to assign non-overlapping outlines to their nearest blobs or
// make new blobs out of them.
void Tesseract::AssignDiacriticsToNewBlobs(const GenericVector<C_OUTLINE *> &outlines, int pass,
void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outlines, int pass,
WERD *real_word, PAGE_RES_IT *pr_it,
GenericVector<bool> *word_wanted,
GenericVector<C_BLOB *> *target_blobs) {
std::vector<bool> *word_wanted,
std::vector<C_BLOB *> *target_blobs) {
std::vector<bool> blob_wanted;
word_wanted->resize(outlines.size(), false);
target_blobs->resize(outlines.size(), nullptr);
@ -1077,7 +1077,7 @@ void Tesseract::AssignDiacriticsToNewBlobs(const GenericVector<C_OUTLINE *> &out
// are desired, in which case ok_outlines indicates which ones.
bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold, PAGE_RES_IT *pr_it,
C_BLOB *blob,
const GenericVector<C_OUTLINE *> &outlines,
const std::vector<C_OUTLINE *> &outlines,
int num_outlines, std::vector<bool> *ok_outlines) {
std::string best_str;
float target_cert = certainty_threshold;
@ -1161,7 +1161,7 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
// Classifies the given blob plus the outlines flagged by ok_outlines, undoes
// the inclusion of the outlines, and returns the certainty of the raw choice.
float Tesseract::ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
const GenericVector<C_OUTLINE *> &outlines, int pass_n,
const std::vector<C_OUTLINE *> &outlines, int pass_n,
PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str) {
C_OUTLINE_IT ol_it;
C_OUTLINE *first_to_keep = nullptr;
@ -1865,8 +1865,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
const int fontinfo_size = get_fontinfo_table().size();
if (fontinfo_size == 0)
return;
GenericVector<int> font_total_score;
font_total_score.init_to_size(fontinfo_size, 0);
std::vector<int> font_total_score(fontinfo_size);
// Compute the font scores for the word
if (tessedit_debug_fonts) {

View File

@ -131,7 +131,7 @@ int EquationDetect::LabelSpecialText(TO_BLOCK *to_block) {
return -1;
}
GenericVector<BLOBNBOX_LIST *> blob_lists;
std::vector<BLOBNBOX_LIST *> blob_lists;
blob_lists.push_back(&(to_block->blobs));
blob_lists.push_back(&(to_block->large_blobs));
for (int i = 0; i < blob_lists.size(); ++i) {
@ -223,16 +223,17 @@ BlobSpecialTextType EquationDetect::EstimateTypeForUnichar(const UNICHARSET &uni
if (unicharset.get_ispunctuation(id)) {
// Exclude some special texts that are likely to be confused as math symbol.
static GenericVector<UNICHAR_ID> ids_to_exclude;
static std::vector<UNICHAR_ID> ids_to_exclude;
if (ids_to_exclude.empty()) {
static const char *kCharsToEx[] = {"'", "`", "\"", "\\", ",", ".",
"", "", "", "", "", ""};
for (auto i = 0; i < countof(kCharsToEx); i++) {
ids_to_exclude.push_back(unicharset.unichar_to_id(kCharsToEx[i]));
}
ids_to_exclude.sort();
std::sort(ids_to_exclude.begin(), ids_to_exclude.end());
}
return ids_to_exclude.bool_binary_search(id) ? BSTT_NONE : BSTT_MATH;
auto found = std::binary_search(ids_to_exclude.begin(), ids_to_exclude.end(), id);
return found ? BSTT_NONE : BSTT_MATH;
}
// Check if it is digit. In addition to the isdigit attribute, we also check
@ -266,13 +267,13 @@ void EquationDetect::IdentifySpecialText() {
IdentifyBlobsToSkip(part);
BLOBNBOX_C_IT bbox_it(part->boxes());
// Compute the height threshold.
GenericVector<int> blob_heights;
std::vector<int> blob_heights;
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
if (bbox_it.data()->special_text_type() != BSTT_SKIP) {
blob_heights.push_back(bbox_it.data()->bounding_box().height());
}
}
blob_heights.sort();
std::sort(blob_heights.begin(), blob_heights.end());
const int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2;
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
if (bbox_it.data()->special_text_type() != BSTT_SKIP) {
@ -377,7 +378,7 @@ int EquationDetect::FindEquationParts(ColPartitionGrid *part_grid, ColPartitionS
// Pass 3: expand block equation seeds.
while (!cp_seeds_.empty()) {
GenericVector<ColPartition *> seeds_expanded;
std::vector<ColPartition *> seeds_expanded;
for (int i = 0; i < cp_seeds_.size(); ++i) {
if (ExpandSeed(cp_seeds_[i])) {
// If this seed is expanded, then we add it into seeds_expanded. Note
@ -407,14 +408,14 @@ void EquationDetect::MergePartsByLocation() {
while (true) {
ColPartition *part = nullptr;
// partitions that have been updated.
GenericVector<ColPartition *> parts_updated;
std::vector<ColPartition *> parts_updated;
ColPartitionGridSearch gsearch(part_grid_);
gsearch.StartFullSearch();
while ((part = gsearch.NextFullSearch()) != nullptr) {
if (!IsTextOrEquationType(part->type())) {
continue;
}
GenericVector<ColPartition *> parts_to_merge;
std::vector<ColPartition *> parts_to_merge;
SearchByOverlap(part, &parts_to_merge);
if (parts_to_merge.empty()) {
continue;
@ -443,7 +444,7 @@ void EquationDetect::MergePartsByLocation() {
}
void EquationDetect::SearchByOverlap(ColPartition *seed,
GenericVector<ColPartition *> *parts_overlap) {
std::vector<ColPartition *> *parts_overlap) {
ASSERT_HOST(seed != nullptr && parts_overlap != nullptr);
if (!IsTextOrEquationType(seed->type())) {
return;
@ -457,7 +458,7 @@ void EquationDetect::SearchByOverlap(ColPartition *seed,
// Search iteratively.
ColPartition *part;
GenericVector<ColPartition *> parts;
std::vector<ColPartition *> parts;
const float kLargeOverlapTh = 0.95;
const float kEquXOverlap = 0.4, kEquYOverlap = 0.5;
while ((part = search.NextRadSearch()) != nullptr) {
@ -518,11 +519,11 @@ void EquationDetect::IdentifySeedParts() {
ColPartition *part = nullptr;
gsearch.StartFullSearch();
GenericVector<ColPartition *> seeds1, seeds2;
std::vector<ColPartition *> seeds1, seeds2;
// The left coordinates of indented text partitions.
GenericVector<int> indented_texts_left;
std::vector<int> indented_texts_left;
// The foreground density of text partitions.
GenericVector<float> texts_foreground_density;
std::vector<float> texts_foreground_density;
while ((part = gsearch.NextFullSearch()) != nullptr) {
if (!IsTextOrEquationType(part->type())) {
continue;
@ -552,8 +553,8 @@ void EquationDetect::IdentifySeedParts() {
}
// Sort the features collected from text regions.
indented_texts_left.sort();
texts_foreground_density.sort();
std::sort(indented_texts_left.begin(), indented_texts_left.end());
std::sort(texts_foreground_density.begin(), texts_foreground_density.end());
float foreground_density_th = 0.15; // Default value.
if (!texts_foreground_density.empty()) {
// Use the median of the texts_foreground_density.
@ -598,7 +599,7 @@ bool EquationDetect::CheckSeedFgDensity(const float density_th, ColPartition *pa
ASSERT_HOST(part);
// Split part horizontall, and check for each sub part.
GenericVector<TBOX> sub_boxes;
std::vector<TBOX> sub_boxes;
SplitCPHorLite(part, &sub_boxes);
float parts_passed = 0.0;
for (int i = 0; i < sub_boxes.size(); ++i) {
@ -615,7 +616,7 @@ bool EquationDetect::CheckSeedFgDensity(const float density_th, ColPartition *pa
return retval;
}
void EquationDetect::SplitCPHor(ColPartition *part, GenericVector<ColPartition *> *parts_splitted) {
void EquationDetect::SplitCPHor(ColPartition *part, std::vector<ColPartition *> *parts_splitted) {
ASSERT_HOST(part && parts_splitted);
if (part->median_width() == 0 || part->boxes_count() == 0) {
return;
@ -623,7 +624,9 @@ void EquationDetect::SplitCPHor(ColPartition *part, GenericVector<ColPartition *
// Make a copy of part, and reset parts_splitted.
ColPartition *right_part = part->CopyButDontOwnBlobs();
parts_splitted->delete_data_pointers();
for (auto part : *parts_splitted) {
delete part;
}
parts_splitted->clear();
const double kThreshold = part->median_width() * 3.0;
@ -663,7 +666,7 @@ void EquationDetect::SplitCPHor(ColPartition *part, GenericVector<ColPartition *
parts_splitted->push_back(right_part);
}
void EquationDetect::SplitCPHorLite(ColPartition *part, GenericVector<TBOX> *splitted_boxes) {
void EquationDetect::SplitCPHorLite(ColPartition *part, std::vector<TBOX> *splitted_boxes) {
ASSERT_HOST(part && splitted_boxes);
splitted_boxes->clear();
if (part->median_width() == 0) {
@ -701,7 +704,7 @@ void EquationDetect::SplitCPHorLite(ColPartition *part, GenericVector<TBOX> *spl
}
}
bool EquationDetect::CheckForSeed2(const GenericVector<int> &indented_texts_left,
bool EquationDetect::CheckForSeed2(const std::vector<int> &indented_texts_left,
const float foreground_density_th, ColPartition *part) {
ASSERT_HOST(part);
const TBOX &box = part->bounding_box();
@ -720,22 +723,25 @@ bool EquationDetect::CheckForSeed2(const GenericVector<int> &indented_texts_left
return true;
}
int EquationDetect::CountAlignment(const GenericVector<int> &sorted_vec, const int val) const {
int EquationDetect::CountAlignment(const std::vector<int> &sorted_vec, const int val) const {
if (sorted_vec.empty()) {
return 0;
}
const int kDistTh = static_cast<int>(roundf(0.03 * resolution_));
const int pos = sorted_vec.binary_search(val);
const int kDistTh = static_cast<int>(round(0.03f * resolution_));
auto pos = std::upper_bound(sorted_vec.begin(), sorted_vec.end(), val);
if (pos > sorted_vec.begin()) {
--pos;
}
int count = 0;
// Search left side.
int index = pos;
auto index = pos - sorted_vec.begin();
while (index >= 0 && abs(val - sorted_vec[index--]) < kDistTh) {
count++;
}
// Search right side.
index = pos + 1;
index = pos + 1 - sorted_vec.begin();
while (index < sorted_vec.size() && sorted_vec[index++] - val < kDistTh) {
count++;
}
@ -764,9 +770,9 @@ void EquationDetect::ComputeCPsSuperBBox() {
void EquationDetect::IdentifyInlinePartsHorizontal() {
ASSERT_HOST(cps_super_bbox_);
GenericVector<ColPartition *> new_seeds;
std::vector<ColPartition *> new_seeds;
const int kMarginDiffTh = IntCastRounded(0.5 * lang_tesseract_->source_resolution());
const int kGapTh = static_cast<int>(roundf(1.0 * lang_tesseract_->source_resolution()));
const int kGapTh = static_cast<int>(round(1.0f * lang_tesseract_->source_resolution()));
ColPartitionGridSearch search(part_grid_);
search.SetUniqueMode(true);
// The center x coordinate of the cp_super_bbox_.
@ -826,7 +832,7 @@ int EquationDetect::EstimateTextPartLineSpacing() {
// Get the y gap between text partitions;
ColPartition *current = nullptr, *prev = nullptr;
gsearch.StartFullSearch();
GenericVector<int> ygaps;
std::vector<int> ygaps;
while ((current = gsearch.NextFullSearch()) != nullptr) {
if (!PTIsTextType(current->type())) {
continue;
@ -851,7 +857,7 @@ int EquationDetect::EstimateTextPartLineSpacing() {
}
// Compute the line spacing from ygaps: use the mean of the first half.
ygaps.sort();
std::sort(ygaps.begin(), ygaps.end());
int spacing = 0, count;
for (count = 0; count < ygaps.size() / 2; count++) {
spacing += ygaps[count];
@ -867,12 +873,12 @@ void EquationDetect::IdentifyInlinePartsVertical(const bool top_to_bottom,
// Sort cp_seeds_.
if (top_to_bottom) { // From top to bottom.
cp_seeds_.sort(&SortCPByTopReverse);
std::sort(cp_seeds_.begin(), cp_seeds_.end(), &SortCPByTopReverse);
} else { // From bottom to top.
cp_seeds_.sort(&SortCPByBottom);
std::sort(cp_seeds_.begin(), cp_seeds_.end(), &SortCPByBottom);
}
GenericVector<ColPartition *> new_seeds;
std::vector<ColPartition *> new_seeds;
for (int i = 0; i < cp_seeds_.size(); ++i) {
ColPartition *part = cp_seeds_[i];
// If we sort cp_seeds_ from top to bottom, then for each cp_seeds_, we look
@ -918,8 +924,8 @@ bool EquationDetect::IsInline(const bool search_bottom, const int textparts_line
// Check if neighbor and part is inline similar.
const float kHeightRatioTh = 0.5;
const int kYGapTh = textparts_linespacing > 0
? textparts_linespacing + static_cast<int>(roundf(0.02 * resolution_))
: static_cast<int>(roundf(0.05 * resolution_)); // Default value.
? textparts_linespacing + static_cast<int>(round(0.02f * resolution_))
: static_cast<int>(round(0.05f * resolution_)); // Default value.
if (part_box.x_overlap(neighbor_box) && // Location feature.
part_box.y_gap(neighbor_box) <= kYGapTh && // Line spacing.
// Geo feature.
@ -973,9 +979,9 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition *part) {
ColPartitionGridSearch search(part_grid_);
ColPartition *neighbor = nullptr;
const TBOX &part_box(part->bounding_box());
const int kXGapTh = static_cast<int>(roundf(0.5 * resolution_));
const int kRadiusTh = static_cast<int>(roundf(3.0 * resolution_));
const int kYGapTh = static_cast<int>(roundf(0.5 * resolution_));
const int kXGapTh = static_cast<int>(round(0.5f * resolution_));
const int kRadiusTh = static_cast<int>(round(3.0f * resolution_));
const int kYGapTh = static_cast<int>(round(0.5f * resolution_));
// Here we use a simple approximation algorithm: from the center of part, We
// perform the radius search, and check if we can find a neighboring partition
@ -1036,7 +1042,7 @@ bool EquationDetect::ExpandSeed(ColPartition *seed) {
}
// Expand in four directions.
GenericVector<ColPartition *> parts_to_merge;
std::vector<ColPartition *> parts_to_merge;
ExpandSeedHorizontal(true, seed, &parts_to_merge);
ExpandSeedHorizontal(false, seed, &parts_to_merge);
ExpandSeedVertical(true, seed, &parts_to_merge);
@ -1073,10 +1079,10 @@ bool EquationDetect::ExpandSeed(ColPartition *seed) {
}
void EquationDetect::ExpandSeedHorizontal(const bool search_left, ColPartition *seed,
GenericVector<ColPartition *> *parts_to_merge) {
std::vector<ColPartition *> *parts_to_merge) {
ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr);
const float kYOverlapTh = 0.6;
const int kXGapTh = static_cast<int>(roundf(0.2 * resolution_));
const int kXGapTh = static_cast<int>(round(0.2f * resolution_));
ColPartitionGridSearch search(part_grid_);
const TBOX &seed_box(seed->bounding_box());
@ -1125,10 +1131,10 @@ void EquationDetect::ExpandSeedHorizontal(const bool search_left, ColPartition *
}
void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *seed,
GenericVector<ColPartition *> *parts_to_merge) {
std::vector<ColPartition *> *parts_to_merge) {
ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr && cps_super_bbox_ != nullptr);
const float kXOverlapTh = 0.4;
const int kYGapTh = static_cast<int>(roundf(0.2 * resolution_));
const int kYGapTh = static_cast<int>(round(0.2f * resolution_));
ColPartitionGridSearch search(part_grid_);
const TBOX &seed_box(seed->bounding_box());
@ -1138,7 +1144,7 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *
// Search iteratively.
ColPartition *part = nullptr;
GenericVector<ColPartition *> parts;
std::vector<ColPartition *> parts;
int skipped_min_top = std::numeric_limits<int>::max(), skipped_max_bottom = -1;
while ((part = search.NextVerticalSearch(search_bottom)) != nullptr) {
if (part == seed) {
@ -1206,8 +1212,8 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *
}
bool EquationDetect::IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const {
const int kXGapTh = static_cast<int>(roundf(0.25 * resolution_));
const int kYGapTh = static_cast<int>(roundf(0.05 * resolution_));
const int kXGapTh = static_cast<int>(round(0.25f * resolution_));
const int kYGapTh = static_cast<int>(round(0.05f * resolution_));
// Check geometric feature.
if (part_box.height() > seed_box.height() || part_box.width() > seed_box.width()) {
@ -1244,7 +1250,7 @@ void EquationDetect::ProcessMathBlockSatelliteParts() {
// Iterate over part_grid_, and find all parts that are text type but not
// equation type.
ColPartition *part = nullptr;
GenericVector<ColPartition *> text_parts;
std::vector<ColPartition *> text_parts;
ColPartitionGridSearch gsearch(part_grid_);
gsearch.StartFullSearch();
while ((part = gsearch.NextFullSearch()) != nullptr) {
@ -1257,12 +1263,12 @@ void EquationDetect::ProcessMathBlockSatelliteParts() {
}
// Compute the medium height of the text_parts.
text_parts.sort(&SortCPByHeight);
std::sort(text_parts.begin(), text_parts.end(), &SortCPByHeight);
const TBOX &text_box = text_parts[text_parts.size() / 2]->bounding_box();
int med_height = text_box.height();
if (text_parts.size() % 2 == 0 && text_parts.size() > 1) {
const TBOX &text_box = text_parts[text_parts.size() / 2 - 1]->bounding_box();
med_height = static_cast<int>(roundf(0.5 * (text_box.height() + med_height)));
med_height = static_cast<int>(round(0.5f * (text_box.height() + med_height)));
}
// Iterate every text_parts and check if it is a math block satellite.
@ -1271,7 +1277,7 @@ void EquationDetect::ProcessMathBlockSatelliteParts() {
if (text_box.height() > med_height) {
continue;
}
GenericVector<ColPartition *> math_blocks;
std::vector<ColPartition *> math_blocks;
if (!IsMathBlockSatellite(text_parts[i], &math_blocks)) {
continue;
}
@ -1288,7 +1294,7 @@ void EquationDetect::ProcessMathBlockSatelliteParts() {
}
bool EquationDetect::IsMathBlockSatellite(ColPartition *part,
GenericVector<ColPartition *> *math_blocks) {
std::vector<ColPartition *> *math_blocks) {
ASSERT_HOST(part != nullptr && math_blocks != nullptr);
math_blocks->clear();
const TBOX &part_box(part->bounding_box());
@ -1344,7 +1350,7 @@ bool EquationDetect::IsMathBlockSatellite(ColPartition *part,
ColPartition *EquationDetect::SearchNNVertical(const bool search_bottom, const ColPartition *part) {
ASSERT_HOST(part);
ColPartition *nearest_neighbor = nullptr, *neighbor = nullptr;
const int kYGapTh = static_cast<int>(roundf(resolution_ * 0.5));
const int kYGapTh = static_cast<int>(round(resolution_ * 0.5f));
ColPartitionGridSearch search(part_grid_);
search.SetUniqueMode(true);
@ -1379,7 +1385,7 @@ bool EquationDetect::IsNearMathNeighbor(const int y_gap, const ColPartition *nei
if (!neighbor) {
return false;
}
const int kYGapTh = static_cast<int>(roundf(resolution_ * 0.1));
const int kYGapTh = static_cast<int>(round(resolution_ * 0.1f));
return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh;
}

View File

@ -22,7 +22,6 @@
#include <tesseract/unichar.h> // for UNICHAR_ID
#include "blobbox.h" // for BLOBNBOX (ptr only), BlobSpecialText...
#include "equationdetectbase.h" // for EquationDetectBase
#include "genericvector.h" // for GenericVector
#include "tesseractclass.h" // for Tesseract
class TBOX;
@ -86,7 +85,7 @@ protected:
// parts_overlap. Note: this function may update the part_grid_, so if the
// caller is also running ColPartitionGridSearch, use the RepositionIterator
// to continue.
void SearchByOverlap(ColPartition *seed, GenericVector<ColPartition *> *parts_overlap);
void SearchByOverlap(ColPartition *seed, std::vector<ColPartition *> *parts_overlap);
// Insert part back into part_grid_, after it absorbs some other parts.
void InsertPartAfterAbsorb(ColPartition *part);
@ -106,12 +105,12 @@ protected:
// 1. If its left is aligned with any coordinates in indented_texts_left,
// which we assume have been sorted.
// 2. If its foreground density is over foreground_density_th.
bool CheckForSeed2(const GenericVector<int> &indented_texts_left,
bool CheckForSeed2(const std::vector<int> &indented_texts_left,
const float foreground_density_th, ColPartition *part);
// Count the number of values in sorted_vec that is close to val, used to
// check if a partition is aligned with text partitions.
int CountAlignment(const GenericVector<int> &sorted_vec, const int val) const;
int CountAlignment(const std::vector<int> &sorted_vec, const int val) const;
// Check for a seed candidate using the foreground pixel density. And we
// return true if the density is below a certain threshold, because characters
@ -120,14 +119,14 @@ protected:
// A light version of SplitCPHor: instead of really doing the part split, we
// simply compute the union bounding box of each split part.
void SplitCPHorLite(ColPartition *part, GenericVector<TBOX> *splitted_boxes);
void SplitCPHorLite(ColPartition *part, std::vector<TBOX> *splitted_boxes);
// Split the part (horizontally), and save the split result into
// parts_splitted. Note that it is caller's responsibility to release the
// memory owns by parts_splitted. On the other hand, the part is unchanged
// during this process and still owns the blobs, so do NOT call DeleteBoxes
// when freeing the colpartitions in parts_splitted.
void SplitCPHor(ColPartition *part, GenericVector<ColPartition *> *parts_splitted);
void SplitCPHor(ColPartition *part, std::vector<ColPartition *> *parts_splitted);
// Check the density for a seed candidate (part) using its math density and
// italic density, returns true if the check passed.
@ -167,9 +166,9 @@ protected:
// merged with seed, remove them from part_grid_, and put them into
// parts_to_merge.
void ExpandSeedHorizontal(const bool search_left, ColPartition *seed,
GenericVector<ColPartition *> *parts_to_merge);
std::vector<ColPartition *> *parts_to_merge);
void ExpandSeedVertical(const bool search_bottom, ColPartition *seed,
GenericVector<ColPartition *> *parts_to_merge);
std::vector<ColPartition *> *parts_to_merge);
// Check if a part_box is the small neighbor of seed_box.
bool IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const;
@ -190,7 +189,7 @@ protected:
// Check if part is the satellite of one/two math blocks. If it is, we return
// true, and save the blocks into math_blocks.
bool IsMathBlockSatellite(ColPartition *part, GenericVector<ColPartition *> *math_blocks);
bool IsMathBlockSatellite(ColPartition *part, std::vector<ColPartition *> *math_blocks);
// Search the nearest neighbor of part in one vertical direction as defined in
// search_bottom. It returns the neighbor found that major x overlap with it,
@ -237,7 +236,7 @@ protected:
TBOX *cps_super_bbox_;
// The seed ColPartition for equation region.
GenericVector<ColPartition *> cp_seeds_;
std::vector<ColPartition *> cp_seeds_;
// The resolution (dpi) of the processing image.
int resolution_;

View File

@ -18,7 +18,6 @@
#include "paragraphs.h"
#include "genericvector.h" // for GenericVector, GenericVectorEqEq
#include "helpers.h" // for UpdateRange, ClipToRange
#include "host.h" // for NearlyEqual
#include "mutableiterator.h" // for MutableIterator
@ -72,7 +71,7 @@ static int Epsilon(int space_pix) {
}
static bool AcceptableRowArgs(int debug_level, int min_num_rows, const char *function_name,
const GenericVector<RowScratchRegisters> *rows, int row_start,
const std::vector<RowScratchRegisters> *rows, int row_start,
int row_end) {
if (row_start < 0 || row_end > rows->size() || row_start > row_end) {
tprintf("Invalid arguments rows[%d, %d) while rows is of size %d.\n", row_start, row_end,
@ -134,7 +133,7 @@ static std::string RtlEmbed(const std::string &word, bool rtlify) {
// Print the current thoughts of the paragraph detector.
static void PrintDetectorState(const ParagraphTheory &theory,
const GenericVector<RowScratchRegisters> &rows) {
const std::vector<RowScratchRegisters> &rows) {
std::vector<std::vector<std::string>> output;
output.push_back(std::vector<std::string>());
output.back().push_back("#row");
@ -173,7 +172,7 @@ static void PrintDetectorState(const ParagraphTheory &theory,
}
static void DebugDump(bool should_print, const char *phase, const ParagraphTheory &theory,
const GenericVector<RowScratchRegisters> &rows) {
const std::vector<RowScratchRegisters> &rows) {
if (!should_print)
return;
tprintf("# %s\n", phase);
@ -181,7 +180,7 @@ static void DebugDump(bool should_print, const char *phase, const ParagraphTheor
}
// Print out the text for rows[row_start, row_end)
static void PrintRowRange(const GenericVector<RowScratchRegisters> &rows, int row_start,
static void PrintRowRange(const std::vector<RowScratchRegisters> &rows, int row_start,
int row_end) {
tprintf("======================================\n");
for (int row = row_start; row < row_end; row++) {
@ -398,6 +397,13 @@ static bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) {
return pos == werd->length();
}
template<class T>
void push_back_new(std::vector<T> &vector, const T &data) {
if (std::find(vector.begin(), vector.end(), data) == vector.end()) {
vector.push_back(data);
}
}
// ========= Brain Dead Language Model (combined entry points) ================
// Given the leftmost word of a line either as a Tesseract unicharset + werd
@ -581,7 +587,7 @@ void RowScratchRegisters::SetStartLine() {
tprintf("Trying to set a line to be START when it's already BODY.\n");
}
if (current_lt == LT_UNKNOWN || current_lt == LT_BODY) {
hypotheses_.push_back_new(LineHypothesis(LT_START, nullptr));
push_back_new(hypotheses_, LineHypothesis(LT_START, nullptr));
}
}
@ -591,42 +597,44 @@ void RowScratchRegisters::SetBodyLine() {
tprintf("Trying to set a line to be BODY when it's already START.\n");
}
if (current_lt == LT_UNKNOWN || current_lt == LT_START) {
hypotheses_.push_back_new(LineHypothesis(LT_BODY, nullptr));
push_back_new(hypotheses_, LineHypothesis(LT_BODY, nullptr));
}
}
void RowScratchRegisters::AddStartLine(const ParagraphModel *model) {
hypotheses_.push_back_new(LineHypothesis(LT_START, model));
int old_idx = hypotheses_.get_index(LineHypothesis(LT_START, nullptr));
if (old_idx >= 0)
hypotheses_.remove(old_idx);
push_back_new(hypotheses_, LineHypothesis(LT_START, model));
auto found = std::find(hypotheses_.begin(), hypotheses_.end(), LineHypothesis(LT_START, nullptr));
if (found != hypotheses_.end()) {
hypotheses_.erase(found);
}
}
void RowScratchRegisters::AddBodyLine(const ParagraphModel *model) {
hypotheses_.push_back_new(LineHypothesis(LT_BODY, model));
int old_idx = hypotheses_.get_index(LineHypothesis(LT_BODY, nullptr));
if (old_idx >= 0)
hypotheses_.remove(old_idx);
push_back_new(hypotheses_, LineHypothesis(LT_BODY, model));
auto found = std::find(hypotheses_.begin(), hypotheses_.end(), LineHypothesis(LT_BODY, nullptr));
if (found != hypotheses_.end()) {
hypotheses_.erase(found);
}
}
void RowScratchRegisters::StartHypotheses(SetOfModels *models) const {
for (int h = 0; h < hypotheses_.size(); h++) {
if (hypotheses_[h].ty == LT_START && StrongModel(hypotheses_[h].model))
models->push_back_new(hypotheses_[h].model);
push_back_new(*models, hypotheses_[h].model);
}
}
void RowScratchRegisters::StrongHypotheses(SetOfModels *models) const {
for (int h = 0; h < hypotheses_.size(); h++) {
if (StrongModel(hypotheses_[h].model))
models->push_back_new(hypotheses_[h].model);
push_back_new(*models, hypotheses_[h].model);
}
}
void RowScratchRegisters::NonNullHypotheses(SetOfModels *models) const {
for (int h = 0; h < hypotheses_.size(); h++) {
if (hypotheses_[h].model != nullptr)
models->push_back_new(hypotheses_[h].model);
push_back_new(*models, hypotheses_[h].model);
}
}
@ -647,8 +655,8 @@ void RowScratchRegisters::DiscardNonMatchingHypotheses(const SetOfModels &models
if (models.empty())
return;
for (int h = hypotheses_.size() - 1; h >= 0; h--) {
if (!models.contains(hypotheses_[h].model)) {
hypotheses_.remove(h);
if (!contains(models, hypotheses_[h].model)) {
hypotheses_.erase(hypotheses_.begin() + h);
}
}
}
@ -672,15 +680,15 @@ public:
int size() const {
return values_.size();
}
void GetClusters(GenericVector<Cluster> *clusters);
void GetClusters(std::vector<Cluster> *clusters);
private:
int max_cluster_width_;
GenericVector<int> values_;
std::vector<int> values_;
};
// Return the index of the cluster closest to value.
static int ClosestCluster(const GenericVector<Cluster> &clusters, int value) {
static int ClosestCluster(const std::vector<Cluster> &clusters, int value) {
int best_index = 0;
for (int i = 0; i < clusters.size(); i++) {
if (abs(value - clusters[i].center) < abs(value - clusters[best_index].center))
@ -689,9 +697,9 @@ static int ClosestCluster(const GenericVector<Cluster> &clusters, int value) {
return best_index;
}
void SimpleClusterer::GetClusters(GenericVector<Cluster> *clusters) {
void SimpleClusterer::GetClusters(std::vector<Cluster> *clusters) {
clusters->clear();
values_.sort();
std::sort(values_.begin(), values_.end());
for (int i = 0; i < values_.size();) {
int orig_i = i;
int lo = values_[i];
@ -705,16 +713,16 @@ void SimpleClusterer::GetClusters(GenericVector<Cluster> *clusters) {
// Calculate left- and right-indent tab stop values seen in
// rows[row_start, row_end) given a tolerance of tolerance.
static void CalculateTabStops(GenericVector<RowScratchRegisters> *rows, int row_start, int row_end,
int tolerance, GenericVector<Cluster> *left_tabs,
GenericVector<Cluster> *right_tabs) {
static void CalculateTabStops(std::vector<RowScratchRegisters> *rows, int row_start, int row_end,
int tolerance, std::vector<Cluster> *left_tabs,
std::vector<Cluster> *right_tabs) {
if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end))
return;
// First pass: toss all left and right indents into clusterers.
SimpleClusterer initial_lefts(tolerance);
SimpleClusterer initial_rights(tolerance);
GenericVector<Cluster> initial_left_tabs;
GenericVector<Cluster> initial_right_tabs;
std::vector<Cluster> initial_left_tabs;
std::vector<Cluster> initial_right_tabs;
for (int i = row_start; i < row_end; i++) {
initial_lefts.Add((*rows)[i].lindent_);
initial_rights.Add((*rows)[i].rindent_);
@ -782,7 +790,7 @@ static void CalculateTabStops(GenericVector<RowScratchRegisters> *rows, int row_
}
}
if (to_prune >= 0 && (*left_tabs)[to_prune].count <= infrequent_enough_to_ignore) {
left_tabs->remove(to_prune);
left_tabs->erase(left_tabs->begin() + to_prune);
}
}
if (right_tabs->size() == 3 && left_tabs->size() >= 4) {
@ -793,7 +801,7 @@ static void CalculateTabStops(GenericVector<RowScratchRegisters> *rows, int row_
}
}
if (to_prune >= 0 && (*right_tabs)[to_prune].count <= infrequent_enough_to_ignore) {
right_tabs->remove(to_prune);
right_tabs->erase(right_tabs->begin() + to_prune);
}
}
}
@ -817,7 +825,7 @@ static void CalculateTabStops(GenericVector<RowScratchRegisters> *rows, int row_
// Case 2b: Fully Justified. (eop_threshold > 0)
// We mark a line as short (end of paragraph) if the offside indent
// is greater than eop_threshold.
static void MarkRowsWithModel(GenericVector<RowScratchRegisters> *rows, int row_start, int row_end,
static void MarkRowsWithModel(std::vector<RowScratchRegisters> *rows, int row_start, int row_end,
const ParagraphModel *model, bool ltr, int eop_threshold) {
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end))
return;
@ -861,7 +869,7 @@ static void MarkRowsWithModel(GenericVector<RowScratchRegisters> *rows, int row_
// Further, this struct holds the data we amass for the (single) ParagraphModel
// we'll assign to the text lines (assuming we get that far).
struct GeometricClassifierState {
GeometricClassifierState(int dbg_level, GenericVector<RowScratchRegisters> *r, int r_start,
GeometricClassifierState(int dbg_level, std::vector<RowScratchRegisters> *r, int r_start,
int r_end)
: debug_level(dbg_level), rows(r), row_start(r_start), row_end(r_end) {
tolerance = InterwordSpace(*r, r_start, r_end);
@ -886,7 +894,7 @@ struct GeometricClassifierState {
}
// Align tabs are the tab stops the text is aligned to.
const GenericVector<Cluster> &AlignTabs() const {
const std::vector<Cluster> &AlignTabs() const {
if (just == tesseract::JUSTIFICATION_RIGHT)
return right_tabs;
return left_tabs;
@ -897,7 +905,7 @@ struct GeometricClassifierState {
// Note that for a left-to-right text which is aligned to the right such as
// this function comment, the offside tabs are the horizontal tab stops
// marking the beginning of ("Note", "this" and "marking").
const GenericVector<Cluster> &OffsideTabs() const {
const std::vector<Cluster> &OffsideTabs() const {
if (just == tesseract::JUSTIFICATION_RIGHT)
return left_tabs;
return right_tabs;
@ -940,7 +948,7 @@ struct GeometricClassifierState {
// The Geometric Classifier was asked to find a single paragraph model
// to fit the text rows (*rows)[row_start, row_end)
GenericVector<RowScratchRegisters> *rows;
std::vector<RowScratchRegisters> *rows;
int row_start = 0;
int row_end = 0;
@ -953,8 +961,8 @@ struct GeometricClassifierState {
// These left and right tab stops were determined to be the common tab
// stops for the given text.
GenericVector<Cluster> left_tabs;
GenericVector<Cluster> right_tabs;
std::vector<Cluster> left_tabs;
std::vector<Cluster> right_tabs;
// These are parameters we must determine to create a ParagraphModel.
tesseract::ParagraphJustification just = JUSTIFICATION_UNKNOWN;
@ -1083,7 +1091,7 @@ static void GeometricClassifyThreeTabStopTextBlock(int debug_level, GeometricCla
// have capital letters to go on (e.g. Hebrew, Arabic, Hindi, Chinese),
// it's worth guessing that (A1b) is the correct interpretation if there are
// far more "full" lines than "short" lines.
static void GeometricClassify(int debug_level, GenericVector<RowScratchRegisters> *rows,
static void GeometricClassify(int debug_level, std::vector<RowScratchRegisters> *rows,
int row_start, int row_end, ParagraphTheory *theory) {
if (!AcceptableRowArgs(debug_level, 4, __func__, rows, row_start, row_end))
return;
@ -1223,7 +1231,7 @@ const ParagraphModel *ParagraphTheory::AddModel(const ParagraphModel &model) {
}
auto *m = new ParagraphModel(model);
models_->push_back(m);
models_we_added_.push_back_new(m);
push_back_new(models_we_added_, m);
return m;
}
@ -1231,7 +1239,7 @@ void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {
size_t w = 0;
for (size_t r = 0; r < models_->size(); r++) {
ParagraphModel *m = (*models_)[r];
if (!used_models.contains(m) && models_we_added_.contains(m)) {
if (!contains(used_models, static_cast<const ParagraphModel *>(m)) && contains(models_we_added_, m)) {
delete m;
} else {
if (r > w) {
@ -1246,7 +1254,7 @@ void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {
// Examine rows[start, end) and try to determine if an existing non-centered
// paragraph model would fit them perfectly. If so, return a pointer to it.
// If not, return nullptr.
const ParagraphModel *ParagraphTheory::Fits(const GenericVector<RowScratchRegisters> *rows,
const ParagraphModel *ParagraphTheory::Fits(const std::vector<RowScratchRegisters> *rows,
int start, int end) const {
for (const auto *model : *models_) {
if (model->justification() != JUSTIFICATION_CENTER && RowsFitModel(rows, start, end, model))
@ -1258,7 +1266,7 @@ const ParagraphModel *ParagraphTheory::Fits(const GenericVector<RowScratchRegist
void ParagraphTheory::NonCenteredModels(SetOfModels *models) {
for (const auto *model : *models_) {
if (model->justification() != JUSTIFICATION_CENTER)
models->push_back_new(model);
push_back_new(*models, model);
}
}
@ -1272,7 +1280,7 @@ int ParagraphTheory::IndexOf(const ParagraphModel *model) const {
return -1;
}
bool ValidFirstLine(const GenericVector<RowScratchRegisters> *rows, int row,
bool ValidFirstLine(const std::vector<RowScratchRegisters> *rows, int row,
const ParagraphModel *model) {
if (!StrongModel(model)) {
tprintf("ValidFirstLine() should only be called with strong models!\n");
@ -1281,7 +1289,7 @@ bool ValidFirstLine(const GenericVector<RowScratchRegisters> *rows, int row,
(*rows)[row].rindent_, (*rows)[row].rmargin_);
}
bool ValidBodyLine(const GenericVector<RowScratchRegisters> *rows, int row,
bool ValidBodyLine(const std::vector<RowScratchRegisters> *rows, int row,
const ParagraphModel *model) {
if (!StrongModel(model)) {
tprintf("ValidBodyLine() should only be called with strong models!\n");
@ -1290,7 +1298,7 @@ bool ValidBodyLine(const GenericVector<RowScratchRegisters> *rows, int row,
(*rows)[row].rindent_, (*rows)[row].rmargin_);
}
bool CrownCompatible(const GenericVector<RowScratchRegisters> *rows, int a, int b,
bool CrownCompatible(const std::vector<RowScratchRegisters> *rows, int a, int b,
const ParagraphModel *model) {
if (model != kCrownRight && model != kCrownLeft) {
tprintf("CrownCompatible() should only be called with crown models!\n");
@ -1308,7 +1316,7 @@ bool CrownCompatible(const GenericVector<RowScratchRegisters> *rows, int a, int
// =============== Implementation of ParagraphModelSmearer ====================
ParagraphModelSmearer::ParagraphModelSmearer(GenericVector<RowScratchRegisters> *rows,
ParagraphModelSmearer::ParagraphModelSmearer(std::vector<RowScratchRegisters> *rows,
int row_start, int row_end, ParagraphTheory *theory)
: theory_(theory), rows_(rows), row_start_(row_start), row_end_(row_end) {
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
@ -1341,7 +1349,7 @@ void ParagraphModelSmearer::CalculateOpenModels(int row_start, int row_end) {
// This is basic filtering; we check likely paragraph starty-ness down
// below in Smear() -- you know, whether the first word would have fit
// and such.
still_open.push_back_new(opened[m]);
push_back_new(still_open, opened[m]);
}
}
OpenModels(row + 1) = still_open;
@ -1449,7 +1457,7 @@ void ParagraphModelSmearer::Smear() {
// Find out what ParagraphModels are actually used, and discard any
// that are not.
static void DiscardUnusedModels(const GenericVector<RowScratchRegisters> &rows,
static void DiscardUnusedModels(const std::vector<RowScratchRegisters> &rows,
ParagraphTheory *theory) {
SetOfModels used_models;
for (int i = 0; i < rows.size(); i++) {
@ -1483,7 +1491,7 @@ static void DiscardUnusedModels(const GenericVector<RowScratchRegisters> &rows,
// sequences of body lines of equivalent type abutted against the beginning
// or a body or start line of a different type into a crown paragraph.
static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
GenericVector<RowScratchRegisters> *rows) {
std::vector<RowScratchRegisters> *rows) {
int start;
for (int end = rows->size(); end > 0; end = start) {
// Search back for a body line of a unique type.
@ -1546,7 +1554,7 @@ static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
// really just ignore it as an outlier. To express this, we allow the
// user to specify the percentile (0..100) of indent values to use as
// the common margin for each row in the run of rows[start, end).
void RecomputeMarginsAndClearHypotheses(GenericVector<RowScratchRegisters> *rows, int start,
void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows, int start,
int end, int percentile) {
if (!AcceptableRowArgs(0, 0, __func__, rows, start, end))
return;
@ -1585,7 +1593,7 @@ void RecomputeMarginsAndClearHypotheses(GenericVector<RowScratchRegisters> *rows
}
// Return the median inter-word space in rows[row_start, row_end).
int InterwordSpace(const GenericVector<RowScratchRegisters> &rows, int row_start, int row_end) {
int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start, int row_end) {
if (row_end < row_start + 1)
return 1;
int word_height =
@ -1666,7 +1674,7 @@ static bool LikelyParagraphStart(const RowScratchRegisters &before,
// If the rows given could be a consistent start to a paragraph, set *consistent
// true.
static ParagraphModel InternalParagraphModelByOutline(
const GenericVector<RowScratchRegisters> *rows, int start, int end, int tolerance,
const std::vector<RowScratchRegisters> *rows, int start, int end, int tolerance,
bool *consistent) {
int ltr_line_count = 0;
for (int i = start; i < end; i++) {
@ -1763,7 +1771,7 @@ static ParagraphModel InternalParagraphModelByOutline(
// justification_ = JUSTIFICATION_UNKNOWN and print the paragraph to debug
// output if we're debugging.
static ParagraphModel ParagraphModelByOutline(int debug_level,
const GenericVector<RowScratchRegisters> *rows,
const std::vector<RowScratchRegisters> *rows,
int start, int end, int tolerance) {
bool unused_consistent;
ParagraphModel retval =
@ -1776,7 +1784,7 @@ static ParagraphModel ParagraphModelByOutline(int debug_level,
}
// Do rows[start, end) form a single instance of the given paragraph model?
bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows, int start, int end,
bool RowsFitModel(const std::vector<RowScratchRegisters> *rows, int start, int end,
const ParagraphModel *model) {
if (!AcceptableRowArgs(0, 1, __func__, rows, start, end))
return false;
@ -1800,7 +1808,7 @@ bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows, int start, int
// We only take the very strongest signals, as we don't want to get
// confused and marking up centered text, poetry, or source code as
// clearly part of a typical paragraph.
static void MarkStrongEvidence(GenericVector<RowScratchRegisters> *rows, int row_start,
static void MarkStrongEvidence(std::vector<RowScratchRegisters> *rows, int row_start,
int row_end) {
// Record patently obvious body text.
for (int i = row_start + 1; i < row_end; i++) {
@ -1862,7 +1870,7 @@ static void MarkStrongEvidence(GenericVector<RowScratchRegisters> *rows, int row
// Look for sequences of a start line followed by some body lines in
// rows[row_start, row_end) and create ParagraphModels for them if
// they seem coherent.
static void ModelStrongEvidence(int debug_level, GenericVector<RowScratchRegisters> *rows,
static void ModelStrongEvidence(int debug_level, std::vector<RowScratchRegisters> *rows,
int row_start, int row_end, bool allow_flush_models,
ParagraphTheory *theory) {
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end))
@ -1951,7 +1959,7 @@ static void ModelStrongEvidence(int debug_level, GenericVector<RowScratchRegiste
// clues.
// (3) Form models for any sequence of start + continuation lines.
// (4) Smear the paragraph models to cover surrounding text.
static void StrongEvidenceClassify(int debug_level, GenericVector<RowScratchRegisters> *rows,
static void StrongEvidenceClassify(int debug_level, std::vector<RowScratchRegisters> *rows,
int row_start, int row_end, ParagraphTheory *theory) {
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end))
return;
@ -1979,7 +1987,7 @@ static void StrongEvidenceClassify(int debug_level, GenericVector<RowScratchRegi
smearer.Smear();
}
static void SeparateSimpleLeaderLines(GenericVector<RowScratchRegisters> *rows, int row_start,
static void SeparateSimpleLeaderLines(std::vector<RowScratchRegisters> *rows, int row_start,
int row_end, ParagraphTheory *theory) {
for (int i = row_start + 1; i < row_end - 1; i++) {
if ((*rows)[i - 1].ri_->has_leaders && (*rows)[i].ri_->has_leaders &&
@ -1994,8 +2002,8 @@ static void SeparateSimpleLeaderLines(GenericVector<RowScratchRegisters> *rows,
// Collect sequences of unique hypotheses in row registers and create proper
// paragraphs for them, referencing the paragraphs in row_owners.
static void ConvertHypothesizedModelRunsToParagraphs(int debug_level,
GenericVector<RowScratchRegisters> &rows,
GenericVector<PARA *> *row_owners,
std::vector<RowScratchRegisters> &rows,
std::vector<PARA *> *row_owners,
ParagraphTheory *theory) {
int end = rows.size();
int start;
@ -2090,7 +2098,7 @@ struct Interval {
// (1) If a line is surrounded by lines of unknown type, it's weak.
// (2) If two lines in a row are start lines for a given paragraph type, but
// after that the same paragraph type does not continue, they're weak.
static bool RowIsStranded(const GenericVector<RowScratchRegisters> &rows, int row) {
static bool RowIsStranded(const std::vector<RowScratchRegisters> &rows, int row) {
SetOfModels row_models;
rows[row].StrongHypotheses(&row_models);
@ -2145,8 +2153,8 @@ static bool RowIsStranded(const GenericVector<RowScratchRegisters> &rows, int ro
// + Crown paragraphs not immediately followed by a strongly modeled line.
// + Single line paragraphs surrounded by text that doesn't match the
// model.
static void LeftoverSegments(const GenericVector<RowScratchRegisters> &rows,
GenericVector<Interval> *to_fix, int row_start, int row_end) {
static void LeftoverSegments(const std::vector<RowScratchRegisters> &rows,
std::vector<Interval> *to_fix, int row_start, int row_end) {
to_fix->clear();
for (int i = row_start; i < row_end; i++) {
bool needs_fixing = false;
@ -2195,8 +2203,8 @@ static void LeftoverSegments(const GenericVector<RowScratchRegisters> &rows,
// Given a set of row_owners pointing to PARAs or nullptr (no paragraph known),
// normalize each row_owner to point to an actual PARA, and output the
// paragraphs in order onto paragraphs.
void CanonicalizeDetectionResults(GenericVector<PARA *> *row_owners, PARA_LIST *paragraphs) {
GenericVector<PARA *> &rows = *row_owners;
void CanonicalizeDetectionResults(std::vector<PARA *> *row_owners, PARA_LIST *paragraphs) {
std::vector<PARA *> &rows = *row_owners;
paragraphs->clear();
PARA_IT out(paragraphs);
PARA *formerly_null = nullptr;
@ -2226,16 +2234,16 @@ void CanonicalizeDetectionResults(GenericVector<PARA *> *row_owners, PARA_LIST *
// models - the list of paragraph models referenced by the PARA objects.
// caller is responsible for deleting the models.
void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners, PARA_LIST *paragraphs,
std::vector<PARA *> *row_owners, PARA_LIST *paragraphs,
std::vector<ParagraphModel *> *models) {
GenericVector<RowScratchRegisters> rows;
std::vector<RowScratchRegisters> rows;
ParagraphTheory theory(models);
// Initialize row_owners to be a bunch of nullptr pointers.
row_owners->init_to_size(row_infos->size(), nullptr);
row_owners->resize(row_infos->size());
// Set up row scratch registers for the main algorithm.
rows.init_to_size(row_infos->size(), RowScratchRegisters());
rows.resize(row_infos->size(), RowScratchRegisters());
for (int i = 0; i < row_infos->size(); i++) {
rows[i].Init((*row_infos)[i]);
}
@ -2249,7 +2257,7 @@ void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
DebugDump(debug_level > 1, "End of Pass 1", theory, rows);
GenericVector<Interval> leftovers;
std::vector<Interval> leftovers;
LeftoverSegments(rows, &leftovers, 0, rows.size());
for (int i = 0; i < leftovers.size(); i++) {
// Pass 2a:
@ -2263,7 +2271,7 @@ void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
// If we had any luck in pass 2a, we got part of the page and didn't
// know how to classify a few runs of rows. Take the segments that
// didn't find a model and reprocess them individually.
GenericVector<Interval> leftovers2;
std::vector<Interval> leftovers2;
LeftoverSegments(rows, &leftovers2, leftovers[i].begin, leftovers[i].end);
bool pass2a_was_useful =
leftovers2.size() > 1 ||
@ -2422,7 +2430,7 @@ static void InitializeRowInfo(bool after_recognition, const MutableIterator &it,
}
PAGE_RES_IT page_res_it = *it.PageResIt();
GenericVector<WERD_RES *> werds;
std::vector<WERD_RES *> werds;
WERD_RES *word_res = page_res_it.restart_row();
ROW_RES *this_row = page_res_it.row();
int num_leaders = 0;
@ -2505,12 +2513,12 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
}
// Run the paragraph detection algorithm.
GenericVector<PARA *> row_owners;
GenericVector<PARA *> the_paragraphs;
std::vector<PARA *> row_owners;
std::vector<PARA *> the_paragraphs;
if (!is_image_block) {
DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(), models);
} else {
row_owners.init_to_size(row_infos.size(), nullptr);
row_owners.resize(row_infos.size());
CanonicalizeDetectionResults(&row_owners, block->para_list());
}

View File

@ -31,9 +31,6 @@ class ParagraphModel;
class PARA_LIST;
struct PARA;
template <typename T>
class GenericVector;
// This structure captures all information needed about a text line for the
// purposes of paragraph detection. It is meant to be exceedingly light-weight
// so that we can easily test paragraph detection independent of the rest of
@ -90,7 +87,7 @@ public:
// caller is responsible for deleting the models.
TESS_API
void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners, PARA_LIST *paragraphs,
std::vector<PARA *> *row_owners, PARA_LIST *paragraphs,
std::vector<ParagraphModel *> *models);
// Given a MutableIterator to the start of a block, run DetectParagraphs on

View File

@ -95,7 +95,7 @@ struct LineHypothesis {
class ParagraphTheory; // Forward Declaration
using SetOfModels = GenericVector<const ParagraphModel *>;
using SetOfModels = std::vector<const ParagraphModel *>;
// Row Scratch Registers are data generated by the paragraph detection
// algorithm based on a RowInfo input.
@ -123,7 +123,7 @@ public:
// Clear all hypotheses about this line.
void SetUnknown() {
hypotheses_.truncate(0);
hypotheses_.clear();
}
// Append all hypotheses of strong models that match this row as a start.
@ -190,7 +190,7 @@ public:
private:
// Hypotheses of either LT_START or LT_BODY
GenericVector<LineHypothesis> hypotheses_;
std::vector<LineHypothesis> hypotheses_;
};
// A collection of convenience functions for wrapping the set of
@ -219,21 +219,21 @@ public:
// If any of the non-centered paragraph models we know about fit
// rows[start, end), return it. Else nullptr.
const ParagraphModel *Fits(const GenericVector<RowScratchRegisters> *rows, int start,
const ParagraphModel *Fits(const std::vector<RowScratchRegisters> *rows, int start,
int end) const;
int IndexOf(const ParagraphModel *model) const;
private:
std::vector<ParagraphModel *> *models_;
GenericVector<ParagraphModel *> models_we_added_;
std::vector<ParagraphModel *> models_we_added_;
};
bool ValidFirstLine(const GenericVector<RowScratchRegisters> *rows, int row,
bool ValidFirstLine(const std::vector<RowScratchRegisters> *rows, int row,
const ParagraphModel *model);
bool ValidBodyLine(const GenericVector<RowScratchRegisters> *rows, int row,
bool ValidBodyLine(const std::vector<RowScratchRegisters> *rows, int row,
const ParagraphModel *model);
bool CrownCompatible(const GenericVector<RowScratchRegisters> *rows, int a, int b,
bool CrownCompatible(const std::vector<RowScratchRegisters> *rows, int a, int b,
const ParagraphModel *model);
// A class for smearing Paragraph Model hypotheses to surrounding rows.
@ -245,7 +245,7 @@ bool CrownCompatible(const GenericVector<RowScratchRegisters> *rows, int a, int
// "smear" our models over the text.
class ParagraphModelSmearer {
public:
ParagraphModelSmearer(GenericVector<RowScratchRegisters> *rows, int row_start, int row_end,
ParagraphModelSmearer(std::vector<RowScratchRegisters> *rows, int row_start, int row_end,
ParagraphTheory *theory);
// Smear forward paragraph models from existing row markings to subsequent
@ -266,7 +266,7 @@ private:
}
ParagraphTheory *theory_;
GenericVector<RowScratchRegisters> *rows_;
std::vector<RowScratchRegisters> *rows_;
int row_start_;
int row_end_;
@ -284,11 +284,11 @@ private:
// Clear all hypotheses about lines [start, end) and reset the margins to the
// percentile (0..100) value of the left and right row edges for this run of
// rows.
void RecomputeMarginsAndClearHypotheses(GenericVector<RowScratchRegisters> *rows, int start,
void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows, int start,
int end, int percentile);
// Return the median inter-word space in rows[row_start, row_end).
int InterwordSpace(const GenericVector<RowScratchRegisters> &rows, int row_start, int row_end);
int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start, int row_end);
// Return whether the first word on the after line can fit in the space at
// the end of the before line (knowing which way the text is aligned and read).
@ -300,13 +300,13 @@ bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRe
bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRegisters &after);
// Do rows[start, end) form a single instance of the given paragraph model?
bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows, int start, int end,
bool RowsFitModel(const std::vector<RowScratchRegisters> *rows, int start, int end,
const ParagraphModel *model);
// Given a set of row_owners pointing to PARAs or nullptr (no paragraph known),
// normalize each row_owner to point to an actual PARA, and output the
// paragraphs in order onto paragraphs.
void CanonicalizeDetectionResults(GenericVector<PARA *> *row_owners, PARA_LIST *paragraphs);
void CanonicalizeDetectionResults(std::vector<PARA *> *row_owners, PARA_LIST *paragraphs);
} // namespace tesseract

View File

@ -45,7 +45,7 @@
#include <tesseract/publictypes.h> // for OcrEngineMode, PageSegMode, OEM_L...
#include <tesseract/unichar.h> // for UNICHAR_ID
#include "genericvector.h" // for GenericVector, PointerVector
#include "genericvector.h" // for PointerVector
#include <allheaders.h> // for pixDestroy, pixGetWidth, pixGetHe...
@ -398,27 +398,27 @@ public:
// Input: a set of noisy outlines that probably belong to the real_word.
// Output: outlines that overlapped blobs are set to nullptr and put back into
// the word, either in the blobs or in the reject list.
void AssignDiacriticsToOverlappingBlobs(const GenericVector<C_OUTLINE *> &outlines, int pass,
void AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *> &outlines, int pass,
WERD *real_word, PAGE_RES_IT *pr_it,
GenericVector<bool> *word_wanted,
GenericVector<bool> *overlapped_any_blob,
GenericVector<C_BLOB *> *target_blobs);
std::vector<bool> *word_wanted,
std::vector<bool> *overlapped_any_blob,
std::vector<C_BLOB *> *target_blobs);
// Attempts to assign non-overlapping outlines to their nearest blobs or
// make new blobs out of them.
void AssignDiacriticsToNewBlobs(const GenericVector<C_OUTLINE *> &outlines, int pass,
void AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outlines, int pass,
WERD *real_word, PAGE_RES_IT *pr_it,
GenericVector<bool> *word_wanted,
GenericVector<C_BLOB *> *target_blobs);
std::vector<bool> *word_wanted,
std::vector<C_BLOB *> *target_blobs);
// Starting with ok_outlines set to indicate which outlines overlap the blob,
// chooses the optimal set (approximately) and returns true if any outlines
// are desired, in which case ok_outlines indicates which ones.
bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold, PAGE_RES_IT *pr_it,
C_BLOB *blob, const GenericVector<C_OUTLINE *> &outlines,
C_BLOB *blob, const std::vector<C_OUTLINE *> &outlines,
int num_outlines, std::vector<bool> *ok_outlines);
// Classifies the given blob plus the outlines flagged by ok_outlines, undoes
// the inclusion of the outlines, and returns the certainty of the raw choice.
float ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
const GenericVector<C_OUTLINE *> &outlines, int pass_n,
const std::vector<C_OUTLINE *> &outlines, int pass_n,
PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str);
// Classifies the given blob (part of word_data->word->word) as an individual
// word, using languages, chopper etc, returning only the certainty of the
@ -703,22 +703,22 @@ public:
void ReSegmentByClassification(PAGE_RES *page_res);
// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
// Returns false if an invalid UNICHAR_ID is encountered.
bool ConvertStringToUnichars(const char *utf8, GenericVector<UNICHAR_ID> *class_ids);
bool ConvertStringToUnichars(const char *utf8, std::vector<UNICHAR_ID> *class_ids);
// Resegments the word to achieve the target_text from the classifier.
// Returns false if the re-segmentation fails.
// Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and
// applies a full search on the classifier results to find the best classified
// segmentation. As a compromise to obtain better recall, 1-1 ambigiguity
// substitutions ARE used.
bool FindSegmentation(const GenericVector<UNICHAR_ID> &target_text, WERD_RES *word_res);
bool FindSegmentation(const std::vector<UNICHAR_ID> &target_text, WERD_RES *word_res);
// Recursive helper to find a match to the target_text (from text_index
// position) in the choices (from choices_pos position).
// Choices is an array of GenericVectors, of length choices_length, with each
// Choices is an array of vectors of length choices_length, with each
// element representing a starting position in the word, and the
// GenericVector holding classification results for a sequence of consecutive
// vector holding classification results for a sequence of consecutive
// blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
void SearchForText(const GenericVector<BLOB_CHOICE_LIST *> *choices, int choices_pos,
int choices_length, const GenericVector<UNICHAR_ID> &target_text,
void SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, int choices_pos,
int choices_length, const std::vector<UNICHAR_ID> &target_text,
int text_index, float rating, std::vector<int> *segmentation,
float *best_rating, std::vector<int> *best_segmentation);
// Counts up the labelled words and the blobs within.

View File

@ -502,7 +502,7 @@ void WERD::CleanNoise(float size_threshold) {
// Extracts all the noise outlines and stuffs the pointers into the given
// vector of outlines. Afterwards, the outlines vector owns the pointers.
void WERD::GetNoiseOutlines(GenericVector<C_OUTLINE *> *outlines) {
void WERD::GetNoiseOutlines(std::vector<C_OUTLINE *> *outlines) {
C_BLOB_IT rej_it(&rej_cblobs);
for (rej_it.mark_cycle_pt(); !rej_it.empty(); rej_it.forward()) {
C_BLOB *blob = rej_it.extract();
@ -516,13 +516,13 @@ void WERD::GetNoiseOutlines(GenericVector<C_OUTLINE *> *outlines) {
// back in rej_cblobs where they came from. Where the target_blobs entry is
// nullptr, a run of wanted outlines is put into a single new blob.
// Ownership of the outlines is transferred back to the word. (Hence
// GenericVector and not PointerVector.)
// vector and not PointerVector.)
// Returns true if any new blob was added to the start of the word, which
// suggests that it might need joining to the word before it, and likewise
// sets make_next_word_fuzzy true if any new blob was added to the end.
bool WERD::AddSelectedOutlines(const GenericVector<bool> &wanted,
const GenericVector<C_BLOB *> &target_blobs,
const GenericVector<C_OUTLINE *> &outlines,
bool WERD::AddSelectedOutlines(const std::vector<bool> &wanted,
const std::vector<C_BLOB *> &target_blobs,
const std::vector<C_OUTLINE *> &outlines,
bool *make_next_word_fuzzy) {
bool outline_added_to_start = false;
if (make_next_word_fuzzy != nullptr)

View File

@ -21,7 +21,6 @@
#include "bits16.h"
#include "elst2.h"
#include "genericvector.h" // GenericVector
#include "params.h"
#include "stepblob.h"
@ -173,18 +172,18 @@ public:
// Extracts all the noise outlines and stuffs the pointers into the given
// vector of outlines. Afterwards, the outlines vector owns the pointers.
void GetNoiseOutlines(GenericVector<C_OUTLINE *> *outlines);
void GetNoiseOutlines(std::vector<C_OUTLINE *> *outlines);
// Adds the selected outlines to the indcated real blobs, and puts the rest
// back in rej_cblobs where they came from. Where the target_blobs entry is
// nullptr, a run of wanted outlines is put into a single new blob.
// Ownership of the outlines is transferred back to the word. (Hence
// GenericVector and not PointerVector.)
// vector and not PointerVector.)
// Returns true if any new blob was added to the start of the word, which
// suggests that it might need joining to the word before it, and likewise
// sets make_next_word_fuzzy true if any new blob was added to the end.
bool AddSelectedOutlines(const GenericVector<bool> &wanted,
const GenericVector<C_BLOB *> &target_blobs,
const GenericVector<C_OUTLINE *> &outlines, bool *make_next_word_fuzzy);
bool AddSelectedOutlines(const std::vector<bool> &wanted,
const std::vector<C_BLOB *> &target_blobs,
const std::vector<C_OUTLINE *> &outlines, bool *make_next_word_fuzzy);
private:
uint8_t blanks = 0; // no of blanks

View File

@ -225,16 +225,6 @@ public:
qsort(data_, size_used_, sizeof(*data_), comparator);
}
// Searches the array (assuming sorted in ascending order, using sort()) for
// an element equal to target and returns true if it is present.
// Use binary_search to get the index of target, or its nearest candidate.
bool bool_binary_search(const T &target) const {
int index = binary_search(target);
if (index >= size_used_) {
return false;
}
return data_[index] == target;
}
// Searches the array (assuming sorted in ascending order, using sort()) for
// an element equal to target and returns the index of the best candidate.
// The return value is conceptually the largest index i such that

View File

@ -92,15 +92,15 @@ public:
return ComputeForegroundDensity(tbox);
}
int RunCountAlignment(const GenericVector<int> &sorted_vec, const int val) {
int RunCountAlignment(const std::vector<int> &sorted_vec, const int val) {
return CountAlignment(sorted_vec, val);
}
void RunSplitCPHorLite(ColPartition *part, GenericVector<TBOX> *splitted_boxes) {
void RunSplitCPHorLite(ColPartition *part, std::vector<TBOX> *splitted_boxes) {
SplitCPHorLite(part, splitted_boxes);
}
void RunSplitCPHor(ColPartition *part, GenericVector<ColPartition *> *parts_splitted) {
void RunSplitCPHor(ColPartition *part, std::vector<ColPartition *> *parts_splitted) {
SplitCPHor(part, parts_splitted);
}
@ -377,7 +377,7 @@ TEST_F(EquationFinderTest, ComputeForegroundDensity) {
}
TEST_F(EquationFinderTest, CountAlignment) {
GenericVector<int> vec;
std::vector<int> vec;
vec.push_back(1);
vec.push_back(1);
vec.push_back(1);
@ -452,7 +452,7 @@ TEST_F(EquationFinderTest, SplitCPHorLite) {
ColPartition *part = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
part->DeleteBoxes();
part->set_median_width(10);
GenericVector<TBOX> splitted_boxes;
std::vector<TBOX> splitted_boxes;
// Test an empty part.
equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
@ -486,7 +486,7 @@ TEST_F(EquationFinderTest, SplitCPHor) {
ColPartition *part = ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
part->DeleteBoxes();
part->set_median_width(10);
GenericVector<ColPartition *> parts_splitted;
std::vector<ColPartition *> parts_splitted;
// Test an empty part.
equation_det_->RunSplitCPHor(part, &parts_splitted);
@ -512,7 +512,9 @@ TEST_F(EquationFinderTest, SplitCPHor) {
EXPECT_TRUE(TBOX(100, 0, 140, 45) == parts_splitted[1]->bounding_box());
EXPECT_TRUE(TBOX(500, 0, 540, 35) == parts_splitted[2]->bounding_box());
parts_splitted.delete_data_pointers();
for (auto part_splitted : parts_splitted) {
delete part_splitted;
}
part->DeleteBoxes();
delete (part);
}

View File

@ -107,7 +107,7 @@ void MakeAsciiRowInfos(const TextAndModel *row_infos, int n, std::vector<RowInfo
// Given n rows of reference ground truth, evaluate whether the n rows
// of PARA * pointers yield the same paragraph breakpoints.
void EvaluateParagraphDetection(const TextAndModel *correct, int n,
const GenericVector<PARA *> &detector_output) {
const std::vector<PARA *> &detector_output) {
int incorrect_breaks = 0;
int missed_breaks = 0;
int poorly_matched_models = 0;
@ -186,7 +186,7 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
void TestParagraphDetection(const TextAndModel *correct, int num_rows) {
std::vector<RowInfo> row_infos;
GenericVector<PARA *> row_owners;
std::vector<PARA *> row_owners;
PARA_LIST paragraphs;
std::vector<ParagraphModel *> models;
@ -312,7 +312,7 @@ TEST(ParagraphsTest, TestSingleFullPageContinuation) {
const TextAndModel *correct = kSingleFullPageContinuation;
int num_rows = countof(kSingleFullPageContinuation);
std::vector<RowInfo> row_infos;
GenericVector<PARA *> row_owners;
std::vector<PARA *> row_owners;
PARA_LIST paragraphs;
std::vector<ParagraphModel *> models;
models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10));