From d1eed6a82157b333979a1961c022c29e78a7a0eb Mon Sep 17 00:00:00 2001 From: Karthick J Date: Fri, 5 Jul 2024 19:27:07 +0530 Subject: [PATCH] Fix the crash issue in LSTM multithreading Changed the WERD_RES linked link to use shared pointers instead of raw pointers. This is needed so that even if one thread deletes a WERD_RES object, other thread's which needs to iterate thru them can still access it safely. In terms of LSTM processing, only one threads processes one WERD_RES. This change is needed as all the threads can iterate thru due to single linked list data structure. --- src/ccmain/control.cpp | 33 +++--- src/ccmain/fixspace.cpp | 40 +++---- src/ccmain/tesseractclass.h | 2 +- src/ccstruct/pageres.cpp | 41 +++++-- src/ccstruct/pageres.h | 34 ++++-- src/ccutil/elst.cpp | 80 ++++++++----- src/ccutil/elst.h | 222 +++++++++++++++++++++++------------- src/ccutil/list.h | 50 ++++++++ 8 files changed, 338 insertions(+), 164 deletions(-) diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index 05da662b..a35d539e 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -203,8 +203,8 @@ bool Tesseract::RecogWordsSegment(std::vector::iterator start, LSTMRecognizer *lstm_recognizer, std::atomic& words_done, int total_words, - std::mutex& monitor_mutex) { - PAGE_RES_IT pr_it(page_res); + std::shared_ptr recog_words_mutex) { + PAGE_RES_IT pr_it(page_res, recog_words_mutex); // Process a segment of the words vector pr_it.restart_page(); @@ -214,7 +214,7 @@ bool Tesseract::RecogWordsSegment(std::vector::iterator start, word->prev_word = &(*(it - 1)); } if (monitor != nullptr) { - std::lock_guard lock(monitor_mutex); + std::lock_guard lock(*recog_words_mutex); monitor->ocr_alive = true; if (pass_n == 1) { monitor->progress = 70 * words_done / total_words; @@ -245,9 +245,7 @@ bool Tesseract::RecogWordsSegment(std::vector::iterator start, } } // Sync pr_it with the WordData. - while (pr_it.word() != nullptr && pr_it.word() != word->word) { - pr_it.forward(); - } + pr_it.forward_to_word(word->word); ASSERT_HOST(pr_it.word() != nullptr); bool make_next_word_fuzzy = false; #ifndef DISABLED_LEGACY_ENGINE @@ -274,19 +272,24 @@ bool Tesseract::RecogWordsSegment(std::vector::iterator start, bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES *page_res, std::vector *words) { int total_words = words->size(); - int segment_size = total_words / lstm_num_threads; + int segment_size = std::max(total_words / lstm_num_threads, 1); std::atomic words_done(0); - std::mutex monitor_mutex; + std::shared_ptr recog_words_mutex = std::make_shared(); std::vector> futures; // Launch multiple threads to recognize the words in parallel auto segment_start = words->begin() + segment_size; - for (int i = 1; i < lstm_num_threads; ++i) { - auto segment_end = (i == lstm_num_threads - 1) ? words->end() : segment_start + segment_size; - futures.push_back(std::async(std::launch::async, &Tesseract::RecogWordsSegment, - this, segment_start, segment_end, pass_n, monitor, page_res, - lstm_recognizers_[i], std::ref(words_done), total_words, std::ref(monitor_mutex))); - segment_start = segment_end; + for (int i = 1; i < lstm_num_threads && segment_start != words->end(); ++i) { + auto segment_end = segment_start + segment_size; + if (i == lstm_num_threads - 1 || + std::distance(segment_start, words->end()) < segment_size) { + segment_end = words->end(); + } + futures.push_back(std::async( + std::launch::async, &Tesseract::RecogWordsSegment, this, segment_start, + segment_end, pass_n, monitor, page_res, lstm_recognizers_[i], + std::ref(words_done), total_words, recog_words_mutex)); + segment_start = segment_end; } // Process the first segment in this thread @@ -298,7 +301,7 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES *pa lstm_recognizers_[0], std::ref(words_done), total_words, - std::ref(monitor_mutex)); + recog_words_mutex); // Wait for all threads to complete and aggregate results for (auto &f : futures) { diff --git a/src/ccmain/fixspace.cpp b/src/ccmain/fixspace.cpp index 7f03ff03..96931c0c 100644 --- a/src/ccmain/fixspace.cpp +++ b/src/ccmain/fixspace.cpp @@ -79,7 +79,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R ROW_RES_IT row_res_it; WERD_RES_IT word_res_it_from; WERD_RES_IT word_res_it_to; - WERD_RES *word_res; + std::shared_ptr word_res; WERD_RES_LIST fuzzy_space_words; int16_t new_length; bool prevent_null_wd_fixsp; // DON'T process blobless wds @@ -114,7 +114,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R if (!word_res_it_from.at_last()) { word_res_it_to = word_res_it_from; prevent_null_wd_fixsp = word_res->word->cblob_list()->empty(); - if (check_debug_pt(word_res, 60)) { + if (check_debug_pt(word_res.get(), 60)) { debug_fix_space_level.set_value(10); } word_res_it_to.forward(); @@ -131,7 +131,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R while (!word_res_it_to.at_last() && (word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) || word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) { - if (check_debug_pt(word_res, 60)) { + if (check_debug_pt(word_res.get(), 60)) { debug_fix_space_level.set_value(10); } if (word_res->word->cblob_list()->empty()) { @@ -139,7 +139,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R } word_res = word_res_it_to.forward(); } - if (check_debug_pt(word_res, 60)) { + if (check_debug_pt(word_res.get(), 60)) { debug_fix_space_level.set_value(10); } if (word_res->word->cblob_list()->empty()) { @@ -203,12 +203,12 @@ void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) { WERD_RES *new_wd; for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { - WERD_RES *src_wd = src_it.data(); + WERD_RES *src_wd = src_it.data().get(); if (!src_wd->combination) { new_wd = WERD_RES::deep_copy(src_wd); new_wd->combination = false; new_wd->part_of_combo = false; - new_it.add_after_then_move(new_wd); + new_it.add_after_then_move(std::shared_ptr(new_wd)); } } } @@ -220,7 +220,7 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK *block // prev_word_best_choice_ before calling classify_word_pass2(). prev_word_best_choice_ = nullptr; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); + word = word_it.data().get(); if ((!word->part_of_combo) && (word->box_word == nullptr)) { WordData word_data(block, row, word); SetupWordPassN(2, &word_data); @@ -269,7 +269,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { const char *punct_chars = "!\"`',.:;"; do { // current word - WERD_RES *word = word_res_it.data(); + WERD_RES *word = word_res_it.data().get(); bool word_done = fixspace_thinks_word_done(word); word_count++; if (word->tess_failed) { @@ -396,7 +396,7 @@ void transform_to_next_perm(WERD_RES_LIST &words) { int16_t min_gap = INT16_MAX; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); + word = word_it.data().get(); if (!word->part_of_combo) { box = word->word->bounding_box(); if (prev_right > -INT16_MAX) { @@ -413,13 +413,13 @@ void transform_to_next_perm(WERD_RES_LIST &words) { word_it.set_to_list(&words); // Note: we can't use cycle_pt due to inserted combos at start of list. for (; (prev_right == -INT16_MAX) || !word_it.at_first(); word_it.forward()) { - word = word_it.data(); + word = word_it.data().get(); if (!word->part_of_combo) { box = word->word->bounding_box(); if (prev_right > -INT16_MAX) { gap = box.left() - prev_right; if (gap <= min_gap) { - prev_word = prev_word_it.data(); + prev_word = prev_word_it.data().get(); WERD_RES *combo; if (prev_word->combination) { combo = prev_word; @@ -433,14 +433,14 @@ void transform_to_next_perm(WERD_RES_LIST &words) { combo->combination = true; combo->x_height = prev_word->x_height; prev_word->part_of_combo = true; - prev_word_it.add_before_then_move(combo); + prev_word_it.add_before_then_move(std::shared_ptr(combo)); } combo->word->set_flag(W_EOL, word->word->flag(W_EOL)); if (word->combination) { combo->word->join_on(word->word); // Move blobs to combo // old combo no longer needed - delete word_it.extract(); + word_it.extract(); } else { // Copy current wd to combo combo->copy_on(word); @@ -545,7 +545,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block) int16_t new_length; float junk; - word_res = word_res_it.data(); + word_res = word_res_it.data().get(); if (word_res->word->flag(W_REP_CHAR) || word_res->combination || word_res->part_of_combo || !word_res->word->flag(W_DONT_CHOP)) { return; @@ -582,11 +582,11 @@ void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK * dump_words(best_perm, best_score, 1, improved); - old_word_res = best_perm_it.data(); + old_word_res = best_perm_it.data().get(); // Even deep_copy doesn't copy the underlying WERD unless its combination // flag is true!. old_word_res->combination = true; // Kludge to force deep copy - current_perm_it.add_to_end(WERD_RES::deep_copy(old_word_res)); + current_perm_it.add_to_end(std::shared_ptr(WERD_RES::deep_copy(old_word_res))); old_word_res->combination = false; // Undo kludge break_noisiest_blob_word(current_perm); @@ -630,7 +630,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { int16_t i; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - auto blob_index = worst_noise_blob(word_it.data(), &noise_score); + auto blob_index = worst_noise_blob(word_it.data().get(), &noise_score); if (blob_index > -1 && worst_noise_score > noise_score) { worst_noise_score = noise_score; worst_blob_index = blob_index; @@ -644,7 +644,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { /* Now split the worst_word_it */ - word_res = worst_word_it.data(); + word_res = worst_word_it.data().get(); /* Move blobs before noise blob to a new bloblist */ @@ -671,7 +671,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) { auto *new_word_res = new WERD_RES(new_word); new_word_res->combination = true; - worst_word_it.add_before_then_move(new_word_res); + worst_word_it.add_before_then_move(std::shared_ptr(new_word_res)); word_res->ClearResults(); } @@ -834,7 +834,7 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) { float small_limit = kBlnXHeight * fixsp_small_outlines_size; for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { - word = word_it.data(); + word = word_it.data().get(); if (word->rebuild_word == nullptr) { continue; // Can't handle cube words. } diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 9388bd05..93e5486b 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -387,7 +387,7 @@ public: bool RecogWordsSegment(std::vector::iterator start, std::vector::iterator end, int pass_n, ETEXT_DESC *monitor, PAGE_RES *page_res, LSTMRecognizer *lstm_recognizer, std::atomic& words_done, - int total_words, std::mutex& monitor_mutex); + int total_words, std::shared_ptr recog_words_mutex); // Runs word recognition on all the words. bool RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES *page_res, std::vector *words); diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp index 65ea748f..865364c7 100644 --- a/src/ccstruct/pageres.cpp +++ b/src/ccstruct/pageres.cpp @@ -173,18 +173,18 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) { combo = new WERD_RES(copy_word); combo->x_height = the_row->x_height(); combo->combination = true; - word_res_it.add_to_end(combo); + word_res_it.add_to_end(std::shared_ptr(combo)); } word_res->part_of_combo = true; } else { combo = nullptr; } - word_res_it.add_to_end(word_res); + word_res_it.add_to_end(std::shared_ptr(word_res)); } } WERD_RES &WERD_RES::operator=(const WERD_RES &source) { - this->ELIST_LINK::operator=(source); + this->ELIST_LINK_SP::operator=(source); Clear(); if (source.combination) { word = new WERD; @@ -1251,6 +1251,7 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const { // with best_choice etc. WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word) { + std::lock_guard guard(*mutex_res); // Make a WERD_RES for the new_word. auto *new_res = new WERD_RES(new_word); new_res->CopySimpleFields(clone_res); @@ -1258,13 +1259,13 @@ WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, // Insert into the appropriate place in the ROW_RES. WERD_RES_IT wr_it(&row()->word_res_list); for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { - WERD_RES *word = wr_it.data(); - if (word == word_res) { + WERD_RES *word = wr_it.data().get(); + if (word == word_res.get()) { break; } } ASSERT_HOST(!wr_it.cycled_list()); - wr_it.add_before_then_move(new_res); + wr_it.add_before_then_move(std::shared_ptr(new_res)); if (wr_it.at_first()) { // This is the new first word, so reset the member iterator so it // detects the cycled_list state correctly. @@ -1381,6 +1382,7 @@ void PAGE_RES_IT::ReplaceCurrentWord( DeleteCurrentWord(); return; } + std::lock_guard guard(*mutex_res); WERD_RES *input_word = word(); // Set the BOL/EOL flags on the words from the input word. if (input_word->word->flag(W_BOL)) { @@ -1410,7 +1412,7 @@ void PAGE_RES_IT::ReplaceCurrentWord( // Insert into the appropriate place in the ROW_RES. WERD_RES_IT wr_it(&row()->word_res_list); for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) { - WERD_RES *word = wr_it.data(); + WERD_RES *word = wr_it.data().get(); if (word == input_word) { break; } @@ -1470,7 +1472,7 @@ void PAGE_RES_IT::ReplaceCurrentWord( word_w->combination = false; } (*words)[w] = nullptr; // We are taking ownership. - wr_it.add_before_stay_put(word_w); + wr_it.add_before_stay_put(std::shared_ptr(word_w)); } // We have taken ownership of the words. words->clear(); @@ -1480,12 +1482,13 @@ void PAGE_RES_IT::ReplaceCurrentWord( if (!input_word->combination) { delete w_it.extract(); } - delete wr_it.extract(); + wr_it.extract_mt(); ResetWordIterator(); } // Deletes the current WERD_RES and its underlying WERD. void PAGE_RES_IT::DeleteCurrentWord() { + std::lock_guard guard(*mutex_res); // Check that this word is as we expect. part_of_combos are NEVER iterated // by the normal iterator, so we should never be trying to delete them. ASSERT_HOST(!word_res->part_of_combo); @@ -1512,13 +1515,14 @@ void PAGE_RES_IT::DeleteCurrentWord() { } } ASSERT_HOST(!wr_it.cycled_list()); - delete wr_it.extract(); + wr_it.extract_mt(); ResetWordIterator(); } // Makes the current word a fuzzy space if not already fuzzy. Updates // corresponding part of combo if required. void PAGE_RES_IT::MakeCurrentWordFuzzy() { + std::lock_guard guard(*mutex_res); WERD *real_word = word_res->word; if (!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON)) { real_word->set_flag(W_FUZZY_SP, true); @@ -1583,7 +1587,10 @@ void PAGE_RES_IT::ResetWordIterator() { word_res = word_res_it.data(); } } - ASSERT_HOST(!word_res_it.cycled_list()); + if (word_res_it.cycled_list()) { + // We didn't find next_word_res. Maybe it has been deleted by some other thread. + return; + } wr_it_of_next_word = word_res_it; word_res_it.forward(); } else { @@ -1672,7 +1679,7 @@ foundword: ? nullptr : prev_word_res->best_choice; } - return word_res; + return word_res.get(); } /************************************************************************* @@ -1698,6 +1705,7 @@ WERD_RES *PAGE_RES_IT::restart_row() { *************************************************************************/ WERD_RES *PAGE_RES_IT::forward_paragraph() { + std::lock_guard guard(*mutex_res); while (block_res == next_block_res && (next_row_res != nullptr && next_row_res->row != nullptr && row_res->row->para() == next_row_res->row->para())) { @@ -1713,12 +1721,21 @@ WERD_RES *PAGE_RES_IT::forward_paragraph() { *************************************************************************/ WERD_RES *PAGE_RES_IT::forward_block() { + std::lock_guard guard(*mutex_res); while (block_res == next_block_res) { internal_forward(false, true); } return internal_forward(false, true); } +bool PAGE_RES_IT::forward_to_word(const WERD_RES* word_res) { + std::lock_guard guard(*mutex_res); + while (word() != nullptr && word() != word_res) { + internal_forward(false, false); + } + return word() != nullptr; +} + void PAGE_RES_IT::rej_stat_word() { int16_t chars_in_word; int16_t rejects_in_word = 0; diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 48e70b73..ede51671 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -37,6 +37,7 @@ #include // for std::function #include // for std::pair #include // for std::vector +#include // for std::shared_ptr #include // for int8_t @@ -69,7 +70,7 @@ class ROW_RES; ELISTIZEH(ROW_RES) class WERD_RES; -ELISTIZEH(WERD_RES) +ELISTIZEH_SP(WERD_RES) /************************************************************************* * PAGE_RES - Page results @@ -161,7 +162,7 @@ enum CRUNCH_MODE { CR_NONE, CR_KEEP_SPACE, CR_LOOSE_SPACE, CR_DELETE }; // WERD_RES is a collection of publicly accessible members that gathers // information about a word result. -class TESS_API WERD_RES : public ELIST_LINK { +class TESS_API WERD_RES : public ELIST_LINK_SP { public: // Which word is which? // There are 3 coordinate spaces in use here: a possibly rotated pixel space, @@ -345,7 +346,7 @@ public: } // Deep copies everything except the ratings MATRIX. // To get that use deep_copy below. - WERD_RES(const WERD_RES &source) : ELIST_LINK(source) { + WERD_RES(const WERD_RES &source) : ELIST_LINK_SP(source) { // combination is used in function Clear which is called from operator=. combination = false; *this = source; // see operator= @@ -685,8 +686,11 @@ public: PAGE_RES_IT() = default; - PAGE_RES_IT(PAGE_RES *the_page_res) { // page result + // If multiple instances of PAGE_RES_IT needs to be used concurrently, + // then they must be created using the same mutex_res. + PAGE_RES_IT(PAGE_RES *the_page_res, std::shared_ptr the_mutex_res = std::make_shared()) { // page result page_res = the_page_res; + mutex_res = the_mutex_res; restart_page(); // ready to scan } @@ -720,8 +724,8 @@ public: // ============ Methods that mutate the underling structures =========== // Note that these methods will potentially invalidate other PAGE_RES_ITs // and are intended to be used only while a single PAGE_RES_IT is active. - // This problem needs to be taken into account if these mutation operators - // are ever provided to PageIterator or its subclasses. + // To use these methods safely, from multiple PAGE_RES_ITs concurrently, it + // is needed to construct all these iterators with the same mutex_res. // Inserts the new_word and a corresponding WERD_RES before the current // position. The simple fields of the WERD_RES are copied from clone_res and @@ -741,18 +745,22 @@ public: void MakeCurrentWordFuzzy(); WERD_RES *forward() { // Get next word. + std::lock_guard guard(*mutex_res); return internal_forward(false, false); } // Move forward, but allow empty blocks to show as single nullptr words. WERD_RES *forward_with_empties() { + std::lock_guard guard(*mutex_res); return internal_forward(false, true); } WERD_RES *forward_paragraph(); // get first word in next non-empty paragraph WERD_RES *forward_block(); // get first word in next non-empty block + bool forward_to_word(const WERD_RES *word_res); // go to this word + WERD_RES *prev_word() const { // previous word - return prev_word_res; + return prev_word_res.get(); } ROW_RES *prev_row() const { // row of prev word return prev_row_res; @@ -761,7 +769,7 @@ public: return prev_block_res; } WERD_RES *word() const { // current word - return word_res; + return word_res.get(); } ROW_RES *row() const { // row of current word return row_res; @@ -770,7 +778,7 @@ public: return block_res; } WERD_RES *next_word() const { // next word - return next_word_res; + return next_word_res.get(); } ROW_RES *next_row() const { // row of next word return next_row_res; @@ -784,15 +792,15 @@ public: private: WERD_RES *internal_forward(bool new_block, bool empty_ok); - WERD_RES *prev_word_res; // previous word + std::shared_ptr prev_word_res; // previous word ROW_RES *prev_row_res; // row of prev word BLOCK_RES *prev_block_res; // block of prev word - WERD_RES *word_res; // current word + std::shared_ptr word_res; // current word ROW_RES *row_res; // row of current word BLOCK_RES *block_res; // block of cur. word - WERD_RES *next_word_res; // next word + std::shared_ptr next_word_res; // next word ROW_RES *next_row_res; // row of next word BLOCK_RES *next_block_res; // block of next word @@ -803,6 +811,8 @@ private: // Since word_res_it is 2 words further on, this is otherwise hard to do. WERD_RES_IT wr_it_of_current_word; WERD_RES_IT wr_it_of_next_word; + + std::shared_ptr mutex_res; }; } // namespace tesseract diff --git a/src/ccutil/elst.cpp b/src/ccutil/elst.cpp index 2cac5fd1..046fbd66 100644 --- a/src/ccutil/elst.cpp +++ b/src/ccutil/elst.cpp @@ -33,11 +33,12 @@ namespace tesseract { * the consequential memory overhead. **********************************************************************/ -void ELIST::internal_clear( // destroy all links +template +inline void ELIST_T::internal_clear( // destroy all links void (*zapper)(void *)) { // ptr to zapper functn - ELIST_LINK *ptr; - ELIST_LINK *next; + T ptr; + T next; if (!empty()) { ptr = last->next; // set to first @@ -45,7 +46,10 @@ void ELIST::internal_clear( // destroy all links last = nullptr; // set list empty while (ptr) { next = ptr->next; - zapper(ptr); + if constexpr (!is_shared_ptr::value) { + // We need not delete the shared pointer objects as it will be deleted automatically + zapper(ptr); + } ptr = next; } } @@ -64,9 +68,10 @@ void ELIST::internal_clear( // destroy all links * end point is always the end_it position. **********************************************************************/ -void ELIST::assign_to_sublist( // to this list - ELIST_ITERATOR *start_it, // from list start - ELIST_ITERATOR *end_it) { // from list end +template +void ELIST_T::assign_to_sublist( // to this list + ELIST_ITERATOR_T *start_it, // from list start + ELIST_ITERATOR_T *end_it) { // from list end constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist"); if (!empty()) { @@ -84,7 +89,8 @@ void ELIST::assign_to_sublist( // to this list * ( int (*)(const void *, const void *) **********************************************************************/ -void ELIST::sort( // sort elements +template +void ELIST_T::sort( // sort elements int comparator( // comparison routine const void *, const void *)) { // Allocate an array of pointers, one per list element. @@ -92,10 +98,10 @@ void ELIST::sort( // sort elements if (count > 0) { // ptr array to sort - std::vector base; + std::vector base; base.reserve(count); - ELIST_ITERATOR it(this); + ELIST_ITERATOR_T it(this); // Extract all elements, putting the pointers in the array. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { @@ -122,8 +128,9 @@ void ELIST::sort( // sort elements // list) - new_link is not added to the list and the function returns the // pointer to the identical entry that already exists in the list // (otherwise the function returns new_link). -ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void *), bool unique, - ELIST_LINK *new_link) { +template +T ELIST_T::add_sorted_and_find(int comparator(const void *, const void *), bool unique, + T new_link) { // Check for adding at the end. if (last == nullptr || comparator(&last, &new_link) < 0) { if (last == nullptr) { @@ -135,9 +142,9 @@ ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void * last = new_link; } else { // Need to use an iterator. - ELIST_ITERATOR it(this); + ELIST_ITERATOR_T it(this); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ELIST_LINK *link = it.data(); + T link = it.data(); int compare = comparator(&link, &new_link); if (compare > 0) { break; @@ -165,8 +172,8 @@ ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void * * Move the iterator to the next element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::forward() { +template +inline T ELIST_ITERATOR_T::forward() { #ifndef NDEBUG if (!list) NO_LIST.error("ELIST_ITERATOR::forward", ABORT); @@ -195,10 +202,17 @@ ELIST_LINK *ELIST_ITERATOR::forward() { #ifndef NDEBUG if (!next) { + void *current_ptr = nullptr; + if constexpr(is_shared_ptr::value) { + current_ptr = static_cast(current.get()); + } + else { + current_ptr = static_cast(current); + } NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT, "This is: %p Current is: %p", static_cast(this), - static_cast(current)); + current_ptr); } #endif return current; @@ -212,9 +226,10 @@ ELIST_LINK *ELIST_ITERATOR::forward() { * (This function can't be INLINEd because it contains a loop) **********************************************************************/ -ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ... +template +T ELIST_ITERATOR_T::data_relative( // get data + or - ... int8_t offset) { // offset from current - ELIST_LINK *ptr; + T ptr; #ifndef NDEBUG if (!list) @@ -249,7 +264,8 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ... * (This function can't be INLINEd because it contains a loop) **********************************************************************/ -ELIST_LINK *ELIST_ITERATOR::move_to_last() { +template +T ELIST_ITERATOR_T::move_to_last() { #ifndef NDEBUG if (!list) NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT); @@ -272,11 +288,12 @@ ELIST_LINK *ELIST_ITERATOR::move_to_last() { * (This function hasn't been in-lined because its a bit big!) **********************************************************************/ -void ELIST_ITERATOR::exchange( // positions of 2 links - ELIST_ITERATOR *other_it) { // other iterator +template +void ELIST_ITERATOR_T::exchange( // positions of 2 links + ELIST_ITERATOR_T *other_it) { // other iterator constexpr ERRCODE DONT_EXCHANGE_DELETED("Can't exchange deleted elements of lists"); - ELIST_LINK *old_current; + T old_current; #ifndef NDEBUG if (!list) @@ -368,16 +385,17 @@ non-adjacent elements. */ * (Can't inline this function because it contains a loop) **********************************************************************/ -ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current - ELIST_ITERATOR *other_it) { // to other current +template +T ELIST_ITERATOR_T::extract_sublist( // from this current + ELIST_ITERATOR_T *other_it) { // to other current #ifndef NDEBUG constexpr ERRCODE BAD_EXTRACTION_PTS("Can't extract sublist from points on different lists"); constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points"); #endif constexpr ERRCODE BAD_SUBLIST("Can't find sublist end point in original list"); - ELIST_ITERATOR temp_it = *this; - ELIST_LINK *end_of_new_list; + ELIST_ITERATOR_T temp_it = *this; + T end_of_new_list; #ifndef NDEBUG if (!other_it) @@ -437,4 +455,12 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current return end_of_new_list; } +template class ELIST_LINK_T; +template class ELIST_T; +template class ELIST_ITERATOR_T; + +template class ELIST_LINK_T>; +template class ELIST_T>; +template class ELIST_ITERATOR_T>; + } // namespace tesseract diff --git a/src/ccutil/elst.h b/src/ccutil/elst.h index 040ce2a4..68953c69 100644 --- a/src/ccutil/elst.h +++ b/src/ccutil/elst.h @@ -24,11 +24,10 @@ #include "serialis.h" #include +#include namespace tesseract { -class ELIST_ITERATOR; - /********************************************************************** This module implements list classes and iterators. The following list types and iterators are provided: @@ -79,25 +78,43 @@ lists. * walks the list. **********************************************************************/ -class ELIST_LINK { - friend class ELIST_ITERATOR; - friend class ELIST; +// Forward declarations +template +class ELIST_ITERATOR_T; - ELIST_LINK *next; +template +class ELIST_LINK_T; + +template +class ELIST_T; + +template +struct is_shared_ptr : std::false_type {}; + +template +struct is_shared_ptr> : std::true_type {}; + +template +class ELIST_LINK_T { + friend class ELIST_ITERATOR_T; + friend class ELIST_T; + +protected: + T next; public: - ELIST_LINK() { + ELIST_LINK_T() { next = nullptr; } // constructor // The special copy constructor is used by lots of classes. - ELIST_LINK(const ELIST_LINK &) { + ELIST_LINK_T(const ELIST_LINK_T &) { next = nullptr; } // The special assignment operator is used by lots of classes. - void operator=(const ELIST_LINK &) { + void operator=(const ELIST_LINK_T &) { next = nullptr; } }; @@ -108,16 +125,19 @@ public: * Generic list class for singly linked lists with embedded links **********************************************************************/ -class TESS_API ELIST { - friend class ELIST_ITERATOR; - - ELIST_LINK *last = nullptr; // End of list +template +class TESS_API ELIST_T { + friend class ELIST_ITERATOR_T; +protected: + T last; // End of list //(Points to head) - ELIST_LINK *First() { // return first + T First() { // return first return last ? last->next : nullptr; } public: + ELIST_T(): last(nullptr) { // constructor + } // destroy all links void internal_clear(void (*zapper)(void *)); @@ -130,17 +150,13 @@ public: } void shallow_copy( // dangerous!! - ELIST *from_list) { // beware destructors!! + ELIST_T *from_list) { // beware destructors!! last = from_list->last; } - // ptr to copier functn - void internal_deep_copy(ELIST_LINK *(*copier)(ELIST_LINK *), - const ELIST *list); // list being copied - void assign_to_sublist( // to this list - ELIST_ITERATOR *start_it, // from list start - ELIST_ITERATOR *end_it); // from list end + ELIST_ITERATOR_T *start_it, // from list start + ELIST_ITERATOR_T *end_it); // from list end // # elements in list int32_t length() const { @@ -168,12 +184,12 @@ public: // list) - new_link is not added to the list and the function returns the // pointer to the identical entry that already exists in the list // (otherwise the function returns new_link). - ELIST_LINK *add_sorted_and_find(int comparator(const void *, const void *), bool unique, - ELIST_LINK *new_link); + T add_sorted_and_find(int comparator(const void *, const void *), bool unique, + T new_link); // Same as above, but returns true if the new entry was inserted, false // if the identical entry already existed in the list. - bool add_sorted(int comparator(const void *, const void *), bool unique, ELIST_LINK *new_link) { + bool add_sorted(int comparator(const void *, const void *), bool unique, T new_link) { return (add_sorted_and_find(comparator, unique, new_link) == new_link); } }; @@ -185,50 +201,52 @@ public: *embedded links **********************************************************************/ -class TESS_API ELIST_ITERATOR { - friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *); - - ELIST *list; // List being iterated - ELIST_LINK *prev; // prev element - ELIST_LINK *current; // current element - ELIST_LINK *next; // next element - ELIST_LINK *cycle_pt; // point we are cycling the list to. +template +class TESS_API ELIST_ITERATOR_T { + friend void ELIST_T::assign_to_sublist(ELIST_ITERATOR_T *, ELIST_ITERATOR_T *); +protected: + ELIST_T *list; // List being iterated + T prev; // prev element + T current; // current element + T next; // next element + T cycle_pt; // point we are cycling the list to. bool ex_current_was_last; // current extracted was end of list bool ex_current_was_cycle_pt; // current extracted was cycle point bool started_cycling; // Have we moved off the start? - ELIST_LINK *extract_sublist( // from this current... - ELIST_ITERATOR *other_it); // to other current - + T extract_sublist( // from this current... + ELIST_ITERATOR_T *other_it); // to other current +private: + T extract_internal(); public: - ELIST_ITERATOR() { // constructor + ELIST_ITERATOR_T() { // constructor list = nullptr; } // unassigned list - explicit ELIST_ITERATOR(ELIST *list_to_iterate); + explicit ELIST_ITERATOR_T(ELIST_T *list_to_iterate); void set_to_list( // change list - ELIST *list_to_iterate); + ELIST_T *list_to_iterate); void add_after_then_move( // add after current & - ELIST_LINK *new_link); // move to new + T new_link); // move to new void add_after_stay_put( // add after current & - ELIST_LINK *new_link); // stay at current + T new_link); // stay at current void add_before_then_move( // add before current & - ELIST_LINK *new_link); // move to new + T new_link); // move to new void add_before_stay_put( // add before current & - ELIST_LINK *new_link); // stay at current + T new_link); // stay at current void add_list_after( // add a list & - ELIST *list_to_add); // stay at current + ELIST_T *list_to_add); // stay at current void add_list_before( // add a list & - ELIST *list_to_add); // move to it 1st item + ELIST_T *list_to_add); // move to it 1st item - ELIST_LINK *data() { // get current data + T data() { // get current data #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::data", ABORT); @@ -240,16 +258,17 @@ public: return current; } - ELIST_LINK *data_relative( // get data + or - ... + T data_relative( // get data + or - ... int8_t offset); // offset from current - ELIST_LINK *forward(); // move to next element + T forward(); // move to next element - ELIST_LINK *extract(); // remove from list + T extract(); // remove from list + T extract_mt(); // remove from list (multithreaded version) - ELIST_LINK *move_to_first(); // go to start of list + T move_to_first(); // go to start of list - ELIST_LINK *move_to_last(); // go to end of list + T move_to_last(); // go to end of list void mark_cycle_pt(); // remember current @@ -263,7 +282,7 @@ public: } bool current_extracted() const { // current extracted? - return !current; + return current == nullptr; } bool at_first() const; // Current is first? @@ -273,10 +292,10 @@ public: bool cycled_list() const; // Completed a cycle? void add_to_end( // add at end & - ELIST_LINK *new_link); // don't move + T new_link); // don't move void exchange( // positions of 2 links - ELIST_ITERATOR *other_it); // other iterator + ELIST_ITERATOR_T *other_it); // other iterator //# elements in list int32_t length() const { @@ -295,8 +314,9 @@ public: * over. **********************************************************************/ -inline void ELIST_ITERATOR::set_to_list( // change list - ELIST *list_to_iterate) { +template +inline void ELIST_ITERATOR_T::set_to_list( // change list + ELIST_T *list_to_iterate) { #ifndef NDEBUG if (!list_to_iterate) { BAD_PARAMETER.error("ELIST_ITERATOR::set_to_list", ABORT, "list_to_iterate is nullptr"); @@ -319,7 +339,8 @@ inline void ELIST_ITERATOR::set_to_list( // change list * CONSTRUCTOR - set iterator to specified list; **********************************************************************/ -inline ELIST_ITERATOR::ELIST_ITERATOR(ELIST *list_to_iterate) { +template +inline ELIST_ITERATOR_T::ELIST_ITERATOR_T(ELIST_T *list_to_iterate) { set_to_list(list_to_iterate); } @@ -330,8 +351,9 @@ inline ELIST_ITERATOR::ELIST_ITERATOR(ELIST *list_to_iterate) { * iterator to the new element. **********************************************************************/ -inline void ELIST_ITERATOR::add_after_then_move( // element to add - ELIST_LINK *new_element) { +template +inline void ELIST_ITERATOR_T::add_after_then_move( // element to add + T new_element) { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT); @@ -377,8 +399,9 @@ inline void ELIST_ITERATOR::add_after_then_move( // element to add * the iterator to the new element. **********************************************************************/ -inline void ELIST_ITERATOR::add_after_stay_put( // element to add - ELIST_LINK *new_element) { +template +inline void ELIST_ITERATOR_T::add_after_stay_put( // element to add + T new_element) { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT); @@ -426,8 +449,9 @@ inline void ELIST_ITERATOR::add_after_stay_put( // element to add * iterator to the new element. **********************************************************************/ -inline void ELIST_ITERATOR::add_before_then_move( // element to add - ELIST_LINK *new_element) { +template +inline void ELIST_ITERATOR_T::add_before_then_move( // element to add + T new_element) { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT); @@ -469,8 +493,9 @@ inline void ELIST_ITERATOR::add_before_then_move( // element to add * iterator to the new element. **********************************************************************/ -inline void ELIST_ITERATOR::add_before_stay_put( // element to add - ELIST_LINK *new_element) { +template +inline void ELIST_ITERATOR_T::add_before_stay_put( // element to add + T new_element) { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT); @@ -514,7 +539,8 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add * iterator. **********************************************************************/ -inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) { +template +inline void ELIST_ITERATOR_T::add_list_after(ELIST_T *list_to_add) { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT); @@ -561,7 +587,8 @@ inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) { * iterator. **********************************************************************/ -inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { +template +inline void ELIST_ITERATOR_T::add_list_before(ELIST_T *list_to_add) { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT); @@ -607,8 +634,9 @@ inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { * is to be deleted, this is the callers responsibility. **********************************************************************/ -inline ELIST_LINK *ELIST_ITERATOR::extract() { - ELIST_LINK *extracted_link; +template +inline T ELIST_ITERATOR_T::extract_internal() { + T extracted_link; #ifndef NDEBUG if (!list) { @@ -634,11 +662,23 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() { // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. ex_current_was_cycle_pt = (current == cycle_pt); extracted_link = current; - extracted_link->next = nullptr; // for safety current = nullptr; return extracted_link; } + +template +inline T ELIST_ITERATOR_T::extract() { + T extracted_link = extract_internal(); + extracted_link->next = nullptr; // for safety + return extracted_link; +} + +template +inline T ELIST_ITERATOR_T::extract_mt() { + return extract_internal(); +} + /*********************************************************************** * ELIST_ITERATOR::move_to_first() * @@ -646,7 +686,8 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() { * Return data just in case anyone wants it. **********************************************************************/ -inline ELIST_LINK *ELIST_ITERATOR::move_to_first() { +template +inline T ELIST_ITERATOR_T::move_to_first() { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT); @@ -670,7 +711,8 @@ inline ELIST_LINK *ELIST_ITERATOR::move_to_first() { * by a forward, add_after_then_move or add_after_then_move. **********************************************************************/ -inline void ELIST_ITERATOR::mark_cycle_pt() { +template +inline void ELIST_ITERATOR_T::mark_cycle_pt() { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT); @@ -692,7 +734,8 @@ inline void ELIST_ITERATOR::mark_cycle_pt() { * **********************************************************************/ -inline bool ELIST_ITERATOR::at_first() const { +template +inline bool ELIST_ITERATOR_T::at_first() const { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::at_first", ABORT); @@ -712,7 +755,8 @@ inline bool ELIST_ITERATOR::at_first() const { * **********************************************************************/ -inline bool ELIST_ITERATOR::at_last() const { +template +inline bool ELIST_ITERATOR_T::at_last() const { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::at_last", ABORT); @@ -732,7 +776,8 @@ inline bool ELIST_ITERATOR::at_last() const { * **********************************************************************/ -inline bool ELIST_ITERATOR::cycled_list() const { +template +inline bool ELIST_ITERATOR_T::cycled_list() const { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT); @@ -749,7 +794,8 @@ inline bool ELIST_ITERATOR::cycled_list() const { * **********************************************************************/ -inline void ELIST_ITERATOR::sort( // sort elements +template +inline void ELIST_ITERATOR_T::sort( // sort elements int comparator( // comparison routine const void *, const void *)) { #ifndef NDEBUG @@ -772,8 +818,9 @@ inline void ELIST_ITERATOR::sort( // sort elements queues. **********************************************************************/ -inline void ELIST_ITERATOR::add_to_end( // element to add - ELIST_LINK *new_element) { +template +inline void ELIST_ITERATOR_T::add_to_end( // element to add + T new_element) { #ifndef NDEBUG if (!list) { NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT); @@ -800,6 +847,18 @@ inline void ELIST_ITERATOR::add_to_end( // element to add } } + +// Type aliases for raw pointer versions +class ELIST_LINK : public ELIST_LINK_T {}; +using ELIST = ELIST_T; +using ELIST_ITERATOR = ELIST_ITERATOR_T; + + // Type aliases for shared_ptr versions +class ELIST_LINK_SP : public ELIST_LINK_T> {}; +using ELIST_SP = ELIST_T>; +using ELIST_ITERATOR_SP = ELIST_ITERATOR_T>; + + #define ELISTIZEH(CLASSNAME) \ class CLASSNAME##_LIST : public X_LIST { \ using X_LIST::X_LIST; \ @@ -808,6 +867,15 @@ inline void ELIST_ITERATOR::add_to_end( // element to add using X_ITER::X_ITER; \ }; +#define ELISTIZEH_SP(CLASSNAME) \ + class CLASSNAME##_LIST \ + : public X_LIST_SP { \ + using X_LIST_SP::X_LIST_SP; \ + }; \ + class CLASSNAME##_IT : public X_ITER_SP { \ + using X_ITER_SP::X_ITER_SP; \ + }; + } // namespace tesseract #endif diff --git a/src/ccutil/list.h b/src/ccutil/list.h index d9dfe095..2379cc77 100644 --- a/src/ccutil/list.h +++ b/src/ccutil/list.h @@ -15,6 +15,7 @@ #define LIST_ITERATOR_H #include +#include namespace tesseract { @@ -65,6 +66,55 @@ public: } }; +template +class X_ITER_SP : public ITERATOR { +public: + X_ITER_SP() = default; + template + X_ITER_SP(U *list) : ITERATOR(list) {} + + std::shared_ptr data() { + return std::static_pointer_cast(ITERATOR::data()); + } + std::shared_ptr data_relative(int8_t offset) { + return std::static_pointer_cast(ITERATOR::data_relative(offset)); + } + std::shared_ptr forward() { + return std::static_pointer_cast(ITERATOR::forward()); + } + std::shared_ptr extract() { + return std::static_pointer_cast(ITERATOR::extract()); + } +}; + +template +class X_LIST_SP : public CONTAINER { +public: + X_LIST_SP() = default; + X_LIST_SP(const X_LIST_SP &) = delete; + X_LIST_SP &operator=(const X_LIST_SP &) = delete; + ~X_LIST_SP() { + clear(); + } + + /* delete elements */ + void clear() { + CONTAINER::internal_clear( + [](void *link) { delete reinterpret_cast(link); }); + } + + /* Become a deep copy of src_list */ + template + void deep_copy(const U *src_list, CLASSNAME *(*copier)(const CLASSNAME *)) { + X_ITER_SP from_it(const_cast(src_list)); + X_ITER_SP to_it(this); + + for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) + to_it.add_after_then_move( + std::shared_ptr((*copier)(from_it.data().get()))); + } +}; + } // namespace tesseract #endif