From 4175679da6bfe8ff63f6119131ecacf3bfee20ef Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Mon, 28 Dec 2020 02:31:45 +0300 Subject: [PATCH] Revert kdpair, genericheap changes. --- src/ccutil/genericheap.h | 28 ++++----- src/ccutil/kdpair.h | 133 ++++++++++++++++++++++++--------------- src/lstm/recodebeam.cpp | 10 +-- src/lstm/recodebeam.h | 21 ++++--- 4 files changed, 114 insertions(+), 78 deletions(-) diff --git a/src/ccutil/genericheap.h b/src/ccutil/genericheap.h index d7c77265..b2f9234c 100644 --- a/src/ccutil/genericheap.h +++ b/src/ccutil/genericheap.h @@ -54,7 +54,7 @@ namespace tesseract { // index and pointer can be changed arbitrarily by heap operations. // Revaluation can be done by making the Data type in the Pair derived from or // contain a DoublePtr as its first data element, making it possible to convert -// the pointer to a Pair using reinterpret_cast. +// the pointer to a Pair using KDPairInc::RecastDataPointer. template class GenericHeap { public: @@ -99,10 +99,10 @@ class GenericHeap { // location for the new *entry. To avoid needing a default constructor // for primitive types, and to allow for use of DoublePtr in the Pair // somewhere, we have to incur a double copy here. - heap_.push_back(std::move(*entry)); - *entry = std::move(heap_.back()); + heap_.push_back(*entry); + *entry = heap_.back(); hole_index = SiftUp(hole_index, *entry); - heap_[hole_index] = std::move(*entry); + heap_[hole_index] = *entry; } // Get the value of the top (smallest, defined by operator< ) element. @@ -121,14 +121,14 @@ class GenericHeap { if (new_size < 0) return false; // Already empty. if (entry != nullptr) - *entry = std::move(heap_[0]); + *entry = heap_[0]; if (new_size > 0) { // Sift the hole at the start of the heap_ downwards to match the last // element. - auto hole_pair = std::move(heap_[new_size]); + Pair hole_pair = heap_[new_size]; heap_.truncate(new_size); int hole_index = SiftDown(0, hole_pair); - heap_[hole_index] = std::move(hole_pair); + heap_[hole_index] = hole_pair; } else { heap_.truncate(new_size); } @@ -143,13 +143,13 @@ class GenericHeap { if (worst_index < 0) return false; // It cannot be empty! // Extract the worst element from the heap, leaving a hole at worst_index. if (entry != nullptr) - *entry = std::move(heap_[worst_index]); + *entry = heap_[worst_index]; int heap_size = heap_.size() - 1; if (heap_size > 0) { // Sift the hole upwards to match the last element of the heap_ - auto hole_pair = std::move(heap_[heap_size]); + Pair hole_pair = heap_[heap_size]; int hole_index = SiftUp(worst_index, hole_pair); - heap_[hole_index] = std::move(hole_pair); + heap_[hole_index] = hole_pair; } heap_.truncate(heap_size); return true; @@ -182,10 +182,10 @@ class GenericHeap { // Time = O(log n). void Reshuffle(Pair* pair) { int index = pair - &heap_[0]; - auto hole_pair = std::move(heap_[index]); + Pair hole_pair = heap_[index]; index = SiftDown(index, hole_pair); index = SiftUp(index, hole_pair); - heap_[index] = std::move(hole_pair); + heap_[index] = hole_pair; } private: @@ -195,7 +195,7 @@ class GenericHeap { int SiftUp(int hole_index, const Pair& pair) { int parent; while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) { - heap_[hole_index] = std::move(heap_[parent]); + heap_[hole_index] = heap_[parent]; hole_index = parent; } return hole_index; @@ -211,7 +211,7 @@ class GenericHeap { if (child + 1 < heap_size && heap_[child + 1] < heap_[child]) ++child; if (heap_[child] < pair) { - heap_[hole_index] = std::move(heap_[child]); + heap_[hole_index] = heap_[child]; hole_index = child; } else { break; diff --git a/src/ccutil/kdpair.h b/src/ccutil/kdpair.h index db859429..58e26530 100644 --- a/src/ccutil/kdpair.h +++ b/src/ccutil/kdpair.h @@ -26,65 +26,62 @@ #include -#include -#include - namespace tesseract { // A useful base struct to facilitate the common operation of sorting a vector // of simple or smart-pointer data using a separate key. Similar to STL pair. template -class KDPair : public std::pair { - using base = std::pair; - public: - using std::pair::pair; - - using base::first; - using base::second; +struct KDPair { + KDPair() = default; + KDPair(Key k, Data d) : data_(d), key_(k) {} int operator==(const KDPair& other) const { - return first == other.first; + return key_ == other.key_; } - Key& key() { - return first; - } - Data& data() { - return second; - } - const Key& key() const { - return first; - } - const Data& data() const { - return second; - } + Data &data() { return data_; } + const Data &data() const { return data_; } + Key &key() { return key_; } + const Key &key() const { return key_; } + + // WARNING! Keep data as the first element! KDPairInc and KDPairDec depend + // on the order of these elements so they can downcast pointers appropriately + // for use by GenericHeap::Reshuffle. + Data data_; + Key key_; }; - // Specialization of KDPair to provide operator< for sorting in increasing order // and recasting of data pointers for use with DoublePtr. template -class KDPairInc : public KDPair { - public: - using KDPair::KDPair; - +struct KDPairInc : public KDPair { + KDPairInc() = default; + KDPairInc(Key k, Data d) : KDPair(k, d) {} // Operator< facilitates sorting in increasing order. int operator<(const KDPairInc& other) const { return this->key() < other.key(); } + // Returns the input Data pointer recast to a KDPairInc pointer. + // Just casts a pointer to the first element to a pointer to the whole struct. + static KDPairInc* RecastDataPointer(Data* data_ptr) { + return reinterpret_cast(data_ptr); + } }; - // Specialization of KDPair to provide operator< for sorting in decreasing order // and recasting of data pointers for use with DoublePtr. template -class KDPairDec : public KDPair { - public: - using KDPair::KDPair; - +struct KDPairDec : public KDPair { + KDPairDec() = default; + KDPairDec(Key k, Data d) : KDPair(k, d) {} // Operator< facilitates sorting in decreasing order by using operator> on // the key values. int operator<(const KDPairDec& other) const { return this->key() > other.key(); } + // Returns the input Data pointer recast to a KDPairDec pointer. + // Just casts a pointer to the first element to a pointer to the whole struct. + static KDPairDec* RecastDataPointer(Data* data_ptr) { + return reinterpret_cast(data_ptr); + } }; // A useful base class to facilitate the common operation of sorting a vector @@ -93,57 +90,89 @@ class KDPairDec : public KDPair { // operator= that have move semantics so that the data does not get copied and // only a single instance of KDPtrPair holds a specific data pointer. template -class KDPtrPair : public std::pair> { - using base = std::pair>; +class KDPtrPair { public: - using base::first; - using base::second; - - KDPtrPair() = default; - KDPtrPair(Key k, Data* d) : std::pair>(k, d) {} - KDPtrPair(KDPtrPair &&src) = default; - KDPtrPair &operator=(KDPtrPair &&src) = default; + KDPtrPair() : data_(nullptr) {} + KDPtrPair(Key k, Data* d) : data_(d), key_(k) {} + // Copy constructor steals the pointer from src and nulls it in src, thereby + // moving the (single) ownership of the data. + KDPtrPair(const KDPtrPair& src) : data_(src.data_), key_(src.key_) { + ((KDPtrPair&)src).data_ = nullptr; + } + // Destructor deletes data, assuming it is the sole owner. + ~KDPtrPair() { + delete this->data_; + this->data_ = nullptr; + } + // Operator= steals the pointer from src and nulls it in src, thereby + // moving the (single) ownership of the data. + void operator=(const KDPtrPair& src) { + delete this->data_; + this->data_ = src.data_; + ((KDPtrPair&)src).data_ = nullptr; + this->key_ = src.key_; + } int operator==(const KDPtrPair& other) const { - return key() == other.key(); + return key_ == other.key_; } // Accessors. const Key& key() const { - return first; + return key_; } void set_key(const Key& new_key) { - first = new_key; + key_ = new_key; } const Data* data() const { - return second.get(); + return data_; } // Sets the data pointer, taking ownership of the data. void set_data(Data* new_data) { - second.reset(new_data); + delete data_; + data_ = new_data; } // Relinquishes ownership of the data pointer (setting it to nullptr). Data* extract_data() { - return second.release(); + Data* result = data_; + data_ = nullptr; + return result; } -}; + private: + // Data members are private to keep deletion of data_ encapsulated. + Data* data_; + Key key_; +}; // Specialization of KDPtrPair to provide operator< for sorting in increasing // order. template struct KDPtrPairInc : public KDPtrPair { - using KDPtrPair::KDPtrPair; + // Since we are doing non-standard stuff we have to duplicate *all* the + // constructors and operator=. + KDPtrPairInc() : KDPtrPair() {} + KDPtrPairInc(Key k, Data* d) : KDPtrPair(k, d) {} + KDPtrPairInc(const KDPtrPairInc& src) : KDPtrPair(src) {} + void operator=(const KDPtrPairInc& src) { + KDPtrPair::operator=(src); + } // Operator< facilitates sorting in increasing order. int operator<(const KDPtrPairInc& other) const { return this->key() < other.key(); } }; - // Specialization of KDPtrPair to provide operator< for sorting in decreasing // order. template struct KDPtrPairDec : public KDPtrPair { - using KDPtrPair::KDPtrPair; + // Since we are doing non-standard stuff we have to duplicate *all* the + // constructors and operator=. + KDPtrPairDec() : KDPtrPair() {} + KDPtrPairDec(Key k, Data* d) : KDPtrPair(k, d) {} + KDPtrPairDec(const KDPtrPairDec& src) : KDPtrPair(src) {} + void operator=(const KDPtrPairDec& src) { + KDPtrPair::operator=(src); + } // Operator< facilitates sorting in decreasing order by using operator> on // the key values. int operator<(const KDPtrPairDec& other) const { diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp index 3d958e60..fea655a7 100644 --- a/src/lstm/recodebeam.cpp +++ b/src/lstm/recodebeam.cpp @@ -1054,7 +1054,7 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, word_start = true; } else if (uni_prev->dawgs != nullptr) { // Continuing a previous dict word. - dawg_args.active_dawgs = uni_prev->dawgs.get(); + dawg_args.active_dawgs = uni_prev->dawgs; word_start = uni_prev->start_of_dawg; } else { return; // Can't continue if not a dict word. @@ -1099,7 +1099,7 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, score, prev, initial_dawgs, ComputeCodeHash(code, false, prev)); - *best_initial_dawg = std::move(node); + *best_initial_dawg = node; } } @@ -1144,7 +1144,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id, RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, dup, cert, score, prev, d, hash); if (UpdateHeapIfMatched(&node, heap)) return; - RecodePair entry(score, std::move(node)); + RecodePair entry(score, node); heap->Push(&entry); ASSERT_HOST(entry.data().dawgs == nullptr); if (heap->size() > max_size) heap->Pop(&entry); @@ -1161,7 +1161,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node, if (UpdateHeapIfMatched(node, heap)) { return; } - RecodePair entry(node->score, std::move(*node)); + RecodePair entry(node->score, *node); heap->Push(&entry); ASSERT_HOST(entry.data().dawgs == nullptr); if (heap->size() > max_size) heap->Pop(&entry); @@ -1184,7 +1184,7 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node, if (new_node->score > node.score) { // The new one is better. Update the entire node in the heap and // reshuffle. - node = std::move(*new_node); + node = *new_node; (*nodes)[i].key() = node.score; heap->Reshuffle(&(*nodes)[i]); } diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h index afd83493..88e8e87b 100644 --- a/src/lstm/recodebeam.h +++ b/src/lstm/recodebeam.h @@ -27,9 +27,7 @@ #include "networkio.h" #include "ratngs.h" #include "unicharcompress.h" - #include -#include #include #include #include @@ -126,9 +124,17 @@ struct RecodeNode { // don't want to copy the whole DawgPositionVector each time, and true // copying isn't necessary for this struct. It does get moved around a lot // though inside the heap and during heap push, hence the move semantics. - RecodeNode(RecodeNode &&) = default; - RecodeNode& operator=(RecodeNode &&) = default; - + RecodeNode(const RecodeNode& src) : dawgs(nullptr) { + *this = src; + ASSERT_HOST(src.dawgs == nullptr); + } + RecodeNode& operator=(const RecodeNode& src) { + delete dawgs; + memcpy(this, &src, sizeof(src)); + ((RecodeNode&)src).dawgs = nullptr; + return *this; + } + ~RecodeNode() { delete dawgs; } // Prints details of the node. void Print(int null_char, const UNICHARSET& unicharset, int depth) const; @@ -161,7 +167,7 @@ struct RecodeNode { // The previous node in this chain. Borrowed pointer. const RecodeNode* prev; // The currently active dawgs at this position. Owned pointer. - std::unique_ptr dawgs; + DawgPositionVector* dawgs; // A hash of all codes in the prefix and this->code as well. Used for // duplicate path removal. uint64_t code_hash; @@ -272,8 +278,9 @@ class RecodeBeamSearch { for (auto & beam : beams_) { beam.clear(); } + RecodeNode empty; for (auto & best_initial_dawg : best_initial_dawgs_) { - best_initial_dawg = {}; + best_initial_dawg = empty; } }