Revert kdpair, genericheap changes.

This commit is contained in:
Egor Pugin 2020-12-28 02:31:45 +03:00
parent 6cc00aa332
commit 4175679da6
4 changed files with 114 additions and 78 deletions

View File

@ -54,7 +54,7 @@ namespace tesseract {
// index and pointer can be changed arbitrarily by heap operations. // index and pointer can be changed arbitrarily by heap operations.
// Revaluation can be done by making the Data type in the Pair derived from or // Revaluation can be done by making the Data type in the Pair derived from or
// contain a DoublePtr as its first data element, making it possible to convert // contain a DoublePtr as its first data element, making it possible to convert
// the pointer to a Pair using reinterpret_cast<KDPairDec*>. // the pointer to a Pair using KDPairInc::RecastDataPointer.
template <typename Pair> template <typename Pair>
class GenericHeap { class GenericHeap {
public: public:
@ -99,10 +99,10 @@ class GenericHeap {
// location for the new *entry. To avoid needing a default constructor // location for the new *entry. To avoid needing a default constructor
// for primitive types, and to allow for use of DoublePtr in the Pair // for primitive types, and to allow for use of DoublePtr in the Pair
// somewhere, we have to incur a double copy here. // somewhere, we have to incur a double copy here.
heap_.push_back(std::move(*entry)); heap_.push_back(*entry);
*entry = std::move(heap_.back()); *entry = heap_.back();
hole_index = SiftUp(hole_index, *entry); hole_index = SiftUp(hole_index, *entry);
heap_[hole_index] = std::move(*entry); heap_[hole_index] = *entry;
} }
// Get the value of the top (smallest, defined by operator< ) element. // Get the value of the top (smallest, defined by operator< ) element.
@ -121,14 +121,14 @@ class GenericHeap {
if (new_size < 0) if (new_size < 0)
return false; // Already empty. return false; // Already empty.
if (entry != nullptr) if (entry != nullptr)
*entry = std::move(heap_[0]); *entry = heap_[0];
if (new_size > 0) { if (new_size > 0) {
// Sift the hole at the start of the heap_ downwards to match the last // Sift the hole at the start of the heap_ downwards to match the last
// element. // element.
auto hole_pair = std::move(heap_[new_size]); Pair hole_pair = heap_[new_size];
heap_.truncate(new_size); heap_.truncate(new_size);
int hole_index = SiftDown(0, hole_pair); int hole_index = SiftDown(0, hole_pair);
heap_[hole_index] = std::move(hole_pair); heap_[hole_index] = hole_pair;
} else { } else {
heap_.truncate(new_size); heap_.truncate(new_size);
} }
@ -143,13 +143,13 @@ class GenericHeap {
if (worst_index < 0) return false; // It cannot be empty! if (worst_index < 0) return false; // It cannot be empty!
// Extract the worst element from the heap, leaving a hole at worst_index. // Extract the worst element from the heap, leaving a hole at worst_index.
if (entry != nullptr) if (entry != nullptr)
*entry = std::move(heap_[worst_index]); *entry = heap_[worst_index];
int heap_size = heap_.size() - 1; int heap_size = heap_.size() - 1;
if (heap_size > 0) { if (heap_size > 0) {
// Sift the hole upwards to match the last element of the heap_ // Sift the hole upwards to match the last element of the heap_
auto hole_pair = std::move(heap_[heap_size]); Pair hole_pair = heap_[heap_size];
int hole_index = SiftUp(worst_index, hole_pair); int hole_index = SiftUp(worst_index, hole_pair);
heap_[hole_index] = std::move(hole_pair); heap_[hole_index] = hole_pair;
} }
heap_.truncate(heap_size); heap_.truncate(heap_size);
return true; return true;
@ -182,10 +182,10 @@ class GenericHeap {
// Time = O(log n). // Time = O(log n).
void Reshuffle(Pair* pair) { void Reshuffle(Pair* pair) {
int index = pair - &heap_[0]; int index = pair - &heap_[0];
auto hole_pair = std::move(heap_[index]); Pair hole_pair = heap_[index];
index = SiftDown(index, hole_pair); index = SiftDown(index, hole_pair);
index = SiftUp(index, hole_pair); index = SiftUp(index, hole_pair);
heap_[index] = std::move(hole_pair); heap_[index] = hole_pair;
} }
private: private:
@ -195,7 +195,7 @@ class GenericHeap {
int SiftUp(int hole_index, const Pair& pair) { int SiftUp(int hole_index, const Pair& pair) {
int parent; int parent;
while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) { while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) {
heap_[hole_index] = std::move(heap_[parent]); heap_[hole_index] = heap_[parent];
hole_index = parent; hole_index = parent;
} }
return hole_index; return hole_index;
@ -211,7 +211,7 @@ class GenericHeap {
if (child + 1 < heap_size && heap_[child + 1] < heap_[child]) if (child + 1 < heap_size && heap_[child + 1] < heap_[child])
++child; ++child;
if (heap_[child] < pair) { if (heap_[child] < pair) {
heap_[hole_index] = std::move(heap_[child]); heap_[hole_index] = heap_[child];
hole_index = child; hole_index = child;
} else { } else {
break; break;

View File

@ -26,65 +26,62 @@
#include <tesseract/genericvector.h> #include <tesseract/genericvector.h>
#include <memory>
#include <utility>
namespace tesseract { namespace tesseract {
// A useful base struct to facilitate the common operation of sorting a vector // A useful base struct to facilitate the common operation of sorting a vector
// of simple or smart-pointer data using a separate key. Similar to STL pair. // of simple or smart-pointer data using a separate key. Similar to STL pair.
template <typename Key, typename Data> template <typename Key, typename Data>
class KDPair : public std::pair<Key, Data> { struct KDPair {
using base = std::pair<Key, Data>; KDPair() = default;
public: KDPair(Key k, Data d) : data_(d), key_(k) {}
using std::pair<Key, Data>::pair;
using base::first;
using base::second;
int operator==(const KDPair<Key, Data>& other) const { int operator==(const KDPair<Key, Data>& other) const {
return first == other.first; return key_ == other.key_;
} }
Key& key() { Data &data() { return data_; }
return first; const Data &data() const { return data_; }
} Key &key() { return key_; }
Data& data() { const Key &key() const { return key_; }
return second;
} // WARNING! Keep data as the first element! KDPairInc and KDPairDec depend
const Key& key() const { // on the order of these elements so they can downcast pointers appropriately
return first; // for use by GenericHeap::Reshuffle.
} Data data_;
const Data& data() const { Key key_;
return second;
}
}; };
// Specialization of KDPair to provide operator< for sorting in increasing order // Specialization of KDPair to provide operator< for sorting in increasing order
// and recasting of data pointers for use with DoublePtr. // and recasting of data pointers for use with DoublePtr.
template <typename Key, typename Data> template <typename Key, typename Data>
class KDPairInc : public KDPair<Key, Data> { struct KDPairInc : public KDPair<Key, Data> {
public: KDPairInc() = default;
using KDPair<Key, Data>::KDPair; KDPairInc(Key k, Data d) : KDPair<Key, Data>(k, d) {}
// Operator< facilitates sorting in increasing order. // Operator< facilitates sorting in increasing order.
int operator<(const KDPairInc<Key, Data>& other) const { int operator<(const KDPairInc<Key, Data>& other) const {
return this->key() < other.key(); return this->key() < other.key();
} }
// Returns the input Data pointer recast to a KDPairInc pointer.
// Just casts a pointer to the first element to a pointer to the whole struct.
static KDPairInc* RecastDataPointer(Data* data_ptr) {
return reinterpret_cast<KDPairInc*>(data_ptr);
}
}; };
// Specialization of KDPair to provide operator< for sorting in decreasing order // Specialization of KDPair to provide operator< for sorting in decreasing order
// and recasting of data pointers for use with DoublePtr. // and recasting of data pointers for use with DoublePtr.
template <typename Key, typename Data> template <typename Key, typename Data>
class KDPairDec : public KDPair<Key, Data> { struct KDPairDec : public KDPair<Key, Data> {
public: KDPairDec() = default;
using KDPair<Key, Data>::KDPair; KDPairDec(Key k, Data d) : KDPair<Key, Data>(k, d) {}
// Operator< facilitates sorting in decreasing order by using operator> on // Operator< facilitates sorting in decreasing order by using operator> on
// the key values. // the key values.
int operator<(const KDPairDec<Key, Data>& other) const { int operator<(const KDPairDec<Key, Data>& other) const {
return this->key() > other.key(); return this->key() > other.key();
} }
// Returns the input Data pointer recast to a KDPairDec pointer.
// Just casts a pointer to the first element to a pointer to the whole struct.
static KDPairDec* RecastDataPointer(Data* data_ptr) {
return reinterpret_cast<KDPairDec*>(data_ptr);
}
}; };
// A useful base class to facilitate the common operation of sorting a vector // A useful base class to facilitate the common operation of sorting a vector
@ -93,57 +90,89 @@ class KDPairDec : public KDPair<Key, Data> {
// operator= that have move semantics so that the data does not get copied and // operator= that have move semantics so that the data does not get copied and
// only a single instance of KDPtrPair holds a specific data pointer. // only a single instance of KDPtrPair holds a specific data pointer.
template <typename Key, typename Data> template <typename Key, typename Data>
class KDPtrPair : public std::pair<Key, std::unique_ptr<Data>> { class KDPtrPair {
using base = std::pair<Key, std::unique_ptr<Data>>;
public: public:
using base::first; KDPtrPair() : data_(nullptr) {}
using base::second; KDPtrPair(Key k, Data* d) : data_(d), key_(k) {}
// Copy constructor steals the pointer from src and nulls it in src, thereby
KDPtrPair() = default; // moving the (single) ownership of the data.
KDPtrPair(Key k, Data* d) : std::pair<Key, std::unique_ptr<Data>>(k, d) {} KDPtrPair(const KDPtrPair& src) : data_(src.data_), key_(src.key_) {
KDPtrPair(KDPtrPair &&src) = default; ((KDPtrPair&)src).data_ = nullptr;
KDPtrPair &operator=(KDPtrPair &&src) = default; }
// Destructor deletes data, assuming it is the sole owner.
~KDPtrPair() {
delete this->data_;
this->data_ = nullptr;
}
// Operator= steals the pointer from src and nulls it in src, thereby
// moving the (single) ownership of the data.
void operator=(const KDPtrPair& src) {
delete this->data_;
this->data_ = src.data_;
((KDPtrPair&)src).data_ = nullptr;
this->key_ = src.key_;
}
int operator==(const KDPtrPair<Key, Data>& other) const { int operator==(const KDPtrPair<Key, Data>& other) const {
return key() == other.key(); return key_ == other.key_;
} }
// Accessors. // Accessors.
const Key& key() const { const Key& key() const {
return first; return key_;
} }
void set_key(const Key& new_key) { void set_key(const Key& new_key) {
first = new_key; key_ = new_key;
} }
const Data* data() const { const Data* data() const {
return second.get(); return data_;
} }
// Sets the data pointer, taking ownership of the data. // Sets the data pointer, taking ownership of the data.
void set_data(Data* new_data) { void set_data(Data* new_data) {
second.reset(new_data); delete data_;
data_ = new_data;
} }
// Relinquishes ownership of the data pointer (setting it to nullptr). // Relinquishes ownership of the data pointer (setting it to nullptr).
Data* extract_data() { Data* extract_data() {
return second.release(); Data* result = data_;
data_ = nullptr;
return result;
} }
};
private:
// Data members are private to keep deletion of data_ encapsulated.
Data* data_;
Key key_;
};
// Specialization of KDPtrPair to provide operator< for sorting in increasing // Specialization of KDPtrPair to provide operator< for sorting in increasing
// order. // order.
template <typename Key, typename Data> template <typename Key, typename Data>
struct KDPtrPairInc : public KDPtrPair<Key, Data> { struct KDPtrPairInc : public KDPtrPair<Key, Data> {
using KDPtrPair<Key, Data>::KDPtrPair; // Since we are doing non-standard stuff we have to duplicate *all* the
// constructors and operator=.
KDPtrPairInc() : KDPtrPair<Key, Data>() {}
KDPtrPairInc(Key k, Data* d) : KDPtrPair<Key, Data>(k, d) {}
KDPtrPairInc(const KDPtrPairInc& src) : KDPtrPair<Key, Data>(src) {}
void operator=(const KDPtrPairInc& src) {
KDPtrPair<Key, Data>::operator=(src);
}
// Operator< facilitates sorting in increasing order. // Operator< facilitates sorting in increasing order.
int operator<(const KDPtrPairInc<Key, Data>& other) const { int operator<(const KDPtrPairInc<Key, Data>& other) const {
return this->key() < other.key(); return this->key() < other.key();
} }
}; };
// Specialization of KDPtrPair to provide operator< for sorting in decreasing // Specialization of KDPtrPair to provide operator< for sorting in decreasing
// order. // order.
template <typename Key, typename Data> template <typename Key, typename Data>
struct KDPtrPairDec : public KDPtrPair<Key, Data> { struct KDPtrPairDec : public KDPtrPair<Key, Data> {
using KDPtrPair<Key, Data>::KDPtrPair; // Since we are doing non-standard stuff we have to duplicate *all* the
// constructors and operator=.
KDPtrPairDec() : KDPtrPair<Key, Data>() {}
KDPtrPairDec(Key k, Data* d) : KDPtrPair<Key, Data>(k, d) {}
KDPtrPairDec(const KDPtrPairDec& src) : KDPtrPair<Key, Data>(src) {}
void operator=(const KDPtrPairDec& src) {
KDPtrPair<Key, Data>::operator=(src);
}
// Operator< facilitates sorting in decreasing order by using operator> on // Operator< facilitates sorting in decreasing order by using operator> on
// the key values. // the key values.
int operator<(const KDPtrPairDec<Key, Data>& other) const { int operator<(const KDPtrPairDec<Key, Data>& other) const {

View File

@ -1054,7 +1054,7 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
word_start = true; word_start = true;
} else if (uni_prev->dawgs != nullptr) { } else if (uni_prev->dawgs != nullptr) {
// Continuing a previous dict word. // Continuing a previous dict word.
dawg_args.active_dawgs = uni_prev->dawgs.get(); dawg_args.active_dawgs = uni_prev->dawgs;
word_start = uni_prev->start_of_dawg; word_start = uni_prev->start_of_dawg;
} else { } else {
return; // Can't continue if not a dict word. return; // Can't continue if not a dict word.
@ -1099,7 +1099,7 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
score, prev, initial_dawgs, score, prev, initial_dawgs,
ComputeCodeHash(code, false, prev)); ComputeCodeHash(code, false, prev));
*best_initial_dawg = std::move(node); *best_initial_dawg = node;
} }
} }
@ -1144,7 +1144,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
dup, cert, score, prev, d, hash); dup, cert, score, prev, d, hash);
if (UpdateHeapIfMatched(&node, heap)) return; if (UpdateHeapIfMatched(&node, heap)) return;
RecodePair entry(score, std::move(node)); RecodePair entry(score, node);
heap->Push(&entry); heap->Push(&entry);
ASSERT_HOST(entry.data().dawgs == nullptr); ASSERT_HOST(entry.data().dawgs == nullptr);
if (heap->size() > max_size) heap->Pop(&entry); if (heap->size() > max_size) heap->Pop(&entry);
@ -1161,7 +1161,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node,
if (UpdateHeapIfMatched(node, heap)) { if (UpdateHeapIfMatched(node, heap)) {
return; return;
} }
RecodePair entry(node->score, std::move(*node)); RecodePair entry(node->score, *node);
heap->Push(&entry); heap->Push(&entry);
ASSERT_HOST(entry.data().dawgs == nullptr); ASSERT_HOST(entry.data().dawgs == nullptr);
if (heap->size() > max_size) heap->Pop(&entry); if (heap->size() > max_size) heap->Pop(&entry);
@ -1184,7 +1184,7 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
if (new_node->score > node.score) { if (new_node->score > node.score) {
// The new one is better. Update the entire node in the heap and // The new one is better. Update the entire node in the heap and
// reshuffle. // reshuffle.
node = std::move(*new_node); node = *new_node;
(*nodes)[i].key() = node.score; (*nodes)[i].key() = node.score;
heap->Reshuffle(&(*nodes)[i]); heap->Reshuffle(&(*nodes)[i]);
} }

View File

@ -27,9 +27,7 @@
#include "networkio.h" #include "networkio.h"
#include "ratngs.h" #include "ratngs.h"
#include "unicharcompress.h" #include "unicharcompress.h"
#include <deque> #include <deque>
#include <memory>
#include <set> #include <set>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
@ -126,9 +124,17 @@ struct RecodeNode {
// don't want to copy the whole DawgPositionVector each time, and true // don't want to copy the whole DawgPositionVector each time, and true
// copying isn't necessary for this struct. It does get moved around a lot // copying isn't necessary for this struct. It does get moved around a lot
// though inside the heap and during heap push, hence the move semantics. // though inside the heap and during heap push, hence the move semantics.
RecodeNode(RecodeNode &&) = default; RecodeNode(const RecodeNode& src) : dawgs(nullptr) {
RecodeNode& operator=(RecodeNode &&) = default; *this = src;
ASSERT_HOST(src.dawgs == nullptr);
}
RecodeNode& operator=(const RecodeNode& src) {
delete dawgs;
memcpy(this, &src, sizeof(src));
((RecodeNode&)src).dawgs = nullptr;
return *this;
}
~RecodeNode() { delete dawgs; }
// Prints details of the node. // Prints details of the node.
void Print(int null_char, const UNICHARSET& unicharset, int depth) const; void Print(int null_char, const UNICHARSET& unicharset, int depth) const;
@ -161,7 +167,7 @@ struct RecodeNode {
// The previous node in this chain. Borrowed pointer. // The previous node in this chain. Borrowed pointer.
const RecodeNode* prev; const RecodeNode* prev;
// The currently active dawgs at this position. Owned pointer. // The currently active dawgs at this position. Owned pointer.
std::unique_ptr<DawgPositionVector> dawgs; DawgPositionVector* dawgs;
// A hash of all codes in the prefix and this->code as well. Used for // A hash of all codes in the prefix and this->code as well. Used for
// duplicate path removal. // duplicate path removal.
uint64_t code_hash; uint64_t code_hash;
@ -272,8 +278,9 @@ class RecodeBeamSearch {
for (auto & beam : beams_) { for (auto & beam : beams_) {
beam.clear(); beam.clear();
} }
RecodeNode empty;
for (auto & best_initial_dawg : best_initial_dawgs_) { for (auto & best_initial_dawg : best_initial_dawgs_) {
best_initial_dawg = {}; best_initial_dawg = empty;
} }
} }