Revert kdpair, genericheap changes.

This commit is contained in:
Egor Pugin 2020-12-28 02:31:45 +03:00
parent 6cc00aa332
commit 4175679da6
4 changed files with 114 additions and 78 deletions

View File

@ -54,7 +54,7 @@ namespace tesseract {
// index and pointer can be changed arbitrarily by heap operations.
// Revaluation can be done by making the Data type in the Pair derived from or
// contain a DoublePtr as its first data element, making it possible to convert
// the pointer to a Pair using reinterpret_cast<KDPairDec*>.
// the pointer to a Pair using KDPairInc::RecastDataPointer.
template <typename Pair>
class GenericHeap {
public:
@ -99,10 +99,10 @@ class GenericHeap {
// location for the new *entry. To avoid needing a default constructor
// for primitive types, and to allow for use of DoublePtr in the Pair
// somewhere, we have to incur a double copy here.
heap_.push_back(std::move(*entry));
*entry = std::move(heap_.back());
heap_.push_back(*entry);
*entry = heap_.back();
hole_index = SiftUp(hole_index, *entry);
heap_[hole_index] = std::move(*entry);
heap_[hole_index] = *entry;
}
// Get the value of the top (smallest, defined by operator< ) element.
@ -121,14 +121,14 @@ class GenericHeap {
if (new_size < 0)
return false; // Already empty.
if (entry != nullptr)
*entry = std::move(heap_[0]);
*entry = heap_[0];
if (new_size > 0) {
// Sift the hole at the start of the heap_ downwards to match the last
// element.
auto hole_pair = std::move(heap_[new_size]);
Pair hole_pair = heap_[new_size];
heap_.truncate(new_size);
int hole_index = SiftDown(0, hole_pair);
heap_[hole_index] = std::move(hole_pair);
heap_[hole_index] = hole_pair;
} else {
heap_.truncate(new_size);
}
@ -143,13 +143,13 @@ class GenericHeap {
if (worst_index < 0) return false; // It cannot be empty!
// Extract the worst element from the heap, leaving a hole at worst_index.
if (entry != nullptr)
*entry = std::move(heap_[worst_index]);
*entry = heap_[worst_index];
int heap_size = heap_.size() - 1;
if (heap_size > 0) {
// Sift the hole upwards to match the last element of the heap_
auto hole_pair = std::move(heap_[heap_size]);
Pair hole_pair = heap_[heap_size];
int hole_index = SiftUp(worst_index, hole_pair);
heap_[hole_index] = std::move(hole_pair);
heap_[hole_index] = hole_pair;
}
heap_.truncate(heap_size);
return true;
@ -182,10 +182,10 @@ class GenericHeap {
// Time = O(log n).
void Reshuffle(Pair* pair) {
int index = pair - &heap_[0];
auto hole_pair = std::move(heap_[index]);
Pair hole_pair = heap_[index];
index = SiftDown(index, hole_pair);
index = SiftUp(index, hole_pair);
heap_[index] = std::move(hole_pair);
heap_[index] = hole_pair;
}
private:
@ -195,7 +195,7 @@ class GenericHeap {
int SiftUp(int hole_index, const Pair& pair) {
int parent;
while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) {
heap_[hole_index] = std::move(heap_[parent]);
heap_[hole_index] = heap_[parent];
hole_index = parent;
}
return hole_index;
@ -211,7 +211,7 @@ class GenericHeap {
if (child + 1 < heap_size && heap_[child + 1] < heap_[child])
++child;
if (heap_[child] < pair) {
heap_[hole_index] = std::move(heap_[child]);
heap_[hole_index] = heap_[child];
hole_index = child;
} else {
break;

View File

@ -26,65 +26,62 @@
#include <tesseract/genericvector.h>
#include <memory>
#include <utility>
namespace tesseract {
// A useful base struct to facilitate the common operation of sorting a vector
// of simple or smart-pointer data using a separate key. Similar to STL pair.
template <typename Key, typename Data>
class KDPair : public std::pair<Key, Data> {
using base = std::pair<Key, Data>;
public:
using std::pair<Key, Data>::pair;
using base::first;
using base::second;
struct KDPair {
KDPair() = default;
KDPair(Key k, Data d) : data_(d), key_(k) {}
int operator==(const KDPair<Key, Data>& other) const {
return first == other.first;
return key_ == other.key_;
}
Key& key() {
return first;
}
Data& data() {
return second;
}
const Key& key() const {
return first;
}
const Data& data() const {
return second;
}
Data &data() { return data_; }
const Data &data() const { return data_; }
Key &key() { return key_; }
const Key &key() const { return key_; }
// WARNING! Keep data as the first element! KDPairInc and KDPairDec depend
// on the order of these elements so they can downcast pointers appropriately
// for use by GenericHeap::Reshuffle.
Data data_;
Key key_;
};
// Specialization of KDPair to provide operator< for sorting in increasing order
// and recasting of data pointers for use with DoublePtr.
template <typename Key, typename Data>
class KDPairInc : public KDPair<Key, Data> {
public:
using KDPair<Key, Data>::KDPair;
struct KDPairInc : public KDPair<Key, Data> {
KDPairInc() = default;
KDPairInc(Key k, Data d) : KDPair<Key, Data>(k, d) {}
// Operator< facilitates sorting in increasing order.
int operator<(const KDPairInc<Key, Data>& other) const {
return this->key() < other.key();
}
// Returns the input Data pointer recast to a KDPairInc pointer.
// Just casts a pointer to the first element to a pointer to the whole struct.
static KDPairInc* RecastDataPointer(Data* data_ptr) {
return reinterpret_cast<KDPairInc*>(data_ptr);
}
};
// Specialization of KDPair to provide operator< for sorting in decreasing order
// and recasting of data pointers for use with DoublePtr.
template <typename Key, typename Data>
class KDPairDec : public KDPair<Key, Data> {
public:
using KDPair<Key, Data>::KDPair;
struct KDPairDec : public KDPair<Key, Data> {
KDPairDec() = default;
KDPairDec(Key k, Data d) : KDPair<Key, Data>(k, d) {}
// Operator< facilitates sorting in decreasing order by using operator> on
// the key values.
int operator<(const KDPairDec<Key, Data>& other) const {
return this->key() > other.key();
}
// Returns the input Data pointer recast to a KDPairDec pointer.
// Just casts a pointer to the first element to a pointer to the whole struct.
static KDPairDec* RecastDataPointer(Data* data_ptr) {
return reinterpret_cast<KDPairDec*>(data_ptr);
}
};
// A useful base class to facilitate the common operation of sorting a vector
@ -93,57 +90,89 @@ class KDPairDec : public KDPair<Key, Data> {
// operator= that have move semantics so that the data does not get copied and
// only a single instance of KDPtrPair holds a specific data pointer.
template <typename Key, typename Data>
class KDPtrPair : public std::pair<Key, std::unique_ptr<Data>> {
using base = std::pair<Key, std::unique_ptr<Data>>;
class KDPtrPair {
public:
using base::first;
using base::second;
KDPtrPair() = default;
KDPtrPair(Key k, Data* d) : std::pair<Key, std::unique_ptr<Data>>(k, d) {}
KDPtrPair(KDPtrPair &&src) = default;
KDPtrPair &operator=(KDPtrPair &&src) = default;
KDPtrPair() : data_(nullptr) {}
KDPtrPair(Key k, Data* d) : data_(d), key_(k) {}
// Copy constructor steals the pointer from src and nulls it in src, thereby
// moving the (single) ownership of the data.
KDPtrPair(const KDPtrPair& src) : data_(src.data_), key_(src.key_) {
((KDPtrPair&)src).data_ = nullptr;
}
// Destructor deletes data, assuming it is the sole owner.
~KDPtrPair() {
delete this->data_;
this->data_ = nullptr;
}
// Operator= steals the pointer from src and nulls it in src, thereby
// moving the (single) ownership of the data.
void operator=(const KDPtrPair& src) {
delete this->data_;
this->data_ = src.data_;
((KDPtrPair&)src).data_ = nullptr;
this->key_ = src.key_;
}
int operator==(const KDPtrPair<Key, Data>& other) const {
return key() == other.key();
return key_ == other.key_;
}
// Accessors.
const Key& key() const {
return first;
return key_;
}
void set_key(const Key& new_key) {
first = new_key;
key_ = new_key;
}
const Data* data() const {
return second.get();
return data_;
}
// Sets the data pointer, taking ownership of the data.
void set_data(Data* new_data) {
second.reset(new_data);
delete data_;
data_ = new_data;
}
// Relinquishes ownership of the data pointer (setting it to nullptr).
Data* extract_data() {
return second.release();
Data* result = data_;
data_ = nullptr;
return result;
}
};
private:
// Data members are private to keep deletion of data_ encapsulated.
Data* data_;
Key key_;
};
// Specialization of KDPtrPair to provide operator< for sorting in increasing
// order.
template <typename Key, typename Data>
struct KDPtrPairInc : public KDPtrPair<Key, Data> {
using KDPtrPair<Key, Data>::KDPtrPair;
// Since we are doing non-standard stuff we have to duplicate *all* the
// constructors and operator=.
KDPtrPairInc() : KDPtrPair<Key, Data>() {}
KDPtrPairInc(Key k, Data* d) : KDPtrPair<Key, Data>(k, d) {}
KDPtrPairInc(const KDPtrPairInc& src) : KDPtrPair<Key, Data>(src) {}
void operator=(const KDPtrPairInc& src) {
KDPtrPair<Key, Data>::operator=(src);
}
// Operator< facilitates sorting in increasing order.
int operator<(const KDPtrPairInc<Key, Data>& other) const {
return this->key() < other.key();
}
};
// Specialization of KDPtrPair to provide operator< for sorting in decreasing
// order.
template <typename Key, typename Data>
struct KDPtrPairDec : public KDPtrPair<Key, Data> {
using KDPtrPair<Key, Data>::KDPtrPair;
// Since we are doing non-standard stuff we have to duplicate *all* the
// constructors and operator=.
KDPtrPairDec() : KDPtrPair<Key, Data>() {}
KDPtrPairDec(Key k, Data* d) : KDPtrPair<Key, Data>(k, d) {}
KDPtrPairDec(const KDPtrPairDec& src) : KDPtrPair<Key, Data>(src) {}
void operator=(const KDPtrPairDec& src) {
KDPtrPair<Key, Data>::operator=(src);
}
// Operator< facilitates sorting in decreasing order by using operator> on
// the key values.
int operator<(const KDPtrPairDec<Key, Data>& other) const {

View File

@ -1054,7 +1054,7 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
word_start = true;
} else if (uni_prev->dawgs != nullptr) {
// Continuing a previous dict word.
dawg_args.active_dawgs = uni_prev->dawgs.get();
dawg_args.active_dawgs = uni_prev->dawgs;
word_start = uni_prev->start_of_dawg;
} else {
return; // Can't continue if not a dict word.
@ -1099,7 +1099,7 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
score, prev, initial_dawgs,
ComputeCodeHash(code, false, prev));
*best_initial_dawg = std::move(node);
*best_initial_dawg = node;
}
}
@ -1144,7 +1144,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
dup, cert, score, prev, d, hash);
if (UpdateHeapIfMatched(&node, heap)) return;
RecodePair entry(score, std::move(node));
RecodePair entry(score, node);
heap->Push(&entry);
ASSERT_HOST(entry.data().dawgs == nullptr);
if (heap->size() > max_size) heap->Pop(&entry);
@ -1161,7 +1161,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node,
if (UpdateHeapIfMatched(node, heap)) {
return;
}
RecodePair entry(node->score, std::move(*node));
RecodePair entry(node->score, *node);
heap->Push(&entry);
ASSERT_HOST(entry.data().dawgs == nullptr);
if (heap->size() > max_size) heap->Pop(&entry);
@ -1184,7 +1184,7 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
if (new_node->score > node.score) {
// The new one is better. Update the entire node in the heap and
// reshuffle.
node = std::move(*new_node);
node = *new_node;
(*nodes)[i].key() = node.score;
heap->Reshuffle(&(*nodes)[i]);
}

View File

@ -27,9 +27,7 @@
#include "networkio.h"
#include "ratngs.h"
#include "unicharcompress.h"
#include <deque>
#include <memory>
#include <set>
#include <tuple>
#include <vector>
@ -126,9 +124,17 @@ struct RecodeNode {
// don't want to copy the whole DawgPositionVector each time, and true
// copying isn't necessary for this struct. It does get moved around a lot
// though inside the heap and during heap push, hence the move semantics.
RecodeNode(RecodeNode &&) = default;
RecodeNode& operator=(RecodeNode &&) = default;
RecodeNode(const RecodeNode& src) : dawgs(nullptr) {
*this = src;
ASSERT_HOST(src.dawgs == nullptr);
}
RecodeNode& operator=(const RecodeNode& src) {
delete dawgs;
memcpy(this, &src, sizeof(src));
((RecodeNode&)src).dawgs = nullptr;
return *this;
}
~RecodeNode() { delete dawgs; }
// Prints details of the node.
void Print(int null_char, const UNICHARSET& unicharset, int depth) const;
@ -161,7 +167,7 @@ struct RecodeNode {
// The previous node in this chain. Borrowed pointer.
const RecodeNode* prev;
// The currently active dawgs at this position. Owned pointer.
std::unique_ptr<DawgPositionVector> dawgs;
DawgPositionVector* dawgs;
// A hash of all codes in the prefix and this->code as well. Used for
// duplicate path removal.
uint64_t code_hash;
@ -272,8 +278,9 @@ class RecodeBeamSearch {
for (auto & beam : beams_) {
beam.clear();
}
RecodeNode empty;
for (auto & best_initial_dawg : best_initial_dawgs_) {
best_initial_dawg = {};
best_initial_dawg = empty;
}
}