mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-05 02:47:00 +08:00
Revert kdpair, genericheap changes.
This commit is contained in:
parent
6cc00aa332
commit
4175679da6
@ -54,7 +54,7 @@ namespace tesseract {
|
||||
// index and pointer can be changed arbitrarily by heap operations.
|
||||
// Revaluation can be done by making the Data type in the Pair derived from or
|
||||
// contain a DoublePtr as its first data element, making it possible to convert
|
||||
// the pointer to a Pair using reinterpret_cast<KDPairDec*>.
|
||||
// the pointer to a Pair using KDPairInc::RecastDataPointer.
|
||||
template <typename Pair>
|
||||
class GenericHeap {
|
||||
public:
|
||||
@ -99,10 +99,10 @@ class GenericHeap {
|
||||
// location for the new *entry. To avoid needing a default constructor
|
||||
// for primitive types, and to allow for use of DoublePtr in the Pair
|
||||
// somewhere, we have to incur a double copy here.
|
||||
heap_.push_back(std::move(*entry));
|
||||
*entry = std::move(heap_.back());
|
||||
heap_.push_back(*entry);
|
||||
*entry = heap_.back();
|
||||
hole_index = SiftUp(hole_index, *entry);
|
||||
heap_[hole_index] = std::move(*entry);
|
||||
heap_[hole_index] = *entry;
|
||||
}
|
||||
|
||||
// Get the value of the top (smallest, defined by operator< ) element.
|
||||
@ -121,14 +121,14 @@ class GenericHeap {
|
||||
if (new_size < 0)
|
||||
return false; // Already empty.
|
||||
if (entry != nullptr)
|
||||
*entry = std::move(heap_[0]);
|
||||
*entry = heap_[0];
|
||||
if (new_size > 0) {
|
||||
// Sift the hole at the start of the heap_ downwards to match the last
|
||||
// element.
|
||||
auto hole_pair = std::move(heap_[new_size]);
|
||||
Pair hole_pair = heap_[new_size];
|
||||
heap_.truncate(new_size);
|
||||
int hole_index = SiftDown(0, hole_pair);
|
||||
heap_[hole_index] = std::move(hole_pair);
|
||||
heap_[hole_index] = hole_pair;
|
||||
} else {
|
||||
heap_.truncate(new_size);
|
||||
}
|
||||
@ -143,13 +143,13 @@ class GenericHeap {
|
||||
if (worst_index < 0) return false; // It cannot be empty!
|
||||
// Extract the worst element from the heap, leaving a hole at worst_index.
|
||||
if (entry != nullptr)
|
||||
*entry = std::move(heap_[worst_index]);
|
||||
*entry = heap_[worst_index];
|
||||
int heap_size = heap_.size() - 1;
|
||||
if (heap_size > 0) {
|
||||
// Sift the hole upwards to match the last element of the heap_
|
||||
auto hole_pair = std::move(heap_[heap_size]);
|
||||
Pair hole_pair = heap_[heap_size];
|
||||
int hole_index = SiftUp(worst_index, hole_pair);
|
||||
heap_[hole_index] = std::move(hole_pair);
|
||||
heap_[hole_index] = hole_pair;
|
||||
}
|
||||
heap_.truncate(heap_size);
|
||||
return true;
|
||||
@ -182,10 +182,10 @@ class GenericHeap {
|
||||
// Time = O(log n).
|
||||
void Reshuffle(Pair* pair) {
|
||||
int index = pair - &heap_[0];
|
||||
auto hole_pair = std::move(heap_[index]);
|
||||
Pair hole_pair = heap_[index];
|
||||
index = SiftDown(index, hole_pair);
|
||||
index = SiftUp(index, hole_pair);
|
||||
heap_[index] = std::move(hole_pair);
|
||||
heap_[index] = hole_pair;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -195,7 +195,7 @@ class GenericHeap {
|
||||
int SiftUp(int hole_index, const Pair& pair) {
|
||||
int parent;
|
||||
while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) {
|
||||
heap_[hole_index] = std::move(heap_[parent]);
|
||||
heap_[hole_index] = heap_[parent];
|
||||
hole_index = parent;
|
||||
}
|
||||
return hole_index;
|
||||
@ -211,7 +211,7 @@ class GenericHeap {
|
||||
if (child + 1 < heap_size && heap_[child + 1] < heap_[child])
|
||||
++child;
|
||||
if (heap_[child] < pair) {
|
||||
heap_[hole_index] = std::move(heap_[child]);
|
||||
heap_[hole_index] = heap_[child];
|
||||
hole_index = child;
|
||||
} else {
|
||||
break;
|
||||
|
@ -26,65 +26,62 @@
|
||||
|
||||
#include <tesseract/genericvector.h>
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// A useful base struct to facilitate the common operation of sorting a vector
|
||||
// of simple or smart-pointer data using a separate key. Similar to STL pair.
|
||||
template <typename Key, typename Data>
|
||||
class KDPair : public std::pair<Key, Data> {
|
||||
using base = std::pair<Key, Data>;
|
||||
public:
|
||||
using std::pair<Key, Data>::pair;
|
||||
|
||||
using base::first;
|
||||
using base::second;
|
||||
struct KDPair {
|
||||
KDPair() = default;
|
||||
KDPair(Key k, Data d) : data_(d), key_(k) {}
|
||||
|
||||
int operator==(const KDPair<Key, Data>& other) const {
|
||||
return first == other.first;
|
||||
return key_ == other.key_;
|
||||
}
|
||||
|
||||
Key& key() {
|
||||
return first;
|
||||
}
|
||||
Data& data() {
|
||||
return second;
|
||||
}
|
||||
const Key& key() const {
|
||||
return first;
|
||||
}
|
||||
const Data& data() const {
|
||||
return second;
|
||||
}
|
||||
Data &data() { return data_; }
|
||||
const Data &data() const { return data_; }
|
||||
Key &key() { return key_; }
|
||||
const Key &key() const { return key_; }
|
||||
|
||||
// WARNING! Keep data as the first element! KDPairInc and KDPairDec depend
|
||||
// on the order of these elements so they can downcast pointers appropriately
|
||||
// for use by GenericHeap::Reshuffle.
|
||||
Data data_;
|
||||
Key key_;
|
||||
};
|
||||
|
||||
// Specialization of KDPair to provide operator< for sorting in increasing order
|
||||
// and recasting of data pointers for use with DoublePtr.
|
||||
template <typename Key, typename Data>
|
||||
class KDPairInc : public KDPair<Key, Data> {
|
||||
public:
|
||||
using KDPair<Key, Data>::KDPair;
|
||||
|
||||
struct KDPairInc : public KDPair<Key, Data> {
|
||||
KDPairInc() = default;
|
||||
KDPairInc(Key k, Data d) : KDPair<Key, Data>(k, d) {}
|
||||
// Operator< facilitates sorting in increasing order.
|
||||
int operator<(const KDPairInc<Key, Data>& other) const {
|
||||
return this->key() < other.key();
|
||||
}
|
||||
// Returns the input Data pointer recast to a KDPairInc pointer.
|
||||
// Just casts a pointer to the first element to a pointer to the whole struct.
|
||||
static KDPairInc* RecastDataPointer(Data* data_ptr) {
|
||||
return reinterpret_cast<KDPairInc*>(data_ptr);
|
||||
}
|
||||
};
|
||||
|
||||
// Specialization of KDPair to provide operator< for sorting in decreasing order
|
||||
// and recasting of data pointers for use with DoublePtr.
|
||||
template <typename Key, typename Data>
|
||||
class KDPairDec : public KDPair<Key, Data> {
|
||||
public:
|
||||
using KDPair<Key, Data>::KDPair;
|
||||
|
||||
struct KDPairDec : public KDPair<Key, Data> {
|
||||
KDPairDec() = default;
|
||||
KDPairDec(Key k, Data d) : KDPair<Key, Data>(k, d) {}
|
||||
// Operator< facilitates sorting in decreasing order by using operator> on
|
||||
// the key values.
|
||||
int operator<(const KDPairDec<Key, Data>& other) const {
|
||||
return this->key() > other.key();
|
||||
}
|
||||
// Returns the input Data pointer recast to a KDPairDec pointer.
|
||||
// Just casts a pointer to the first element to a pointer to the whole struct.
|
||||
static KDPairDec* RecastDataPointer(Data* data_ptr) {
|
||||
return reinterpret_cast<KDPairDec*>(data_ptr);
|
||||
}
|
||||
};
|
||||
|
||||
// A useful base class to facilitate the common operation of sorting a vector
|
||||
@ -93,57 +90,89 @@ class KDPairDec : public KDPair<Key, Data> {
|
||||
// operator= that have move semantics so that the data does not get copied and
|
||||
// only a single instance of KDPtrPair holds a specific data pointer.
|
||||
template <typename Key, typename Data>
|
||||
class KDPtrPair : public std::pair<Key, std::unique_ptr<Data>> {
|
||||
using base = std::pair<Key, std::unique_ptr<Data>>;
|
||||
class KDPtrPair {
|
||||
public:
|
||||
using base::first;
|
||||
using base::second;
|
||||
|
||||
KDPtrPair() = default;
|
||||
KDPtrPair(Key k, Data* d) : std::pair<Key, std::unique_ptr<Data>>(k, d) {}
|
||||
KDPtrPair(KDPtrPair &&src) = default;
|
||||
KDPtrPair &operator=(KDPtrPair &&src) = default;
|
||||
KDPtrPair() : data_(nullptr) {}
|
||||
KDPtrPair(Key k, Data* d) : data_(d), key_(k) {}
|
||||
// Copy constructor steals the pointer from src and nulls it in src, thereby
|
||||
// moving the (single) ownership of the data.
|
||||
KDPtrPair(const KDPtrPair& src) : data_(src.data_), key_(src.key_) {
|
||||
((KDPtrPair&)src).data_ = nullptr;
|
||||
}
|
||||
// Destructor deletes data, assuming it is the sole owner.
|
||||
~KDPtrPair() {
|
||||
delete this->data_;
|
||||
this->data_ = nullptr;
|
||||
}
|
||||
// Operator= steals the pointer from src and nulls it in src, thereby
|
||||
// moving the (single) ownership of the data.
|
||||
void operator=(const KDPtrPair& src) {
|
||||
delete this->data_;
|
||||
this->data_ = src.data_;
|
||||
((KDPtrPair&)src).data_ = nullptr;
|
||||
this->key_ = src.key_;
|
||||
}
|
||||
|
||||
int operator==(const KDPtrPair<Key, Data>& other) const {
|
||||
return key() == other.key();
|
||||
return key_ == other.key_;
|
||||
}
|
||||
|
||||
// Accessors.
|
||||
const Key& key() const {
|
||||
return first;
|
||||
return key_;
|
||||
}
|
||||
void set_key(const Key& new_key) {
|
||||
first = new_key;
|
||||
key_ = new_key;
|
||||
}
|
||||
const Data* data() const {
|
||||
return second.get();
|
||||
return data_;
|
||||
}
|
||||
// Sets the data pointer, taking ownership of the data.
|
||||
void set_data(Data* new_data) {
|
||||
second.reset(new_data);
|
||||
delete data_;
|
||||
data_ = new_data;
|
||||
}
|
||||
// Relinquishes ownership of the data pointer (setting it to nullptr).
|
||||
Data* extract_data() {
|
||||
return second.release();
|
||||
Data* result = data_;
|
||||
data_ = nullptr;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
// Data members are private to keep deletion of data_ encapsulated.
|
||||
Data* data_;
|
||||
Key key_;
|
||||
};
|
||||
// Specialization of KDPtrPair to provide operator< for sorting in increasing
|
||||
// order.
|
||||
template <typename Key, typename Data>
|
||||
struct KDPtrPairInc : public KDPtrPair<Key, Data> {
|
||||
using KDPtrPair<Key, Data>::KDPtrPair;
|
||||
// Since we are doing non-standard stuff we have to duplicate *all* the
|
||||
// constructors and operator=.
|
||||
KDPtrPairInc() : KDPtrPair<Key, Data>() {}
|
||||
KDPtrPairInc(Key k, Data* d) : KDPtrPair<Key, Data>(k, d) {}
|
||||
KDPtrPairInc(const KDPtrPairInc& src) : KDPtrPair<Key, Data>(src) {}
|
||||
void operator=(const KDPtrPairInc& src) {
|
||||
KDPtrPair<Key, Data>::operator=(src);
|
||||
}
|
||||
// Operator< facilitates sorting in increasing order.
|
||||
int operator<(const KDPtrPairInc<Key, Data>& other) const {
|
||||
return this->key() < other.key();
|
||||
}
|
||||
};
|
||||
|
||||
// Specialization of KDPtrPair to provide operator< for sorting in decreasing
|
||||
// order.
|
||||
template <typename Key, typename Data>
|
||||
struct KDPtrPairDec : public KDPtrPair<Key, Data> {
|
||||
using KDPtrPair<Key, Data>::KDPtrPair;
|
||||
// Since we are doing non-standard stuff we have to duplicate *all* the
|
||||
// constructors and operator=.
|
||||
KDPtrPairDec() : KDPtrPair<Key, Data>() {}
|
||||
KDPtrPairDec(Key k, Data* d) : KDPtrPair<Key, Data>(k, d) {}
|
||||
KDPtrPairDec(const KDPtrPairDec& src) : KDPtrPair<Key, Data>(src) {}
|
||||
void operator=(const KDPtrPairDec& src) {
|
||||
KDPtrPair<Key, Data>::operator=(src);
|
||||
}
|
||||
// Operator< facilitates sorting in decreasing order by using operator> on
|
||||
// the key values.
|
||||
int operator<(const KDPtrPairDec<Key, Data>& other) const {
|
||||
|
@ -1054,7 +1054,7 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
|
||||
word_start = true;
|
||||
} else if (uni_prev->dawgs != nullptr) {
|
||||
// Continuing a previous dict word.
|
||||
dawg_args.active_dawgs = uni_prev->dawgs.get();
|
||||
dawg_args.active_dawgs = uni_prev->dawgs;
|
||||
word_start = uni_prev->start_of_dawg;
|
||||
} else {
|
||||
return; // Can't continue if not a dict word.
|
||||
@ -1099,7 +1099,7 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
|
||||
RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
|
||||
score, prev, initial_dawgs,
|
||||
ComputeCodeHash(code, false, prev));
|
||||
*best_initial_dawg = std::move(node);
|
||||
*best_initial_dawg = node;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1144,7 +1144,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
|
||||
RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
|
||||
dup, cert, score, prev, d, hash);
|
||||
if (UpdateHeapIfMatched(&node, heap)) return;
|
||||
RecodePair entry(score, std::move(node));
|
||||
RecodePair entry(score, node);
|
||||
heap->Push(&entry);
|
||||
ASSERT_HOST(entry.data().dawgs == nullptr);
|
||||
if (heap->size() > max_size) heap->Pop(&entry);
|
||||
@ -1161,7 +1161,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node,
|
||||
if (UpdateHeapIfMatched(node, heap)) {
|
||||
return;
|
||||
}
|
||||
RecodePair entry(node->score, std::move(*node));
|
||||
RecodePair entry(node->score, *node);
|
||||
heap->Push(&entry);
|
||||
ASSERT_HOST(entry.data().dawgs == nullptr);
|
||||
if (heap->size() > max_size) heap->Pop(&entry);
|
||||
@ -1184,7 +1184,7 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
|
||||
if (new_node->score > node.score) {
|
||||
// The new one is better. Update the entire node in the heap and
|
||||
// reshuffle.
|
||||
node = std::move(*new_node);
|
||||
node = *new_node;
|
||||
(*nodes)[i].key() = node.score;
|
||||
heap->Reshuffle(&(*nodes)[i]);
|
||||
}
|
||||
|
@ -27,9 +27,7 @@
|
||||
#include "networkio.h"
|
||||
#include "ratngs.h"
|
||||
#include "unicharcompress.h"
|
||||
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
@ -126,9 +124,17 @@ struct RecodeNode {
|
||||
// don't want to copy the whole DawgPositionVector each time, and true
|
||||
// copying isn't necessary for this struct. It does get moved around a lot
|
||||
// though inside the heap and during heap push, hence the move semantics.
|
||||
RecodeNode(RecodeNode &&) = default;
|
||||
RecodeNode& operator=(RecodeNode &&) = default;
|
||||
|
||||
RecodeNode(const RecodeNode& src) : dawgs(nullptr) {
|
||||
*this = src;
|
||||
ASSERT_HOST(src.dawgs == nullptr);
|
||||
}
|
||||
RecodeNode& operator=(const RecodeNode& src) {
|
||||
delete dawgs;
|
||||
memcpy(this, &src, sizeof(src));
|
||||
((RecodeNode&)src).dawgs = nullptr;
|
||||
return *this;
|
||||
}
|
||||
~RecodeNode() { delete dawgs; }
|
||||
// Prints details of the node.
|
||||
void Print(int null_char, const UNICHARSET& unicharset, int depth) const;
|
||||
|
||||
@ -161,7 +167,7 @@ struct RecodeNode {
|
||||
// The previous node in this chain. Borrowed pointer.
|
||||
const RecodeNode* prev;
|
||||
// The currently active dawgs at this position. Owned pointer.
|
||||
std::unique_ptr<DawgPositionVector> dawgs;
|
||||
DawgPositionVector* dawgs;
|
||||
// A hash of all codes in the prefix and this->code as well. Used for
|
||||
// duplicate path removal.
|
||||
uint64_t code_hash;
|
||||
@ -272,8 +278,9 @@ class RecodeBeamSearch {
|
||||
for (auto & beam : beams_) {
|
||||
beam.clear();
|
||||
}
|
||||
RecodeNode empty;
|
||||
for (auto & best_initial_dawg : best_initial_dawgs_) {
|
||||
best_initial_dawg = {};
|
||||
best_initial_dawg = empty;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user