Fixed issue 1245

This commit is contained in:
Ray Smith 2014-08-13 18:51:28 -07:00
parent 3adb03b5c8
commit 3c21c14949
3 changed files with 18 additions and 9 deletions

View File

@ -181,8 +181,9 @@ void Dawg::init(DawgType type, const STRING &lang,
perm_ = perm;
ASSERT_HOST(unicharset_size > 0);
unicharset_size_ = unicharset_size;
// Set bit masks.
flag_start_bit_ = ceil(log(static_cast<double>(unicharset_size_)) / log(2.0));
// Set bit masks. We will use the value unicharset_size_ as a null char, so
// the actual number of unichars is unicharset_size_ + 1.
flag_start_bit_ = ceil(log(unicharset_size_ + 1.0) / log(2.0));
next_node_start_bit_ = flag_start_bit_ + NUM_FLAG_BITS;
letter_mask_ = ~(~0ull << flag_start_bit_);
next_node_mask_ = ~0ull << (flag_start_bit_ + NUM_FLAG_BITS);

View File

@ -631,8 +631,8 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index,
// Find the first edge that can be eliminated.
UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID;
while (i < backward_edges->size()) {
curr_unichar_id = unichar_id_from_edge_rec((*backward_edges)[i]);
if (curr_unichar_id != 0) {
if (!DeadEdge((*backward_edges)[i])) {
curr_unichar_id = unichar_id_from_edge_rec((*backward_edges)[i]);
if (curr_unichar_id != unichar_id) return did_something;
if (can_be_eliminated((*backward_edges)[i])) break;
}
@ -643,8 +643,8 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index,
// Compare it to the rest of the edges with the given unichar_id.
for (int j = i + 1; j < backward_edges->size(); ++j) {
const EDGE_RECORD &next_edge_rec = (*backward_edges)[j];
if (DeadEdge(next_edge_rec)) continue;
UNICHAR_ID next_id = unichar_id_from_edge_rec(next_edge_rec);
if (next_id == 0) continue;
if (next_id != unichar_id) break;
if (end_of_word_from_edge_rec(next_edge_rec) ==
end_of_word_from_edge_rec(edge_rec) &&
@ -675,22 +675,23 @@ void Trie::sort_edges(EDGE_VECTOR *edges) {
void Trie::reduce_node_input(NODE_REF node,
NODE_MARKER reduced_nodes) {
EDGE_VECTOR &backward_edges = nodes_[node]->backward_edges;
sort_edges(&backward_edges);
if (debug_level_ > 1) {
tprintf("reduce_node_input(node=" REFFORMAT ")\n", node);
print_node(node, MAX_NODE_EDGES_DISPLAY);
}
EDGE_VECTOR &backward_edges = nodes_[node]->backward_edges;
sort_edges(&backward_edges);
EDGE_INDEX edge_index = 0;
while (edge_index < backward_edges.size()) {
if (DeadEdge(backward_edges[edge_index])) continue;
UNICHAR_ID unichar_id =
unichar_id_from_edge_rec(backward_edges[edge_index]);
while (reduce_lettered_edges(edge_index, unichar_id, node,
&backward_edges, reduced_nodes));
while (++edge_index < backward_edges.size()) {
UNICHAR_ID id = unichar_id_from_edge_rec(backward_edges[edge_index]);
if (id != 0 && id != unichar_id) break;
if (!DeadEdge(backward_edges[edge_index]) && id != unichar_id) break;
}
}
reduced_nodes[node] = true; // mark as reduced
@ -701,6 +702,7 @@ void Trie::reduce_node_input(NODE_REF node,
}
for (int i = 0; i < backward_edges.size(); ++i) {
if (DeadEdge(backward_edges[i])) continue;
NODE_REF next_node = next_node_from_edge_rec(backward_edges[i]);
if (next_node != 0 && !reduced_nodes[next_node]) {
reduce_node_input(next_node, reduced_nodes);
@ -725,6 +727,7 @@ void Trie::print_node(NODE_REF node, int max_num_edges) const {
int i;
for (i = 0; (dir == 0 ? i < num_fwd : i < num_bkw) &&
i < max_num_edges; ++i) {
if (DeadEdge((*vec)[i])) continue;
print_edge_rec((*vec)[i]);
tprintf(" ");
}

View File

@ -148,9 +148,14 @@ class Trie : public Dawg {
if (edge_ref == NO_EDGE || num_edges_ == 0) return INVALID_UNICHAR_ID;
return unichar_id_from_edge_rec(*deref_edge_ref(edge_ref));
}
// Sets the UNICHAR_ID in the given edge_rec to 0, marking the edge dead.
// Sets the UNICHAR_ID in the given edge_rec to unicharset_size_, marking
// the edge dead.
void KillEdge(EDGE_RECORD* edge_rec) const {
*edge_rec &= ~letter_mask_;
*edge_rec |= (unicharset_size_ << LETTER_START_BIT);
}
bool DeadEdge(const EDGE_RECORD& edge_rec) const {
return unichar_id_from_edge_rec(edge_rec) == unicharset_size_;
}
// Prints the contents of the node indicated by the given NODE_REF.