mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Fixed issue 1245
This commit is contained in:
parent
3adb03b5c8
commit
3c21c14949
@ -181,8 +181,9 @@ void Dawg::init(DawgType type, const STRING &lang,
|
||||
perm_ = perm;
|
||||
ASSERT_HOST(unicharset_size > 0);
|
||||
unicharset_size_ = unicharset_size;
|
||||
// Set bit masks.
|
||||
flag_start_bit_ = ceil(log(static_cast<double>(unicharset_size_)) / log(2.0));
|
||||
// Set bit masks. We will use the value unicharset_size_ as a null char, so
|
||||
// the actual number of unichars is unicharset_size_ + 1.
|
||||
flag_start_bit_ = ceil(log(unicharset_size_ + 1.0) / log(2.0));
|
||||
next_node_start_bit_ = flag_start_bit_ + NUM_FLAG_BITS;
|
||||
letter_mask_ = ~(~0ull << flag_start_bit_);
|
||||
next_node_mask_ = ~0ull << (flag_start_bit_ + NUM_FLAG_BITS);
|
||||
|
@ -631,8 +631,8 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index,
|
||||
// Find the first edge that can be eliminated.
|
||||
UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID;
|
||||
while (i < backward_edges->size()) {
|
||||
curr_unichar_id = unichar_id_from_edge_rec((*backward_edges)[i]);
|
||||
if (curr_unichar_id != 0) {
|
||||
if (!DeadEdge((*backward_edges)[i])) {
|
||||
curr_unichar_id = unichar_id_from_edge_rec((*backward_edges)[i]);
|
||||
if (curr_unichar_id != unichar_id) return did_something;
|
||||
if (can_be_eliminated((*backward_edges)[i])) break;
|
||||
}
|
||||
@ -643,8 +643,8 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index,
|
||||
// Compare it to the rest of the edges with the given unichar_id.
|
||||
for (int j = i + 1; j < backward_edges->size(); ++j) {
|
||||
const EDGE_RECORD &next_edge_rec = (*backward_edges)[j];
|
||||
if (DeadEdge(next_edge_rec)) continue;
|
||||
UNICHAR_ID next_id = unichar_id_from_edge_rec(next_edge_rec);
|
||||
if (next_id == 0) continue;
|
||||
if (next_id != unichar_id) break;
|
||||
if (end_of_word_from_edge_rec(next_edge_rec) ==
|
||||
end_of_word_from_edge_rec(edge_rec) &&
|
||||
@ -675,22 +675,23 @@ void Trie::sort_edges(EDGE_VECTOR *edges) {
|
||||
|
||||
void Trie::reduce_node_input(NODE_REF node,
|
||||
NODE_MARKER reduced_nodes) {
|
||||
EDGE_VECTOR &backward_edges = nodes_[node]->backward_edges;
|
||||
sort_edges(&backward_edges);
|
||||
if (debug_level_ > 1) {
|
||||
tprintf("reduce_node_input(node=" REFFORMAT ")\n", node);
|
||||
print_node(node, MAX_NODE_EDGES_DISPLAY);
|
||||
}
|
||||
|
||||
EDGE_VECTOR &backward_edges = nodes_[node]->backward_edges;
|
||||
sort_edges(&backward_edges);
|
||||
EDGE_INDEX edge_index = 0;
|
||||
while (edge_index < backward_edges.size()) {
|
||||
if (DeadEdge(backward_edges[edge_index])) continue;
|
||||
UNICHAR_ID unichar_id =
|
||||
unichar_id_from_edge_rec(backward_edges[edge_index]);
|
||||
while (reduce_lettered_edges(edge_index, unichar_id, node,
|
||||
&backward_edges, reduced_nodes));
|
||||
while (++edge_index < backward_edges.size()) {
|
||||
UNICHAR_ID id = unichar_id_from_edge_rec(backward_edges[edge_index]);
|
||||
if (id != 0 && id != unichar_id) break;
|
||||
if (!DeadEdge(backward_edges[edge_index]) && id != unichar_id) break;
|
||||
}
|
||||
}
|
||||
reduced_nodes[node] = true; // mark as reduced
|
||||
@ -701,6 +702,7 @@ void Trie::reduce_node_input(NODE_REF node,
|
||||
}
|
||||
|
||||
for (int i = 0; i < backward_edges.size(); ++i) {
|
||||
if (DeadEdge(backward_edges[i])) continue;
|
||||
NODE_REF next_node = next_node_from_edge_rec(backward_edges[i]);
|
||||
if (next_node != 0 && !reduced_nodes[next_node]) {
|
||||
reduce_node_input(next_node, reduced_nodes);
|
||||
@ -725,6 +727,7 @@ void Trie::print_node(NODE_REF node, int max_num_edges) const {
|
||||
int i;
|
||||
for (i = 0; (dir == 0 ? i < num_fwd : i < num_bkw) &&
|
||||
i < max_num_edges; ++i) {
|
||||
if (DeadEdge((*vec)[i])) continue;
|
||||
print_edge_rec((*vec)[i]);
|
||||
tprintf(" ");
|
||||
}
|
||||
|
@ -148,9 +148,14 @@ class Trie : public Dawg {
|
||||
if (edge_ref == NO_EDGE || num_edges_ == 0) return INVALID_UNICHAR_ID;
|
||||
return unichar_id_from_edge_rec(*deref_edge_ref(edge_ref));
|
||||
}
|
||||
// Sets the UNICHAR_ID in the given edge_rec to 0, marking the edge dead.
|
||||
// Sets the UNICHAR_ID in the given edge_rec to unicharset_size_, marking
|
||||
// the edge dead.
|
||||
void KillEdge(EDGE_RECORD* edge_rec) const {
|
||||
*edge_rec &= ~letter_mask_;
|
||||
*edge_rec |= (unicharset_size_ << LETTER_START_BIT);
|
||||
}
|
||||
bool DeadEdge(const EDGE_RECORD& edge_rec) const {
|
||||
return unichar_id_from_edge_rec(edge_rec) == unicharset_size_;
|
||||
}
|
||||
|
||||
// Prints the contents of the node indicated by the given NODE_REF.
|
||||
|
Loading…
Reference in New Issue
Block a user