This commit is contained in:
woodjohndavid 2025-05-26 10:14:18 +02:00 committed by GitHub
commit 93998816c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 67 additions and 5 deletions

View File

@ -34,6 +34,11 @@ const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"}; static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
// The minimum diplopia key is the minimum score (key) from
// the network output to qualify as a likely 'real' character
// for the purposes of identifying possible diplopia.
static const float kMinDiplopiaKey = 0.25f;
// Prints debug details of the node. // Prints debug details of the node.
void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, void RecodeNode::Print(int null_char, const UNICHARSET &unicharset,
int depth) const { int depth) const {
@ -188,7 +193,7 @@ void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts,
std::vector<int> *ends, std::vector<int> *ends,
std::vector<int> *char_bounds_, std::vector<int> *char_bounds_,
int maxWidth) { int maxWidth) {
char_bounds_->push_back(0); char_bounds_->push_back((*starts)[0]);
for (unsigned i = 0; i < ends->size(); ++i) { for (unsigned i = 0; i < ends->size(); ++i) {
int middle = ((*starts)[i + 1] - (*ends)[i]) / 2; int middle = ((*starts)[i + 1] - (*ends)[i]) / 2;
char_bounds_->push_back((*ends)[i] + middle); char_bounds_->push_back((*ends)[i] + middle);
@ -588,8 +593,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
} }
rating -= cert; rating -= cert;
} }
starts.push_back(t);
if (t < width) { if (t < width) {
starts.push_back(t);
int unichar_id = best_nodes[t]->unichar_id; int unichar_id = best_nodes[t]->unichar_id;
if (unichar_id == UNICHAR_SPACE && !certs->empty() && if (unichar_id == UNICHAR_SPACE && !certs->empty() &&
best_nodes[t]->permuter != NO_PERM) { best_nodes[t]->permuter != NO_PERM) {
@ -604,8 +609,9 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
} }
unichar_ids->push_back(unichar_id); unichar_ids->push_back(unichar_id);
xcoords->push_back(t); xcoords->push_back(t);
do { t++;
double cert = best_nodes[t++]->certainty; while (t < width && best_nodes[t]->duplicate) {
double cert = best_nodes[t]->certainty;
// Special-case NO-PERM space to forget the certainty of the previous // Special-case NO-PERM space to forget the certainty of the previous
// nulls. See long comment in ContinueContext. // nulls. See long comment in ContinueContext.
if (cert < certainty || (unichar_id == UNICHAR_SPACE && if (cert < certainty || (unichar_id == UNICHAR_SPACE &&
@ -613,7 +619,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
certainty = cert; certainty = cert;
} }
rating -= cert; rating -= cert;
} while (t < width && best_nodes[t]->duplicate); t++;
}
ends.push_back(t); ends.push_back(t);
certs->push_back(certainty); certs->push_back(certainty);
ratings->push_back(rating); ratings->push_back(rating);
@ -685,20 +692,48 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs,
} }
} }
} }
float top_key = 0.0f;
float second_key = 0.0f;
bool found_first_code = false;
bool found_second_code = false;
while (!top_heap_.empty()) { while (!top_heap_.empty()) {
TopPair entry; TopPair entry;
top_heap_.Pop(&entry); top_heap_.Pop(&entry);
if (in_possible_diplopia_ && entry.data() == first_diplopia_code_) {
found_first_code = true;
}
if (in_possible_diplopia_ && entry.data() == second_diplopia_code_) {
found_second_code = true;
}
if (top_heap_.size() > 1) { if (top_heap_.size() > 1) {
top_n_flags_[entry.data()] = TN_TOPN; top_n_flags_[entry.data()] = TN_TOPN;
} else { } else {
top_n_flags_[entry.data()] = TN_TOP2; top_n_flags_[entry.data()] = TN_TOP2;
if (top_heap_.empty()) { if (top_heap_.empty()) {
top_code_ = entry.data(); top_code_ = entry.data();
top_key = entry.key();
} else { } else {
second_code_ = entry.data(); second_code_ = entry.data();
second_key = entry.key();
} }
} }
} }
// Need to identify if we are in a potential diplopia situation
// or if we already are, then determine if it is ended.
if (in_possible_diplopia_) {
if (!found_first_code && !found_second_code) {
in_possible_diplopia_ = false;
first_diplopia_code_ = -1;
second_diplopia_code_ = -1;
}
}
if (!in_possible_diplopia_) {
if (top_code_ != null_char_ && second_code_ != null_char_ && top_key > kMinDiplopiaKey && second_key > kMinDiplopiaKey) {
in_possible_diplopia_ = true;
first_diplopia_code_ = top_code_;
second_diplopia_code_ = second_code_;
}
}
top_n_flags_[null_char_] = TN_TOP2; top_n_flags_[null_char_] = TN_TOP2;
} }
@ -1204,6 +1239,10 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
if (UpdateHeapIfMatched(&node, heap)) { if (UpdateHeapIfMatched(&node, heap)) {
return; return;
} }
// Check to see if node is possible diplopia.
if (!AddToHeapIsAllowed(&node)) {
return;
}
RecodePair entry(score, node); RecodePair entry(score, node);
heap->Push(&entry); heap->Push(&entry);
ASSERT_HOST(entry.data().dawgs == nullptr); ASSERT_HOST(entry.data().dawgs == nullptr);
@ -1258,6 +1297,21 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node,
return false; return false;
} }
// Determines if node can be added to heap based on possible diplopia status.
bool RecodeBeamSearch::AddToHeapIsAllowed(RecodeNode *new_node) {
if (!in_possible_diplopia_) {
return true;
}
const RecodeNode *prev_node = new_node->prev;
if (prev_node != nullptr && prev_node->code == first_diplopia_code_ && new_node->code == second_diplopia_code_) {
return false;
}
if (prev_node != nullptr && prev_node->code == second_diplopia_code_ && new_node->code == first_diplopia_code_) {
return false;
}
return true;
}
// Computes and returns the code-hash for the given code and prev. // Computes and returns the code-hash for the given code and prev.
uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
const RecodeNode *prev) const { const RecodeNode *prev) const {

View File

@ -374,6 +374,9 @@ private:
// Searches the heap for an entry matching new_node, and updates the entry // Searches the heap for an entry matching new_node, and updates the entry
// with reshuffle if needed. Returns true if there was a match. // with reshuffle if needed. Returns true if there was a match.
bool UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap); bool UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap);
// Determines if new node can be added to the heap for the current beam.
// Returns false if we are in possible diplopia situation.
bool AddToHeapIsAllowed(RecodeNode *new_node);
// Computes and returns the code-hash for the given code and prev. // Computes and returns the code-hash for the given code and prev.
uint64_t ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const; uint64_t ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const;
// Backtracks to extract the best path through the lattice that was built // Backtracks to extract the best path through the lattice that was built
@ -420,6 +423,11 @@ private:
// True if the input is simple text, ie adjacent equal chars are not to be // True if the input is simple text, ie adjacent equal chars are not to be
// eliminated. // eliminated.
bool is_simple_text_; bool is_simple_text_;
// Variables used in tracking possible diplopia case.
// Refer to ComputeTopN routine for use of these variables.
bool in_possible_diplopia_ = false;
int first_diplopia_code_ = -1;
int second_diplopia_code_ = -1;
// The encoded (class label) of the null/reject character. // The encoded (class label) of the null/reject character.
int null_char_; int null_char_;
}; };