LSTM char_whitelist/blacklist (6ac2ff0): more robust

- unicharset can be null too
This commit is contained in:
Robert Schubert 2019-03-09 10:40:40 +01:00
parent b45999088c
commit 3912cb1c33
2 changed files with 4 additions and 1 deletions

View File

@ -871,6 +871,7 @@ class UNICHARSET {
// Return the enabled property of the given unichar.
bool get_enabled(UNICHAR_ID unichar_id) const {
ASSERT_HOST(contains_unichar_id(unichar_id));
return unichars[unichar_id].properties.enabled;
}

View File

@ -626,7 +626,9 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode* prev, int index,
int unichar_id = recoder_.DecodeUnichar(full_code);
// Map the null char to INVALID.
if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID;
if (unichar_id != INVALID_UNICHAR_ID && !charset->get_enabled(unichar_id))
if (unichar_id != INVALID_UNICHAR_ID &&
charset != nullptr &&
!charset->get_enabled(unichar_id))
continue; // disabled by whitelist/blacklist
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
use_dawgs, NC_ANYTHING, prev, step);