mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
LSTM char_whitelist/blacklist (6ac2ff0
): more robust
- unicharset can be null too
This commit is contained in:
parent
b45999088c
commit
3912cb1c33
@ -871,6 +871,7 @@ class UNICHARSET {
|
||||
|
||||
// Return the enabled property of the given unichar.
|
||||
bool get_enabled(UNICHAR_ID unichar_id) const {
|
||||
ASSERT_HOST(contains_unichar_id(unichar_id));
|
||||
return unichars[unichar_id].properties.enabled;
|
||||
}
|
||||
|
||||
|
@ -626,7 +626,9 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode* prev, int index,
|
||||
int unichar_id = recoder_.DecodeUnichar(full_code);
|
||||
// Map the null char to INVALID.
|
||||
if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID;
|
||||
if (unichar_id != INVALID_UNICHAR_ID && !charset->get_enabled(unichar_id))
|
||||
if (unichar_id != INVALID_UNICHAR_ID &&
|
||||
charset != nullptr &&
|
||||
!charset->get_enabled(unichar_id))
|
||||
continue; // disabled by whitelist/blacklist
|
||||
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
|
||||
use_dawgs, NC_ANYTHING, prev, step);
|
||||
|
Loading…
Reference in New Issue
Block a user