mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-23 18:49:08 +08:00
backport dict.cpp and dict.h changes from commit da03e4e910
to fix issue #1253
This commit is contained in:
parent
200886ee56
commit
f907620a1a
@ -241,7 +241,8 @@ void Dict::Load(const char *data_file_name, const STRING &lang) {
|
||||
if (load_bigram_dawg) {
|
||||
bigram_dawg_ = dawg_cache_->GetSquishedDawg(
|
||||
lang, data_file_name, TESSDATA_BIGRAM_DAWG, dawg_debug_level);
|
||||
if (bigram_dawg_) dawgs_ += bigram_dawg_;
|
||||
// The bigram_dawg_ is NOT used like the other dawgs! DO NOT add to the
|
||||
// dawgs_!!
|
||||
}
|
||||
if (load_freq_dawg) {
|
||||
freq_dawg_ = dawg_cache_->GetSquishedDawg(
|
||||
@ -332,6 +333,7 @@ void Dict::End() {
|
||||
delete dawgs_[i];
|
||||
}
|
||||
}
|
||||
dawg_cache_->FreeDawg(bigram_dawg_);
|
||||
if (dawg_cache_is_ours_) {
|
||||
delete dawg_cache_;
|
||||
dawg_cache_ = NULL;
|
||||
@ -350,7 +352,7 @@ void Dict::End() {
|
||||
int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
UNICHAR_ID unichar_id,
|
||||
bool word_end) const {
|
||||
DawgArgs *dawg_args = reinterpret_cast<DawgArgs*>(void_dawg_args);
|
||||
DawgArgs *dawg_args = reinterpret_cast<DawgArgs *>(void_dawg_args);
|
||||
|
||||
if (dawg_debug_level >= 3) {
|
||||
tprintf("def_letter_is_okay: current unichar=%s word_end=%d"
|
||||
|
@ -528,14 +528,14 @@ class Dict {
|
||||
DawgVector dawgs_;
|
||||
SuccessorListsVector successors_;
|
||||
Trie *pending_words_;
|
||||
/// The following pointers are only cached for convenience.
|
||||
/// The dawgs will be deleted when dawgs_ vector is destroyed.
|
||||
// bigram_dawg_ points to a dawg of two-word bigrams which always supercede if
|
||||
// any of them are present on the best choices list for a word pair.
|
||||
// the bigrams are stored as space-separated words where:
|
||||
// (1) leading and trailing punctuation has been removed from each word and
|
||||
// (2) any digits have been replaced with '?' marks.
|
||||
Dawg *bigram_dawg_;
|
||||
/// The following pointers are only cached for convenience.
|
||||
/// The dawgs will be deleted when dawgs_ vector is destroyed.
|
||||
// TODO(daria): need to support multiple languages in the future,
|
||||
// so maybe will need to maintain a list of dawgs of each kind.
|
||||
Dawg *freq_dawg_;
|
||||
|
Loading…
Reference in New Issue
Block a user