fix langdata (user words/patterns) file suffixes for LSTMs:

- add another constructor for LSTMRecognizer
  which takes the language_data_path_prefix configured/selected
  at runtime and passes it to the internal CCUtil
- use this in Tesseract::init_tesseract_lang_data when LSTMs
  are available

(this was missing from 297d7d86ce)
This commit is contained in:
Robert Schubert 2019-09-19 19:30:54 +02:00
parent 3b030b4aeb
commit 5b976bfb55
3 changed files with 7 additions and 1 deletions

View File

@ -175,7 +175,7 @@ bool Tesseract::init_tesseract_lang_data(
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
# endif // ndef DISABLED_LEGACY_ENGINE
if (mgr->IsComponentAvailable(TESSDATA_LSTM)) {
lstm_recognizer_ = new LSTMRecognizer;
lstm_recognizer_ = new LSTMRecognizer(language_data_path_prefix);
ASSERT_HOST(lstm_recognizer_->Load(
this->params(), lstm_use_matrix ? language : nullptr, mgr));
} else {

View File

@ -49,6 +49,11 @@ const double kDictRatio = 2.25;
// Default certainty offset to give the dictionary a chance.
const double kCertOffset = -0.085;
LSTMRecognizer::LSTMRecognizer(const STRING language_data_path_prefix)
: LSTMRecognizer::LSTMRecognizer() {
ccutil_.language_data_path_prefix = language_data_path_prefix;
}
LSTMRecognizer::LSTMRecognizer()
: network_(nullptr),
training_flags_(0),

View File

@ -53,6 +53,7 @@ enum TrainingFlags {
class LSTMRecognizer {
public:
LSTMRecognizer();
LSTMRecognizer(const STRING language_data_path_prefix);
~LSTMRecognizer();
int NumOutputs() const { return network_->NumOutputs(); }