Fixed multilang for LSTM, pushed cube to one side without actually deleting it

This commit is contained in:
Ray Smith 2016-12-05 14:41:43 -08:00
parent 798d79aaa5
commit 5deebe6c27
14 changed files with 139 additions and 124 deletions

View File

@ -123,10 +123,9 @@ void PrintHelpForOEM() {
const char* msg =
"OCR Engine modes:\n"
" 0 Original Tesseract only.\n"
" 1 Cube only.\n"
" 2 Tesseract + cube.\n"
" 3 Default, based on what is available.\n"
" 4 Neural nets (LSTM) only.\n";
" 1 Neural nets LSTM only.\n"
" 2 Tesseract + LSTM.\n"
" 3 Default, based on what is available.\n";
printf("%s", msg);
}

View File

@ -31,21 +31,22 @@
#include <errno.h>
#endif
#include <ctype.h>
#include "ocrclass.h"
#include "werdit.h"
#include "callcpp.h"
#include "control.h"
#include "docqual.h"
#include "drawfx.h"
#include "tessbox.h"
#include "tessvars.h"
#include "fixspace.h"
#include "globals.h"
#include "lstmrecognizer.h"
#include "ocrclass.h"
#include "output.h"
#include "pgedit.h"
#include "reject.h"
#include "fixspace.h"
#include "docqual.h"
#include "control.h"
#include "output.h"
#include "callcpp.h"
#include "globals.h"
#include "sorthelper.h"
#include "tessbox.h"
#include "tesseractclass.h"
#include "tessvars.h"
#include "werdit.h"
#define MIN_FONT_ROW_COUNT 8
#define MAX_XHEIGHT_DIFF 3
@ -192,8 +193,8 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
WERD_RES* word_res = new WERD_RES;
word_res->InitForRetryRecognition(*word->word);
word->lang_words.push_back(word_res);
// Cube doesn't get setup for pass2.
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
// LSTM doesn't get setup for pass2.
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
word_res->SetupForRecognition(
lang_t->unicharset, lang_t, BestPix(),
lang_t->tessedit_ocr_engine_mode, NULL,
@ -301,16 +302,6 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
const TBOX* target_word_box,
const char* word_config,
int dopasses) {
// PSM_RAW_LINE is a special-case mode in which the layout analysis is
// completely ignored and LSTM is run on the raw image. There is no hope
// of running normal tesseract in this situation or of integrating output.
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY &&
tessedit_pageseg_mode == PSM_RAW_LINE) {
RecogRawLine(page_res);
return true;
}
#endif
PAGE_RES_IT page_res_it(page_res);
if (tessedit_minimal_rej_pass1) {
@ -397,8 +388,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false;
}
// The next passes can only be run if tesseract has been used, as cube
// doesn't set all the necessary outputs in WERD_RES.
// The next passes are only required for Tess-only.
if (AnyTessLang() && !AnyLSTMLang()) {
// ****************** Pass 3 *******************
// Fix fuzzy spaces.
@ -451,8 +441,13 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
WERD_RES* word = page_res_it.word();
if (word->best_choice == NULL || word->best_choice->length() == 0)
POLY_BLOCK* pb = page_res_it.block()->block != NULL
? page_res_it.block()->block->poly_block()
: NULL;
if (word->best_choice == NULL || word->best_choice->length() == 0 ||
(word->best_choice->IsAllSpaces() && (pb == NULL || pb->IsText()))) {
page_res_it.DeleteCurrentWord();
}
}
if (monitor != NULL) {
@ -1376,12 +1371,20 @@ void Tesseract::classify_word_pass1(const WordData& word_data,
cube_word_pass1(block, row, *in_word);
return;
}
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
if (!(*in_word)->odd_size) {
#endif
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
LSTMRecognizeWord(*block, row, *in_word, out_words);
if (!out_words->empty())
return; // Successful lstm recognition.
}
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
// No fallback allowed, so use a fake.
(*in_word)->SetupFake(lstm_recognizer_->GetUnicharset());
return;
}
// Fall back to tesseract for failed words or odd words.
(*in_word)->SetupForRecognition(unicharset, this, BestPix(),
OEM_TESSERACT_ONLY, NULL,
@ -1523,7 +1526,7 @@ void Tesseract::classify_word_pass2(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words) {
// Return if we do not want to run Tesseract.
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
return;
}
ROW* row = word_data.row;
@ -1908,7 +1911,7 @@ static void find_modal_font( //good chars in word
* Get the fonts for the word.
*/
void Tesseract::set_word_fonts(WERD_RES *word) {
// Don't try to set the word fonts for a cube word, as the configs
// Don't try to set the word fonts for an lstm word, as the configs
// will be meaningless.
if (word->chopped_word == NULL) return;
ASSERT_HOST(word->best_choice != NULL);

View File

@ -219,19 +219,6 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
}
#ifndef ANDROID_BUILD
// Top-level function recognizes a single raw line.
void Tesseract::RecogRawLine(PAGE_RES* page_res) {
PAGE_RES_IT it(page_res);
PointerVector<WERD_RES> words;
LSTMRecognizeWord(*it.block()->block, it.row()->row, it.word(), &words);
if (getDict().stopper_debug_level >= 1) {
for (int w = 0; w < words.size(); ++w) {
words[w]->DebugWordChoices(true, NULL);
}
}
it.ReplaceCurrentWord(&words);
}
// Recognizes a word or group of words, converting to WERD_RES in *words.
// Analogous to classify_word_pass1, but can handle a group of words as well.
void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
@ -268,7 +255,17 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
// for each of the output words.
// If we drop a word as junk, then there is always a space in front of the
// next.
bool deleted_prev = false;
const Dict* stopper_dict = lstm_recognizer_->GetDict();
if (stopper_dict == nullptr) stopper_dict = &getDict();
bool any_nonspace_delimited = false;
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
if (word->best_choice != nullptr &&
word->best_choice->ContainsAnyNonSpaceDelimited()) {
any_nonspace_delimited = true;
break;
}
}
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
if (word->best_choice == NULL) {
@ -284,9 +281,7 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
}
if (word->best_choice == NULL) {
// It is a dud.
words->remove(w);
--w;
deleted_prev = true;
word->SetupFake(lstm_recognizer_->GetUnicharset());
} else {
// Set the best state.
for (int i = 0; i < word->best_choice->length(); ++i) {
@ -314,22 +309,21 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
word->best_choice->print();
}
// Discard words that are impossibly bad, but allow a bit more for
// dictionary words.
// dictionary words, and keep bad words in non-space-delimited langs.
if (word_certainty >= RecodeBeamSearch::kMinCertainty ||
any_nonspace_delimited ||
(word_certainty >= kWorstDictCertainty &&
Dict::valid_word_permuter(word->best_choice->permuter(), true))) {
word->best_choice->set_certainty(word_certainty);
if (deleted_prev) word->word->set_blanks(1);
word->tess_accepted = stopper_dict->AcceptableResult(word);
} else {
if (getDict().stopper_debug_level >= 1) {
tprintf("Deleting word with certainty %g\n", word_certainty);
word->best_choice->print();
}
// It is a dud.
words->remove(w);
--w;
deleted_prev = true;
word->SetupFake(lstm_recognizer_->GetUnicharset());
}
word->best_choice->set_certainty(word_certainty);
}
}
}

View File

@ -161,7 +161,7 @@ bool Tesseract::init_tesseract_lang_data(
// Determine which ocr engine(s) should be loaded and used for recognition.
if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
if (tessdata_manager_debug_level) {
tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
tprintf("Loading Tesseract/LSTM with tessedit_ocr_engine_mode %d\n",
static_cast<int>(tessedit_ocr_engine_mode));
}
@ -174,9 +174,37 @@ bool Tesseract::init_tesseract_lang_data(
return true;
}
// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded. Currently everything needs
// the base tesseract data, which supplies other useful information, but
// alternative engines, such as LSTM are optional.
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
if (tessdata_manager.swap()) {
tprintf("Error: LSTM requested on big-endian hardware!!\n");
tprintf("Big-endian not yet supported! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
} else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
lstm_recognizer_ = new LSTMRecognizer;
TFile fp;
fp.Open(tessdata_manager.GetDataFilePtr(), -1);
ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
if (lstm_use_matrix)
lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
} else {
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
}
}
#endif
// Load the unicharset
if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
// Avoid requiring a unicharset when we aren't running base tesseract.
unicharset.CopyFrom(lstm_recognizer_->GetUnicharset());
} else if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
return false;
}
if (unicharset.size() > MAX_NUM_CLASSES) {
@ -203,11 +231,6 @@ bool Tesseract::init_tesseract_lang_data(
ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
}
// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded. Currently everything needs
// the base tesseract data, which supplies other useful information, but
// alternative engines, such as cube and LSTM are optional.
#ifndef NO_CUBE_BUILD
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
@ -217,22 +240,6 @@ bool Tesseract::init_tesseract_lang_data(
ASSERT_HOST(init_cube_objects(true, &tessdata_manager));
if (tessdata_manager_debug_level)
tprintf("Loaded Cube with combiner\n");
} else if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
if (tessdata_manager.swap()) {
tprintf("Error: LSTM requested on big-endian hardware!!\n");
tprintf("Big-endian not yet supported! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
} else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
lstm_recognizer_ = new LSTMRecognizer;
TFile fp;
fp.Open(tessdata_manager.GetDataFilePtr(), -1);
ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
if (lstm_use_matrix)
lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
} else {
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
}
}
#endif
// Init ParamsModel.
@ -425,16 +432,16 @@ int Tesseract::init_tesseract_internal(
tessdata_manager.End();
return 0;
}
// If only Cube will be used, skip loading Tesseract classifier's
// pre-trained templates.
bool init_tesseract_classifier =
tessedit_ocr_engine_mode != OEM_CUBE_ONLY;
// If only Cube will be used and if it has its own Unicharset,
// skip initializing permuter and loading Tesseract Dawgs.
bool init_dict =
!(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
program_editup(textbase, init_tesseract_classifier, init_dict);
// If only LSTM will be used, skip loading Tesseract classifier's
// pre-trained templates and dictionary.
bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY &&
tessedit_ocr_engine_mode != OEM_CUBE_ONLY;
bool init_dict = init_tesseract;
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
!tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET)) {
init_dict = true;
}
program_editup(textbase, init_tesseract, init_dict);
tessdata_manager.End();
return 0; //Normal exit
}

View File

@ -21,6 +21,8 @@
// the recognition results of Tesseract and Cube at the word level
#include <algorithm>
#include <string>
#include <vector>
#include <wctype.h>
#include "tesseract_cube_combiner.h"
@ -125,12 +127,10 @@ bool TesseractCubeCombiner::ValidWord(const string &str) {
// Public method for computing the combiner features. The agreement
// output parameter will be true if both answers are identical,
// and false otherwise.
bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str,
int tess_confidence,
CubeObject *cube_obj,
WordAltList *cube_alt_list,
vector<double> *features,
bool *agreement) {
bool TesseractCubeCombiner::ComputeCombinerFeatures(
const string &tess_str, int tess_confidence, CubeObject *cube_obj,
WordAltList *cube_alt_list, std::vector<double> *features,
bool *agreement) {
features->clear();
*agreement = false;
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)

View File

@ -81,9 +81,9 @@ Tesseract::Tesseract()
" (Values from PageSegMode enum in publictypes.h)",
this->params()),
INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
"Which OCR engine(s) to run (Tesseract, Cube, both)."
"Which OCR engine(s) to run (Tesseract, LSTM, both)."
" Defaults to loading and running only Tesseract"
" (no Cube,no combiner)."
" (no LSTM,no combiner)."
" Values from OcrEngineMode enum in tesseractclass.h)",
this->params()),
STRING_MEMBER(tessedit_char_blacklist, "",

View File

@ -210,6 +210,9 @@ class Tesseract : public Wordrec {
void set_pix_original(Pix* original_pix) {
pixDestroy(&pix_original_);
pix_original_ = original_pix;
// Clone to sublangs as well.
for (int i = 0; i < sub_langs_.size(); ++i)
sub_langs_[i]->set_pix_original(pixClone(original_pix));
}
// Returns a pointer to a Pix representing the best available (original) image
// of the page. Can be of any bit depth, but never color-mapped, as that has
@ -261,20 +264,19 @@ class Tesseract : public Wordrec {
Tesseract* get_sub_lang(int index) const {
return sub_langs_[index];
}
// Returns true if any language uses Tesseract (as opposed to cube).
// Returns true if any language uses Tesseract (as opposed to LSTM).
bool AnyTessLang() const {
if (tessedit_ocr_engine_mode != OEM_CUBE_ONLY) return true;
if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_CUBE_ONLY)
return true;
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
}
return false;
}
// Returns true if any language uses the LSTM.
bool AnyLSTMLang() const {
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) return true;
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) return true;
for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i]->tessedit_ocr_engine_mode == OEM_LSTM_ONLY)
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY)
return true;
}
return false;
@ -340,8 +342,6 @@ class Tesseract : public Wordrec {
// is also returned to enable calculation of output bounding boxes.
ImageData* GetRectImage(const TBOX& box, const BLOCK& block, int padding,
TBOX* revised_box) const;
// Top-level function recognizes a single raw line.
void RecogRawLine(PAGE_RES* page_res);
// Recognizes a word or group of words, converting to WERD_RES in *words.
// Analogous to classify_word_pass1, but can handle a group of words as well.
void LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
@ -850,8 +850,8 @@ class Tesseract : public Wordrec {
" 5=line, 6=word, 7=char"
" (Values from PageSegMode enum in publictypes.h)");
INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
"Which OCR engine(s) to run (Tesseract, Cube, both). Defaults"
" to loading and running only Tesseract (no Cube, no combiner)."
"Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults"
" to loading and running only Tesseract (no LSTM, no combiner)."
" (Values from OcrEngineMode enum in tesseractclass.h)");
STRING_VAR_H(tessedit_char_blacklist, "",
"Blacklist of chars not to recognize");

View File

@ -884,6 +884,7 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
}
FakeWordFromRatings(TOP_CHOICE_PERM);
reject_map.initialise(blob_count);
best_state.init_to_size(blob_count, 1);
done = true;
}

View File

@ -255,8 +255,9 @@ enum ParagraphJustification {
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
@ -264,14 +265,8 @@ enum OcrEngineMode {
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
// OEM_LSTM_ONLY will fall back (with a warning) to OEM_TESSERACT_ONLY where
// there is no network model available. This allows use of a mix of languages,
// some of which contain a network model, and some of which do not. Since the
// tesseract model is required for the LSTM to fall back to for "difficult"
// words anyway, this seems like a reasonable approach, but leaves the danger
// of not noticing that it is using the wrong engine if the warning is
// ignored.
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy
};
} // namespace tesseract.

View File

@ -508,6 +508,20 @@ class WERD_CHOICE : public ELIST_LINK {
}
return word_str;
}
// Returns true if any unichar_id in the word is a non-space-delimited char.
bool ContainsAnyNonSpaceDelimited() const {
for (int i = 0; i < length_; ++i) {
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) return true;
}
return false;
}
// Returns true if the word is all spaces.
bool IsAllSpaces() const {
for (int i = 0; i < length_; ++i) {
if (unichar_ids_[i] != UNICHAR_SPACE) return false;
}
return true;
}
// Call this to override the default (strict left to right graphemes)
// with the fact that some engine produces a "reading order" set of

View File

@ -49,7 +49,7 @@ const int case_state_table[6][4] = {
5, -1, 2, -1},
};
int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) {
int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const {
int state = 0;
int x;
for (x = 0; x < word.length(); ++x) {

View File

@ -260,7 +260,7 @@ class Dict {
MATRIX *ratings);
/// Returns the length of the shortest alpha run in WordChoice.
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice);
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const;
/// Returns true if the certainty of the BestChoice word is within a
/// reasonable range of the average certainties for the best choices for
/// each character in the segmentation. This test is used to catch words
@ -275,7 +275,7 @@ class Dict {
/// Returns false if the best choice for the current word is questionable
/// and should be tried again on the second pass or should be flagged to
/// the user.
bool AcceptableResult(WERD_RES* word);
bool AcceptableResult(WERD_RES *word) const;
void EndDangerousAmbigs();
/// Prints the current choices for this word to stdout.
void DebugWordChoices();
@ -285,7 +285,7 @@ class Dict {
void SettupStopperPass2();
/* context.cpp *************************************************************/
/// Check a string to see if it matches a set of lexical rules.
int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset);
int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const;
/// Returns true if the word looks like an absolute garbage
/// (e.g. image mistakenly recognized as text).
bool absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharset);

View File

@ -107,7 +107,7 @@ bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
}
}
bool Dict::AcceptableResult(WERD_RES* word) {
bool Dict::AcceptableResult(WERD_RES *word) const {
if (word->best_choice == NULL) return false;
float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
int WordSize;
@ -448,7 +448,7 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
}
}
int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) {
int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
int shortest = MAX_INT32;
int curr_len = 0;
for (int w = 0; w < WordChoice.length(); ++w) {

View File

@ -141,6 +141,8 @@ class LSTMRecognizer {
bool IsUsingAdaGrad() const { return network_->TestFlag(NF_ADA_GRAD); }
// Provides access to the UNICHARSET that this classifier works with.
const UNICHARSET& GetUnicharset() const { return ccutil_.unicharset; }
// Provides access to the Dict that this classifier works with.
const Dict* GetDict() const { return dict_; }
// Sets the sample iteration to the given value. The sample_iteration_
// determines the seed for the random number generator. The training
// iteration is incremented only by a successful training iteration.