mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
Fixed multilang for LSTM, pushed cube to one side without actually deleting it
This commit is contained in:
parent
798d79aaa5
commit
5deebe6c27
@ -123,10 +123,9 @@ void PrintHelpForOEM() {
|
||||
const char* msg =
|
||||
"OCR Engine modes:\n"
|
||||
" 0 Original Tesseract only.\n"
|
||||
" 1 Cube only.\n"
|
||||
" 2 Tesseract + cube.\n"
|
||||
" 3 Default, based on what is available.\n"
|
||||
" 4 Neural nets (LSTM) only.\n";
|
||||
" 1 Neural nets LSTM only.\n"
|
||||
" 2 Tesseract + LSTM.\n"
|
||||
" 3 Default, based on what is available.\n";
|
||||
|
||||
printf("%s", msg);
|
||||
}
|
||||
|
@ -31,21 +31,22 @@
|
||||
#include <errno.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
#include "ocrclass.h"
|
||||
#include "werdit.h"
|
||||
#include "callcpp.h"
|
||||
#include "control.h"
|
||||
#include "docqual.h"
|
||||
#include "drawfx.h"
|
||||
#include "tessbox.h"
|
||||
#include "tessvars.h"
|
||||
#include "fixspace.h"
|
||||
#include "globals.h"
|
||||
#include "lstmrecognizer.h"
|
||||
#include "ocrclass.h"
|
||||
#include "output.h"
|
||||
#include "pgedit.h"
|
||||
#include "reject.h"
|
||||
#include "fixspace.h"
|
||||
#include "docqual.h"
|
||||
#include "control.h"
|
||||
#include "output.h"
|
||||
#include "callcpp.h"
|
||||
#include "globals.h"
|
||||
#include "sorthelper.h"
|
||||
#include "tessbox.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tessvars.h"
|
||||
#include "werdit.h"
|
||||
|
||||
#define MIN_FONT_ROW_COUNT 8
|
||||
#define MAX_XHEIGHT_DIFF 3
|
||||
@ -192,8 +193,8 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
|
||||
WERD_RES* word_res = new WERD_RES;
|
||||
word_res->InitForRetryRecognition(*word->word);
|
||||
word->lang_words.push_back(word_res);
|
||||
// Cube doesn't get setup for pass2.
|
||||
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
|
||||
// LSTM doesn't get setup for pass2.
|
||||
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
|
||||
word_res->SetupForRecognition(
|
||||
lang_t->unicharset, lang_t, BestPix(),
|
||||
lang_t->tessedit_ocr_engine_mode, NULL,
|
||||
@ -301,16 +302,6 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
|
||||
const TBOX* target_word_box,
|
||||
const char* word_config,
|
||||
int dopasses) {
|
||||
// PSM_RAW_LINE is a special-case mode in which the layout analysis is
|
||||
// completely ignored and LSTM is run on the raw image. There is no hope
|
||||
// of running normal tesseract in this situation or of integrating output.
|
||||
#ifndef ANDROID_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY &&
|
||||
tessedit_pageseg_mode == PSM_RAW_LINE) {
|
||||
RecogRawLine(page_res);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
|
||||
if (tessedit_minimal_rej_pass1) {
|
||||
@ -397,8 +388,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
|
||||
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false;
|
||||
}
|
||||
|
||||
// The next passes can only be run if tesseract has been used, as cube
|
||||
// doesn't set all the necessary outputs in WERD_RES.
|
||||
// The next passes are only required for Tess-only.
|
||||
if (AnyTessLang() && !AnyLSTMLang()) {
|
||||
// ****************** Pass 3 *******************
|
||||
// Fix fuzzy spaces.
|
||||
@ -451,8 +441,13 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
|
||||
for (page_res_it.restart_page(); page_res_it.word() != NULL;
|
||||
page_res_it.forward()) {
|
||||
WERD_RES* word = page_res_it.word();
|
||||
if (word->best_choice == NULL || word->best_choice->length() == 0)
|
||||
POLY_BLOCK* pb = page_res_it.block()->block != NULL
|
||||
? page_res_it.block()->block->poly_block()
|
||||
: NULL;
|
||||
if (word->best_choice == NULL || word->best_choice->length() == 0 ||
|
||||
(word->best_choice->IsAllSpaces() && (pb == NULL || pb->IsText()))) {
|
||||
page_res_it.DeleteCurrentWord();
|
||||
}
|
||||
}
|
||||
|
||||
if (monitor != NULL) {
|
||||
@ -1376,12 +1371,20 @@ void Tesseract::classify_word_pass1(const WordData& word_data,
|
||||
cube_word_pass1(block, row, *in_word);
|
||||
return;
|
||||
}
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
if (!(*in_word)->odd_size) {
|
||||
#endif
|
||||
#ifndef ANDROID_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
|
||||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
|
||||
if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
LSTMRecognizeWord(*block, row, *in_word, out_words);
|
||||
if (!out_words->empty())
|
||||
return; // Successful lstm recognition.
|
||||
}
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
// No fallback allowed, so use a fake.
|
||||
(*in_word)->SetupFake(lstm_recognizer_->GetUnicharset());
|
||||
return;
|
||||
}
|
||||
// Fall back to tesseract for failed words or odd words.
|
||||
(*in_word)->SetupForRecognition(unicharset, this, BestPix(),
|
||||
OEM_TESSERACT_ONLY, NULL,
|
||||
@ -1523,7 +1526,7 @@ void Tesseract::classify_word_pass2(const WordData& word_data,
|
||||
WERD_RES** in_word,
|
||||
PointerVector<WERD_RES>* out_words) {
|
||||
// Return if we do not want to run Tesseract.
|
||||
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
return;
|
||||
}
|
||||
ROW* row = word_data.row;
|
||||
@ -1908,7 +1911,7 @@ static void find_modal_font( //good chars in word
|
||||
* Get the fonts for the word.
|
||||
*/
|
||||
void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
// Don't try to set the word fonts for a cube word, as the configs
|
||||
// Don't try to set the word fonts for an lstm word, as the configs
|
||||
// will be meaningless.
|
||||
if (word->chopped_word == NULL) return;
|
||||
ASSERT_HOST(word->best_choice != NULL);
|
||||
|
@ -219,19 +219,6 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
|
||||
}
|
||||
|
||||
#ifndef ANDROID_BUILD
|
||||
// Top-level function recognizes a single raw line.
|
||||
void Tesseract::RecogRawLine(PAGE_RES* page_res) {
|
||||
PAGE_RES_IT it(page_res);
|
||||
PointerVector<WERD_RES> words;
|
||||
LSTMRecognizeWord(*it.block()->block, it.row()->row, it.word(), &words);
|
||||
if (getDict().stopper_debug_level >= 1) {
|
||||
for (int w = 0; w < words.size(); ++w) {
|
||||
words[w]->DebugWordChoices(true, NULL);
|
||||
}
|
||||
}
|
||||
it.ReplaceCurrentWord(&words);
|
||||
}
|
||||
|
||||
// Recognizes a word or group of words, converting to WERD_RES in *words.
|
||||
// Analogous to classify_word_pass1, but can handle a group of words as well.
|
||||
void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
|
||||
@ -268,7 +255,17 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
|
||||
// for each of the output words.
|
||||
// If we drop a word as junk, then there is always a space in front of the
|
||||
// next.
|
||||
bool deleted_prev = false;
|
||||
const Dict* stopper_dict = lstm_recognizer_->GetDict();
|
||||
if (stopper_dict == nullptr) stopper_dict = &getDict();
|
||||
bool any_nonspace_delimited = false;
|
||||
for (int w = 0; w < words->size(); ++w) {
|
||||
WERD_RES* word = (*words)[w];
|
||||
if (word->best_choice != nullptr &&
|
||||
word->best_choice->ContainsAnyNonSpaceDelimited()) {
|
||||
any_nonspace_delimited = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int w = 0; w < words->size(); ++w) {
|
||||
WERD_RES* word = (*words)[w];
|
||||
if (word->best_choice == NULL) {
|
||||
@ -284,9 +281,7 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
|
||||
}
|
||||
if (word->best_choice == NULL) {
|
||||
// It is a dud.
|
||||
words->remove(w);
|
||||
--w;
|
||||
deleted_prev = true;
|
||||
word->SetupFake(lstm_recognizer_->GetUnicharset());
|
||||
} else {
|
||||
// Set the best state.
|
||||
for (int i = 0; i < word->best_choice->length(); ++i) {
|
||||
@ -314,22 +309,21 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
|
||||
word->best_choice->print();
|
||||
}
|
||||
// Discard words that are impossibly bad, but allow a bit more for
|
||||
// dictionary words.
|
||||
// dictionary words, and keep bad words in non-space-delimited langs.
|
||||
if (word_certainty >= RecodeBeamSearch::kMinCertainty ||
|
||||
any_nonspace_delimited ||
|
||||
(word_certainty >= kWorstDictCertainty &&
|
||||
Dict::valid_word_permuter(word->best_choice->permuter(), true))) {
|
||||
word->best_choice->set_certainty(word_certainty);
|
||||
if (deleted_prev) word->word->set_blanks(1);
|
||||
word->tess_accepted = stopper_dict->AcceptableResult(word);
|
||||
} else {
|
||||
if (getDict().stopper_debug_level >= 1) {
|
||||
tprintf("Deleting word with certainty %g\n", word_certainty);
|
||||
word->best_choice->print();
|
||||
}
|
||||
// It is a dud.
|
||||
words->remove(w);
|
||||
--w;
|
||||
deleted_prev = true;
|
||||
word->SetupFake(lstm_recognizer_->GetUnicharset());
|
||||
}
|
||||
word->best_choice->set_certainty(word_certainty);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -161,7 +161,7 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
// Determine which ocr engine(s) should be loaded and used for recognition.
|
||||
if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
|
||||
if (tessdata_manager_debug_level) {
|
||||
tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
|
||||
tprintf("Loading Tesseract/LSTM with tessedit_ocr_engine_mode %d\n",
|
||||
static_cast<int>(tessedit_ocr_engine_mode));
|
||||
}
|
||||
|
||||
@ -174,9 +174,37 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
return true;
|
||||
}
|
||||
|
||||
// The various OcrEngineMode settings (see publictypes.h) determine which
|
||||
// engine-specific data files need to be loaded. Currently everything needs
|
||||
// the base tesseract data, which supplies other useful information, but
|
||||
// alternative engines, such as LSTM are optional.
|
||||
#ifndef ANDROID_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
|
||||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
|
||||
if (tessdata_manager.swap()) {
|
||||
tprintf("Error: LSTM requested on big-endian hardware!!\n");
|
||||
tprintf("Big-endian not yet supported! Loading tesseract.\n");
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
} else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
|
||||
lstm_recognizer_ = new LSTMRecognizer;
|
||||
TFile fp;
|
||||
fp.Open(tessdata_manager.GetDataFilePtr(), -1);
|
||||
ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
|
||||
if (lstm_use_matrix)
|
||||
lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
|
||||
} else {
|
||||
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Load the unicharset
|
||||
if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
|
||||
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
// Avoid requiring a unicharset when we aren't running base tesseract.
|
||||
unicharset.CopyFrom(lstm_recognizer_->GetUnicharset());
|
||||
} else if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
|
||||
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
|
||||
return false;
|
||||
}
|
||||
if (unicharset.size() > MAX_NUM_CLASSES) {
|
||||
@ -203,11 +231,6 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
|
||||
if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
|
||||
}
|
||||
|
||||
// The various OcrEngineMode settings (see publictypes.h) determine which
|
||||
// engine-specific data files need to be loaded. Currently everything needs
|
||||
// the base tesseract data, which supplies other useful information, but
|
||||
// alternative engines, such as cube and LSTM are optional.
|
||||
#ifndef NO_CUBE_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
|
||||
ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
|
||||
@ -217,22 +240,6 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
ASSERT_HOST(init_cube_objects(true, &tessdata_manager));
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded Cube with combiner\n");
|
||||
} else if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
if (tessdata_manager.swap()) {
|
||||
tprintf("Error: LSTM requested on big-endian hardware!!\n");
|
||||
tprintf("Big-endian not yet supported! Loading tesseract.\n");
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
} else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
|
||||
lstm_recognizer_ = new LSTMRecognizer;
|
||||
TFile fp;
|
||||
fp.Open(tessdata_manager.GetDataFilePtr(), -1);
|
||||
ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
|
||||
if (lstm_use_matrix)
|
||||
lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
|
||||
} else {
|
||||
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Init ParamsModel.
|
||||
@ -425,16 +432,16 @@ int Tesseract::init_tesseract_internal(
|
||||
tessdata_manager.End();
|
||||
return 0;
|
||||
}
|
||||
// If only Cube will be used, skip loading Tesseract classifier's
|
||||
// pre-trained templates.
|
||||
bool init_tesseract_classifier =
|
||||
tessedit_ocr_engine_mode != OEM_CUBE_ONLY;
|
||||
// If only Cube will be used and if it has its own Unicharset,
|
||||
// skip initializing permuter and loading Tesseract Dawgs.
|
||||
bool init_dict =
|
||||
!(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
|
||||
tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
|
||||
program_editup(textbase, init_tesseract_classifier, init_dict);
|
||||
// If only LSTM will be used, skip loading Tesseract classifier's
|
||||
// pre-trained templates and dictionary.
|
||||
bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY &&
|
||||
tessedit_ocr_engine_mode != OEM_CUBE_ONLY;
|
||||
bool init_dict = init_tesseract;
|
||||
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
|
||||
!tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET)) {
|
||||
init_dict = true;
|
||||
}
|
||||
program_editup(textbase, init_tesseract, init_dict);
|
||||
tessdata_manager.End();
|
||||
return 0; //Normal exit
|
||||
}
|
||||
|
@ -21,6 +21,8 @@
|
||||
// the recognition results of Tesseract and Cube at the word level
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "tesseract_cube_combiner.h"
|
||||
@ -125,12 +127,10 @@ bool TesseractCubeCombiner::ValidWord(const string &str) {
|
||||
// Public method for computing the combiner features. The agreement
|
||||
// output parameter will be true if both answers are identical,
|
||||
// and false otherwise.
|
||||
bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str,
|
||||
int tess_confidence,
|
||||
CubeObject *cube_obj,
|
||||
WordAltList *cube_alt_list,
|
||||
vector<double> *features,
|
||||
bool *agreement) {
|
||||
bool TesseractCubeCombiner::ComputeCombinerFeatures(
|
||||
const string &tess_str, int tess_confidence, CubeObject *cube_obj,
|
||||
WordAltList *cube_alt_list, std::vector<double> *features,
|
||||
bool *agreement) {
|
||||
features->clear();
|
||||
*agreement = false;
|
||||
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)
|
||||
|
@ -81,9 +81,9 @@ Tesseract::Tesseract()
|
||||
" (Values from PageSegMode enum in publictypes.h)",
|
||||
this->params()),
|
||||
INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
|
||||
"Which OCR engine(s) to run (Tesseract, Cube, both)."
|
||||
"Which OCR engine(s) to run (Tesseract, LSTM, both)."
|
||||
" Defaults to loading and running only Tesseract"
|
||||
" (no Cube,no combiner)."
|
||||
" (no LSTM,no combiner)."
|
||||
" Values from OcrEngineMode enum in tesseractclass.h)",
|
||||
this->params()),
|
||||
STRING_MEMBER(tessedit_char_blacklist, "",
|
||||
|
@ -210,6 +210,9 @@ class Tesseract : public Wordrec {
|
||||
void set_pix_original(Pix* original_pix) {
|
||||
pixDestroy(&pix_original_);
|
||||
pix_original_ = original_pix;
|
||||
// Clone to sublangs as well.
|
||||
for (int i = 0; i < sub_langs_.size(); ++i)
|
||||
sub_langs_[i]->set_pix_original(pixClone(original_pix));
|
||||
}
|
||||
// Returns a pointer to a Pix representing the best available (original) image
|
||||
// of the page. Can be of any bit depth, but never color-mapped, as that has
|
||||
@ -261,20 +264,19 @@ class Tesseract : public Wordrec {
|
||||
Tesseract* get_sub_lang(int index) const {
|
||||
return sub_langs_[index];
|
||||
}
|
||||
// Returns true if any language uses Tesseract (as opposed to cube).
|
||||
// Returns true if any language uses Tesseract (as opposed to LSTM).
|
||||
bool AnyTessLang() const {
|
||||
if (tessedit_ocr_engine_mode != OEM_CUBE_ONLY) return true;
|
||||
if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_CUBE_ONLY)
|
||||
return true;
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if any language uses the LSTM.
|
||||
bool AnyLSTMLang() const {
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) return true;
|
||||
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) return true;
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode == OEM_LSTM_ONLY)
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -340,8 +342,6 @@ class Tesseract : public Wordrec {
|
||||
// is also returned to enable calculation of output bounding boxes.
|
||||
ImageData* GetRectImage(const TBOX& box, const BLOCK& block, int padding,
|
||||
TBOX* revised_box) const;
|
||||
// Top-level function recognizes a single raw line.
|
||||
void RecogRawLine(PAGE_RES* page_res);
|
||||
// Recognizes a word or group of words, converting to WERD_RES in *words.
|
||||
// Analogous to classify_word_pass1, but can handle a group of words as well.
|
||||
void LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
|
||||
@ -850,8 +850,8 @@ class Tesseract : public Wordrec {
|
||||
" 5=line, 6=word, 7=char"
|
||||
" (Values from PageSegMode enum in publictypes.h)");
|
||||
INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
|
||||
"Which OCR engine(s) to run (Tesseract, Cube, both). Defaults"
|
||||
" to loading and running only Tesseract (no Cube, no combiner)."
|
||||
"Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults"
|
||||
" to loading and running only Tesseract (no LSTM, no combiner)."
|
||||
" (Values from OcrEngineMode enum in tesseractclass.h)");
|
||||
STRING_VAR_H(tessedit_char_blacklist, "",
|
||||
"Blacklist of chars not to recognize");
|
||||
|
@ -884,6 +884,7 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
|
||||
}
|
||||
FakeWordFromRatings(TOP_CHOICE_PERM);
|
||||
reject_map.initialise(blob_count);
|
||||
best_state.init_to_size(blob_count, 1);
|
||||
done = true;
|
||||
}
|
||||
|
||||
|
@ -255,8 +255,9 @@ enum ParagraphJustification {
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest
|
||||
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower
|
||||
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
@ -264,14 +265,8 @@ enum OcrEngineMode {
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
// OEM_LSTM_ONLY will fall back (with a warning) to OEM_TESSERACT_ONLY where
|
||||
// there is no network model available. This allows use of a mix of languages,
|
||||
// some of which contain a network model, and some of which do not. Since the
|
||||
// tesseract model is required for the LSTM to fall back to for "difficult"
|
||||
// words anyway, this seems like a reasonable approach, but leaves the danger
|
||||
// of not noticing that it is using the wrong engine if the warning is
|
||||
// ignored.
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower
|
||||
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -508,6 +508,20 @@ class WERD_CHOICE : public ELIST_LINK {
|
||||
}
|
||||
return word_str;
|
||||
}
|
||||
// Returns true if any unichar_id in the word is a non-space-delimited char.
|
||||
bool ContainsAnyNonSpaceDelimited() const {
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if the word is all spaces.
|
||||
bool IsAllSpaces() const {
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
if (unichar_ids_[i] != UNICHAR_SPACE) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Call this to override the default (strict left to right graphemes)
|
||||
// with the fact that some engine produces a "reading order" set of
|
||||
|
@ -49,7 +49,7 @@ const int case_state_table[6][4] = {
|
||||
5, -1, 2, -1},
|
||||
};
|
||||
|
||||
int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) {
|
||||
int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const {
|
||||
int state = 0;
|
||||
int x;
|
||||
for (x = 0; x < word.length(); ++x) {
|
||||
|
@ -260,7 +260,7 @@ class Dict {
|
||||
MATRIX *ratings);
|
||||
|
||||
/// Returns the length of the shortest alpha run in WordChoice.
|
||||
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice);
|
||||
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const;
|
||||
/// Returns true if the certainty of the BestChoice word is within a
|
||||
/// reasonable range of the average certainties for the best choices for
|
||||
/// each character in the segmentation. This test is used to catch words
|
||||
@ -275,7 +275,7 @@ class Dict {
|
||||
/// Returns false if the best choice for the current word is questionable
|
||||
/// and should be tried again on the second pass or should be flagged to
|
||||
/// the user.
|
||||
bool AcceptableResult(WERD_RES* word);
|
||||
bool AcceptableResult(WERD_RES *word) const;
|
||||
void EndDangerousAmbigs();
|
||||
/// Prints the current choices for this word to stdout.
|
||||
void DebugWordChoices();
|
||||
@ -285,7 +285,7 @@ class Dict {
|
||||
void SettupStopperPass2();
|
||||
/* context.cpp *************************************************************/
|
||||
/// Check a string to see if it matches a set of lexical rules.
|
||||
int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset);
|
||||
int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const;
|
||||
/// Returns true if the word looks like an absolute garbage
|
||||
/// (e.g. image mistakenly recognized as text).
|
||||
bool absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharset);
|
||||
|
@ -107,7 +107,7 @@ bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
|
||||
}
|
||||
}
|
||||
|
||||
bool Dict::AcceptableResult(WERD_RES* word) {
|
||||
bool Dict::AcceptableResult(WERD_RES *word) const {
|
||||
if (word->best_choice == NULL) return false;
|
||||
float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
|
||||
int WordSize;
|
||||
@ -448,7 +448,7 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
|
||||
}
|
||||
}
|
||||
|
||||
int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) {
|
||||
int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
|
||||
int shortest = MAX_INT32;
|
||||
int curr_len = 0;
|
||||
for (int w = 0; w < WordChoice.length(); ++w) {
|
||||
|
@ -141,6 +141,8 @@ class LSTMRecognizer {
|
||||
bool IsUsingAdaGrad() const { return network_->TestFlag(NF_ADA_GRAD); }
|
||||
// Provides access to the UNICHARSET that this classifier works with.
|
||||
const UNICHARSET& GetUnicharset() const { return ccutil_.unicharset; }
|
||||
// Provides access to the Dict that this classifier works with.
|
||||
const Dict* GetDict() const { return dict_; }
|
||||
// Sets the sample iteration to the given value. The sample_iteration_
|
||||
// determines the seed for the random number generator. The training
|
||||
// iteration is incremented only by a successful training iteration.
|
||||
|
Loading…
Reference in New Issue
Block a user