mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-28 05:39:35 +08:00
Format modified code with clang-format
Format the files which were changed in
commit 297d7d86ce
.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
06acbaf99c
commit
91e2b253c0
@ -21,42 +21,42 @@
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "basedir.h"
|
||||
#include "tessvars.h"
|
||||
#include "control.h"
|
||||
#include "reject.h"
|
||||
#include "pageres.h"
|
||||
#include "pgedit.h"
|
||||
#include "tprintf.h"
|
||||
#include "tessedit.h"
|
||||
#include "reject.h"
|
||||
#include "stopper.h"
|
||||
#include "tessedit.h"
|
||||
#include "tessvars.h"
|
||||
#include "tprintf.h"
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
#include "intmatcher.h"
|
||||
#include "chop.h"
|
||||
# include "chop.h"
|
||||
# include "intmatcher.h"
|
||||
#endif
|
||||
#ifndef ANDROID_BUILD
|
||||
#include "lstmrecognizer.h"
|
||||
# include "lstmrecognizer.h"
|
||||
#endif
|
||||
#include "tesseractclass.h"
|
||||
#include "params.h"
|
||||
#include "tesseractclass.h"
|
||||
#ifdef DISABLED_LEGACY_ENGINE
|
||||
#include "matchdefs.h"
|
||||
# include "matchdefs.h"
|
||||
#endif
|
||||
|
||||
// config under api
|
||||
#define API_CONFIG "configs/api_config"
|
||||
// config under api
|
||||
#define API_CONFIG "configs/api_config"
|
||||
|
||||
ETEXT_DESC *global_monitor = nullptr; // progress monitor
|
||||
ETEXT_DESC* global_monitor = nullptr; // progress monitor
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Read a "config" file containing a set of variable, value pairs.
|
||||
// Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
// and also accepts a relative or absolute path name.
|
||||
void Tesseract::read_config_file(const char *filename,
|
||||
void Tesseract::read_config_file(const char* filename,
|
||||
SetParamConstraint constraint) {
|
||||
STRING path = datadir;
|
||||
path += "configs/";
|
||||
@ -88,11 +88,11 @@ void Tesseract::read_config_file(const char *filename,
|
||||
// the config files specified on the command line or left as the default
|
||||
// OEM_TESSERACT_ONLY if none of the configs specify this variable.
|
||||
bool Tesseract::init_tesseract_lang_data(
|
||||
const char *arg0, const char *textbase, const char *language,
|
||||
OcrEngineMode oem, char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values, bool set_only_non_debug_params,
|
||||
TessdataManager *mgr) {
|
||||
const char* arg0, const char* textbase, const char* language,
|
||||
OcrEngineMode oem, char** configs, int configs_size,
|
||||
const GenericVector<STRING>* vars_vec,
|
||||
const GenericVector<STRING>* vars_values, bool set_only_non_debug_params,
|
||||
TessdataManager* mgr) {
|
||||
// Set the basename, compute the data directory.
|
||||
main_setup(arg0, textbase);
|
||||
|
||||
@ -106,8 +106,9 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
|
||||
if (!mgr->is_loaded() && !mgr->Init(tessdata_path.string())) {
|
||||
tprintf("Error opening data file %s\n", tessdata_path.string());
|
||||
tprintf("Please make sure the TESSDATA_PREFIX environment variable is set"
|
||||
" to your \"tessdata\" directory.\n");
|
||||
tprintf(
|
||||
"Please make sure the TESSDATA_PREFIX environment variable is set"
|
||||
" to your \"tessdata\" directory.\n");
|
||||
return false;
|
||||
}
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
@ -131,8 +132,9 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
this->params());
|
||||
}
|
||||
|
||||
SetParamConstraint set_params_constraint = set_only_non_debug_params ?
|
||||
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY : SET_PARAM_CONSTRAINT_NONE;
|
||||
SetParamConstraint set_params_constraint =
|
||||
set_only_non_debug_params ? SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY
|
||||
: SET_PARAM_CONSTRAINT_NONE;
|
||||
// Load tesseract variables from config files. This is done after loading
|
||||
// language-specific variables from [lang].traineddata file, so that custom
|
||||
// config files can override values in [lang].traineddata file.
|
||||
@ -153,8 +155,8 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
}
|
||||
}
|
||||
|
||||
if (((STRING &)tessedit_write_params_to_file).length() > 0) {
|
||||
FILE *params_file = fopen(tessedit_write_params_to_file.string(), "wb");
|
||||
if (((STRING&)tessedit_write_params_to_file).length() > 0) {
|
||||
FILE* params_file = fopen(tessedit_write_params_to_file.string(), "wb");
|
||||
if (params_file != nullptr) {
|
||||
ParamUtils::PrintParams(params_file, this->params());
|
||||
fclose(params_file);
|
||||
@ -177,16 +179,16 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
// engine-specific data files need to be loaded.
|
||||
// If LSTM_ONLY is requested, the base Tesseract files are *Not* required.
|
||||
#ifndef ANDROID_BUILD
|
||||
#ifdef DISABLED_LEGACY_ENGINE
|
||||
# ifdef DISABLED_LEGACY_ENGINE
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
#else
|
||||
# else
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
|
||||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
# endif // ndef DISABLED_LEGACY_ENGINE
|
||||
if (mgr->IsComponentAvailable(TESSDATA_LSTM)) {
|
||||
lstm_recognizer_ = new LSTMRecognizer;
|
||||
ASSERT_HOST(
|
||||
lstm_recognizer_->Load(this->params(), lstm_use_matrix ? language : nullptr, mgr));
|
||||
ASSERT_HOST(lstm_recognizer_->Load(
|
||||
this->params(), lstm_use_matrix ? language : nullptr, mgr));
|
||||
} else {
|
||||
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
@ -203,7 +205,7 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
}
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
else if (!mgr->GetComponent(TESSDATA_UNICHARSET, &fp) ||
|
||||
!unicharset.load_from_file(&fp, false)) {
|
||||
!unicharset.load_from_file(&fp, false)) {
|
||||
return false;
|
||||
}
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
@ -228,8 +230,8 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
// Init ParamsModel.
|
||||
// Load pass1 and pass2 weights (for now these two sets are the same, but in
|
||||
// the future separate sets of weights can be generated).
|
||||
for (int p = ParamsModel::PTRAIN_PASS1;
|
||||
p < ParamsModel::PTRAIN_NUM_PASSES; ++p) {
|
||||
for (int p = ParamsModel::PTRAIN_PASS1; p < ParamsModel::PTRAIN_NUM_PASSES;
|
||||
++p) {
|
||||
language_model_->getParamsModel().SetPass(
|
||||
static_cast<ParamsModel::PassEnum>(p));
|
||||
if (mgr->GetComponent(TESSDATA_PARAMS_MODEL, &fp)) {
|
||||
@ -247,8 +249,7 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
static bool IsStrInList(const STRING& str,
|
||||
const GenericVector<STRING>& str_list) {
|
||||
for (int i = 0; i < str_list.size(); ++i) {
|
||||
if (str_list[i] == str)
|
||||
return true;
|
||||
if (str_list[i] == str) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -265,8 +266,7 @@ void Tesseract::ParseLanguageString(const char* lang_str,
|
||||
while (remains.length() > 0) {
|
||||
// Find the start of the lang code and which vector to add to.
|
||||
const char* start = remains.string();
|
||||
while (*start == '+')
|
||||
++start;
|
||||
while (*start == '+') ++start;
|
||||
GenericVector<STRING>* target = to_load;
|
||||
if (*start == '~') {
|
||||
target = not_to_load;
|
||||
@ -275,8 +275,7 @@ void Tesseract::ParseLanguageString(const char* lang_str,
|
||||
// Find the index of the end of the lang code in string start.
|
||||
int end = strlen(start);
|
||||
const char* plus = strchr(start, '+');
|
||||
if (plus != nullptr && plus - start < end)
|
||||
end = plus - start;
|
||||
if (plus != nullptr && plus - start < end) end = plus - start;
|
||||
STRING lang_code(start);
|
||||
lang_code.truncate_at(end);
|
||||
STRING next(start + end);
|
||||
@ -292,13 +291,13 @@ void Tesseract::ParseLanguageString(const char* lang_str,
|
||||
// string and recursively any additional languages required by any language
|
||||
// traineddata file (via tessedit_load_sublangs in its config) that is loaded.
|
||||
// See init_tesseract_internal for args.
|
||||
int Tesseract::init_tesseract(const char *arg0, const char *textbase,
|
||||
const char *language, OcrEngineMode oem,
|
||||
char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
int Tesseract::init_tesseract(const char* arg0, const char* textbase,
|
||||
const char* language, OcrEngineMode oem,
|
||||
char** configs, int configs_size,
|
||||
const GenericVector<STRING>* vars_vec,
|
||||
const GenericVector<STRING>* vars_values,
|
||||
bool set_only_non_debug_params,
|
||||
TessdataManager *mgr) {
|
||||
TessdataManager* mgr) {
|
||||
GenericVector<STRING> langs_to_load;
|
||||
GenericVector<STRING> langs_not_to_load;
|
||||
ParseLanguageString(language, &langs_to_load, &langs_not_to_load);
|
||||
@ -311,8 +310,8 @@ int Tesseract::init_tesseract(const char *arg0, const char *textbase,
|
||||
// Load the rest into sub_langs_.
|
||||
for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) {
|
||||
if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
|
||||
const char *lang_str = langs_to_load[lang_index].string();
|
||||
Tesseract *tess_to_init;
|
||||
const char* lang_str = langs_to_load[lang_index].string();
|
||||
Tesseract* tess_to_init;
|
||||
if (!loaded_primary) {
|
||||
tess_to_init = this;
|
||||
} else {
|
||||
@ -392,13 +391,13 @@ int Tesseract::init_tesseract(const char *arg0, const char *textbase,
|
||||
// in vars_vec.
|
||||
// If set_only_init_params is true, then only the initialization variables
|
||||
// will be set.
|
||||
int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase,
|
||||
const char *language, OcrEngineMode oem,
|
||||
char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
int Tesseract::init_tesseract_internal(const char* arg0, const char* textbase,
|
||||
const char* language, OcrEngineMode oem,
|
||||
char** configs, int configs_size,
|
||||
const GenericVector<STRING>* vars_vec,
|
||||
const GenericVector<STRING>* vars_values,
|
||||
bool set_only_non_debug_params,
|
||||
TessdataManager *mgr) {
|
||||
TessdataManager* mgr) {
|
||||
if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
|
||||
configs_size, vars_vec, vars_values,
|
||||
set_only_non_debug_params, mgr)) {
|
||||
@ -412,7 +411,7 @@ int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase,
|
||||
bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY;
|
||||
program_editup(textbase, init_tesseract ? mgr : nullptr,
|
||||
init_tesseract ? mgr : nullptr);
|
||||
return 0; //Normal exit
|
||||
return 0; // Normal exit
|
||||
}
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
@ -458,8 +457,8 @@ void Tesseract::SetupUniversalFontIds() {
|
||||
}
|
||||
|
||||
// init the LM component
|
||||
int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase,
|
||||
const char *language, TessdataManager *mgr) {
|
||||
int Tesseract::init_tesseract_lm(const char* arg0, const char* textbase,
|
||||
const char* language, TessdataManager* mgr) {
|
||||
if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
|
||||
nullptr, 0, nullptr, nullptr, false, mgr))
|
||||
return -1;
|
||||
@ -471,14 +470,11 @@ int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase,
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
void Tesseract::end_tesseract() {
|
||||
end_recog();
|
||||
}
|
||||
void Tesseract::end_tesseract() { end_recog(); }
|
||||
|
||||
/* Define command type identifiers */
|
||||
|
||||
enum CMD_EVENTS
|
||||
{
|
||||
enum CMD_EVENTS {
|
||||
ACTION_1_CMD_EVENT,
|
||||
RECOG_WERDS,
|
||||
RECOG_PSEUDO,
|
||||
|
@ -17,13 +17,13 @@
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef PARAMS_H
|
||||
#define PARAMS_H
|
||||
#ifndef PARAMS_H
|
||||
#define PARAMS_H
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdio>
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "strngs.h"
|
||||
#include "genericvector.h"
|
||||
#include "strngs.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -41,10 +41,10 @@ enum SetParamConstraint {
|
||||
};
|
||||
|
||||
struct ParamsVectors {
|
||||
GenericVector<IntParam *> int_params;
|
||||
GenericVector<BoolParam *> bool_params;
|
||||
GenericVector<StringParam *> string_params;
|
||||
GenericVector<DoubleParam *> double_params;
|
||||
GenericVector<IntParam*> int_params;
|
||||
GenericVector<BoolParam*> bool_params;
|
||||
GenericVector<StringParam*> string_params;
|
||||
GenericVector<DoubleParam*> double_params;
|
||||
};
|
||||
|
||||
// Utility functions for working with Tesseract parameters.
|
||||
@ -55,27 +55,25 @@ class ParamUtils {
|
||||
// ORed or ANDed with any current values.
|
||||
// Blank lines and lines beginning # are ignored.
|
||||
// Values may have any whitespace after the name and are the rest of line.
|
||||
static bool ReadParamsFile(
|
||||
const char *file, // filename to read
|
||||
SetParamConstraint constraint,
|
||||
ParamsVectors *member_params);
|
||||
static bool ReadParamsFile(const char* file, // filename to read
|
||||
SetParamConstraint constraint,
|
||||
ParamsVectors* member_params);
|
||||
|
||||
// Read parameters from the given file pointer.
|
||||
static bool ReadParamsFromFp(SetParamConstraint constraint, TFile *fp,
|
||||
ParamsVectors *member_params);
|
||||
static bool ReadParamsFromFp(SetParamConstraint constraint, TFile* fp,
|
||||
ParamsVectors* member_params);
|
||||
|
||||
// Set a parameters to have the given value.
|
||||
static bool SetParam(const char *name, const char* value,
|
||||
static bool SetParam(const char* name, const char* value,
|
||||
SetParamConstraint constraint,
|
||||
ParamsVectors *member_params);
|
||||
ParamsVectors* member_params);
|
||||
|
||||
// Returns the pointer to the parameter with the given name (of the
|
||||
// appropriate type) if it was found in the vector obtained from
|
||||
// GlobalParams() or in the given member_params.
|
||||
template<class T>
|
||||
static T *FindParam(const char *name,
|
||||
const GenericVector<T *> &global_vec,
|
||||
const GenericVector<T *> &member_vec) {
|
||||
template <class T>
|
||||
static T* FindParam(const char* name, const GenericVector<T*>& global_vec,
|
||||
const GenericVector<T*>& member_vec) {
|
||||
int i;
|
||||
for (i = 0; i < global_vec.size(); ++i) {
|
||||
if (strcmp(global_vec[i]->name_str(), name) == 0) return global_vec[i];
|
||||
@ -86,8 +84,8 @@ class ParamUtils {
|
||||
return nullptr;
|
||||
}
|
||||
// Removes the given pointer to the param from the given vector.
|
||||
template<class T>
|
||||
static void RemoveParam(T *param_ptr, GenericVector<T *> *vec) {
|
||||
template <class T>
|
||||
static void RemoveParam(T* param_ptr, GenericVector<T*>* vec) {
|
||||
for (int i = 0; i < vec->size(); ++i) {
|
||||
if ((*vec)[i] == param_ptr) {
|
||||
vec->remove(i);
|
||||
@ -97,12 +95,12 @@ class ParamUtils {
|
||||
}
|
||||
// Fetches the value of the named param as a STRING. Returns false if not
|
||||
// found.
|
||||
static bool GetParamAsString(const char *name,
|
||||
static bool GetParamAsString(const char* name,
|
||||
const ParamsVectors* member_params,
|
||||
STRING *value);
|
||||
STRING* value);
|
||||
|
||||
// Print parameters to the given file.
|
||||
static void PrintParams(FILE *fp, const ParamsVectors *member_params);
|
||||
static void PrintParams(FILE* fp, const ParamsVectors* member_params);
|
||||
|
||||
// Resets all parameters back to default values;
|
||||
static void ResetToDefaults(ParamsVectors* member_params);
|
||||
@ -113,36 +111,36 @@ class Param {
|
||||
public:
|
||||
~Param() = default;
|
||||
|
||||
const char *name_str() const { return name_; }
|
||||
const char *info_str() const { return info_; }
|
||||
const char* name_str() const { return name_; }
|
||||
const char* info_str() const { return info_; }
|
||||
bool is_init() const { return init_; }
|
||||
bool is_debug() const { return debug_; }
|
||||
bool constraint_ok(SetParamConstraint constraint) const {
|
||||
return (constraint == SET_PARAM_CONSTRAINT_NONE ||
|
||||
(constraint == SET_PARAM_CONSTRAINT_DEBUG_ONLY &&
|
||||
this->is_debug()) ||
|
||||
(constraint == SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY &&
|
||||
!this->is_debug()) ||
|
||||
(constraint == SET_PARAM_CONSTRAINT_NON_INIT_ONLY &&
|
||||
!this->is_init()));
|
||||
return (
|
||||
constraint == SET_PARAM_CONSTRAINT_NONE ||
|
||||
(constraint == SET_PARAM_CONSTRAINT_DEBUG_ONLY && this->is_debug()) ||
|
||||
(constraint == SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY &&
|
||||
!this->is_debug()) ||
|
||||
(constraint == SET_PARAM_CONSTRAINT_NON_INIT_ONLY && !this->is_init()));
|
||||
}
|
||||
|
||||
protected:
|
||||
Param(const char *name, const char *comment, bool init) :
|
||||
name_(name), info_(comment), init_(init) {
|
||||
Param(const char* name, const char* comment, bool init)
|
||||
: name_(name), info_(comment), init_(init) {
|
||||
debug_ = (strstr(name, "debug") != nullptr) || (strstr(name, "display"));
|
||||
}
|
||||
|
||||
const char *name_; // name of this parameter
|
||||
const char *info_; // for menus
|
||||
bool init_; // needs to be set before init
|
||||
const char* name_; // name of this parameter
|
||||
const char* info_; // for menus
|
||||
bool init_; // needs to be set before init
|
||||
bool debug_;
|
||||
};
|
||||
|
||||
class IntParam : public Param {
|
||||
public:
|
||||
IntParam(int32_t value, const char *name, const char *comment, bool init,
|
||||
ParamsVectors *vec) : Param(name, comment, init) {
|
||||
public:
|
||||
IntParam(int32_t value, const char* name, const char* comment, bool init,
|
||||
ParamsVectors* vec)
|
||||
: Param(name, comment, init) {
|
||||
value_ = value;
|
||||
default_ = value;
|
||||
params_vec_ = &(vec->int_params);
|
||||
@ -152,29 +150,29 @@ class IntParam : public Param {
|
||||
operator int32_t() const { return value_; }
|
||||
void operator=(int32_t value) { value_ = value; }
|
||||
void set_value(int32_t value) { value_ = value; }
|
||||
void ResetToDefault() {
|
||||
value_ = default_;
|
||||
}
|
||||
void ResetToDefault() { value_ = default_; }
|
||||
void ResetFrom(const ParamsVectors* vec) {
|
||||
for (int i = 0; i < vec->int_params.size(); ++i) {
|
||||
if (strcmp(vec->int_params[i]->name_str(), name_) == 0) {
|
||||
//printf("overriding param %s=%d by =%d\n", name_, value_, *vec->int_params[i]);
|
||||
// printf("overriding param %s=%d by =%d\n", name_, value_,
|
||||
// *vec->int_params[i]);
|
||||
value_ = *vec->int_params[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
int32_t value_;
|
||||
int32_t default_;
|
||||
// Pointer to the vector that contains this param (not owned by this class).
|
||||
GenericVector<IntParam *> *params_vec_;
|
||||
GenericVector<IntParam*>* params_vec_;
|
||||
};
|
||||
|
||||
class BoolParam : public Param {
|
||||
public:
|
||||
BoolParam(bool value, const char *name, const char *comment, bool init,
|
||||
ParamsVectors *vec) : Param(name, comment, init) {
|
||||
BoolParam(bool value, const char* name, const char* comment, bool init,
|
||||
ParamsVectors* vec)
|
||||
: Param(name, comment, init) {
|
||||
value_ = value;
|
||||
default_ = value;
|
||||
params_vec_ = &(vec->bool_params);
|
||||
@ -184,13 +182,12 @@ class BoolParam : public Param {
|
||||
operator BOOL8() const { return value_; }
|
||||
void operator=(BOOL8 value) { value_ = value; }
|
||||
void set_value(BOOL8 value) { value_ = value; }
|
||||
void ResetToDefault() {
|
||||
value_ = default_;
|
||||
}
|
||||
void ResetToDefault() { value_ = default_; }
|
||||
void ResetFrom(const ParamsVectors* vec) {
|
||||
for (int i = 0; i < vec->bool_params.size(); ++i) {
|
||||
if (strcmp(vec->bool_params[i]->name_str(), name_) == 0) {
|
||||
//printf("overriding param %s=%s by =%s\n", name_, value_ ? "true" : "false", *vec->bool_params[i] ? "true" : "false");
|
||||
// printf("overriding param %s=%s by =%s\n", name_, value_ ? "true" :
|
||||
// "false", *vec->bool_params[i] ? "true" : "false");
|
||||
value_ = *vec->bool_params[i];
|
||||
}
|
||||
}
|
||||
@ -200,34 +197,33 @@ class BoolParam : public Param {
|
||||
BOOL8 value_;
|
||||
BOOL8 default_;
|
||||
// Pointer to the vector that contains this param (not owned by this class).
|
||||
GenericVector<BoolParam *> *params_vec_;
|
||||
GenericVector<BoolParam*>* params_vec_;
|
||||
};
|
||||
|
||||
class StringParam : public Param {
|
||||
public:
|
||||
StringParam(const char *value, const char *name,
|
||||
const char *comment, bool init,
|
||||
ParamsVectors *vec) : Param(name, comment, init) {
|
||||
StringParam(const char* value, const char* name, const char* comment,
|
||||
bool init, ParamsVectors* vec)
|
||||
: Param(name, comment, init) {
|
||||
value_ = value;
|
||||
default_ = value;
|
||||
params_vec_ = &(vec->string_params);
|
||||
vec->string_params.push_back(this);
|
||||
}
|
||||
~StringParam() { ParamUtils::RemoveParam<StringParam>(this, params_vec_); }
|
||||
operator STRING &() { return value_; }
|
||||
const char *string() const { return value_.string(); }
|
||||
const char *c_str() const { return value_.string(); }
|
||||
operator STRING&() { return value_; }
|
||||
const char* string() const { return value_.string(); }
|
||||
const char* c_str() const { return value_.string(); }
|
||||
bool empty() { return value_.length() <= 0; }
|
||||
bool operator==(const STRING& other) { return value_ == other; }
|
||||
void operator=(const STRING& value) { value_ = value; }
|
||||
void set_value(const STRING& value) { value_ = value; }
|
||||
void ResetToDefault() {
|
||||
value_ = default_;
|
||||
}
|
||||
void ResetToDefault() { value_ = default_; }
|
||||
void ResetFrom(const ParamsVectors* vec) {
|
||||
for (int i = 0; i < vec->string_params.size(); ++i) {
|
||||
if (strcmp(vec->string_params[i]->name_str(), name_) == 0) {
|
||||
//printf("overriding param %s=%s by =%s\n", name_, value_, vec->string_params[i]->c_str());
|
||||
// printf("overriding param %s=%s by =%s\n", name_, value_,
|
||||
// vec->string_params[i]->c_str());
|
||||
value_ = *vec->string_params[i];
|
||||
}
|
||||
}
|
||||
@ -237,13 +233,14 @@ class StringParam : public Param {
|
||||
STRING value_;
|
||||
STRING default_;
|
||||
// Pointer to the vector that contains this param (not owned by this class).
|
||||
GenericVector<StringParam *> *params_vec_;
|
||||
GenericVector<StringParam*>* params_vec_;
|
||||
};
|
||||
|
||||
class DoubleParam : public Param {
|
||||
public:
|
||||
DoubleParam(double value, const char *name, const char *comment,
|
||||
bool init, ParamsVectors *vec) : Param(name, comment, init) {
|
||||
DoubleParam(double value, const char* name, const char* comment, bool init,
|
||||
ParamsVectors* vec)
|
||||
: Param(name, comment, init) {
|
||||
value_ = value;
|
||||
default_ = value;
|
||||
params_vec_ = &(vec->double_params);
|
||||
@ -253,13 +250,12 @@ class DoubleParam : public Param {
|
||||
operator double() const { return value_; }
|
||||
void operator=(double value) { value_ = value; }
|
||||
void set_value(double value) { value_ = value; }
|
||||
void ResetToDefault() {
|
||||
value_ = default_;
|
||||
}
|
||||
void ResetToDefault() { value_ = default_; }
|
||||
void ResetFrom(const ParamsVectors* vec) {
|
||||
for (int i = 0; i < vec->double_params.size(); ++i) {
|
||||
if (strcmp(vec->double_params[i]->name_str(), name_) == 0) {
|
||||
//printf("overriding param %s=%f by =%f\n", name_, value_, *vec->double_params[i]);
|
||||
// printf("overriding param %s=%f by =%f\n", name_, value_,
|
||||
// *vec->double_params[i]);
|
||||
value_ = *vec->double_params[i];
|
||||
}
|
||||
}
|
||||
@ -269,7 +265,7 @@ class DoubleParam : public Param {
|
||||
double value_;
|
||||
double default_;
|
||||
// Pointer to the vector that contains this param (not owned by this class).
|
||||
GenericVector<DoubleParam *> *params_vec_;
|
||||
GenericVector<DoubleParam*>* params_vec_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
@ -283,7 +279,7 @@ class DoubleParam : public Param {
|
||||
//
|
||||
// TODO(daria): remove GlobalParams() when all global Tesseract
|
||||
// parameters are converted to members.
|
||||
tesseract::ParamsVectors *GlobalParams();
|
||||
tesseract::ParamsVectors* GlobalParams();
|
||||
|
||||
/*************************************************************************
|
||||
* Note on defining parameters.
|
||||
@ -293,52 +289,48 @@ tesseract::ParamsVectors *GlobalParams();
|
||||
* (there is no such guarantee for parameters defined with the other macros).
|
||||
*************************************************************************/
|
||||
|
||||
#define INT_VAR_H(name,val,comment)\
|
||||
tesseract::IntParam name
|
||||
#define INT_VAR_H(name, val, comment) tesseract::IntParam name
|
||||
|
||||
#define BOOL_VAR_H(name,val,comment)\
|
||||
tesseract::BoolParam name
|
||||
#define BOOL_VAR_H(name, val, comment) tesseract::BoolParam name
|
||||
|
||||
#define STRING_VAR_H(name,val,comment)\
|
||||
tesseract::StringParam name
|
||||
#define STRING_VAR_H(name, val, comment) tesseract::StringParam name
|
||||
|
||||
#define double_VAR_H(name,val,comment)\
|
||||
tesseract::DoubleParam name
|
||||
#define double_VAR_H(name, val, comment) tesseract::DoubleParam name
|
||||
|
||||
#define INT_VAR(name,val,comment)\
|
||||
tesseract::IntParam name(val,#name,comment,false,GlobalParams())
|
||||
#define INT_VAR(name, val, comment) \
|
||||
tesseract::IntParam name(val, #name, comment, false, GlobalParams())
|
||||
|
||||
#define BOOL_VAR(name,val,comment)\
|
||||
tesseract::BoolParam name(val,#name,comment,false,GlobalParams())
|
||||
#define BOOL_VAR(name, val, comment) \
|
||||
tesseract::BoolParam name(val, #name, comment, false, GlobalParams())
|
||||
|
||||
#define STRING_VAR(name,val,comment)\
|
||||
tesseract::StringParam name(val,#name,comment,false,GlobalParams())
|
||||
#define STRING_VAR(name, val, comment) \
|
||||
tesseract::StringParam name(val, #name, comment, false, GlobalParams())
|
||||
|
||||
#define double_VAR(name,val,comment)\
|
||||
tesseract::DoubleParam name(val,#name,comment,false,GlobalParams())
|
||||
#define double_VAR(name, val, comment) \
|
||||
tesseract::DoubleParam name(val, #name, comment, false, GlobalParams())
|
||||
|
||||
#define INT_MEMBER(name, val, comment, vec)\
|
||||
#define INT_MEMBER(name, val, comment, vec) \
|
||||
name(val, #name, comment, false, vec)
|
||||
|
||||
#define BOOL_MEMBER(name, val, comment, vec)\
|
||||
#define BOOL_MEMBER(name, val, comment, vec) \
|
||||
name(val, #name, comment, false, vec)
|
||||
|
||||
#define STRING_MEMBER(name, val, comment, vec)\
|
||||
#define STRING_MEMBER(name, val, comment, vec) \
|
||||
name(val, #name, comment, false, vec)
|
||||
|
||||
#define double_MEMBER(name, val, comment, vec)\
|
||||
#define double_MEMBER(name, val, comment, vec) \
|
||||
name(val, #name, comment, false, vec)
|
||||
|
||||
#define INT_INIT_MEMBER(name, val, comment, vec)\
|
||||
#define INT_INIT_MEMBER(name, val, comment, vec) \
|
||||
name(val, #name, comment, true, vec)
|
||||
|
||||
#define BOOL_INIT_MEMBER(name, val, comment, vec)\
|
||||
#define BOOL_INIT_MEMBER(name, val, comment, vec) \
|
||||
name(val, #name, comment, true, vec)
|
||||
|
||||
#define STRING_INIT_MEMBER(name, val, comment, vec)\
|
||||
#define STRING_INIT_MEMBER(name, val, comment, vec) \
|
||||
name(val, #name, comment, true, vec)
|
||||
|
||||
#define double_INIT_MEMBER(name, val, comment, vec)\
|
||||
#define double_INIT_MEMBER(name, val, comment, vec) \
|
||||
name(val, #name, comment, true, vec)
|
||||
|
||||
#endif
|
||||
|
@ -27,7 +27,7 @@ namespace tesseract {
|
||||
|
||||
class Image;
|
||||
|
||||
Dict::Dict(CCUtil *ccutil)
|
||||
Dict::Dict(CCUtil* ccutil)
|
||||
: letter_is_okay_(&tesseract::Dict::def_letter_is_okay),
|
||||
probability_in_context_(&tesseract::Dict::def_probability_in_context),
|
||||
params_model_classify_(nullptr),
|
||||
@ -190,7 +190,7 @@ Dict::~Dict() {
|
||||
if (output_ambig_words_file_ != nullptr) fclose(output_ambig_words_file_);
|
||||
}
|
||||
|
||||
DawgCache *Dict::GlobalDawgCache() {
|
||||
DawgCache* Dict::GlobalDawgCache() {
|
||||
// This global cache (a singleton) will outlive every Tesseract instance
|
||||
// (even those that someone else might declare as global statics).
|
||||
static DawgCache cache;
|
||||
@ -198,7 +198,7 @@ DawgCache *Dict::GlobalDawgCache() {
|
||||
}
|
||||
|
||||
// Sets up ready for a Load or LoadLSTM.
|
||||
void Dict::SetupForLoad(DawgCache *dawg_cache) {
|
||||
void Dict::SetupForLoad(DawgCache* dawg_cache) {
|
||||
if (dawgs_.length() != 0) this->End();
|
||||
|
||||
apostrophe_unichar_id_ = getUnicharset().unichar_to_id(kApostropheSymbol);
|
||||
@ -216,7 +216,7 @@ void Dict::SetupForLoad(DawgCache *dawg_cache) {
|
||||
}
|
||||
|
||||
// Loads the dawgs needed by Tesseract. Call FinishLoad() after.
|
||||
void Dict::Load(const STRING &lang, TessdataManager *data_file) {
|
||||
void Dict::Load(const STRING& lang, TessdataManager* data_file) {
|
||||
// Load dawgs_.
|
||||
if (load_punc_dawg) {
|
||||
punc_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_PUNC_DAWG,
|
||||
@ -224,12 +224,12 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) {
|
||||
if (punc_dawg_) dawgs_ += punc_dawg_;
|
||||
}
|
||||
if (load_system_dawg) {
|
||||
Dawg *system_dawg = dawg_cache_->GetSquishedDawg(
|
||||
Dawg* system_dawg = dawg_cache_->GetSquishedDawg(
|
||||
lang, TESSDATA_SYSTEM_DAWG, dawg_debug_level, data_file);
|
||||
if (system_dawg) dawgs_ += system_dawg;
|
||||
}
|
||||
if (load_number_dawg) {
|
||||
Dawg *number_dawg = dawg_cache_->GetSquishedDawg(
|
||||
Dawg* number_dawg = dawg_cache_->GetSquishedDawg(
|
||||
lang, TESSDATA_NUMBER_DAWG, dawg_debug_level, data_file);
|
||||
if (number_dawg) dawgs_ += number_dawg;
|
||||
}
|
||||
@ -251,15 +251,15 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) {
|
||||
}
|
||||
|
||||
STRING name;
|
||||
if (((STRING &)user_words_suffix).length() > 0 ||
|
||||
((STRING &)user_words_file).length() > 0) {
|
||||
Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM,
|
||||
if (((STRING&)user_words_suffix).length() > 0 ||
|
||||
((STRING&)user_words_file).length() > 0) {
|
||||
Trie* trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM,
|
||||
getUnicharset().size(), dawg_debug_level);
|
||||
if (((STRING &)user_words_file).length() > 0) {
|
||||
name = user_words_file;
|
||||
if (((STRING&)user_words_file).length() > 0) {
|
||||
name = user_words_file;
|
||||
} else {
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_words_suffix;
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_words_suffix;
|
||||
}
|
||||
if (!trie_ptr->read_and_add_word_list(name.string(), getUnicharset(),
|
||||
Trie::RRP_REVERSE_IF_HAS_RTL)) {
|
||||
@ -270,16 +270,16 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) {
|
||||
}
|
||||
}
|
||||
|
||||
if (((STRING &)user_patterns_suffix).length() > 0 ||
|
||||
((STRING &)user_patterns_file).length() > 0) {
|
||||
Trie *trie_ptr = new Trie(DAWG_TYPE_PATTERN, lang, USER_PATTERN_PERM,
|
||||
if (((STRING&)user_patterns_suffix).length() > 0 ||
|
||||
((STRING&)user_patterns_file).length() > 0) {
|
||||
Trie* trie_ptr = new Trie(DAWG_TYPE_PATTERN, lang, USER_PATTERN_PERM,
|
||||
getUnicharset().size(), dawg_debug_level);
|
||||
trie_ptr->initialize_patterns(&(getUnicharset()));
|
||||
if (((STRING &)user_patterns_file).length() > 0) {
|
||||
name = user_patterns_file;
|
||||
if (((STRING&)user_patterns_file).length() > 0) {
|
||||
name = user_patterns_file;
|
||||
} else {
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_patterns_suffix;
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_patterns_suffix;
|
||||
}
|
||||
if (!trie_ptr->read_pattern_list(name.string(), getUnicharset())) {
|
||||
tprintf("Error: failed to load %s\n", name.string());
|
||||
@ -299,7 +299,7 @@ void Dict::Load(const STRING &lang, TessdataManager *data_file) {
|
||||
}
|
||||
|
||||
// Loads the dawgs needed by the LSTM model. Call FinishLoad() after.
|
||||
void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) {
|
||||
void Dict::LoadLSTM(const STRING& lang, TessdataManager* data_file) {
|
||||
// Load dawgs_.
|
||||
if (load_punc_dawg) {
|
||||
punc_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_LSTM_PUNC_DAWG,
|
||||
@ -307,27 +307,28 @@ void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) {
|
||||
if (punc_dawg_) dawgs_ += punc_dawg_;
|
||||
}
|
||||
if (load_system_dawg) {
|
||||
Dawg *system_dawg = dawg_cache_->GetSquishedDawg(
|
||||
Dawg* system_dawg = dawg_cache_->GetSquishedDawg(
|
||||
lang, TESSDATA_LSTM_SYSTEM_DAWG, dawg_debug_level, data_file);
|
||||
if (system_dawg) dawgs_ += system_dawg;
|
||||
}
|
||||
if (load_number_dawg) {
|
||||
Dawg *number_dawg = dawg_cache_->GetSquishedDawg(
|
||||
Dawg* number_dawg = dawg_cache_->GetSquishedDawg(
|
||||
lang, TESSDATA_LSTM_NUMBER_DAWG, dawg_debug_level, data_file);
|
||||
if (number_dawg) dawgs_ += number_dawg;
|
||||
}
|
||||
|
||||
// stolen from Dict::Load (but needs params_ from Tesseract langdata/config/api):
|
||||
// stolen from Dict::Load (but needs params_ from Tesseract
|
||||
// langdata/config/api):
|
||||
STRING name;
|
||||
if (((STRING &)user_words_suffix).length() > 0 ||
|
||||
((STRING &)user_words_file).length() > 0) {
|
||||
Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM,
|
||||
if (((STRING&)user_words_suffix).length() > 0 ||
|
||||
((STRING&)user_words_file).length() > 0) {
|
||||
Trie* trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM,
|
||||
getUnicharset().size(), dawg_debug_level);
|
||||
if (((STRING &)user_words_file).length() > 0) {
|
||||
name = user_words_file;
|
||||
if (((STRING&)user_words_file).length() > 0) {
|
||||
name = user_words_file;
|
||||
} else {
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_words_suffix;
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_words_suffix;
|
||||
}
|
||||
if (!trie_ptr->read_and_add_word_list(name.string(), getUnicharset(),
|
||||
Trie::RRP_REVERSE_IF_HAS_RTL)) {
|
||||
@ -338,16 +339,16 @@ void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) {
|
||||
}
|
||||
}
|
||||
|
||||
if (((STRING &)user_patterns_suffix).length() > 0 ||
|
||||
((STRING &)user_patterns_file).length() > 0) {
|
||||
Trie *trie_ptr = new Trie(DAWG_TYPE_PATTERN, lang, USER_PATTERN_PERM,
|
||||
if (((STRING&)user_patterns_suffix).length() > 0 ||
|
||||
((STRING&)user_patterns_file).length() > 0) {
|
||||
Trie* trie_ptr = new Trie(DAWG_TYPE_PATTERN, lang, USER_PATTERN_PERM,
|
||||
getUnicharset().size(), dawg_debug_level);
|
||||
trie_ptr->initialize_patterns(&(getUnicharset()));
|
||||
if (((STRING &)user_patterns_file).length() > 0) {
|
||||
name = user_patterns_file;
|
||||
if (((STRING&)user_patterns_file).length() > 0) {
|
||||
name = user_patterns_file;
|
||||
} else {
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_patterns_suffix;
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_patterns_suffix;
|
||||
}
|
||||
if (!trie_ptr->read_pattern_list(name.string(), getUnicharset())) {
|
||||
tprintf("Error: failed to load %s\n", name.string());
|
||||
@ -356,7 +357,6 @@ void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) {
|
||||
dawgs_ += trie_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Completes the loading process after Load() and/or LoadLSTM().
|
||||
@ -368,13 +368,14 @@ bool Dict::FinishLoad() {
|
||||
// indices into the dawgs_ vector of the successors for dawg i.
|
||||
successors_.reserve(dawgs_.length());
|
||||
for (int i = 0; i < dawgs_.length(); ++i) {
|
||||
const Dawg *dawg = dawgs_[i];
|
||||
SuccessorList *lst = new SuccessorList();
|
||||
const Dawg* dawg = dawgs_[i];
|
||||
SuccessorList* lst = new SuccessorList();
|
||||
for (int j = 0; j < dawgs_.length(); ++j) {
|
||||
const Dawg *other = dawgs_[j];
|
||||
const Dawg* other = dawgs_[j];
|
||||
if (dawg != nullptr && other != nullptr &&
|
||||
(dawg->lang() == other->lang()) &&
|
||||
kDawgSuccessors[dawg->type()][other->type()]) *lst += j;
|
||||
kDawgSuccessors[dawg->type()][other->type()])
|
||||
*lst += j;
|
||||
}
|
||||
successors_ += lst;
|
||||
}
|
||||
@ -382,8 +383,7 @@ bool Dict::FinishLoad() {
|
||||
}
|
||||
|
||||
void Dict::End() {
|
||||
if (dawgs_.length() == 0)
|
||||
return; // Not safe to call twice.
|
||||
if (dawgs_.length() == 0) return; // Not safe to call twice.
|
||||
for (int i = 0; i < dawgs_.size(); i++) {
|
||||
if (!dawg_cache_->FreeDawg(dawgs_[i])) {
|
||||
delete dawgs_[i];
|
||||
@ -405,19 +405,18 @@ void Dict::End() {
|
||||
// Returns true if in light of the current state unichar_id is allowed
|
||||
// according to at least one of the dawgs in the dawgs_ vector.
|
||||
// See more extensive comments in dict.h where this function is declared.
|
||||
int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
const UNICHARSET& unicharset,
|
||||
UNICHAR_ID unichar_id,
|
||||
bool word_end) const {
|
||||
DawgArgs *dawg_args = static_cast<DawgArgs *>(void_dawg_args);
|
||||
int Dict::def_letter_is_okay(void* void_dawg_args, const UNICHARSET& unicharset,
|
||||
UNICHAR_ID unichar_id, bool word_end) const {
|
||||
DawgArgs* dawg_args = static_cast<DawgArgs*>(void_dawg_args);
|
||||
|
||||
ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
|
||||
|
||||
if (dawg_debug_level >= 3) {
|
||||
tprintf("def_letter_is_okay: current unichar=%s word_end=%d"
|
||||
" num active dawgs=%d\n",
|
||||
getUnicharset().debug_str(unichar_id).string(), word_end,
|
||||
dawg_args->active_dawgs->length());
|
||||
tprintf(
|
||||
"def_letter_is_okay: current unichar=%s word_end=%d"
|
||||
" num active dawgs=%d\n",
|
||||
getUnicharset().debug_str(unichar_id).string(), word_end,
|
||||
dawg_args->active_dawgs->length());
|
||||
}
|
||||
|
||||
// Do not accept words that contain kPatternUnicharID.
|
||||
@ -438,9 +437,10 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
// with the updated ref (an edge with the corresponding unichar id) into
|
||||
// dawg_args->updated_pos.
|
||||
for (int a = 0; a < dawg_args->active_dawgs->length(); ++a) {
|
||||
const DawgPosition &pos = (*dawg_args->active_dawgs)[a];
|
||||
const Dawg *punc_dawg = pos.punc_index >= 0 ? dawgs_[pos.punc_index] : nullptr;
|
||||
const Dawg *dawg = pos.dawg_index >= 0 ? dawgs_[pos.dawg_index] : nullptr;
|
||||
const DawgPosition& pos = (*dawg_args->active_dawgs)[a];
|
||||
const Dawg* punc_dawg =
|
||||
pos.punc_index >= 0 ? dawgs_[pos.punc_index] : nullptr;
|
||||
const Dawg* dawg = pos.dawg_index >= 0 ? dawgs_[pos.dawg_index] : nullptr;
|
||||
|
||||
if (!dawg && !punc_dawg) {
|
||||
// shouldn't happen.
|
||||
@ -450,23 +450,23 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
if (!dawg) {
|
||||
// We're in the punctuation dawg. A core dawg has not been chosen.
|
||||
NODE_REF punc_node = GetStartingNode(punc_dawg, pos.punc_ref);
|
||||
EDGE_REF punc_transition_edge = punc_dawg->edge_char_of(
|
||||
punc_node, Dawg::kPatternUnicharID, word_end);
|
||||
EDGE_REF punc_transition_edge =
|
||||
punc_dawg->edge_char_of(punc_node, Dawg::kPatternUnicharID, word_end);
|
||||
if (punc_transition_edge != NO_EDGE) {
|
||||
// Find all successors, and see which can transition.
|
||||
const SuccessorList &slist = *(successors_[pos.punc_index]);
|
||||
const SuccessorList& slist = *(successors_[pos.punc_index]);
|
||||
for (int s = 0; s < slist.length(); ++s) {
|
||||
int sdawg_index = slist[s];
|
||||
const Dawg *sdawg = dawgs_[sdawg_index];
|
||||
const Dawg* sdawg = dawgs_[sdawg_index];
|
||||
UNICHAR_ID ch = char_for_dawg(unicharset, unichar_id, sdawg);
|
||||
EDGE_REF dawg_edge = sdawg->edge_char_of(0, ch, word_end);
|
||||
if (dawg_edge != NO_EDGE) {
|
||||
if (dawg_debug_level >=3) {
|
||||
if (dawg_debug_level >= 3) {
|
||||
tprintf("Letter found in dawg %d\n", sdawg_index);
|
||||
}
|
||||
dawg_args->updated_dawgs->add_unique(
|
||||
DawgPosition(sdawg_index, dawg_edge,
|
||||
pos.punc_index, punc_transition_edge, false),
|
||||
DawgPosition(sdawg_index, dawg_edge, pos.punc_index,
|
||||
punc_transition_edge, false),
|
||||
dawg_debug_level > 0,
|
||||
"Append transition from punc dawg to current dawgs: ");
|
||||
if (sdawg->permuter() > curr_perm) curr_perm = sdawg->permuter();
|
||||
@ -476,16 +476,15 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
}
|
||||
}
|
||||
}
|
||||
EDGE_REF punc_edge = punc_dawg->edge_char_of(punc_node, unichar_id,
|
||||
word_end);
|
||||
EDGE_REF punc_edge =
|
||||
punc_dawg->edge_char_of(punc_node, unichar_id, word_end);
|
||||
if (punc_edge != NO_EDGE) {
|
||||
if (dawg_debug_level >=3) {
|
||||
if (dawg_debug_level >= 3) {
|
||||
tprintf("Letter found in punctuation dawg\n");
|
||||
}
|
||||
dawg_args->updated_dawgs->add_unique(
|
||||
DawgPosition(-1, NO_EDGE, pos.punc_index, punc_edge, false),
|
||||
dawg_debug_level > 0,
|
||||
"Extend punctuation dawg: ");
|
||||
dawg_debug_level > 0, "Extend punctuation dawg: ");
|
||||
if (PUNC_PERM > curr_perm) curr_perm = PUNC_PERM;
|
||||
if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true;
|
||||
}
|
||||
@ -496,14 +495,15 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
// We can end the main word here.
|
||||
// If we can continue on the punc ref, add that possibility.
|
||||
NODE_REF punc_node = GetStartingNode(punc_dawg, pos.punc_ref);
|
||||
EDGE_REF punc_edge = punc_node == NO_EDGE ? NO_EDGE
|
||||
: punc_dawg->edge_char_of(punc_node, unichar_id, word_end);
|
||||
EDGE_REF punc_edge =
|
||||
punc_node == NO_EDGE
|
||||
? NO_EDGE
|
||||
: punc_dawg->edge_char_of(punc_node, unichar_id, word_end);
|
||||
if (punc_edge != NO_EDGE) {
|
||||
dawg_args->updated_dawgs->add_unique(
|
||||
DawgPosition(pos.dawg_index, pos.dawg_ref,
|
||||
pos.punc_index, punc_edge, true),
|
||||
dawg_debug_level > 0,
|
||||
"Return to punctuation dawg: ");
|
||||
DawgPosition(pos.dawg_index, pos.dawg_ref, pos.punc_index,
|
||||
punc_edge, true),
|
||||
dawg_debug_level > 0, "Return to punctuation dawg: ");
|
||||
if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter();
|
||||
if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true;
|
||||
}
|
||||
@ -524,9 +524,11 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
|
||||
// Find the edge out of the node for the unichar_id.
|
||||
NODE_REF node = GetStartingNode(dawg, pos.dawg_ref);
|
||||
EDGE_REF edge = (node == NO_EDGE) ? NO_EDGE
|
||||
: dawg->edge_char_of(node, char_for_dawg(unicharset, unichar_id, dawg),
|
||||
word_end);
|
||||
EDGE_REF edge =
|
||||
(node == NO_EDGE)
|
||||
? NO_EDGE
|
||||
: dawg->edge_char_of(
|
||||
node, char_for_dawg(unicharset, unichar_id, dawg), word_end);
|
||||
|
||||
if (dawg_debug_level >= 3) {
|
||||
tprintf("Active dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n",
|
||||
@ -534,7 +536,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
}
|
||||
|
||||
if (edge != NO_EDGE) { // the unichar was found in the current dawg
|
||||
if (dawg_debug_level >=3) {
|
||||
if (dawg_debug_level >= 3) {
|
||||
tprintf("Letter found in dawg %d\n", pos.dawg_index);
|
||||
}
|
||||
if (word_end && punc_dawg && !punc_dawg->end_of_word(pos.punc_ref)) {
|
||||
@ -569,10 +571,10 @@ int Dict::def_letter_is_okay(void* void_dawg_args,
|
||||
return dawg_args->permuter;
|
||||
}
|
||||
|
||||
void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos,
|
||||
void Dict::ProcessPatternEdges(const Dawg* dawg, const DawgPosition& pos,
|
||||
UNICHAR_ID unichar_id, bool word_end,
|
||||
DawgArgs *dawg_args,
|
||||
PermuterType *curr_perm) const {
|
||||
DawgArgs* dawg_args,
|
||||
PermuterType* curr_perm) const {
|
||||
NODE_REF node = GetStartingNode(dawg, pos.dawg_ref);
|
||||
// Try to find the edge corresponding to the exact unichar_id and to all the
|
||||
// edges corresponding to the character class of unichar_id.
|
||||
@ -584,9 +586,10 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos,
|
||||
// On the first iteration check all the outgoing edges.
|
||||
// On the second iteration check all self-loops.
|
||||
for (int k = 0; k < 2; ++k) {
|
||||
EDGE_REF edge = (k == 0)
|
||||
? dawg->edge_char_of(node, unichar_id_patterns[i], word_end)
|
||||
: dawg->pattern_loop_edge(pos.dawg_ref, unichar_id_patterns[i], word_end);
|
||||
EDGE_REF edge =
|
||||
(k == 0) ? dawg->edge_char_of(node, unichar_id_patterns[i], word_end)
|
||||
: dawg->pattern_loop_edge(pos.dawg_ref,
|
||||
unichar_id_patterns[i], word_end);
|
||||
if (edge == NO_EDGE) continue;
|
||||
if (dawg_debug_level >= 3) {
|
||||
tprintf("Pattern dawg: [%d, " REFFORMAT "] edge=" REFFORMAT "\n",
|
||||
@ -607,7 +610,7 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos,
|
||||
// Fill the given active_dawgs vector with dawgs that could contain the
|
||||
// beginning of the word. If hyphenated() returns true, copy the entries
|
||||
// from hyphen_active_dawgs_ instead.
|
||||
void Dict::init_active_dawgs(DawgPositionVector *active_dawgs,
|
||||
void Dict::init_active_dawgs(DawgPositionVector* active_dawgs,
|
||||
bool ambigs_mode) const {
|
||||
int i;
|
||||
if (hyphenated()) {
|
||||
@ -624,11 +627,11 @@ void Dict::init_active_dawgs(DawgPositionVector *active_dawgs,
|
||||
}
|
||||
}
|
||||
|
||||
void Dict::default_dawgs(DawgPositionVector *dawg_pos_vec,
|
||||
void Dict::default_dawgs(DawgPositionVector* dawg_pos_vec,
|
||||
bool suppress_patterns) const {
|
||||
bool punc_dawg_available =
|
||||
(punc_dawg_ != nullptr) &&
|
||||
punc_dawg_->edge_char_of(0, Dawg::kPatternUnicharID, true) != NO_EDGE;
|
||||
(punc_dawg_ != nullptr) &&
|
||||
punc_dawg_->edge_char_of(0, Dawg::kPatternUnicharID, true) != NO_EDGE;
|
||||
|
||||
for (int i = 0; i < dawgs_.length(); i++) {
|
||||
if (dawgs_[i] != nullptr &&
|
||||
@ -651,7 +654,7 @@ void Dict::default_dawgs(DawgPositionVector *dawg_pos_vec,
|
||||
}
|
||||
}
|
||||
|
||||
void Dict::add_document_word(const WERD_CHOICE &best_choice) {
|
||||
void Dict::add_document_word(const WERD_CHOICE& best_choice) {
|
||||
// Do not add hyphenated word parts to the document dawg.
|
||||
// hyphen_word_ will be non-nullptr after the set_hyphen_word() is
|
||||
// called when the first part of the hyphenated word is
|
||||
@ -662,8 +665,7 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) {
|
||||
|
||||
int stringlen = best_choice.length();
|
||||
|
||||
if (valid_word(best_choice) || stringlen < 2)
|
||||
return;
|
||||
if (valid_word(best_choice) || stringlen < 2) return;
|
||||
|
||||
// Discard words that contain >= kDocDictMaxRepChars repeating unichars.
|
||||
if (best_choice.length() >= kDocDictMaxRepChars) {
|
||||
@ -682,8 +684,7 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) {
|
||||
|
||||
if (best_choice.certainty() < doc_dict_certainty_threshold ||
|
||||
stringlen == 2) {
|
||||
if (best_choice.certainty() < doc_dict_pending_threshold)
|
||||
return;
|
||||
if (best_choice.certainty() < doc_dict_pending_threshold) return;
|
||||
|
||||
if (!pending_words_->word_in_dawg(best_choice)) {
|
||||
if (stringlen > 2 ||
|
||||
@ -699,23 +700,20 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) {
|
||||
if (save_doc_words) {
|
||||
STRING filename(getCCUtil()->imagefile);
|
||||
filename += ".doc";
|
||||
FILE *doc_word_file = fopen(filename.string(), "a");
|
||||
FILE* doc_word_file = fopen(filename.string(), "a");
|
||||
if (doc_word_file == nullptr) {
|
||||
tprintf("Error: Could not open file %s\n", filename.string());
|
||||
ASSERT_HOST(doc_word_file);
|
||||
}
|
||||
fprintf(doc_word_file, "%s\n",
|
||||
best_choice.debug_string().string());
|
||||
fprintf(doc_word_file, "%s\n", best_choice.debug_string().string());
|
||||
fclose(doc_word_file);
|
||||
}
|
||||
document_words_->add_word_to_dawg(best_choice);
|
||||
}
|
||||
|
||||
void Dict::adjust_word(WERD_CHOICE *word,
|
||||
bool nonword,
|
||||
void Dict::adjust_word(WERD_CHOICE* word, bool nonword,
|
||||
XHeightConsistencyEnum xheight_consistency,
|
||||
float additional_adjust,
|
||||
bool modify_rating,
|
||||
float additional_adjust, bool modify_rating,
|
||||
bool debug) {
|
||||
bool is_han = (getUnicharset().han_sid() != getUnicharset().null_sid() &&
|
||||
word->GetTopScriptID() == getUnicharset().han_sid());
|
||||
@ -725,7 +723,7 @@ void Dict::adjust_word(WERD_CHOICE *word,
|
||||
float adjust_factor = additional_adjust;
|
||||
float new_rating = word->rating();
|
||||
new_rating += kRatingPad;
|
||||
const char *xheight_triggered = "";
|
||||
const char* xheight_triggered = "";
|
||||
if (word->length() > 1) {
|
||||
// Calculate x-height and y-offset consistency penalties.
|
||||
switch (xheight_consistency) {
|
||||
@ -750,8 +748,7 @@ void Dict::adjust_word(WERD_CHOICE *word,
|
||||
}
|
||||
if (debug) {
|
||||
tprintf("%sWord: %s %4.2f%s", nonword ? "Non-" : "",
|
||||
word->unichar_string().string(), word->rating(),
|
||||
xheight_triggered);
|
||||
word->unichar_string().string(), word->rating(), xheight_triggered);
|
||||
}
|
||||
|
||||
if (nonword) { // non-dictionary word
|
||||
@ -791,8 +788,8 @@ void Dict::adjust_word(WERD_CHOICE *word,
|
||||
word->set_adjust_factor(adjust_factor);
|
||||
}
|
||||
|
||||
int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
|
||||
const WERD_CHOICE *word_ptr = &word;
|
||||
int Dict::valid_word(const WERD_CHOICE& word, bool numbers_ok) const {
|
||||
const WERD_CHOICE* word_ptr = &word;
|
||||
WERD_CHOICE temp_word(word.unicharset());
|
||||
if (hyphenated() && hyphen_word_->unicharset() == word.unicharset()) {
|
||||
copy_hyphen_info(&temp_word);
|
||||
@ -802,15 +799,15 @@ int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
|
||||
if (word_ptr->length() == 0) return NO_PERM;
|
||||
// Allocate vectors for holding current and updated
|
||||
// active_dawgs and initialize them.
|
||||
DawgPositionVector *active_dawgs = new DawgPositionVector[2];
|
||||
DawgPositionVector* active_dawgs = new DawgPositionVector[2];
|
||||
init_active_dawgs(&(active_dawgs[0]), false);
|
||||
DawgArgs dawg_args(&(active_dawgs[0]), &(active_dawgs[1]), NO_PERM);
|
||||
int last_index = word_ptr->length() - 1;
|
||||
// Call letter_is_okay for each letter in the word.
|
||||
for (int i = hyphen_base_size(); i <= last_index; ++i) {
|
||||
if (!((this->*letter_is_okay_)(&dawg_args, *word_ptr->unicharset(),
|
||||
word_ptr->unichar_id(i),
|
||||
i == last_index))) break;
|
||||
word_ptr->unichar_id(i), i == last_index)))
|
||||
break;
|
||||
// Swap active_dawgs, constraints with the corresponding updated vector.
|
||||
if (dawg_args.updated_dawgs == &(active_dawgs[1])) {
|
||||
dawg_args.updated_dawgs = &(active_dawgs[0]);
|
||||
@ -821,12 +818,13 @@ int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
|
||||
}
|
||||
}
|
||||
delete[] active_dawgs;
|
||||
return valid_word_permuter(dawg_args.permuter, numbers_ok) ?
|
||||
dawg_args.permuter : NO_PERM;
|
||||
return valid_word_permuter(dawg_args.permuter, numbers_ok)
|
||||
? dawg_args.permuter
|
||||
: NO_PERM;
|
||||
}
|
||||
|
||||
bool Dict::valid_bigram(const WERD_CHOICE &word1,
|
||||
const WERD_CHOICE &word2) const {
|
||||
bool Dict::valid_bigram(const WERD_CHOICE& word1,
|
||||
const WERD_CHOICE& word2) const {
|
||||
if (bigram_dawg_ == nullptr) return false;
|
||||
|
||||
// Extract the core word from the middle of each word with any digits
|
||||
@ -862,13 +860,13 @@ bool Dict::valid_bigram(const WERD_CHOICE &word1,
|
||||
}
|
||||
WERD_CHOICE normalized_word(&uchset, bigram_string.size());
|
||||
for (int i = 0; i < bigram_string.size(); ++i) {
|
||||
normalized_word.append_unichar_id_space_allocated(bigram_string[i], 1,
|
||||
0.0f, 0.0f);
|
||||
normalized_word.append_unichar_id_space_allocated(bigram_string[i], 1, 0.0f,
|
||||
0.0f);
|
||||
}
|
||||
return bigram_dawg_->word_in_dawg(normalized_word);
|
||||
}
|
||||
|
||||
bool Dict::valid_punctuation(const WERD_CHOICE &word) {
|
||||
bool Dict::valid_punctuation(const WERD_CHOICE& word) {
|
||||
if (word.length() == 0) return NO_PERM;
|
||||
int i;
|
||||
WERD_CHOICE new_word(word.unicharset());
|
||||
@ -882,21 +880,21 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) {
|
||||
!getUnicharset().get_isdigit(unichar_id)) {
|
||||
return false; // neither punc, nor alpha, nor digit
|
||||
} else if ((new_len = new_word.length()) == 0 ||
|
||||
new_word.unichar_id(new_len-1) != Dawg::kPatternUnicharID) {
|
||||
new_word.unichar_id(new_len - 1) != Dawg::kPatternUnicharID) {
|
||||
new_word.append_unichar_id(Dawg::kPatternUnicharID, 1, 0.0, 0.0);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < dawgs_.size(); ++i) {
|
||||
if (dawgs_[i] != nullptr &&
|
||||
dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION &&
|
||||
dawgs_[i]->word_in_dawg(new_word)) return true;
|
||||
if (dawgs_[i] != nullptr && dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION &&
|
||||
dawgs_[i]->word_in_dawg(new_word))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns true if the language is space-delimited (not CJ, or T).
|
||||
bool Dict::IsSpaceDelimitedLang() const {
|
||||
const UNICHARSET &u_set = getUnicharset();
|
||||
const UNICHARSET& u_set = getUnicharset();
|
||||
if (u_set.han_sid() > 0) return false;
|
||||
if (u_set.katakana_sid() > 0) return false;
|
||||
if (u_set.thai_sid() > 0) return false;
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "lstmrecognizer.h"
|
||||
@ -66,7 +66,8 @@ LSTMRecognizer::~LSTMRecognizer() {
|
||||
}
|
||||
|
||||
// Loads a model from mgr, including the dictionary only if lang is not null.
|
||||
bool LSTMRecognizer::Load(const ParamsVectors* params, const char* lang, TessdataManager* mgr) {
|
||||
bool LSTMRecognizer::Load(const ParamsVectors* params, const char* lang,
|
||||
TessdataManager* mgr) {
|
||||
TFile fp;
|
||||
if (!mgr->GetComponent(TESSDATA_LSTM, &fp)) return false;
|
||||
if (!DeSerialize(mgr, &fp)) return false;
|
||||
@ -155,7 +156,8 @@ bool LSTMRecognizer::LoadRecoder(TFile* fp) {
|
||||
// from checkpoint or restore without having to go back and reload the
|
||||
// dictionary.
|
||||
// Some parameters have to be passed in (from langdata/config/api via Tesseract)
|
||||
bool LSTMRecognizer::LoadDictionary(const ParamsVectors* params, const char* lang, TessdataManager* mgr) {
|
||||
bool LSTMRecognizer::LoadDictionary(const ParamsVectors* params,
|
||||
const char* lang, TessdataManager* mgr) {
|
||||
delete dict_;
|
||||
dict_ = new Dict(&ccutil_);
|
||||
dict_->user_words_file.ResetFrom(params);
|
||||
@ -261,7 +263,8 @@ bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
|
||||
pixInvert(pix, pix);
|
||||
Input::PreparePixInput(network_->InputShape(), pix, &randomizer_,
|
||||
&inv_inputs);
|
||||
network_->Forward(debug, inv_inputs, nullptr, &scratch_space_, &inv_outputs);
|
||||
network_->Forward(debug, inv_inputs, nullptr, &scratch_space_,
|
||||
&inv_outputs);
|
||||
float inv_min, inv_mean, inv_sd;
|
||||
OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
|
||||
if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
|
||||
@ -405,7 +408,7 @@ void LSTMRecognizer::DebugActivationRange(const NetworkIO& outputs,
|
||||
// Helper returns true if the null_char is the winner at t, and it beats the
|
||||
// null_threshold, or the next choice is space, in which case we will use the
|
||||
// null anyway.
|
||||
#if 0 // TODO: unused, remove if still unused after 2020.
|
||||
#if 0 // TODO: unused, remove if still unused after 2020.
|
||||
static bool NullIsBest(const NetworkIO& output, float null_thr,
|
||||
int null_char, int t) {
|
||||
if (output.f(t)[null_char] >= null_thr) return true;
|
||||
|
@ -56,18 +56,10 @@ class LSTMRecognizer {
|
||||
LSTMRecognizer();
|
||||
~LSTMRecognizer();
|
||||
|
||||
int NumOutputs() const {
|
||||
return network_->NumOutputs();
|
||||
}
|
||||
int training_iteration() const {
|
||||
return training_iteration_;
|
||||
}
|
||||
int sample_iteration() const {
|
||||
return sample_iteration_;
|
||||
}
|
||||
double learning_rate() const {
|
||||
return learning_rate_;
|
||||
}
|
||||
int NumOutputs() const { return network_->NumOutputs(); }
|
||||
int training_iteration() const { return training_iteration_; }
|
||||
int sample_iteration() const { return sample_iteration_; }
|
||||
double learning_rate() const { return learning_rate_; }
|
||||
LossType OutputLossType() const {
|
||||
if (network_ == nullptr) return LT_NONE;
|
||||
StaticShape shape;
|
||||
@ -145,17 +137,14 @@ class LSTMRecognizer {
|
||||
// Sets the sample iteration to the given value. The sample_iteration_
|
||||
// determines the seed for the random number generator. The training
|
||||
// iteration is incremented only by a successful training iteration.
|
||||
void SetIteration(int iteration) {
|
||||
sample_iteration_ = iteration;
|
||||
}
|
||||
void SetIteration(int iteration) { sample_iteration_ = iteration; }
|
||||
// Accessors for textline image normalization.
|
||||
int NumInputs() const {
|
||||
return network_->NumInputs();
|
||||
}
|
||||
int NumInputs() const { return network_->NumInputs(); }
|
||||
int null_char() const { return null_char_; }
|
||||
|
||||
// Loads a model from mgr, including the dictionary only if lang is not null.
|
||||
bool Load(const ParamsVectors* params, const char* lang, TessdataManager* mgr);
|
||||
bool Load(const ParamsVectors* params, const char* lang,
|
||||
TessdataManager* mgr);
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
// If mgr contains a unicharset and recoder, then they are not encoded to fp.
|
||||
@ -175,7 +164,8 @@ class LSTMRecognizer {
|
||||
// on the unicharset matching. This enables training to deserialize a model
|
||||
// from checkpoint or restore without having to go back and reload the
|
||||
// dictionary.
|
||||
bool LoadDictionary(const ParamsVectors* params, const char* lang, TessdataManager* mgr);
|
||||
bool LoadDictionary(const ParamsVectors* params, const char* lang,
|
||||
TessdataManager* mgr);
|
||||
|
||||
// Recognizes the line image, contained within image_data, returning the
|
||||
// recognized tesseract WERD_RES for the words.
|
||||
@ -188,8 +178,8 @@ class LSTMRecognizer {
|
||||
PointerVector<WERD_RES>* words, int lstm_choice_mode = 0);
|
||||
|
||||
// Helper computes min and mean best results in the output.
|
||||
void OutputStats(const NetworkIO& outputs,
|
||||
float* min_output, float* mean_output, float* sd);
|
||||
void OutputStats(const NetworkIO& outputs, float* min_output,
|
||||
float* mean_output, float* sd);
|
||||
// Recognizes the image_data, returning the labels,
|
||||
// scores, and corresponding pairs of start, end x-coords in coords.
|
||||
// Returned in scale_factor is the reduction factor
|
||||
@ -209,11 +199,9 @@ class LSTMRecognizer {
|
||||
|
||||
// Displays the forward results in a window with the characters and
|
||||
// boundaries as determined by the labels and label_coords.
|
||||
void DisplayForward(const NetworkIO& inputs,
|
||||
const GenericVector<int>& labels,
|
||||
void DisplayForward(const NetworkIO& inputs, const GenericVector<int>& labels,
|
||||
const GenericVector<int>& label_coords,
|
||||
const char* window_name,
|
||||
ScrollView** window);
|
||||
const char* window_name, ScrollView** window);
|
||||
// Converts the network output to a sequence of labels. Outputs labels, scores
|
||||
// and start xcoords of each char, and each null_char_, with an additional
|
||||
// final xcoord for the end of the output.
|
||||
@ -232,8 +220,8 @@ class LSTMRecognizer {
|
||||
// Displays the labels and cuts at the corresponding xcoords.
|
||||
// Size of labels should match xcoords.
|
||||
void DisplayLSTMOutput(const GenericVector<int>& labels,
|
||||
const GenericVector<int>& xcoords,
|
||||
int height, ScrollView* window);
|
||||
const GenericVector<int>& xcoords, int height,
|
||||
ScrollView* window);
|
||||
|
||||
// Prints debug output detailing the activation path that is implied by the
|
||||
// xcoords.
|
||||
@ -253,8 +241,7 @@ class LSTMRecognizer {
|
||||
// Converts the network output to a sequence of labels, with scores, using
|
||||
// the simple character model (each position is a char, and the null_char_ is
|
||||
// mainly intended for tail padding.)
|
||||
void LabelsViaSimpleText(const NetworkIO& output,
|
||||
GenericVector<int>* labels,
|
||||
void LabelsViaSimpleText(const NetworkIO& output, GenericVector<int>* labels,
|
||||
GenericVector<int>* xcoords);
|
||||
|
||||
// Returns a string corresponding to the label starting at start. Sets *end
|
||||
|
Loading…
Reference in New Issue
Block a user