mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-19 23:19:07 +08:00
Adding --print-fonts-table parameter & tessedit_font_id configuration option
This commit is contained in:
parent
2e2a5b3ef4
commit
b852d658cb
@ -150,6 +150,11 @@ public:
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
/**
|
||||
* Print Tesseract fonts table to the given file.
|
||||
*/
|
||||
void PrintFontsTable(FILE* fp) const;
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
|
@ -330,6 +330,22 @@ bool TessBaseAPI::GetVariableAsString(const char *name, std::string *val) const
|
||||
return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
|
||||
}
|
||||
|
||||
/** Print Tesseract fonts table to the given file. */
|
||||
void TessBaseAPI::PrintFontsTable(FILE *fp) const {
|
||||
const int fontinfo_size = tesseract_->get_fontinfo_table().size();
|
||||
for (int font_index = 1; font_index < fontinfo_size; ++font_index) {
|
||||
FontInfo font = tesseract_->get_fontinfo_table().at(font_index);
|
||||
fprintf(fp, "ID=%3d: %s is_italic=%s is_bold=%s"
|
||||
" is_fixed_pitch=%s is_serif=%s is_fraktur=%s\n",
|
||||
font_index, font.name,
|
||||
font.is_italic() ? "true" : "false",
|
||||
font.is_bold() ? "true" : "false",
|
||||
font.is_fixed_pitch() ? "true" : "false",
|
||||
font.is_serif() ? "true" : "false",
|
||||
font.is_fraktur() ? "true" : "false");
|
||||
}
|
||||
}
|
||||
|
||||
/** Print Tesseract parameters to the given file. */
|
||||
void TessBaseAPI::PrintVariables(FILE *fp) const {
|
||||
ParamUtils::PrintParams(fp, tesseract_->params());
|
||||
|
@ -226,6 +226,9 @@ static void PrintHelpExtra(const char *program) {
|
||||
#endif
|
||||
"--version\n"
|
||||
" %s --list-langs [--tessdata-dir PATH]\n"
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
" %s --print-fonts-table [options...] [configfile...]\n"
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
" %s --print-parameters [options...] [configfile...]\n"
|
||||
" %s imagename|imagelist|stdin outputbase|stdout [options...] "
|
||||
"[configfile...]\n"
|
||||
@ -244,7 +247,11 @@ static void PrintHelpExtra(const char *program) {
|
||||
#endif
|
||||
"NOTE: These options must occur before any configfile.\n"
|
||||
"\n",
|
||||
program, program, program, program);
|
||||
program, program, program, program
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
, program
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
);
|
||||
|
||||
PrintHelpForPSM();
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
@ -263,6 +270,9 @@ static void PrintHelpExtra(const char *program) {
|
||||
#endif
|
||||
" -v, --version Show version information.\n"
|
||||
" --list-langs List available languages for tesseract engine.\n"
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
" --print-fonts-table Print tesseract fonts table.\n"
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
" --print-parameters Print tesseract parameters.\n");
|
||||
}
|
||||
|
||||
@ -358,7 +368,7 @@ static bool checkArgValues(int arg, const char *mode, int count) {
|
||||
// NOTE: arg_i is used here to avoid ugly *i so many times in this function
|
||||
static bool ParseArgs(int argc, char **argv, const char **lang, const char **image,
|
||||
const char **outputbase, const char **datapath, l_int32 *dpi,
|
||||
bool *list_langs, bool *print_parameters, std::vector<std::string> *vars_vec,
|
||||
bool *list_langs, bool *print_parameters, bool* print_fonts_table, std::vector<std::string> *vars_vec,
|
||||
std::vector<std::string> *vars_values, l_int32 *arg_i,
|
||||
tesseract::PageSegMode *pagesegmode, tesseract::OcrEngineMode *enginemode) {
|
||||
bool noocr = false;
|
||||
@ -422,6 +432,11 @@ static bool ParseArgs(int argc, char **argv, const char **lang, const char **ima
|
||||
} else if (strcmp(argv[i], "--print-parameters") == 0) {
|
||||
noocr = true;
|
||||
*print_parameters = true;
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
} else if (strcmp(argv[i], "--print-fonts-table") == 0) {
|
||||
noocr = true;
|
||||
*print_fonts_table = true;
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
} else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
|
||||
// handled properly after api init
|
||||
++i;
|
||||
@ -608,6 +623,7 @@ int main(int argc, char **argv) {
|
||||
const char *datapath = nullptr;
|
||||
bool list_langs = false;
|
||||
bool print_parameters = false;
|
||||
bool print_fonts_table = false;
|
||||
l_int32 dpi = 0;
|
||||
int arg_i = 1;
|
||||
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
|
||||
@ -634,7 +650,7 @@ int main(int argc, char **argv) {
|
||||
#endif // HAVE_TIFFIO_H && _WIN32
|
||||
|
||||
if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
|
||||
&print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode, &enginemode)) {
|
||||
&print_parameters, &print_fonts_table, &vars_vec, &vars_values, &arg_i, &pagesegmode, &enginemode)) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
@ -643,7 +659,7 @@ int main(int argc, char **argv) {
|
||||
lang = "eng";
|
||||
}
|
||||
|
||||
if (image == nullptr && !list_langs && !print_parameters) {
|
||||
if (image == nullptr && !list_langs && !print_parameters && !print_fonts_table) {
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
@ -684,6 +700,16 @@ int main(int argc, char **argv) {
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
if (print_fonts_table) {
|
||||
FILE* fout = stdout;
|
||||
fprintf(stdout, "Tesseract fonts table:\n");
|
||||
api.PrintFontsTable(fout);
|
||||
api.End();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
FixPageSegMode(api, pagesegmode);
|
||||
|
||||
if (dpi) {
|
||||
|
@ -1922,10 +1922,22 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
ASSERT_HOST(word->best_choice != nullptr);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
const int fontinfo_size = get_fontinfo_table().size();
|
||||
const int fontinfo_size = fontinfo_table_.size();
|
||||
if (fontinfo_size == 0) {
|
||||
return;
|
||||
}
|
||||
if (tessedit_font_id > 0) {
|
||||
if (tessedit_font_id >= fontinfo_size) {
|
||||
tprintf("Error, invalid font ID provided: must be below %d.\n"
|
||||
"Falling back to font auto-detection.\n", fontinfo_size);
|
||||
} else {
|
||||
word->fontinfo = &fontinfo_table_.at(tessedit_font_id);
|
||||
word->fontinfo2 = nullptr;
|
||||
word->fontinfo_id_count = INT8_MAX;
|
||||
word->fontinfo_id2_count = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::vector<int> font_total_score(fontinfo_size);
|
||||
|
||||
// Compute the font scores for the word
|
||||
|
@ -129,6 +129,7 @@ Tesseract::Tesseract()
|
||||
, BOOL_MEMBER(tessedit_enable_doc_dict, true, "Add words to the document dictionary",
|
||||
this->params())
|
||||
, BOOL_MEMBER(tessedit_debug_fonts, false, "Output font info per char", this->params())
|
||||
, INT_MEMBER(tessedit_font_id, 0, "Font ID to use or zero", this->params())
|
||||
, BOOL_MEMBER(tessedit_debug_block_rejection, false, "Block and Row stats", this->params())
|
||||
, BOOL_MEMBER(tessedit_enable_bigram_correction, true,
|
||||
"Enable correction based on the word bigram dictionary.", this->params())
|
||||
|
@ -798,6 +798,8 @@ public:
|
||||
BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?");
|
||||
BOOL_VAR_H(tessedit_enable_doc_dict, true, "Add words to the document dictionary");
|
||||
BOOL_VAR_H(tessedit_debug_fonts, false, "Output font info per char");
|
||||
INT_VAR_H(tessedit_font_id, 0, "Disable font detection and use the font"
|
||||
" corresponding to the ID specified instead");
|
||||
BOOL_VAR_H(tessedit_debug_block_rejection, false, "Block and Row stats");
|
||||
BOOL_VAR_H(tessedit_enable_bigram_correction, true,
|
||||
"Enable correction based on the word bigram dictionary.");
|
||||
|
Loading…
Reference in New Issue
Block a user