tesseract/src/tesseract.cpp
Stefan Weil ade0dfaa8c
Some checks failed
CodeQL / Analyze (cpp) (push) Has been cancelled
vcpkg / build (windows-2019) (push) Has been cancelled
unittest / ${{ matrix.config.name }} (map[cxx:clang++ cxxflags:-g -O2 -fsanitize=address,undefined -stdlib=libc++ name:ubuntu-22.04-clang-unittest os:ubuntu-22.04]) (push) Has been cancelled
unittest / ${{ matrix.config.name }} (map[cxx:g++ cxxflags:-g -O2 -fsanitize=address,undefined name:ubuntu-24.04-gcc-unittest os:ubuntu-24.04]) (push) Has been cancelled
unittest-macos / ${{ matrix.config.name }} (map[cxx:clang++ name:macos-arm-14-clang-unittest os:macos-14]) (push) Has been cancelled
unittest-macos / ${{ matrix.config.name }} (map[cxx:clang++ name:macos-latest-clang-unittest os:macos-latest]) (push) Has been cancelled
unittest-macos / ${{ matrix.config.name }} (map[cxx:g++ name:macos-latest-gcc-unittest os:macos-latest]) (push) Has been cancelled
sw / build (fedora:latest, ubuntu-22.04) (push) Has been cancelled
sw / build (macos-latest) (push) Has been cancelled
sw / build (windows-2022) (push) Has been cancelled
msys2 / windows (mingw-w64-x86_64, MINGW64) (push) Has been cancelled
cmake / ${{ matrix.config.name }} (map[cxx:clang++ name:macos-14-clang-15-cmake os:macos-14]) (push) Has been cancelled
cmake / ${{ matrix.config.name }} (map[cxx:clang++ name:macos-15-clang-cmake os:macos-15]) (push) Has been cancelled
cmake / ${{ matrix.config.name }} (map[cxx:clang++-15 name:ubuntu-22.04-clang-15-cmake os:ubuntu-22.04]) (push) Has been cancelled
cmake / ${{ matrix.config.name }} (map[cxx:g++-12 name:ubuntu-22.04-gcc-12-cmake os:ubuntu-22.04]) (push) Has been cancelled
cmake / ${{ matrix.config.name }} (map[cxx:g++-14 name:macos-14-gcc-14-cmake os:macos-14]) (push) Has been cancelled
cmake / ${{ matrix.config.name }} (map[cxx:g++-14 name:ubuntu-24.04-gcc-12-cmake os:ubuntu-24.04]) (push) Has been cancelled
cmake-win64 / cmake-win64 (push) Has been cancelled
autotools / linux (map[cxx:clang++-15 name:ubuntu-22.04-clang-15-autotools os:ubuntu-22.04]) (push) Has been cancelled
autotools / linux (map[cxx:g++-11 name:ubuntu-22.04-gcc-11-autotools os:ubuntu-22.04]) (push) Has been cancelled
autotools / linux (map[cxx:g++-12 name:ubuntu-22.04-gcc-12-autotools os:ubuntu-22.04]) (push) Has been cancelled
autotools / linux (map[cxx:g++-14 name:ubuntu-24.04-gcc-14-autotools os:ubuntu-24.04]) (push) Has been cancelled
autotools-macos / brew (map[cxx:clang++ name:macos-latest-clang-autotools os:macos-latest]) (push) Has been cancelled
autotools-macos / ports (map[cxx:clang++ name:macos-latest-clang-autotools os:macos-latest]) (push) Has been cancelled
unittest-disablelegacy / linux (clang++-18, ubuntu-24.04) (push) Has been cancelled
unittest-disablelegacy / linux (g++, ubuntu-24.04) (push) Has been cancelled
Fix two compiler warnings in tesseract.cpp
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2025-05-02 07:26:38 +02:00

866 lines
28 KiB
C++

/**********************************************************************
* File: tesseract.cpp
* Description: Main program for merge of tess and editor.
* Author: Ray Smith
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include <cerrno> // for errno
#if defined(__USE_GNU)
# include <cfenv> // for feenableexcept
#endif
#include <climits> // for INT_MIN, INT_MAX
#include <cstdlib> // for std::getenv
#include <iostream>
#include <map> // for std::map
#include <memory> // std::unique_ptr
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include "dict.h"
#include <tesseract/renderer.h>
#include "simddetect.h"
#include "tesseractclass.h" // for AnyTessLang
#include "tprintf.h" // for tprintf
#ifdef _OPENMP
# include <omp.h>
#endif
#if defined(HAVE_LIBARCHIVE)
# include <archive.h>
#endif
#if defined(HAVE_LIBCURL)
# include <curl/curl.h>
#endif
#if defined(_WIN32)
# include <fcntl.h>
# include <io.h>
# if defined(HAVE_TIFFIO_H)
# include <tiffio.h>
static void Win32ErrorHandler(const char *module, const char *fmt, va_list ap) {
if (module != nullptr) {
fprintf(stderr, "%s: ", module);
}
vfprintf(stderr, fmt, ap);
fprintf(stderr, ".\n");
}
static void Win32WarningHandler(const char *module, const char *fmt, va_list ap) {
if (module != nullptr) {
fprintf(stderr, "%s: ", module);
}
fprintf(stderr, "Warning, ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, ".\n");
}
# endif /* HAVE_TIFFIO_H */
class AutoWin32ConsoleOutputCP {
public:
explicit AutoWin32ConsoleOutputCP(UINT codeCP) :
oldCP_(GetConsoleOutputCP()) {
SetConsoleOutputCP(codeCP);
}
~AutoWin32ConsoleOutputCP() {
SetConsoleOutputCP(oldCP_);
}
private:
UINT oldCP_;
};
static AutoWin32ConsoleOutputCP autoWin32ConsoleOutputCP(CP_UTF8);
#endif // _WIN32
using namespace tesseract;
static void PrintVersionInfo() {
char *versionStrP;
printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
versionStrP = getLeptonicaVersion();
printf(" %s\n", versionStrP);
lept_free(versionStrP);
versionStrP = getImagelibVersions();
printf(" %s\n", versionStrP);
lept_free(versionStrP);
#if defined(HAVE_NEON) || defined(__aarch64__)
if (tesseract::SIMDDetect::IsNEONAvailable())
printf(" Found NEON\n");
#elif defined(HAVE_RVV)
if (tesseract::SIMDDetect::IsRVVAvailable())
printf(" Found RVV\n");
#else
if (tesseract::SIMDDetect::IsAVX512BWAvailable()) {
printf(" Found AVX512BW\n");
}
if (tesseract::SIMDDetect::IsAVX512FAvailable()) {
printf(" Found AVX512F\n");
}
if (tesseract::SIMDDetect::IsAVX512VNNIAvailable()) {
printf(" Found AVX512VNNI\n");
}
if (tesseract::SIMDDetect::IsAVX2Available()) {
printf(" Found AVX2\n");
}
if (tesseract::SIMDDetect::IsAVXAvailable()) {
printf(" Found AVX\n");
}
if (tesseract::SIMDDetect::IsFMAAvailable()) {
printf(" Found FMA\n");
}
if (tesseract::SIMDDetect::IsSSEAvailable()) {
printf(" Found SSE4.1\n");
}
#endif
#ifdef _OPENMP
printf(" Found OpenMP %d\n", _OPENMP);
#endif
#if defined(HAVE_LIBARCHIVE)
# if ARCHIVE_VERSION_NUMBER >= 3002000
printf(" Found %s\n", archive_version_details());
# else
printf(" Found %s\n", archive_version_string());
# endif // ARCHIVE_VERSION_NUMBER
#endif // HAVE_LIBARCHIVE
#if defined(HAVE_LIBCURL)
printf(" Found %s\n", curl_version());
#endif
}
static void PrintHelpForPSM() {
printf(
"Page segmentation modes (PSM):\n"
" 0|osd_only Orientation and script detection (OSD) only.\n"
" 1|auto_osd Automatic page segmentation with OSD.\n"
" 2|auto_only Automatic page segmentation, but no OSD, or OCR. (not "
"implemented)\n"
" 3|auto Fully automatic page segmentation, but no OSD. (Default)\n"
" 4|single_column Assume a single column of text of variable sizes.\n"
" 5|single_block_vert_text Assume a single uniform block of vertically aligned text.\n"
" 6|single_block Assume a single uniform block of text.\n"
" 7|single_line Treat the image as a single text line.\n"
" 8|single_word Treat the image as a single word.\n"
" 9|circle_word Treat the image as a single word in a circle.\n"
" 10|single_char Treat the image as a single character.\n"
" 11|sparse_text Sparse text. Find as much text as possible in no"
" particular order.\n"
" 12|sparse_text_osd Sparse text with OSD.\n"
" 13|raw_line Raw line. Treat the image as a single text line,\n"
" bypassing hacks that are Tesseract-specific.\n"
);
#ifdef DISABLED_LEGACY_ENGINE
printf("\nNOTE: The OSD modes are currently disabled.\n");
#endif
}
#ifndef DISABLED_LEGACY_ENGINE
static void PrintHelpForOEM() {
printf(
"OCR Engine modes (OEM):\n"
" 0|tesseract_only Legacy engine only.\n"
" 1|lstm_only Neural nets LSTM engine only.\n"
" 2|tesseract_lstm_combined Legacy + LSTM engines.\n"
" 3|default Default, based on what is available.\n"
);
}
#endif // ndef DISABLED_LEGACY_ENGINE
static void PrintHelpExtra(const char *program) {
printf(
"Usage:\n"
" %s --help | --help-extra | --help-psm | "
#ifndef DISABLED_LEGACY_ENGINE
"--help-oem | "
#endif
"--version\n"
" %s --list-langs [--tessdata-dir PATH]\n"
#ifndef DISABLED_LEGACY_ENGINE
" %s --print-fonts-table [options...] [configfile...]\n"
#endif // ndef DISABLED_LEGACY_ENGINE
" %s --print-parameters [options...] [configfile...]\n"
" %s imagename|imagelist|stdin outputbase|stdout [options...] "
"[configfile...]\n"
"\n"
"OCR options:\n"
" --tessdata-dir PATH Specify the location of tessdata path.\n"
" --user-words PATH Specify the location of user words file.\n"
" --user-patterns PATH Specify the location of user patterns file.\n"
" --dpi VALUE Specify DPI for input image.\n"
" --loglevel LEVEL Specify logging level. LEVEL can be\n"
" ALL, TRACE, DEBUG, INFO, WARN, ERROR, FATAL or OFF.\n"
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
" -c VAR=VALUE Set value for config variables.\n"
" Multiple -c arguments are allowed.\n"
" --psm PSM|NUM Specify page segmentation mode.\n"
#ifndef DISABLED_LEGACY_ENGINE
" --oem OEM|NUM Specify OCR Engine mode.\n"
#endif
"NOTE: These options must occur before any configfile.\n"
"\n",
program, program, program, program
#ifndef DISABLED_LEGACY_ENGINE
, program
#endif // ndef DISABLED_LEGACY_ENGINE
);
PrintHelpForPSM();
#ifndef DISABLED_LEGACY_ENGINE
printf("\n");
PrintHelpForOEM();
#endif
printf(
"\n"
"Single options:\n"
" -h, --help Show minimal help message.\n"
" --help-extra Show extra help for advanced users.\n"
" --help-psm Show page segmentation modes.\n"
#ifndef DISABLED_LEGACY_ENGINE
" --help-oem Show OCR Engine modes.\n"
#endif
" -v, --version Show version information.\n"
" --list-langs List available languages for tesseract engine.\n"
#ifndef DISABLED_LEGACY_ENGINE
" --print-fonts-table Print tesseract fonts table.\n"
#endif // ndef DISABLED_LEGACY_ENGINE
" --print-parameters Print tesseract parameters.\n");
}
static void PrintHelpMessage(const char *program) {
printf(
"Usage:\n"
" %s --help | --help-extra | --version\n"
" %s --list-langs\n"
" %s imagename outputbase [options...] [configfile...]\n"
"\n"
"OCR options:\n"
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
"NOTE: These options must occur before any configfile.\n"
"\n"
"Single options:\n"
" --help Show this help message.\n"
" --help-extra Show extra help for advanced users.\n"
" --version Show version information.\n"
" --list-langs List available languages for tesseract "
"engine.\n",
program, program, program);
}
static void PrintLangsList(tesseract::TessBaseAPI &api) {
std::vector<std::string> languages;
api.GetAvailableLanguagesAsVector(&languages);
printf("List of available languages in \"%s\" (%zu):\n",
api.GetDatapath(), languages.size());
for (const auto &language : languages) {
printf("%s\n", language.c_str());
}
api.End();
}
/**
* We have 2 possible sources of pagesegmode: a config file and
* the command line. For backwards compatibility reasons, the
* default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
* default for this program is tesseract::PSM_AUTO. We will let
* the config file take priority, so the command-line default
* can take priority over the tesseract default, so we use the
* value from the command line only if the retrieved mode
* is still tesseract::PSM_SINGLE_BLOCK, indicating no change
* in any config file. Therefore the only way to force
* tesseract::PSM_SINGLE_BLOCK is from the command line.
* It would be simpler if we could set the value before Init,
* but that doesn't work.
*/
static void FixPageSegMode(tesseract::TessBaseAPI &api, tesseract::PageSegMode pagesegmode) {
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) {
api.SetPageSegMode(pagesegmode);
}
}
static bool checkArgValues(int arg, const char *mode, int count) {
if (arg >= count || arg < 0) {
printf("Invalid %s value, please enter a symbolic %s value or a number between 0-%d\n", mode, mode, count - 1);
return false;
}
return true;
}
// Convert a symbolic or numeric string to an OEM value.
static int stringToOEM(const std::string arg) {
std::map<std::string, int> oem_map = {
{"0", 0},
{"1", 1},
{"2", 2},
{"3", 3},
{"tesseract_only", 0},
{"lstm_only", 1},
{"tesseract_lstm_combined", 2},
{"default", 3},
};
auto it = oem_map.find(arg);
return it == oem_map.end() ? -1 : it->second;
}
static int stringToPSM(const std::string arg) {
std::map<std::string, int> psm_map = {
{"0", 0},
{"1", 1},
{"2", 2},
{"3", 3},
{"4", 4},
{"5", 5},
{"6", 6},
{"7", 7},
{"8", 8},
{"9", 9},
{"10", 10},
{"11", 11},
{"12", 12},
{"13", 13},
{"osd_only", 0},
{"auto_osd", 1},
{"auto_only", 2},
{"auto", 3},
{"single_column", 4},
{"single_block_vert_text", 5},
{"single_block", 6},
{"single_line", 7},
{"single_word", 8},
{"circle_word", 9},
{"single_char", 10},
{"sparse_text", 11},
{"sparse_text_osd", 12},
{"raw_line", 13},
};
auto it = psm_map.find(arg);
return it == psm_map.end() ? -1 : it->second;
}
// NOTE: arg_i is used here to avoid ugly *i so many times in this function
static bool ParseArgs(int argc, char **argv, const char **lang, const char **image,
const char **outputbase, const char **datapath, l_int32 *dpi,
bool *list_langs, bool *print_parameters, bool *print_fonts_table,
std::vector<std::string> *vars_vec, std::vector<std::string> *vars_values,
l_int32 *arg_i, tesseract::PageSegMode *pagesegmode,
tesseract::OcrEngineMode *enginemode) {
bool noocr = false;
int i;
for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
if (*image != nullptr && *outputbase == nullptr) {
// outputbase follows image, don't allow options at that position.
*outputbase = argv[i];
} else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
PrintHelpMessage(argv[0]);
noocr = true;
} else if (strcmp(argv[i], "--help-extra") == 0) {
PrintHelpExtra(argv[0]);
noocr = true;
} else if ((strcmp(argv[i], "--help-psm") == 0)) {
PrintHelpForPSM();
noocr = true;
#ifndef DISABLED_LEGACY_ENGINE
} else if ((strcmp(argv[i], "--help-oem") == 0)) {
PrintHelpForOEM();
noocr = true;
#endif
} else if ((strcmp(argv[i], "-v") == 0) || (strcmp(argv[i], "--version") == 0)) {
PrintVersionInfo();
noocr = true;
} else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
*lang = argv[i + 1];
++i;
} else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
*datapath = argv[i + 1];
++i;
} else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
*dpi = atoi(argv[i + 1]);
++i;
} else if (strcmp(argv[i], "--loglevel") == 0 && i + 1 < argc) {
// Allow the log levels which are used by log4cxx.
const std::string loglevel_string = argv[++i];
static const std::map<const std::string, int> loglevels {
{"ALL", INT_MIN},
{"TRACE", 5000},
{"DEBUG", 10000},
{"INFO", 20000},
{"WARN", 30000},
{"ERROR", 40000},
{"FATAL", 50000},
{"OFF", INT_MAX},
};
try {
auto loglevel = loglevels.at(loglevel_string);
log_level = loglevel;
} catch (const std::out_of_range &) {
// TODO: Allow numeric argument?
tprintf("Error, unsupported --loglevel %s\n", loglevel_string.c_str());
return false;
}
} else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
vars_vec->push_back("user_words_file");
vars_values->push_back(argv[i + 1]);
++i;
} else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
vars_vec->push_back("user_patterns_file");
vars_values->push_back(argv[i + 1]);
++i;
} else if (strcmp(argv[i], "--list-langs") == 0) {
noocr = true;
*list_langs = true;
} else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
int psm = stringToPSM(argv[i + 1]);
if (!checkArgValues(psm, "PSM", tesseract::PSM_COUNT)) {
return false;
}
*pagesegmode = static_cast<tesseract::PageSegMode>(psm);
++i;
} else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
#ifndef DISABLED_LEGACY_ENGINE
int oem = stringToOEM(argv[i + 1]);
if (!checkArgValues(oem, "OEM", tesseract::OEM_COUNT)) {
return false;
}
*enginemode = static_cast<tesseract::OcrEngineMode>(oem);
#endif
++i;
} else if (strcmp(argv[i], "--print-parameters") == 0) {
noocr = true;
*print_parameters = true;
#ifndef DISABLED_LEGACY_ENGINE
} else if (strcmp(argv[i], "--print-fonts-table") == 0) {
noocr = true;
*print_fonts_table = true;
#endif // ndef DISABLED_LEGACY_ENGINE
} else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
const std::string argument(argv[i + 1]);
const auto equal_pos = argument.find('=');
if (equal_pos == std::string::npos) {
throw std::invalid_argument("Missing '=' in configvar assignment");
}
// Extract key and value
const std::string key = argument.substr(0, equal_pos);
const std::string value = argument.substr(equal_pos + 1);
vars_vec->push_back(key);
vars_values->push_back(value);
++i;
} else if (*image == nullptr) {
*image = argv[i];
} else {
// Unexpected argument.
fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
return false;
}
}
*arg_i = i;
if (*pagesegmode == tesseract::PSM_OSD_ONLY) {
// OSD = orientation and script detection.
if (*lang != nullptr && strcmp(*lang, "osd")) {
// If the user explicitly specifies a language (other than osd)
// or a script, only orientation can be detected.
fprintf(stderr, "Warning, detects only orientation with -l %s\n", *lang);
} else {
// That mode requires osd.traineddata to detect orientation and script.
*lang = "osd";
}
}
if (*outputbase == nullptr && noocr == false) {
PrintHelpMessage(argv[0]);
return false;
}
return true;
}
static void PreloadRenderers(tesseract::TessBaseAPI &api,
std::vector<std::unique_ptr<TessResultRenderer>> &renderers,
tesseract::PageSegMode pagesegmode, const char *outputbase) {
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
#ifndef DISABLED_LEGACY_ENGINE
renderers.push_back(std::make_unique<tesseract::TessOsdRenderer>(outputbase));
#endif // ndef DISABLED_LEGACY_ENGINE
} else {
bool error = false;
bool b;
api.GetBoolVariable("tessedit_create_hocr", &b);
if (b) {
bool font_info;
api.GetBoolVariable("hocr_font_info", &font_info);
auto renderer = std::make_unique<tesseract::TessHOcrRenderer>(outputbase, font_info);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create hOCR output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_create_alto", &b);
if (b) {
auto renderer = std::make_unique<tesseract::TessAltoRenderer>(outputbase);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create ALTO output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_create_page_xml", &b);
if (b) {
auto renderer = std::make_unique<tesseract::TessPAGERenderer>(outputbase);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create PAGE output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_create_tsv", &b);
if (b) {
bool font_info;
api.GetBoolVariable("hocr_font_info", &font_info);
auto renderer = std::make_unique<tesseract::TessTsvRenderer>(outputbase, font_info);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create TSV output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_create_pdf", &b);
if (b) {
#ifdef WIN32
if (_setmode(_fileno(stdout), _O_BINARY) == -1)
tprintf("ERROR: cin to binary: %s", strerror(errno));
#endif // WIN32
bool textonly;
api.GetBoolVariable("textonly_pdf", &textonly);
auto renderer = std::make_unique<tesseract::TessPDFRenderer>(outputbase, api.GetDatapath(), textonly);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create PDF output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_write_unlv", &b);
if (b) {
api.SetVariable("unlv_tilde_crunching", "true");
auto renderer = std::make_unique<tesseract::TessUnlvRenderer>(outputbase);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create UNLV output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_create_lstmbox", &b);
if (b) {
auto renderer = std::make_unique<tesseract::TessLSTMBoxRenderer>(outputbase);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create LSTM BOX output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_create_boxfile", &b);
if (b) {
auto renderer = std::make_unique<tesseract::TessBoxTextRenderer>(outputbase);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create BOX output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_create_wordstrbox", &b);
if (b) {
auto renderer = std::make_unique<tesseract::TessWordStrBoxRenderer>(outputbase);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create WordStr BOX output file: %s\n", strerror(errno));
error = true;
}
}
api.GetBoolVariable("tessedit_create_txt", &b);
if (b || (!error && renderers.empty())) {
// Create text output if no other output was requested
// even if text output was not explicitly requested unless
// there was an error.
auto renderer = std::make_unique<tesseract::TessTextRenderer>(outputbase);
if (renderer->happy()) {
renderers.push_back(std::move(renderer));
} else {
tprintf("Error, could not create TXT output file: %s\n", strerror(errno));
}
}
}
// Null-out the renderers that are
// added to the root, and leave the root in the vector.
for (size_t r = 1; r < renderers.size(); ++r) {
renderers[0]->insert(renderers[r].get());
renderers[r].release(); // at the moment insert() is owning
}
}
/**********************************************************************
* main()
*
**********************************************************************/
static int main1(int argc, char **argv) {
#if defined(__USE_GNU) && defined(HAVE_FEENABLEEXCEPT)
// Raise SIGFPE.
# if defined(__clang__)
// clang creates code which causes some FP exceptions, so don't enable those.
feenableexcept(FE_DIVBYZERO);
# else
feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
# endif
#endif
const char *lang = nullptr;
const char *image = nullptr;
const char *outputbase = nullptr;
const char *datapath = nullptr;
bool list_langs = false;
bool print_parameters = false;
bool print_fonts_table = false;
l_int32 dpi = 0;
int arg_i = 1;
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
#ifdef DISABLED_LEGACY_ENGINE
auto enginemode = tesseract::OEM_LSTM_ONLY;
#else
tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
#endif
std::vector<std::string> vars_vec;
std::vector<std::string> vars_values;
if (std::getenv("LEPT_MSG_SEVERITY")) {
// Get Leptonica message level from environment variable.
setMsgSeverity(L_SEVERITY_EXTERNAL);
} else {
// Disable debugging and informational messages from Leptonica.
setMsgSeverity(L_SEVERITY_ERROR);
}
#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
/* Show libtiff errors and warnings on console (not in GUI). */
TIFFSetErrorHandler(Win32ErrorHandler);
TIFFSetWarningHandler(Win32WarningHandler);
#endif // HAVE_TIFFIO_H && _WIN32
if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
&print_parameters, &print_fonts_table, &vars_vec, &vars_values, &arg_i,
&pagesegmode, &enginemode)) {
return EXIT_FAILURE;
}
bool in_recognition_mode = !list_langs && !print_parameters && !print_fonts_table;
if (lang == nullptr && in_recognition_mode) {
// Set default language model if none was given and a model file is needed.
lang = "eng";
}
if (image == nullptr && in_recognition_mode) {
return EXIT_SUCCESS;
}
// Call GlobalDawgCache here to create the global DawgCache object before
// the TessBaseAPI object. This fixes the order of destructor calls:
// first TessBaseAPI must be destructed, DawgCache must be the last object.
tesseract::Dict::GlobalDawgCache();
TessBaseAPI api;
api.SetOutputName(outputbase);
const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i,
&vars_vec, &vars_values, false);
// SIMD settings might be overridden by config variable.
tesseract::SIMDDetect::Update();
if (list_langs) {
PrintLangsList(api);
return EXIT_SUCCESS;
}
if (init_failed) {
fprintf(stderr, "Could not initialize tesseract.\n");
return EXIT_FAILURE;
}
if (print_parameters) {
FILE *fout = stdout;
fprintf(stdout, "Tesseract parameters:\n");
api.PrintVariables(fout);
api.End();
return EXIT_SUCCESS;
}
#ifndef DISABLED_LEGACY_ENGINE
if (print_fonts_table) {
FILE *fout = stdout;
fprintf(stdout, "Tesseract fonts table:\n");
api.PrintFontsTable(fout);
api.End();
return EXIT_SUCCESS;
}
#endif // ndef DISABLED_LEGACY_ENGINE
FixPageSegMode(api, pagesegmode);
if (dpi) {
auto dpi_string = std::to_string(dpi);
api.SetVariable("user_defined_dpi", dpi_string.c_str());
}
int ret_val = EXIT_SUCCESS;
if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
Pix *pixs = pixRead(image);
if (!pixs) {
fprintf(stderr, "Leptonica can't process input file: %s\n", image);
return 2;
}
api.SetImage(pixs);
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;
const std::unique_ptr<const tesseract::PageIterator> it(api.AnalyseLayout());
if (it) {
// TODO: Implement output of page segmentation, see documentation
// ("Automatic page segmentation, but no OSD, or OCR").
it->Orientation(&orientation, &direction, &order, &deskew_angle);
tprintf(
"Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
"Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
} else {
ret_val = EXIT_FAILURE;
}
pixDestroy(&pixs);
return ret_val;
}
// Set in_training_mode to true when using one of these configs:
// ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train.
// In this mode no other OCR result files are written.
bool b = false;
bool in_training_mode = (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
(api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY) {
if (!api.tesseract()->AnyTessLang()) {
fprintf(stderr, "Error, OSD requires a model for the legacy engine\n");
return EXIT_FAILURE;
}
}
#ifdef DISABLED_LEGACY_ENGINE
auto cur_psm = api.GetPageSegMode();
auto osd_warning = std::string("");
if (cur_psm == tesseract::PSM_OSD_ONLY) {
const char *disabled_osd_msg =
"\nERROR: The page segmentation mode 0 (OSD Only) is currently "
"disabled.\n\n";
fprintf(stderr, "%s", disabled_osd_msg);
return EXIT_FAILURE;
} else if (cur_psm == tesseract::PSM_AUTO_OSD) {
api.SetPageSegMode(tesseract::PSM_AUTO);
osd_warning +=
"\nWarning: The page segmentation mode 1 (Auto+OSD) is currently "
"disabled. "
"Using PSM 3 (Auto) instead.\n\n";
} else if (cur_psm == tesseract::PSM_SPARSE_TEXT_OSD) {
api.SetPageSegMode(tesseract::PSM_SPARSE_TEXT);
osd_warning +=
"\nWarning: The page segmentation mode 12 (Sparse text + OSD) is "
"currently disabled. "
"Using PSM 11 (Sparse text) instead.\n\n";
}
#endif // def DISABLED_LEGACY_ENGINE
std::vector<std::unique_ptr<TessResultRenderer>> renderers;
if (in_training_mode) {
renderers.push_back(nullptr);
} else if (outputbase != nullptr) {
PreloadRenderers(api, renderers, pagesegmode, outputbase);
}
if (!renderers.empty()) {
#ifdef DISABLED_LEGACY_ENGINE
if (!osd_warning.empty()) {
fprintf(stderr, "%s", osd_warning.c_str());
}
#endif
bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0].get());
if (!succeed) {
fprintf(stderr, "Error during processing.\n");
ret_val = EXIT_FAILURE;
}
}
return ret_val;
}
int main(int argc, char **argv) {
try {
return main1(argc, argv);
} catch (std::exception &e) {
std::cerr << "exception: " << e.what() << "\n";
} catch (...) {
std::cerr << "unknown exception\n";
}
return 1;
}