This commit is contained in:
zdenop 2025-06-02 18:22:29 -05:00 committed by GitHub
commit affad4acaf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 97 additions and 96 deletions

View File

@ -346,7 +346,7 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
// Update datapath and language requested for the last valid initialization.
datapath_ = std::move(datapath);
if (datapath_.empty() && !tesseract_->datadir.empty()) {
datapath_ = tesseract_->datadir;
datapath_ = tesseract_->datadir.string();
}
language_ = language;
@ -395,7 +395,7 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector<std::string> *langs) co
void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const {
langs->clear();
if (tesseract_ != nullptr) {
addAvailableLanguages(tesseract_->datadir, langs);
addAvailableLanguages(tesseract_->datadir.string(), langs);
std::sort(langs->begin(), langs->end());
}
}
@ -857,7 +857,7 @@ const char *TessBaseAPI::GetInputName() {
}
const char *TessBaseAPI::GetDatapath() {
return tesseract_->datadir.c_str();
return datapath_.c_str();
}
int TessBaseAPI::GetSourceYResolution() {

View File

@ -298,7 +298,7 @@ ParamsEditor::ParamsEditor(tesseract::Tesseract *tess, ScrollView *sv) {
SVMenuNode *svMenuRoot = BuildListOfAllLeaves(tess);
std::string paramfile;
paramfile = tess->datadir;
paramfile = tess->datadir.string();
paramfile += VARDIR; // parameters dir
paramfile += "edited"; // actual name

View File

@ -29,6 +29,7 @@
#include "params.h"
#include "stopper.h"
#include "tesseractclass.h"
#include "tesserrstream.h" // for tesserr
#include "tessvars.h"
#include "tprintf.h"
#ifndef DISABLED_LEGACY_ENGINE
@ -43,24 +44,25 @@ namespace tesseract {
// Read a "config" file containing a set of variable, value pairs.
// Searches the standard places: tessdata/configs, tessdata/tessconfigs
// and also accepts a relative or absolute path name.
void Tesseract::read_config_file(const char *filename, SetParamConstraint constraint) {
std::string path = datadir;
path += "configs/";
path += filename;
FILE *fp;
if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
fclose(fp);
} else {
path = datadir;
path += "tessconfigs/";
path += filename;
if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
fclose(fp);
} else {
path = filename;
}
}
ParamUtils::ReadParamsFile(path.c_str(), constraint, this->params());
void Tesseract::read_config_file(const char *filename,
SetParamConstraint constraint) {
// Construct potential config file paths
std::vector<std::filesystem::path> config_paths = {
datadir / "configs" / filename,
datadir / "tessconfigs" / filename,
std::filesystem::path(filename)};
// Use the first existing file or fallback to the last (filename)
auto config_file = std::find_if(config_paths.begin(), config_paths.end(),
[](const std::filesystem::path &path) {
std::error_code ec;
return std::filesystem::exists(path, ec);
});
const std::filesystem::path &selected_path =
(config_file != config_paths.end()) ? *config_file : config_paths.back();
ParamUtils::ReadParamsFile(selected_path.string().c_str(), constraint,
this->params());
}
// Returns false if a unicharset file for the specified language was not found
@ -81,17 +83,14 @@ bool Tesseract::init_tesseract_lang_data(const std::string &arg0,
bool set_only_non_debug_params, TessdataManager *mgr) {
// Set the language data path prefix
lang = !language.empty() ? language : "eng";
language_data_path_prefix = datadir;
language_data_path_prefix += lang;
language_data_path_prefix += ".";
language_data_path_prefix = datadir.string();
std::filesystem::path tessdata_path = datadir / (lang + "." + kTrainedDataSuffix);
// Initialize TessdataManager.
std::string tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
if (!mgr->is_loaded() && !mgr->Init(tessdata_path.c_str())) {
tprintf("Error opening data file %s\n", tessdata_path.c_str());
tprintf(
if (!mgr->is_loaded() && !mgr->Init(tessdata_path.string().c_str())) {
tesserr << "Error opening data file " << tessdata_path.string() << '\n' <<
"Please make sure the TESSDATA_PREFIX environment variable is set"
" to your \"tessdata\" directory.\n");
" to your \"tessdata\" directory.\n";
return false;
}
#ifdef DISABLED_LEGACY_ENGINE
@ -184,10 +183,8 @@ bool Tesseract::init_tesseract_lang_data(const std::string &arg0,
}
#ifndef DISABLED_LEGACY_ENGINE
else if (!mgr->GetComponent(TESSDATA_UNICHARSET, &fp) || !unicharset.load_from_file(&fp, false)) {
tprintf(
"Error: Tesseract (legacy) engine requested, but components are "
"not present in %s!!\n",
tessdata_path.c_str());
tesserr << "Error: Tesseract (legacy) engine requested, but components are "
"not present in " << tessdata_path.string() << "!!\n";
return false;
}
#endif // ndef DISABLED_LEGACY_ENGINE

View File

@ -11,11 +11,10 @@
// limitations under the License.
#include "ccutil.h"
#include "tesserrstream.h" // for tesserr
#include "tprintf.h" // for tprintf
#include <cstdlib>
#include <cstring> // for std::strrchrA
#include <filesystem> // for std::filesystem
namespace tesseract {
@ -32,69 +31,73 @@ CCUtil::CCUtil()
// instead of weak vtables in every compilation unit.
CCUtil::~CCUtil() = default;
/**
* @brief Finds the path to the tessdata directory.
*
* This function determines the location of the tessdata directory based on the
* following order of precedence:
* 1. If `argv0` is provided, use it.
* 2. If `TESSDATA_PREFIX` environment variable is set and the path exists, use
* it.
* 3. On Windows, check for a "tessdata" directory in the executable's directory
* and use it.
* 4. If `TESSDATA_PREFIX` is defined at compile time, use it.
* 5. Otherwise, use the current working directory.
*
* @param argv0 argument to be considered as the data directory path.
* @return The path to the tessdata directory or current directory.
*/
static std::filesystem::path find_data_path(const std::string &argv0) {
// If argv0 is set, always use it even if it is not a valid directory
if (!argv0.empty()) {
std::filesystem::path path(argv0);
if (!std::filesystem::is_directory(path)) {
tesserr << "Warning (tessdata): '" << argv0 << "' is not a valid directory.\n";
}
return path;
}
// Check environment variable if argv0 is not specified
if (const char *tessdata_prefix = std::getenv("TESSDATA_PREFIX")) {
std::filesystem::path path(tessdata_prefix);
if (std::filesystem::exists(path)) {
return path;
} else {
tprintf("Warning: TESSDATA_PREFIX %s does not exist, ignoring.\n",
tessdata_prefix);
}
}
#ifdef _WIN32
// Windows-specific: check for 'tessdata' not existing in the executable
// directory
wchar_t path[MAX_PATH];
if (DWORD length = GetModuleFileNameW(nullptr, path, MAX_PATH);
length > 0 && length < MAX_PATH) {
std::filesystem::path exe_path(path);
auto tessdata_subdir = exe_path.parent_path() / "tessdata";
if (std::filesystem::exists(tessdata_subdir)) {
return tessdata_subdir;
}
}
#endif
// Fallback to compile-time or current directory
#ifdef TESSDATA_PREFIX
return std::filesystem::path(TESSDATA_PREFIX) / "tessdata";
#else
return std::filesystem::current_path();
#endif
}
/**
* @brief CCUtil::main_setup - set location of tessdata and name of image
*
* @param argv0 - paths to the directory with language files and config files.
* An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
* used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
* previous is not successful - use current directory.
* @param basename - name of image
*/
void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
imagebasename = basename; /**< name of image */
const char *tessdata_prefix = getenv("TESSDATA_PREFIX");
// Ignore TESSDATA_PREFIX if there is no matching filesystem entry.
if (tessdata_prefix != nullptr && !std::filesystem::exists(tessdata_prefix)) {
tprintf("Warning: TESSDATA_PREFIX %s does not exist, ignore it\n", tessdata_prefix);
tessdata_prefix = nullptr;
}
if (!argv0.empty()) {
/* Use tessdata prefix from the command line. */
datadir = argv0;
} else if (tessdata_prefix) {
/* Use tessdata prefix from the environment. */
datadir = tessdata_prefix;
#if defined(_WIN32)
} else if (datadir.empty() || !std::filesystem::exists(datadir)) {
/* Look for tessdata in directory of executable. */
char path[_MAX_PATH];
DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
if (length > 0 && length < sizeof(path)) {
char *separator = std::strrchr(path, '\\');
if (separator != nullptr) {
*separator = '\0';
std::string subdir = path;
subdir += "/tessdata";
if (std::filesystem::exists(subdir)) {
datadir = subdir;
}
}
}
#endif /* _WIN32 */
}
// datadir may still be empty:
if (datadir.empty()) {
#if defined(TESSDATA_PREFIX)
// Use tessdata prefix which was compiled in.
datadir = TESSDATA_PREFIX "/tessdata/";
// Note that some software (for example conda) patches TESSDATA_PREFIX
// in the binary, so it might be shorter. Recalculate its length.
datadir.resize(std::strlen(datadir.c_str()));
#else
datadir = "./";
#endif /* TESSDATA_PREFIX */
}
// check for missing directory separator
const char lastchar = datadir.back();
if (lastchar != '/' && lastchar != '\\') {
datadir += '/';
}
datadir = find_data_path(argv0);
}
} // namespace tesseract

View File

@ -19,6 +19,8 @@
#ifndef TESSERACT_CCUTIL_CCUTIL_H_
#define TESSERACT_CCUTIL_CCUTIL_H_
#include <filesystem> // for std::filesystem
#ifndef _WIN32
# include <pthread.h>
# include <semaphore.h>
@ -53,9 +55,8 @@ public:
ParamsVectors *params() {
return &params_;
}
std::string datadir; // dir for data files
std::string imagebasename; // name of image
std::filesystem::path datadir; // dir for data files
std::string imagebasename; // name of image
std::string lang;
std::string language_data_path_prefix;
UNICHARSET unicharset;