mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 09:52:40 +08:00
Merge 2a296fabd8
into de095fc074
This commit is contained in:
commit
affad4acaf
@ -346,7 +346,7 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
|
||||
// Update datapath and language requested for the last valid initialization.
|
||||
datapath_ = std::move(datapath);
|
||||
if (datapath_.empty() && !tesseract_->datadir.empty()) {
|
||||
datapath_ = tesseract_->datadir;
|
||||
datapath_ = tesseract_->datadir.string();
|
||||
}
|
||||
|
||||
language_ = language;
|
||||
@ -395,7 +395,7 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector<std::string> *langs) co
|
||||
void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const {
|
||||
langs->clear();
|
||||
if (tesseract_ != nullptr) {
|
||||
addAvailableLanguages(tesseract_->datadir, langs);
|
||||
addAvailableLanguages(tesseract_->datadir.string(), langs);
|
||||
std::sort(langs->begin(), langs->end());
|
||||
}
|
||||
}
|
||||
@ -857,7 +857,7 @@ const char *TessBaseAPI::GetInputName() {
|
||||
}
|
||||
|
||||
const char *TessBaseAPI::GetDatapath() {
|
||||
return tesseract_->datadir.c_str();
|
||||
return datapath_.c_str();
|
||||
}
|
||||
|
||||
int TessBaseAPI::GetSourceYResolution() {
|
||||
|
@ -298,7 +298,7 @@ ParamsEditor::ParamsEditor(tesseract::Tesseract *tess, ScrollView *sv) {
|
||||
SVMenuNode *svMenuRoot = BuildListOfAllLeaves(tess);
|
||||
|
||||
std::string paramfile;
|
||||
paramfile = tess->datadir;
|
||||
paramfile = tess->datadir.string();
|
||||
paramfile += VARDIR; // parameters dir
|
||||
paramfile += "edited"; // actual name
|
||||
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "params.h"
|
||||
#include "stopper.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tesserrstream.h" // for tesserr
|
||||
#include "tessvars.h"
|
||||
#include "tprintf.h"
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
@ -43,24 +44,25 @@ namespace tesseract {
|
||||
// Read a "config" file containing a set of variable, value pairs.
|
||||
// Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
// and also accepts a relative or absolute path name.
|
||||
void Tesseract::read_config_file(const char *filename, SetParamConstraint constraint) {
|
||||
std::string path = datadir;
|
||||
path += "configs/";
|
||||
path += filename;
|
||||
FILE *fp;
|
||||
if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
|
||||
fclose(fp);
|
||||
} else {
|
||||
path = datadir;
|
||||
path += "tessconfigs/";
|
||||
path += filename;
|
||||
if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
|
||||
fclose(fp);
|
||||
} else {
|
||||
path = filename;
|
||||
}
|
||||
}
|
||||
ParamUtils::ReadParamsFile(path.c_str(), constraint, this->params());
|
||||
void Tesseract::read_config_file(const char *filename,
|
||||
SetParamConstraint constraint) {
|
||||
// Construct potential config file paths
|
||||
std::vector<std::filesystem::path> config_paths = {
|
||||
datadir / "configs" / filename,
|
||||
datadir / "tessconfigs" / filename,
|
||||
std::filesystem::path(filename)};
|
||||
|
||||
// Use the first existing file or fallback to the last (filename)
|
||||
auto config_file = std::find_if(config_paths.begin(), config_paths.end(),
|
||||
[](const std::filesystem::path &path) {
|
||||
std::error_code ec;
|
||||
return std::filesystem::exists(path, ec);
|
||||
});
|
||||
const std::filesystem::path &selected_path =
|
||||
(config_file != config_paths.end()) ? *config_file : config_paths.back();
|
||||
|
||||
ParamUtils::ReadParamsFile(selected_path.string().c_str(), constraint,
|
||||
this->params());
|
||||
}
|
||||
|
||||
// Returns false if a unicharset file for the specified language was not found
|
||||
@ -81,17 +83,14 @@ bool Tesseract::init_tesseract_lang_data(const std::string &arg0,
|
||||
bool set_only_non_debug_params, TessdataManager *mgr) {
|
||||
// Set the language data path prefix
|
||||
lang = !language.empty() ? language : "eng";
|
||||
language_data_path_prefix = datadir;
|
||||
language_data_path_prefix += lang;
|
||||
language_data_path_prefix += ".";
|
||||
language_data_path_prefix = datadir.string();
|
||||
std::filesystem::path tessdata_path = datadir / (lang + "." + kTrainedDataSuffix);
|
||||
|
||||
// Initialize TessdataManager.
|
||||
std::string tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
|
||||
if (!mgr->is_loaded() && !mgr->Init(tessdata_path.c_str())) {
|
||||
tprintf("Error opening data file %s\n", tessdata_path.c_str());
|
||||
tprintf(
|
||||
if (!mgr->is_loaded() && !mgr->Init(tessdata_path.string().c_str())) {
|
||||
tesserr << "Error opening data file " << tessdata_path.string() << '\n' <<
|
||||
"Please make sure the TESSDATA_PREFIX environment variable is set"
|
||||
" to your \"tessdata\" directory.\n");
|
||||
" to your \"tessdata\" directory.\n";
|
||||
return false;
|
||||
}
|
||||
#ifdef DISABLED_LEGACY_ENGINE
|
||||
@ -184,10 +183,8 @@ bool Tesseract::init_tesseract_lang_data(const std::string &arg0,
|
||||
}
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
else if (!mgr->GetComponent(TESSDATA_UNICHARSET, &fp) || !unicharset.load_from_file(&fp, false)) {
|
||||
tprintf(
|
||||
"Error: Tesseract (legacy) engine requested, but components are "
|
||||
"not present in %s!!\n",
|
||||
tessdata_path.c_str());
|
||||
tesserr << "Error: Tesseract (legacy) engine requested, but components are "
|
||||
"not present in " << tessdata_path.string() << "!!\n";
|
||||
return false;
|
||||
}
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
@ -11,11 +11,10 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "ccutil.h"
|
||||
#include "tesserrstream.h" // for tesserr
|
||||
#include "tprintf.h" // for tprintf
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring> // for std::strrchrA
|
||||
#include <filesystem> // for std::filesystem
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -32,69 +31,73 @@ CCUtil::CCUtil()
|
||||
// instead of weak vtables in every compilation unit.
|
||||
CCUtil::~CCUtil() = default;
|
||||
|
||||
/**
|
||||
* @brief Finds the path to the tessdata directory.
|
||||
*
|
||||
* This function determines the location of the tessdata directory based on the
|
||||
* following order of precedence:
|
||||
* 1. If `argv0` is provided, use it.
|
||||
* 2. If `TESSDATA_PREFIX` environment variable is set and the path exists, use
|
||||
* it.
|
||||
* 3. On Windows, check for a "tessdata" directory in the executable's directory
|
||||
* and use it.
|
||||
* 4. If `TESSDATA_PREFIX` is defined at compile time, use it.
|
||||
* 5. Otherwise, use the current working directory.
|
||||
*
|
||||
* @param argv0 argument to be considered as the data directory path.
|
||||
* @return The path to the tessdata directory or current directory.
|
||||
*/
|
||||
static std::filesystem::path find_data_path(const std::string &argv0) {
|
||||
// If argv0 is set, always use it even if it is not a valid directory
|
||||
if (!argv0.empty()) {
|
||||
std::filesystem::path path(argv0);
|
||||
if (!std::filesystem::is_directory(path)) {
|
||||
tesserr << "Warning (tessdata): '" << argv0 << "' is not a valid directory.\n";
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
// Check environment variable if argv0 is not specified
|
||||
if (const char *tessdata_prefix = std::getenv("TESSDATA_PREFIX")) {
|
||||
std::filesystem::path path(tessdata_prefix);
|
||||
if (std::filesystem::exists(path)) {
|
||||
return path;
|
||||
} else {
|
||||
tprintf("Warning: TESSDATA_PREFIX %s does not exist, ignoring.\n",
|
||||
tessdata_prefix);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// Windows-specific: check for 'tessdata' not existing in the executable
|
||||
// directory
|
||||
wchar_t path[MAX_PATH];
|
||||
if (DWORD length = GetModuleFileNameW(nullptr, path, MAX_PATH);
|
||||
length > 0 && length < MAX_PATH) {
|
||||
std::filesystem::path exe_path(path);
|
||||
auto tessdata_subdir = exe_path.parent_path() / "tessdata";
|
||||
if (std::filesystem::exists(tessdata_subdir)) {
|
||||
return tessdata_subdir;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Fallback to compile-time or current directory
|
||||
#ifdef TESSDATA_PREFIX
|
||||
return std::filesystem::path(TESSDATA_PREFIX) / "tessdata";
|
||||
#else
|
||||
return std::filesystem::current_path();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief CCUtil::main_setup - set location of tessdata and name of image
|
||||
*
|
||||
* @param argv0 - paths to the directory with language files and config files.
|
||||
* An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
|
||||
* used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
|
||||
* previous is not successful - use current directory.
|
||||
* @param basename - name of image
|
||||
*/
|
||||
void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
|
||||
imagebasename = basename; /**< name of image */
|
||||
|
||||
const char *tessdata_prefix = getenv("TESSDATA_PREFIX");
|
||||
|
||||
// Ignore TESSDATA_PREFIX if there is no matching filesystem entry.
|
||||
if (tessdata_prefix != nullptr && !std::filesystem::exists(tessdata_prefix)) {
|
||||
tprintf("Warning: TESSDATA_PREFIX %s does not exist, ignore it\n", tessdata_prefix);
|
||||
tessdata_prefix = nullptr;
|
||||
}
|
||||
|
||||
if (!argv0.empty()) {
|
||||
/* Use tessdata prefix from the command line. */
|
||||
datadir = argv0;
|
||||
} else if (tessdata_prefix) {
|
||||
/* Use tessdata prefix from the environment. */
|
||||
datadir = tessdata_prefix;
|
||||
#if defined(_WIN32)
|
||||
} else if (datadir.empty() || !std::filesystem::exists(datadir)) {
|
||||
/* Look for tessdata in directory of executable. */
|
||||
char path[_MAX_PATH];
|
||||
DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
|
||||
if (length > 0 && length < sizeof(path)) {
|
||||
char *separator = std::strrchr(path, '\\');
|
||||
if (separator != nullptr) {
|
||||
*separator = '\0';
|
||||
std::string subdir = path;
|
||||
subdir += "/tessdata";
|
||||
if (std::filesystem::exists(subdir)) {
|
||||
datadir = subdir;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* _WIN32 */
|
||||
}
|
||||
|
||||
// datadir may still be empty:
|
||||
if (datadir.empty()) {
|
||||
#if defined(TESSDATA_PREFIX)
|
||||
// Use tessdata prefix which was compiled in.
|
||||
datadir = TESSDATA_PREFIX "/tessdata/";
|
||||
// Note that some software (for example conda) patches TESSDATA_PREFIX
|
||||
// in the binary, so it might be shorter. Recalculate its length.
|
||||
datadir.resize(std::strlen(datadir.c_str()));
|
||||
#else
|
||||
datadir = "./";
|
||||
#endif /* TESSDATA_PREFIX */
|
||||
}
|
||||
|
||||
// check for missing directory separator
|
||||
const char lastchar = datadir.back();
|
||||
if (lastchar != '/' && lastchar != '\\') {
|
||||
datadir += '/';
|
||||
}
|
||||
datadir = find_data_path(argv0);
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -19,6 +19,8 @@
|
||||
#ifndef TESSERACT_CCUTIL_CCUTIL_H_
|
||||
#define TESSERACT_CCUTIL_CCUTIL_H_
|
||||
|
||||
#include <filesystem> // for std::filesystem
|
||||
|
||||
#ifndef _WIN32
|
||||
# include <pthread.h>
|
||||
# include <semaphore.h>
|
||||
@ -53,9 +55,8 @@ public:
|
||||
ParamsVectors *params() {
|
||||
return ¶ms_;
|
||||
}
|
||||
|
||||
std::string datadir; // dir for data files
|
||||
std::string imagebasename; // name of image
|
||||
std::filesystem::path datadir; // dir for data files
|
||||
std::string imagebasename; // name of image
|
||||
std::string lang;
|
||||
std::string language_data_path_prefix;
|
||||
UNICHARSET unicharset;
|
||||
|
Loading…
Reference in New Issue
Block a user