mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-19 15:03:45 +08:00
Use model prefix also for submodels
Fix also a regression in the for loop which handles submodels.
Fixes: 0d91c700c0
("Modernize code in Tesseract::init_tesseract")
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
7cfcfe1101
commit
391e713ae8
@ -23,6 +23,8 @@
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include <regex> // for std::regex_match
|
||||
|
||||
#include "control.h"
|
||||
#include "matchdefs.h"
|
||||
#include "pageres.h"
|
||||
@ -247,6 +249,15 @@ static bool IsStrInList(const std::string &str, const std::vector<std::string> &
|
||||
void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std::string> *to_load,
|
||||
std::vector<std::string> *not_to_load) {
|
||||
std::string remains(lang_str);
|
||||
// Look whether the model file uses a prefix which must be applied to
|
||||
// included model files as well.
|
||||
std::regex e("(.*)/[^/]*");
|
||||
std::cmatch cm;
|
||||
std::string prefix;
|
||||
if (std::regex_match(lang.c_str(), cm, e, std::regex_constants::match_default)) {
|
||||
// A prefix was found.
|
||||
prefix = cm[1].str() + "/";
|
||||
}
|
||||
while (!remains.empty()) {
|
||||
// Find the start of the lang code and which vector to add to.
|
||||
const char *start = remains.c_str();
|
||||
@ -268,6 +279,7 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
|
||||
lang_code.resize(end);
|
||||
std::string next(start + end);
|
||||
remains = next;
|
||||
lang_code = prefix + lang_code;
|
||||
// Check whether lang_code is already in the target vector and add.
|
||||
if (!IsStrInList(lang_code, *target)) {
|
||||
target->push_back(lang_code);
|
||||
@ -296,7 +308,10 @@ int Tesseract::init_tesseract(const std::string &arg0, const std::string &textba
|
||||
// Add any languages that this language requires
|
||||
bool loaded_primary = false;
|
||||
// Load the rest into sub_langs_.
|
||||
for (auto &lang_to_load : langs_to_load) {
|
||||
// A range based for loop does not work here because langs_to_load
|
||||
// might be changed in the loop when a new submodel is found.
|
||||
for (size_t lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) {
|
||||
auto &lang_to_load = langs_to_load[lang_index];
|
||||
if (!IsStrInList(lang_to_load, langs_not_to_load)) {
|
||||
const char *lang_str = lang_to_load.c_str();
|
||||
Tesseract *tess_to_init;
|
||||
|
Loading…
Reference in New Issue
Block a user