Replace std::regex by std::string functions (fixes issue #3830)

On Windows with UCRT and a UTF-8 locale std::regex takes a lot of time (several minutes!). Replacing it avoids that bottleneck. Signed-off-by: Stefan Weil <sw@weilnetz.de>
2024-11-24 02:59:07 +08:00 · 2022-05-29 10:21:42 +02:00 · 2022-05-29 10:21:42 +02:00 · 64bcdce607
commit 64bcdce607
parent f36c0d019b
1 changed files with 3 additions and 6 deletions
--- a/src/ccmain/tessedit.cpp
+++ b/src/ccmain/tessedit.cpp
@ -23,8 +23,6 @@
 #  include "config_auto.h"
 #endif

-#include <regex> // for std::regex_match
-
 #include "control.h"
 #include "matchdefs.h"
 #include "pageres.h"
@ -248,12 +246,11 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
  std::string remains(lang_str);
  // Look whether the model file uses a prefix which must be applied to
  // included model files as well.
-  std::regex e("(.*)/[^/]*");
-  std::cmatch cm;
  std::string prefix;
-  if (std::regex_match(lang.c_str(), cm, e, std::regex_constants::match_default)) {
+  size_t found = lang.find_last_of('/');
+  if (found != std::string::npos) {
    // A prefix was found.
-    prefix = cm[1].str() + "/";
+    prefix = lang.substr(0, found + 1);
  }
  while (!remains.empty()) {
    // Find the start of the lang code and which vector to add to.