This commit is contained in:
林博仁 Buo-ren Lin 2025-05-26 10:14:18 +02:00 committed by GitHub
commit a67d87cef8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 1991 additions and 10 deletions

1
.gitignore vendored
View File

@ -54,6 +54,7 @@ config_auto.h
/src/training/wordlist2dawg
*.patch
!/snap/local/*.patch
# files generated by libtool
/src/training/combine_lang_model

View File

@ -0,0 +1,47 @@
Implement fallback config loading mechanism
This patch fixes configuration files in the readonly snap filesystem
not loadable by Tesseract due to missing datadir fallback logic.
If the config file with the same name doesn't exist in the
user-specified tessdata prefix directory, the one in the compiled-in
directory will be loaded, allows Tesseract to work properly.
Signed-off-by: 林博仁(Buo-ren Lin) <buo.ren.lin@gmail.com>
diff --git a/src/ccmain/tessedit.cpp b/src/ccmain/tessedit.cpp
index c7518883..d0031eb5 100644
--- a/src/ccmain/tessedit.cpp
+++ b/src/ccmain/tessedit.cpp
@@ -37,6 +37,7 @@
# include "reject.h"
#endif
#include "lstmrecognizer.h"
+#include <cstdlib>
namespace tesseract {
@@ -57,7 +58,22 @@ void Tesseract::read_config_file(const char *filename, SetParamConstraint constr
if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
fclose(fp);
} else {
- path = filename;
+ std::string datadir_readonly = TESSDATA_PREFIX "/tessdata/";
+ path = datadir_readonly;
+ path += "configs/";
+ path += filename;
+ if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
+ fclose(fp);
+ } else {
+ path = datadir_readonly;
+ path += "tessconfigs/";
+ path += filename;
+ if ((fp = fopen(path.c_str(), "rb")) != nullptr) {
+ fclose(fp);
+ } else {
+ path = filename;
+ }
+ }
}
}
ParamUtils::ReadParamsFile(path.c_str(), constraint, this->params());

1866
snap/local/selective-checkout Executable file

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
name: tesseract
version: git
adopt-info: tesseract
summary: open source optical character recognition engine
description: |
Tesseract has unicode (UTF-8) support, and can recognize more than 100
@ -19,32 +19,99 @@ apps:
tesseract:
command: usr/local/bin/tesseract
environment:
TESSDATA_PREFIX: $SNAP_USER_COMMON
LD_LIBRARY_PATH: $LD_LIBRARY_PATH:$SNAP/usr/local/lib
plugs:
- home
- removable-media
# For recognition of remote images
- network
parts:
tesseract:
after: [leptonica]
source: .
override-pull: |
craftctl default
"${CRAFT_PROJECT_DIR}/snap/local/selective-checkout" \
--beta-tag-pattern='-beta-[[:digit:]]+$'
plugin: autotools
build-packages:
- pkg-config
- libpng-dev
- libjpeg-dev
- libarchive-dev
- libcurl4-gnutls-dev
- libtiff-dev
- zlib1g-dev
- libicu-dev
- libpango1.0-dev
- libcairo2-dev
# Dependencies of the selective-checkout scriptlet
- curl
- git
- jq
stage-packages:
- libarchive13
- libcurl3-gnutls
- libgomp1
after: [leptonica]
- libtiff5
autotools-configure-parameters:
# Disable unused features to reduce snap size and built time
- --disable-dependency-tracking
- --disable-doc
- --disable-static
- --disable-training
# Enable core features
- --with-archive
- --with-curl
# Fix incorrect compiled-in datadir
- CPPFLAGS=-DTESSDATA_PREFIX=\"\\\"/snap/$CRAFT_PROJECT_NAME/current/usr/local/share\\\"\"
override-build: |
patch -p1 < "${CRAFT_PROJECT_DIR}/snap/local/implement-fallback-config-loading.patch"
craftctl default
prime:
- -usr/lib/$CRAFT_ARCH_TRIPLET_BUILD_FOR/libicuio.so*
- -usr/lib/$CRAFT_ARCH_TRIPLET_BUILD_FOR/libicui18n.so*
- -usr/lib/$CRAFT_ARCH_TRIPLET_BUILD_FOR/libicutest.so*
- -usr/lib/$CRAFT_ARCH_TRIPLET_BUILD_FOR/libicutu.so*
- -usr/local/include
- -usr/local/lib/pkgconfig
- -usr/share/lintian
- -usr/share/man
leptonica:
source: https://github.com/DanBloomberg/leptonica/archive/1.83.1.tar.gz
source: https://github.com/DanBloomberg/leptonica.git
source-tag: 1.84.1
source-depth: 1
plugin: autotools
build-packages:
- libjpeg-dev
- libopenjp2-7-dev
- libpng-dev
- libtiff-dev
- libwebp-dev
- pkg-config
- zlib1g-dev
stage-packages:
- libjbig0
- libjpeg-turbo8
- libopenjp2-7
- libpng16-16
- libtiff5
- libwebp7
- libwebpmux3
- zlib1g
autotools-configure-parameters:
# Reduce part build time
- --disable-programs
- --disable-static
- --disable-dependency-tracking
# Ensure features are built
- --with-libpng
- --with-jpeg
- --with-libtiff
- --with-libwebp
- --with-libopenjpeg
prime:
- usr/lib/$CRAFT_ARCH_TRIPLET_BUILD_FOR/*.so*
- usr/local/lib/*.so*
- usr/share/doc/*/copyright*