Merge branch 'main' into improve-build-for-android-integrate-with-cpu_features-library

This commit is contained in:
zdenop 2022-06-23 16:43:37 +02:00 committed by GitHub
commit a96b2abb1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
150 changed files with 2440 additions and 2350 deletions

View File

@ -5,6 +5,7 @@ on:
#push:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
brew:
@ -42,7 +43,7 @@ jobs:
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2' "PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/opt/libffi/lib/pkgconfig"
./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
- name: Make and Install Tesseract
run: |
@ -130,6 +131,9 @@ jobs:
- name: Install Macports
run: |
curl -LO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install
# --remove-brew does not remove the Homebrew entries in bin,
# so remove them now.
rm -v $(brew --prefix)/bin/*
- name: Install Dependencies
run: |

83
.github/workflows/autotools-openmp.yml vendored Normal file
View File

@ -0,0 +1,83 @@
name: autotools-openmp
# autotools on Ubuntu - run benchmark test. '--enable-openmp' no training tools
on:
#push:
#schedule:
# - cron: 0 20 * * *
workflow_dispatch:
jobs:
linux:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: 18.04-openmp, os: ubuntu-18.04 }
- { name: 20.04-openmp, os: ubuntu-20.04 }
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install autoconf libleptonica-dev -y
sudo apt-get install libpango1.0-dev -y
sudo apt-get install cabextract libarchive-dev -y
sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
- name: Setup Tesseract
run: |
mkdir -p m4
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--enable-openmp' '--disable-doc' 'CXX=g++' 'CXXFLAGS=-g -O2'
grep -i OpenMP config.log
- name: Make and Install Tesseract
run: |
make
sudo make install
- name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file
run: |
wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg
printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list
- name: Run Tesseract using image from issue 263 with tessdata_fast
run: |
lscpu
free
g++ --version
tesseract -v
time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1
echo "tessdata_fast"
- name: Run Tesseract using image from issue 263 with tessdata_fast and OpenMP Thread Limit
run: |
for lmt in {1..3}; do
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_fast"
done
- name: Run Tesseract using image from issue 263 with tessdata_best and OpenMP Thread Limit
run: |
for lmt in {1..3}; do
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_best"
done
- name: Run Tesseract using image from issue 263 with tessdata and OpenMP Thread Limit
run: |
for lmt in {1..3}; do
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata"
done

View File

@ -5,93 +5,81 @@ on:
#push:
schedule:
- cron: 0 23 * * *
workflow_dispatch:
env:
ILOC: d:/a/local
jobs:
build:
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- {
name: "Windows Latest MSVC - cmake",
os: windows-latest,
cc: "cl",
cxx: "cl",
environment_script: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars64.bat"
}
name: cmake-win64
runs-on: windows-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- uses: ilammy/setup-nasm@v1
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
uses: actions/checkout@v3
with:
submodules: recursive
- name: Build and Install leptonica dependencies
- name: Get the version
id: get_version
run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
- name: Setup Instalation Location
run: |
mkdir ${{env.ILOC}}
- name: Build and Install zlib-ng
shell: cmd
run: |
mkdir d:/a/local
set PKG_CONFIG_PATH=d:/a/local/lib/pkgconfig
echo "PKG_CONFIG_PATH=d:/a/local/lib/pkgconfig" >> $GITHUB_ENV
- name: Build and Install zlib
shell: cmd
run: |
curl -sSL -o zlib1211.zip https://zlib.net/zlib1211.zip
unzip.exe zlib1211.zip
cd zlib-1.2.11
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git
cd zlib-ng
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
cmake --build build --config Release --target install
cd ..
- name: Build and Install libpng
shell: cmd
run: |
curl -sSL -o lpng1637.zip https://download.sourceforge.net/libpng/lpng1637.zip
unzip.exe lpng1637.zip
unzip.exe -qq lpng1637.zip
cd lpng1637
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
cmake --build build --config Release --target install
- name: Build and Install webp
shell: cmd
run: |
git clone --depth 1 https://github.com/webmproject/libwebp.git && cd libwebp
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF
cmake --build build --config Release --target install
cd ..
- name: Build and Install giflib
shell: cmd
run: |
curl -sSL -o giflib-master.zip https://codeload.github.com/xbmc/giflib/zip/master
unzip giflib-master.zip
unzip -qq giflib-master.zip
cd giflib-master
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}}
cmake --build build --config Release --target install
cd ..
- name: Build and Install libjpeg
shell: cmd
run: |
git clone --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git
cd libjpeg-turbo
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF
cmake --build build --config Release --target install
cd ..
- name: Build and Install openjpeg
- name: Build and Install webp
shell: cmd
run: |
git clone --depth 1 https://github.com/uclouvain/openjpeg.git
cd openjpeg
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
git clone --depth 1 https://github.com/webmproject/libwebp.git
cd libwebp
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWEBP_BUILD_ANIM_UTILS=OFF -DWEBP_BUILD_CWEBP=OFF -DWEBP_BUILD_DWEBP=OFF -DWEBP_BUILD_GIF2WEBP=OFF -DWEBP_BUILD_IMG2WEBP=OFF -DWEBP_BUILD_VWEBP=OFF -DWEBP_BUILD_WEBPMUX=OFF -DWEBP_BUILD_WEBPINFO=OFF -DWEBP_BUILD_EXTRAS=OFF
cmake --build build --config Release --target install
cd ..
- name: Build and Install jbigkit
shell: cmd
run: |
git clone --depth 1 https://github.com/zdenop/jbigkit
cd jbigkit-2.1
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
git clone --depth 1 https://github.com/zdenop/jbigkit.git
cd jbigkit
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_PROGRAMS=OFF -DBUILD_TOOLS=OFF -DCMAKE_WARN_DEPRECATED=OFF
cmake --build build --config Release --target install
cd ..
@ -99,17 +87,26 @@ jobs:
shell: cmd
run: |
git clone --depth 1 https://github.com/facebook/zstd.git
cd zstd
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
cd zstd/build/cmake
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DZSTD_BUILD_PROGRAMS=OFF -DBUILD_TESTING=OFF
cmake --build build --config Release --target install
cd ..
- name: Build and Install libtiff
shell: cmd
run: |
git clone --depth 1 https://gitlab.com/libtiff/libtiff
git clone --depth 1 https://gitlab.com/libtiff/libtiff.git
cd libtiff
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF
cmake --build build --config Release --target install
cd ..
- name: Build and Install openjpeg
shell: cmd
run: |
git clone --depth 1 https://github.com/uclouvain/openjpeg.git
cd openjpeg
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_CODEC=OFF -DBUILD_TESTING=OFF -DBUILD_DOC=OFF -DCMAKE_WARN_DEPRECATED=OFF
cmake --build build --config Release --target install
cd ..
@ -117,26 +114,45 @@ jobs:
shell: cmd
run: |
echo "Building leptonica..."
git clone --depth 1 https://github.com/DanBloomberg/leptonica.git && cd leptonica
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DBUILD_PROG=OFF -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
git clone --depth 1 https://github.com/DanBloomberg/leptonica.git
cd leptonica
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_PROG=OFF -DBUILD_SHARED_LIBS=ON
cmake --build build --config Release --target install
- name: Build and Install libarchive
shell: cmd
run: |
git clone --depth 1 https://github.com/libarchive/libarchive.git
cd libarchive
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DENABLE_TEST=OFF
cmake --build build --config Release --target install
- name: Remove not needed tools Before building tesseract
shell: cmd
run: >
rm -Rf ${{env.ILOC}}/bin/*.exe
- name: Build and Install tesseract
shell: cmd
run: |
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DBUILD_TRAINING_TOOLS=OFF -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DOPENMP_BUILD=OFF -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
REM cmake -E env CXXFLAGS="/Qpar /fp:fast"
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=ON
cmake --build build --config Release --target install
- name: Display Tesseract Version and Test Command Line Usage
shell: cmd
run: |
git clone --depth 1 https://github.com/tesseract-ocr/tessconfigs
mkdir d:/a/local/share
move tessconfigs d:/a/local/share
curl -L https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output d:/a/local/share/tessconfigs/eng.traineddata
curl -L https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output d:/a/local/share/tessconfigs/osd.traineddata
set TESSDATA_PREFIX=d:/a/local/share/tessconfigs
set PATH=d:/a/local/bin;%PATH%
curl -L https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output ${{env.ILOC}}/share/tessdata/eng.traineddata
curl -L https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output ${{env.ILOC}}/share/tessdata/osd.traineddata
set TESSDATA_PREFIX=${{env.ILOC}}/share/tessdata
set PATH=${{env.ILOC}}/bin;%PATH%
tesseract -v
tesseract --list-langs
tesseract test/testing/phototest.tif -
- name: Upload Build Results
uses: actions/upload-artifact@v2
with:
name: tesseract-${{ steps.get_version.outputs.VERSION }}-VS2019_win64
path: ${{env.ILOC}}
retention-days: 5

View File

@ -50,23 +50,26 @@ jobs:
steps:
- name: Install compilers on Linux
run: |
sudo apt-get update
sudo apt-get install ${{ matrix.config.cxx }} -y
if: runner.os == 'Linux'
# sudo apt-get install libarchive-dev libcurl4-openssl-dev libcurl4 curl -y
- name: Install dependencies on Linux
run: |
sudo apt-get install autoconf libleptonica-dev -y
sudo apt-get install libarchive-dev libcurl4-openssl-dev -y
sudo apt-get install libpango1.0-dev -y
sudo apt-get install cabextract -y
sudo apt-get install ninja-build -y
cmake --version
if: runner.os == 'Linux'
- name: Install dependencies on macOS
run: |
brew install autoconf automake
brew install leptonica
brew install cairo pango icu4c
brew install libarchive
brew install pango
brew install cabextract
brew install ninja
ninja --version
@ -147,15 +150,15 @@ jobs:
run: |
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++11
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++11
./basicapitest
if: runner.os == 'Linux'
- name: Build and run basicapitest (macOS)
run: |
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" `pkg-config --cflags --libs tesseract lept ` -lcurl -pthread -std=c++11
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++11
./basicapitest
if: runner.os == 'macOS'

View File

@ -13,7 +13,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [windows-latest, windows-2022, ubuntu-20.04, macOS-latest]
os: [windows-2022, windows-2019, ubuntu-22.04, ubuntu-20.04, macOS-latest]
steps:
- uses: actions/checkout@v2
@ -22,50 +22,50 @@ jobs:
- uses: egorpugin/sw-action@master
- name: build
if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
run: ./sw -static -shared -platform x86,x64 -config d,r build
- name: build
if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
run: ./sw -static -shared -config d,r build -Dwith-tests=1
- name: download test data
run: git clone https://github.com/egorpugin/tessdata tessdata_unittest
- name: copy fonts
if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
run: cp tessdata_unittest/fonts/* test/testing/
- name: copy fonts
if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
run: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse
shell: pwsh
- name: test
if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
continue-on-error: true
- name: test-nightly
if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022' && github.event.schedule=='0 0 * * *'
if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019' && github.event.schedule=='0 0 * * *'
run: ./sw -static -shared -config "d,r" test -Dwith-tests=1
continue-on-error: true
# windows tests hang here for some reason, investigate
#- name: test
#if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
#if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
#run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
#continue-on-error: true
- name: Upload Unit Test Results
if: always() && matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
if: always() && matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
uses: actions/upload-artifact@v2
with:
name: Test Results (${{ matrix.os }})
path: .sw/test/results.xml
- name: Publish Test Report
if: always() && matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
if: always() && matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
uses: mikepenz/action-junit-report@v1
with:
check_name: test (${{ matrix.os }})

View File

@ -24,10 +24,11 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
sudo apt-get install cabextract -y
#sudo apt-get install libc++-7-dev libc++abi-7-dev -y
- name: Setup
run: |
mkdir -p m4

View File

@ -36,8 +36,7 @@ jobs:
run: |
./configure '--disable-shared' '--with-pic' \
'CXX=${{ matrix.config.cxx }}' \
'CXXFLAGS=-g -O2 -fsanitize=address,undefined' \
"PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/opt/libffi/lib/pkgconfig"
'CXXFLAGS=-g -O2 -fsanitize=address,undefined'
- name: Make and Install Tesseract
run: |

View File

@ -24,6 +24,7 @@ jobs:
- name: Install dependencies (Linux)
run: |
sudo apt-get update
sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
sudo apt-get install cabextract -y

1
.gitignore vendored
View File

@ -63,6 +63,7 @@ config_auto.h
# ignore compilation files
build/*
/bin
/cmake-*
.deps
.dirstamp
/.libs

View File

@ -1,40 +0,0 @@
# Travis CI configuration for Tesseract
sudo: false
notifications:
email: false
language: cpp
os: linux
dist: focal
arch:
- amd64
- arm64
- ppc64le
- s390x
compiler:
- gcc
- clang
env:
cache:
directories:
before_install:
- sudo apt-get install libleptonica-dev libpango1.0-dev libtiff5-dev -y
install:
script:
- mkdir build
- cd build
- cmake .. -DSW_BUILD=OFF
- make
- sudo make install
#after_script: # let those commands trigger build errors
- tesseract -v
- text2image -v
- lstmtraining -v
- ls /home/travis/build/tesseract-ocr/tesseract/test/testing/*.tif
- wget https://github.com/egorpugin/tessdata/raw/master/tessdata/eng.traineddata
- tesseract /home/travis/build/tesseract-ocr/tesseract/test/testing/phototest.tif - -l eng --tessdata-dir ./

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,22 @@
2022-03-01 - V5.1.0
* Handle image and line regions in output formats ALTO, hOCR and text.
* New parameter curl_timeout for curl_easy_setop.
* Build fixes and improvements.
* Catch nullptr in PageIterator::Orientation to improve robustness.
* Remove unused code.
2022-01-06 - V5.0.1
* Add SPDX-License-Identifier to public include files.
* Support redirections when running OCR on a URL.
* Lots of fixes and improvements for cmake builds.
Distributions should use the autoconf build.
* Fix broken msys2 build with gcc 11.
* Fix parameter certainty_scale (was duplicated).
* Fix some compiler warnings and clean code.
* Correctly detect amd64 and i386 on FreeBSD.
* Add libarchive and libcurl in continuous integration actions.
* Update submodule googletest to release v1.11.0.
2021-11-22 - V5.0.0
* Faster training and recognition by default (float instead of
double calculations)

View File

@ -107,7 +107,7 @@ libtesseract_la_LDFLAGS += $(libarchive_LIBS)
libtesseract_la_LDFLAGS += $(libcurl_LIBS)
libtesseract_la_LDFLAGS += $(TENSORFLOW_LIBS)
if T_WIN
libtesseract_la_LDFLAGS += -no-undefined -Wl,--as-needed -lws2_32
libtesseract_la_LDFLAGS += -no-undefined -lws2_32
else
libtesseract_la_LDFLAGS += $(NOUNDEFINED)
endif
@ -160,6 +160,14 @@ libtesseract_la_LIBADD += libtesseract_avx2.la
noinst_LTLIBRARIES += libtesseract_avx2.la
endif
if HAVE_AVX512F
libtesseract_avx512_la_CXXFLAGS = -mavx512f
libtesseract_avx512_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_avx512_la_SOURCES = src/arch/dotproductavx512.cpp
libtesseract_la_LIBADD += libtesseract_avx512.la
noinst_LTLIBRARIES += libtesseract_avx512.la
endif
if HAVE_FMA
libtesseract_fma_la_CXXFLAGS = -mfma
libtesseract_fma_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
@ -379,7 +387,6 @@ libtesseract_ccutil_la_SOURCES += src/ccutil/clst.cpp
libtesseract_ccutil_la_SOURCES += src/ccutil/elst2.cpp
libtesseract_ccutil_la_SOURCES += src/ccutil/elst.cpp
libtesseract_ccutil_la_SOURCES += src/ccutil/errcode.cpp
libtesseract_ccutil_la_SOURCES += src/ccutil/mainblk.cpp
libtesseract_ccutil_la_SOURCES += src/ccutil/serialis.cpp
libtesseract_ccutil_la_SOURCES += src/ccutil/scanutils.cpp
libtesseract_ccutil_la_SOURCES += src/ccutil/tessdatamanager.cpp

View File

@ -1,8 +1,7 @@
# Tesseract OCR
[![Build Status](https://travis-ci.org/tesseract-ocr/tesseract.svg?branch=master)](https://travis-ci.org/tesseract-ocr/tesseract)
[![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)<br>
[![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)<br>
[![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr)
[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/cpp/g/tesseract-ocr/tesseract.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tesseract-ocr/tesseract/context:cpp)
[![Total Alerts](https://img.shields.io/lgtm/alerts/g/tesseract-ocr/tesseract.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tesseract-ocr/tesseract/alerts)
@ -34,7 +33,7 @@ on line recognition, but also still supports the legacy Tesseract OCR engine of
Tesseract 3 which works by recognizing character patterns. Compatibility with
Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0).
It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files which support the legacy engine, for example
those from the tessdata repository.
those from the [tessdata](https://github.com/tesseract-ocr/tessdata) repository.
The lead developer is Ray Smith. The maintainer is Zdenko Podobny.
For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS)
@ -42,7 +41,9 @@ and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/gr
Tesseract has **unicode (UTF-8) support**, and can **recognize more than 100 languages** "out of the box".
Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV. The main branch also has experimental support for ALTO (XML) output.
Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF.
Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO (the last one - since version 4.1.0).
You should note that in many cases, in order to get better OCR results,
you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract.
@ -60,7 +61,11 @@ at Hewlett-Packard Co, Greeley Colorado between 1985 and 1994, with some
more changes made in 1996 to port to Windows, and some C++izing in 1998.
In 2005 Tesseract was open sourced by HP. From 2006 until November 2018 it was developed by Google.
The latest stable version is **[5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0)**, released on November 30, 2021.
Major version 5 is the current stable version and started with release
[5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0) on November 30, 2021.
Newer minor versions and bugfix versions are available from
[GitHub](https://github.com/tesseract-ocr/tesseract/releases/).
Latest source code is available from [main branch on GitHub](https://github.com/tesseract-ocr/tesseract/tree/main).
Open issues can be found in [issue tracker](https://github.com/tesseract-ocr/tesseract/issues),
and [planning documentation](https://tesseract-ocr.github.io/tessdoc/Planning.html).

View File

@ -1 +1 @@
5.0.0
5.1.0

View File

@ -96,13 +96,20 @@ set(include_files_list
pango-1.0/pango/pango-features.h
unicode/uchar.h
)
check_includes(include_files_list)
# check_includes(include_files_list)
set(types_list
"long long int"
wchar_t
)
check_types(types_list)
# check_types(types_list)
list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
list(APPEND CMAKE_REQUIRED_LIBRARIES -lm)
set(functions_list
feenableexcept
)
check_functions(functions_list)
file(APPEND ${AUTOCONFIG_SRC} "
/* Version number */
@ -113,6 +120,7 @@ file(APPEND ${AUTOCONFIG_SRC} "
#cmakedefine HAVE_TIFFIO_H ${HAVE_TIFFIO_H}
#cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE}
#cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL}
#cmakedefine USE_OPENCL ${USE_OPENCL}
")
if(TESSDATA_PREFIX)

View File

@ -7,7 +7,7 @@
# ----------------------------------------
AC_PREREQ([2.69])
AC_INIT([tesseract],
[m4_esyscmd_s([test -d .git && git describe --abbrev=4 || cat VERSION])],
[m4_esyscmd_s([git describe --abbrev=4 2>/dev/null || cat VERSION])],
[https://github.com/tesseract-ocr/tesseract/issues],,
[https://github.com/tesseract-ocr/tesseract/])
@ -28,8 +28,8 @@ AM_INIT_AUTOMAKE([foreign subdir-objects nostdinc])
# Define date of package, etc. Could be useful in auto-generated
# documentation.
PACKAGE_YEAR=2021
PACKAGE_DATE="11/30"
PACKAGE_YEAR=2022
PACKAGE_DATE="03/01"
abs_top_srcdir=`AS_DIRNAME([$0])`
@ -91,7 +91,7 @@ case "${host_os}" in
mingw*)
AM_CONDITIONAL([T_WIN], true)
AM_CONDITIONAL([ADD_RT], false)
AC_SUBST([AM_LDFLAGS], ['-Wl,-no-undefined -Wl,--as-needed'])
AC_SUBST([AM_LDFLAGS], ['-no-undefined'])
;;
cygwin*)
AM_CONDITIONAL([ADD_RT], false)
@ -129,6 +129,7 @@ AX_CHECK_COMPILE_FLAG([-Werror=unused-command-line-argument], [WERROR=-Werror=un
AM_CONDITIONAL([HAVE_AVX], false)
AM_CONDITIONAL([HAVE_AVX2], false)
AM_CONDITIONAL([HAVE_AVX512F], false)
AM_CONDITIONAL([HAVE_FMA], false)
AM_CONDITIONAL([HAVE_SSE4_1], false)
AM_CONDITIONAL([HAVE_NEON], false)
@ -149,6 +150,12 @@ case "${host_cpu}" in
AC_DEFINE([HAVE_AVX2], [1], [Enable AVX2 instructions])
fi
AX_CHECK_COMPILE_FLAG([-mavx512f], [avx512f=true], [avx512f=false], [$WERROR])
AM_CONDITIONAL([HAVE_AVX512F], $avx512f)
if $avx512f; then
AC_DEFINE([HAVE_AVX512F], [1], [Enable AVX512F instructions])
fi
AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR])
AM_CONDITIONAL([HAVE_FMA], $fma)
if $fma; then
@ -163,7 +170,7 @@ case "${host_cpu}" in
;;
aarch64)
aarch64|arm64)
# ARMv8 always has NEON and does not need special compiler flags.
AM_CONDITIONAL([HAVE_NEON], true)
@ -178,6 +185,7 @@ case "${host_cpu}" in
AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions])
NEON_CXXFLAGS="-mfpu=neon"
AC_SUBST([NEON_CXXFLAGS])
check_for_neon=1
fi
;;
@ -188,6 +196,19 @@ case "${host_cpu}" in
esac
# check whether feenableexcept is supported. some C libraries (e.g. uclibc) don't.
AC_CHECK_FUNCS([feenableexcept])
# additional checks for NEON targets
if test x$check_for_neon = x1; then
AC_MSG_NOTICE([checking how to detect NEON availability])
AC_CHECK_FUNCS([getauxval elf_aux_info android_getCpuFamily])
if test $ac_cv_func_getauxval = no && test $ac_cv_func_elf_aux_info = no && test $ac_cv_func_android_getCpuFamily = no; then
AC_MSG_WARN([NEON is available, but we don't know how to check for it. Will not be able to use NEON.])
fi
fi
AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR])
AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd)
@ -443,6 +464,15 @@ esac
AC_SEARCH_LIBS([pthread_create], [pthread])
# Set PKG_CONFIG_PATH for MacOS with Homebrew unless it is already set.
AC_CHECK_PROG([have_brew], brew, true, false)
if $have_brew; then
brew_prefix=$(brew --prefix)
if test -z "$PKG_CONFIG_PATH"; then
PKG_CONFIG_PATH=$brew_prefix/opt/icu4c/lib/pkgconfig:$brew_prefix/opt/libarchive/lib/pkgconfig
export PKG_CONFIG_PATH
fi
fi
# ----------------------------------------
# Check for programs needed to build documentation.
@ -462,9 +492,7 @@ AS_IF([test "$enable_doc" != "no"], [
if $have_asciidoc && $have_xsltproc; then
AM_CONDITIONAL([ASCIIDOC], true)
XML_CATALOG_FILES=
AC_CHECK_PROG([have_brew], brew, true, false)
if $have_brew; then
brew_prefix=$(brew --prefix)
catalog_file=$brew_prefix/etc/xml/catalog
if test -f $catalog_file; then
AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], true)

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
@ -13,8 +13,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: capi.h
// Description: C-API TessBaseAPI
//
@ -12,8 +12,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef API_CAPI_H_
#define API_CAPI_H_
@ -233,6 +231,12 @@ TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: export.h
// Description: Place holder
//
@ -12,8 +12,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: ltrresultiterator.h
// Description: Iterator for tesseract results in strict left-to-right
// order that avoids using tesseract internal data structures.
@ -14,8 +14,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
@ -183,7 +181,7 @@ class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that is is useless.
// choices for this symbol and after that it is useless.
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: Apache-2.0
/**********************************************************************
* File: ocrclass.h
* Description: Class definitions and constants for the OCR API.

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
@ -14,8 +14,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
@ -14,8 +14,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: publictypes.h
// Description: Types used in both the API and internally
// Author: Ray Smith
@ -13,8 +13,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: renderer.h
// Description: Rendering interface to inject into TessBaseAPI
//
@ -12,8 +12,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
@ -23,6 +21,7 @@
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <cstdint>
#include <string> // for std::string
#include <vector> // for std::vector
@ -232,7 +231,7 @@ private:
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
std::vector<long int> offsets_; // offset of every PDF object in bytes
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: resultiterator.h
// Description: Iterator for tesseract results that is capable of
// iterating in proper reading order over Bi Directional
@ -15,8 +15,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: unichar.h
// Description: Unicode character/ligature class.
// Author: Ray Smith
@ -13,8 +13,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
@ -99,10 +97,10 @@ public:
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
// it != UNICHAR::end(str, len);
// ++it) {
// tprintf("UCS-4 symbol code = %d\n", *it);
// printf("UCS-4 symbol code = %d\n", *it);
// char buf[5];
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// tprintf("Char = %s\n", buf);
// printf("Char = %s\n", buf);
// }
class TESS_API const_iterator {
using CI = const_iterator;

View File

@ -1,4 +1,4 @@
///////////////////////////////////////////////////////////////////////
// SPDX-License-Identifier: Apache-2.0
// File: version.h
// Description: Version information
//
@ -12,8 +12,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_

View File

@ -13,9 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "errcode.h" // for ASSERT_HOST
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
#include "tprintf.h" // for tprintf
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>
@ -174,6 +176,36 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
continue;
}
int left, top, right, bottom;
auto block_type = res_it->BlockType();
switch (block_type) {
case PT_FLOWING_IMAGE:
case PT_HEADING_IMAGE:
case PT_PULLOUT_IMAGE: {
// Handle all kinds of images.
// TODO: optionally add TYPE, for example TYPE="photo".
alto_str << "\t\t\t\t<Illustration ID=\"cblock_" << bcnt++ << "\"";
AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
alto_str << "</Illustration>\n";
res_it->Next(RIL_BLOCK);
continue;
}
case PT_HORZ_LINE:
case PT_VERT_LINE:
// Handle horizontal and vertical lines.
alto_str << "\t\t\t\t<GraphicalElement ID=\"cblock_" << bcnt++ << "\"";
AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
alto_str << "</GraphicalElement >\n";
res_it->Next(RIL_BLOCK);
continue;
case PT_NOISE:
tprintf("TODO: Please report image which triggers the noise case.\n");
ASSERT_HOST(false);
default:
break;
}
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << bcnt << "\"";
AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
@ -200,7 +232,6 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
bool last_word_in_tblock = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
int left, top, right, bottom;
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
do {

View File

@ -99,6 +99,9 @@ namespace tesseract {
static BOOL_VAR(stream_filelist, false, "Stream a filelist from stdin");
static STRING_VAR(document_title, "", "Title of output document (used for hOCR and PDF output)");
#ifdef HAVE_LIBCURL
static INT_VAR(curl_timeout, 0, "Timeout for curl in seconds");
#endif
/** Minimum sensible image size to be worth running tesseract. */
const int kMinRectSize = 10;
@ -1150,6 +1153,17 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
if (curlcode != CURLE_OK) {
return error("curl_easy_setopt");
}
int timeout = curl_timeout;
if (timeout > 0) {
curlcode = curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
if (curlcode != CURLE_OK) {
return error("curl_easy_setopt");
}
curlcode = curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
if (curlcode != CURLE_OK) {
return error("curl_easy_setopt");
}
}
curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
if (curlcode != CURLE_OK) {
return error("curl_easy_setopt");
@ -1357,6 +1371,22 @@ char *TessBaseAPI::GetUTF8Text() {
if (it->Empty(RIL_PARA)) {
continue;
}
auto block_type = it->BlockType();
switch (block_type) {
case PT_FLOWING_IMAGE:
case PT_HEADING_IMAGE:
case PT_PULLOUT_IMAGE:
case PT_HORZ_LINE:
case PT_VERT_LINE:
// Ignore images and lines for text output.
continue;
case PT_NOISE:
tprintf("TODO: Please report image which triggers the noise case.\n");
ASSERT_HOST(false);
default:
break;
}
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
text += para_text.get();
} while (it->Next(RIL_PARA));

View File

@ -228,6 +228,22 @@ int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath, const char *lang
return handle->Init(datapath, language);
}
int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size, const char *language,
TessOcrEngineMode mode, char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params) {
std::vector<std::string> varNames;
std::vector<std::string> varValues;
if (vars_vec != nullptr && vars_values != nullptr) {
for (size_t i = 0; i < vars_vec_size; i++) {
varNames.emplace_back(vars_vec[i]);
varValues.emplace_back(vars_values[i]);
}
}
return handle->Init(data, data_size, language, mode, configs, configs_size, &varNames, &varValues,
set_only_non_debug_params != 0, nullptr);
}
const char *TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI *handle) {
return handle->GetInitLanguagesAsString();
}

View File

@ -189,6 +189,36 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
std::unique_ptr<ResultIterator> res_it(GetIterator());
while (!res_it->Empty(RIL_BLOCK)) {
int left, top, right, bottom;
auto block_type = res_it->BlockType();
switch (block_type) {
case PT_FLOWING_IMAGE:
case PT_HEADING_IMAGE:
case PT_PULLOUT_IMAGE: {
// Handle all kinds of images.
res_it.get()->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
hocr_str << " <div class='ocr_photo' id='block_" << page_id << '_'
<< bcnt++ << "' title=\"bbox " << left << " " << top << " "
<< right << " " << bottom << "\"></div>\n";
res_it->Next(RIL_BLOCK);
continue;
}
case PT_HORZ_LINE:
case PT_VERT_LINE:
// Handle horizontal and vertical lines.
res_it.get()->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
hocr_str << " <div class='ocr_separator' id='block_" << page_id << '_'
<< bcnt++ << "' title=\"bbox " << left << " " << top << " "
<< right << " " << bottom << "\"></div>\n";
res_it->Next(RIL_BLOCK);
continue;
case PT_NOISE:
tprintf("TODO: Please report image which triggers the noise case.\n");
ASSERT_HOST(false);
default:
break;
}
if (res_it->Empty(RIL_WORD)) {
res_it->Next(RIL_WORD);
continue;
@ -218,7 +248,7 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
hocr_str << "\n <span class='";
switch (res_it->BlockType()) {
switch (block_type) {
case PT_HEADING_TEXT:
hocr_str << "ocr_header";
break;
@ -228,6 +258,11 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
case PT_CAPTION_TEXT:
hocr_str << "ocr_caption";
break;
case PT_FLOWING_IMAGE:
case PT_HEADING_IMAGE:
case PT_PULLOUT_IMAGE:
ASSERT_HOST(false);
break;
default:
hocr_str << "ocr_line";
}
@ -248,12 +283,10 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
hocr_str << "\n <span class='ocrx_word'"
<< " id='"
<< "word_" << page_id << "_" << wcnt << "'";
int left, top, right, bottom;
bool bold, italic, underlined, monospace, serif, smallcaps;
int pointsize, font_id;
const char *font_name;
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
font_name =
const char *font_name =
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
&serif, &smallcaps, &pointsize, &font_id);
hocr_str << " title='bbox " << left << " " << top << " " << right << " "

View File

@ -109,6 +109,9 @@ bool TessResultRenderer::EndDocument() {
}
void TessResultRenderer::AppendString(const char *s) {
if (s == nullptr) {
return;
}
AppendData(s, strlen(s));
}

View File

@ -27,6 +27,9 @@ TFloat DotProductNative(const TFloat *u, const TFloat *v, int n);
// Uses Intel AVX intrinsics to access the SIMD instruction set.
TFloat DotProductAVX(const TFloat *u, const TFloat *v, int n);
// Uses Intel AVX512F intrinsics to access the SIMD instruction set.
TFloat DotProductAVX512F(const TFloat *u, const TFloat *v, int n);
// Use Intel FMA.
TFloat DotProductFMA(const TFloat *u, const TFloat *v, int n);

View File

@ -0,0 +1,70 @@
///////////////////////////////////////////////////////////////////////
// File: dotproductavx512.cpp
// Description: Architecture-specific dot-product function.
// Author: Stefan Weil
//
// (C) Copyright 2022
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#if !defined(__AVX__)
# if defined(__i686__) || defined(__x86_64__)
# error Implementation only for AVX capable architectures
# endif
#else
# include <immintrin.h>
# include <cstdint>
# include "dotproduct.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel AVX intrinsics to access the SIMD instruction set.
# if defined(FAST_FLOAT)
float DotProductAVX512F(const float *u, const float *v, int n) {
const unsigned quot = n / 16;
const unsigned rem = n % 16;
__m512 t0 = _mm512_setzero_ps();
for (unsigned k = 0; k < quot; k++) {
__m512 f0 = _mm512_loadu_ps(u);
__m512 f1 = _mm512_loadu_ps(v);
t0 = _mm512_fmadd_ps(f0, f1, t0);
u += 16;
v += 16;
}
float result = _mm512_reduce_add_ps(t0);
for (unsigned k = 0; k < rem; k++) {
result += *u++ * *v++;
}
return result;
}
# else
double DotProductAVX512F(const double *u, const double *v, int n) {
const unsigned quot = n / 8;
const unsigned rem = n % 8;
__m512d t0 = _mm512_setzero_pd();
for (unsigned k = 0; k < quot; k++) {
t0 = _mm512_fmadd_pd(_mm512_loadu_pd(u), _mm512_loadu_pd(v), t0);
u += 8;
v += 8;
}
double result = _mm512_reduce_add_pd(t0);
for (unsigned k = 0; k < rem; k++) {
result += *u++ * *v++;
}
return result;
}
# endif
} // namespace tesseract.
#endif

View File

@ -27,6 +27,14 @@
# include <cstdint>
# include <vector>
# if defined(_MSC_VER) && _MSC_VER >= 1925 && _MSC_VER <= 1929 && \
defined(_WIN32) && !defined(_WIN64)
// Optimize for size (/Os) instead of using the default optimization for some
// versions of the 32 bit Visual Studio compiler which generate buggy code.
# pragma optimize("", off)
# pragma optimize("s", on)
# endif
namespace tesseract {
// Number of outputs held in each register. 8 x 32 bit ints.

View File

@ -53,12 +53,14 @@
#endif
#if defined(HAVE_NEON) && !defined(__aarch64__)
# ifdef ANDROID
# if defined(HAVE_ANDROID_GETCPUFAMILY)
# include <cpu-features.h>
# else
/* Assume linux */
# elif defined(HAVE_GETAUXVAL)
# include <asm/hwcap.h>
# include <sys/auxv.h>
# elif defined(HAVE_ELF_AUX_INFO)
# include <sys/auxv.h>
# include <sys/elf.h>
# endif
#endif
@ -210,21 +212,29 @@ SIMDDetect::SIMDDetect() {
#endif
#if defined(HAVE_NEON) && !defined(__aarch64__)
# ifdef ANDROID
# if defined(HAVE_ANDROID_GETCPUFAMILY)
{
AndroidCpuFamily family = android_getCpuFamily();
if (family == ANDROID_CPU_FAMILY_ARM)
neon_available_ = (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON);
}
# else
/* Assume linux */
# elif defined(HAVE_GETAUXVAL)
neon_available_ = getauxval(AT_HWCAP) & HWCAP_NEON;
# elif defined(HAVE_ELF_AUX_INFO)
unsigned long hwcap = 0;
elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
neon_available_ = hwcap & HWCAP_NEON;
# endif
#endif
// Select code for calculation of dot product based on autodetection.
if (false) {
// This is a dummy to support conditional compilation.
#if defined(HAVE_AVX512F)
} else if (avx512F_available_) {
// AVX512F detected.
SetDotProduct(DotProductAVX512F, &IntSimdMatrix::intSimdMatrixAVX2);
#endif
#if defined(HAVE_AVX2)
} else if (avx2_available_) {
// AVX2 detected.

View File

@ -159,7 +159,7 @@ PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation,
// Helper computes median xheight in the image.
static double MedianXHeight(BLOCK_LIST *block_list) {
BLOCK_IT block_it(block_list);
STATS xheights(0, block_it.data()->pdblk.bounding_box().height());
STATS xheights(0, block_it.data()->pdblk.bounding_box().height() - 1);
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
ROW_IT row_it(block_it.data()->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {

View File

@ -2015,7 +2015,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
PAGE_RES_IT page_res_it(page_res);
WERD_RES *word; // current word
STATS doc_fonts(0, font_table_size_); // font counters
STATS doc_fonts(0, font_table_size_ - 1); // font counters
// Gather font id statistics.
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {

View File

@ -103,8 +103,8 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
// Returns a new x-height maximally compatible with the result in word_res.
// See comment above for overall algorithm.
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift) {
STATS top_stats(0, UINT8_MAX);
STATS shift_stats(-UINT8_MAX, UINT8_MAX);
STATS top_stats(0, UINT8_MAX - 1);
STATS shift_stats(-UINT8_MAX, UINT8_MAX - 1);
int bottom_shift = 0;
int num_blobs = word_res->rebuild_word->NumBlobs();
do {

View File

@ -225,7 +225,7 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
return true; // Already at the end!
}
// The result is true if we step forward by element and find we are
// at the the end of the page or at beginning of *all* levels in:
// at the end of the page or at beginning of *all* levels in:
// [level, element).
// When there is more than one level difference between element and level,
// we could for instance move forward one symbol and still be at the first
@ -566,7 +566,15 @@ void PageIterator::Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const {
BLOCK *block = it_->block()->block;
auto *block_res = it_->block();
if (block_res == nullptr) {
// Nothing can be done, so return default values.
*orientation = ORIENTATION_PAGE_UP;
*writing_direction = WRITING_DIRECTION_LEFT_TO_RIGHT;
*textline_order = TEXTLINE_ORDER_TOP_TO_BOTTOM;
return;
}
auto *block = block_res->block;
// Orientation
FCOORD up_in_image(0.0, 1.0);

View File

@ -108,10 +108,8 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
// If a UNLV zone file can be found, use that instead of segmentation.
if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file[0] != '\0') {
std::string name = input_file;
const char *lastdot = strrchr(name.c_str(), '.');
if (lastdot != nullptr) {
name[lastdot - name.c_str()] = '\0';
}
std::size_t lastdot = name.find_last_of(".");
name = name.substr(0, lastdot);
read_unlv_file(name, width, height, blocks);
}
if (blocks->empty()) {

View File

@ -1623,8 +1623,8 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
}
STATS lefts(lmin, lmax + 1);
STATS rights(rmin, rmax + 1);
STATS lefts(lmin, lmax);
STATS rights(rmin, rmax);
for (int i = start; i < end; i++) {
RowScratchRegisters &sr = (*rows)[i];
if (sr.ri_->num_words == 0) {
@ -1655,7 +1655,7 @@ int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start,
(rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
int word_width =
(rows[row_start].ri_->lword_box.width() + rows[row_end - 1].ri_->lword_box.width()) / 2;
STATS spacing_widths(0, 5 + word_width);
STATS spacing_widths(0, 4 + word_width);
for (int i = row_start; i < row_end; i++) {
if (rows[i].ri_->num_words > 1) {
spacing_widths.add(rows[i].ri_->average_interword_space, 1);

View File

@ -616,7 +616,7 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel
return true; // Already at the end!
}
// The result is true if we step forward by element and find we are
// at the the end of the page or at beginning of *all* levels in:
// at the end of the page or at beginning of *all* levels in:
// [level, element).
// When there is more than one level difference between element and level,
// we could for instance move forward one symbol and still be at the first
@ -731,10 +731,12 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) {
std::vector<int> textline_order;
std::vector<StrongScriptDirection> dirs;
CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &dirs, &textline_order);
tprintf("Strong Script dirs [%p/P=%s]: ", it_->row(),
tprintf("Strong Script dirs [%p/P=%s]: ",
static_cast<void *>(it_->row()),
current_paragraph_is_ltr_ ? "ltr" : "rtl");
PrintScriptDirs(dirs);
tprintf("Logical textline order [%p/P=%s]: ", it_->row(),
tprintf("Logical textline order [%p/P=%s]: ",
static_cast<void *>(it_->row()),
current_paragraph_is_ltr_ ? "ltr" : "rtl");
for (int i : textline_order) {
tprintf("%d ", i);

View File

@ -23,8 +23,6 @@
# include "config_auto.h"
#endif
#include <regex> // for std::regex_match
#include "control.h"
#include "matchdefs.h"
#include "pageres.h"
@ -248,12 +246,11 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
std::string remains(lang_str);
// Look whether the model file uses a prefix which must be applied to
// included model files as well.
std::regex e("(.*)/[^/]*");
std::cmatch cm;
std::string prefix;
if (std::regex_match(lang.c_str(), cm, e, std::regex_constants::match_default)) {
size_t found = lang.find_last_of('/');
if (found != std::string::npos) {
// A prefix was found.
prefix = cm[1].str() + "/";
prefix = lang.substr(0, found + 1);
}
while (!remains.empty()) {
// Find the start of the lang code and which vector to add to.

View File

@ -86,27 +86,27 @@ Tesseract::Tesseract()
, double_MEMBER(thresholding_window_size, 0.33,
"Window size for measuring local statistics (to be "
"multiplied by image DPI). "
"This parameter is used by the Sauvola thresolding method",
"This parameter is used by the Sauvola thresholding method",
this->params())
, double_MEMBER(thresholding_kfactor, 0.34,
"Factor for reducing threshold due to variance. "
"This parameter is used by the Sauvola thresolding method."
"This parameter is used by the Sauvola thresholding method."
" Normal range: 0.2-0.5",
this->params())
, double_MEMBER(thresholding_tile_size, 0.33,
"Desired tile size (to be multiplied by image DPI). "
"This parameter is used by the LeptonicaOtsu thresolding "
"This parameter is used by the LeptonicaOtsu thresholding "
"method",
this->params())
, double_MEMBER(thresholding_smooth_kernel_size, 0.0,
"Size of convolution kernel applied to threshold array "
"(to be multiplied by image DPI). Use 0 for no smoothing. "
"This parameter is used by the LeptonicaOtsu thresolding "
"This parameter is used by the LeptonicaOtsu thresholding "
"method",
this->params())
, double_MEMBER(thresholding_score_fraction, 0.1,
"Fraction of the max Otsu score. "
"This parameter is used by the LeptonicaOtsu thresolding "
"This parameter is used by the LeptonicaOtsu thresholding "
"method. "
"For standard Otsu use 0.0, otherwise 0.1 is recommended",
this->params())

View File

@ -16,6 +16,11 @@
//
///////////////////////////////////////////////////////////////////////
// Include automatically generated configuration file
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include "otsuthr.h"
#include "thresholder.h"
#include "tprintf.h" // for tprintf
@ -27,7 +32,8 @@
#include <allheaders.h>
#include <tesseract/baseapi.h> // for api->GetIntVariable()
#include <cstdint> // for uint32_t
#include <algorithm> // for std::max, std::min
#include <cstdint> // for uint32_t
#include <cstring>
#include <tuple>
@ -164,16 +170,7 @@ void ImageThresholder::SetImage(const Image pix) {
// Convert the image as necessary so it is one of binary, plain RGB, or
// 8 bit with no colormap. Guarantee that we always end up with our own copy,
// not just a clone of the input.
if (pixGetColormap(src)) {
Image tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
depth = pixGetDepth(tmp);
if (depth > 1 && depth < 8) {
pix_ = pixConvertTo8(tmp, false);
tmp.destroy();
} else {
pix_ = tmp;
}
} else if (depth > 1 && depth < 8) {
if (depth > 1 && depth < 8) {
pix_ = pixConvertTo8(src, false);
} else {
pix_ = src.copy();

View File

@ -810,7 +810,7 @@ void TO_ROW::compute_vertical_projection() { // project whole row
row_box += blob_it.data()->bounding_box();
}
projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN);
projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN - 1);
projection_left = row_box.left() - PROJECTION_MARGIN;
projection_right = row_box.right() + PROJECTION_MARGIN;
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {

View File

@ -725,8 +725,8 @@ public:
ASSERT_HOST(block->pdblk.poly_block() != nullptr);
block->rotate(rotation);
// Update the median size statistic from the blobs list.
STATS widths(0, block->pdblk.bounding_box().width());
STATS heights(0, block->pdblk.bounding_box().height());
STATS widths(0, block->pdblk.bounding_box().width() - 1);
STATS heights(0, block->pdblk.bounding_box().height() - 1);
BLOBNBOX_IT blob_it(&blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
widths.add(blob_it.data()->bounding_box().width(), 1);
@ -769,7 +769,7 @@ public:
#ifndef GRAPHICS_DISABLED
// Draw the noise blobs from all lists in red.
void plot_noise_blobs(ScrollView *to_win);
// Draw the blobs on on the various lists in the block in different colors.
// Draw the blobs on the various lists in the block in different colors.
void plot_graded_blobs(ScrollView *to_win);
#endif

View File

@ -28,9 +28,4 @@ const double CCStruct::kAscenderFraction = 0.25;
const double CCStruct::kXHeightCapRatio =
CCStruct::kXHeightFraction / (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
// Destructor.
// It is defined here, so the compiler can create a single vtable
// instead of weak vtables in every compilation unit.
CCStruct::~CCStruct() = default;
} // namespace tesseract

View File

@ -22,11 +22,9 @@
#include "ccutil.h" // for CCUtil
namespace tesseract {
class TESS_API CCStruct : public CCUtil {
public:
CCStruct() = default;
~CCStruct() override;
class CCStruct : public CCUtil {
public:
// Globally accessible constants.
// APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height.
@ -36,6 +34,7 @@ public:
// Derived value giving the x-height as a fraction of cap-height.
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_

View File

@ -871,7 +871,7 @@ void C_OUTLINE::ComputeBinaryOffsets() {
increment_step(s, 1, &head_pos, dir_counts, pos_totals);
}
for (int s = 0; s < stepcount; pos += step(s++)) {
// At step s, s in in the middle of [s-2, s+2].
// At step s, s in the middle of [s-2, s+2].
increment_step(s + 2, 1, &head_pos, dir_counts, pos_totals);
int dir_index = chain_code(s);
ICOORD step_vec = step(s);

View File

@ -81,7 +81,7 @@ void LLSQ::add(const LLSQ &other) {
void LLSQ::remove(double x, double y) { // delete an element
if (total_weight <= 0.0) { // illegal
EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr);
EMPTY_LLSQ.error("LLSQ::remove", ABORT);
}
total_weight--; // count elements
sigx -= x; // update accumulators

View File

@ -180,7 +180,7 @@ static void ComputeRunlengthImage(const TBOX &box,
++y;
}
}
// Now set the image pixels the the MIN of the x and y runlengths.
// Now set the image pixels the MIN of the x and y runlengths.
for (int iy = 0; iy < height; ++iy) {
int x = 0;
for (auto x_coord : x_coords[iy]) {

View File

@ -347,7 +347,7 @@ void BLOCK_RECT_IT::forward() { // next rectangle
/**********************************************************************
* BLOCK_LINE_IT::get_line
*
* Get the the start and width of a line in the block.
* Get the start and width of a line in the block.
**********************************************************************/
TDimension BLOCK_LINE_IT::get_line( // get a line

View File

@ -59,8 +59,8 @@ public:
unichar_id_ = UNICHAR_SPACE;
fontinfo_id_ = -1;
fontinfo_id2_ = -1;
rating_ = 10.0;
certainty_ = -1.0;
rating_ = 10.0f;
certainty_ = -1.0f;
script_id_ = -1;
min_xheight_ = 0.0f;
max_xheight_ = 0.0f;
@ -170,13 +170,17 @@ public:
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const;
void print(const UNICHARSET *unicharset) const {
tprintf("r%.2f c%.2f x[%g,%g]: %d %s", rating_, certainty_, min_xheight_, max_xheight_,
tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
static_cast<double>(rating_),
static_cast<double>(certainty_),
static_cast<double>(min_xheight_),
static_cast<double>(max_xheight_),
unichar_id_, (unicharset == nullptr) ? "" : unicharset->debug_str(unichar_id_).c_str());
}
void print_full() const {
print(nullptr);
tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
fontinfo_id2_, yshift_, classifier_);
fontinfo_id2_, static_cast<double>(yshift_), classifier_);
}
// Sort function for sorting BLOB_CHOICEs in increasing order of rating.
static int SortByRating(const void *p1, const void *p2) {

View File

@ -40,14 +40,14 @@ namespace tesseract {
*
* Construct a new stats element by allocating and zeroing the memory.
**********************************************************************/
STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
if (max_bucket_value_plus_1 <= min_bucket_value) {
STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value) {
if (max_bucket_value < min_bucket_value) {
min_bucket_value = 0;
max_bucket_value_plus_1 = 1;
max_bucket_value = 1;
}
rangemin_ = min_bucket_value; // setup
rangemax_ = max_bucket_value_plus_1;
buckets_ = new int32_t[rangemax_ - rangemin_];
rangemax_ = max_bucket_value;
buckets_ = new int32_t[1 + rangemax_ - rangemin_];
clear();
}
@ -56,16 +56,16 @@ STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
*
* Alter the range on an existing stats element.
**********************************************************************/
bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
if (max_bucket_value_plus_1 <= min_bucket_value) {
bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value) {
if (max_bucket_value < min_bucket_value) {
return false;
}
if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
if (rangemax_ - rangemin_ != max_bucket_value - min_bucket_value) {
delete[] buckets_;
buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value];
buckets_ = new int32_t[1 + max_bucket_value - min_bucket_value];
}
rangemin_ = min_bucket_value; // setup
rangemax_ = max_bucket_value_plus_1;
rangemax_ = max_bucket_value;
clear(); // zero it
return true;
}
@ -78,7 +78,7 @@ bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
void STATS::clear() { // clear out buckets
total_count_ = 0;
if (buckets_ != nullptr) {
memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
memset(buckets_, 0, (1 + rangemax_ - rangemin_) * sizeof(buckets_[0]));
}
}
@ -97,12 +97,11 @@ STATS::~STATS() {
* Add a set of samples to (or delete from) a pile.
**********************************************************************/
void STATS::add(int32_t value, int32_t count) {
if (buckets_ == nullptr) {
return;
if (buckets_ != nullptr) {
value = ClipToRange(value, rangemin_, rangemax_);
buckets_[value - rangemin_] += count;
total_count_ += count; // keep count of total
}
value = ClipToRange(value, rangemin_, rangemax_ - 1);
buckets_[value - rangemin_] += count;
total_count_ += count; // keep count of total
}
/**********************************************************************
@ -116,7 +115,7 @@ int32_t STATS::mode() const { // get mode of samples
}
int32_t max = buckets_[0]; // max cell count
int32_t maxindex = 0; // index of max
for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
for (int index = rangemax_ - rangemin_; index > 0; --index) {
if (buckets_[index] > max) {
max = buckets_[index]; // find biggest
maxindex = index;
@ -135,7 +134,7 @@ double STATS::mean() const { // get mean of samples
return static_cast<double>(rangemin_);
}
int64_t sum = 0;
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
for (int index = rangemax_ - rangemin_; index >= 0; --index) {
sum += static_cast<int64_t>(index) * buckets_[index];
}
return static_cast<double>(sum) / total_count_ + rangemin_;
@ -152,7 +151,7 @@ double STATS::sd() const { // standard deviation
}
int64_t sum = 0;
double sqsum = 0.0;
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
for (int index = rangemax_ - rangemin_; index >= 0; --index) {
sum += static_cast<int64_t>(index) * buckets_[index];
sqsum += static_cast<double>(index) * index * buckets_[index];
}
@ -186,7 +185,7 @@ double STATS::ile(double frac) const {
#endif
int sum = 0;
int index = 0;
for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
for (index = 0; index <= rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
;
}
if (index > 0) {
@ -207,7 +206,7 @@ int32_t STATS::min_bucket() const { // Find min
return rangemin_;
}
int32_t min = 0;
for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
for (min = 0; (min <= rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
;
}
return rangemin_ + min;
@ -224,7 +223,7 @@ int32_t STATS::max_bucket() const { // Find max
return rangemin_;
}
int32_t max;
for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
for (max = rangemax_ - rangemin_; max > 0 && buckets_[max] == 0; max--) {
;
}
return rangemin_ + max;
@ -270,7 +269,7 @@ bool STATS::local_min(int32_t x) const {
if (buckets_ == nullptr) {
return false;
}
x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
x = ClipToRange(x, rangemin_, rangemax_) - rangemin_;
if (buckets_[x] == 0) {
return true;
}
@ -281,10 +280,10 @@ bool STATS::local_min(int32_t x) const {
if (index >= 0 && buckets_[index] < buckets_[x]) {
return false;
}
for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
for (index = x + 1; index <= rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
;
}
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
if (index <= rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
return false;
} else {
return true;
@ -304,7 +303,7 @@ void STATS::smooth(int32_t factor) {
return;
}
STATS result(rangemin_, rangemax_);
int entrycount = rangemax_ - rangemin_;
int entrycount = 1 + rangemax_ - rangemin_;
for (int entry = 0; entry < entrycount; entry++) {
// centre weight
int count = buckets_[entry] * factor;
@ -368,7 +367,7 @@ int32_t STATS::cluster(float lower, // thresholds
clusters[0].add(entry, count);
}
}
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
pile_count(entry) <= pile_count(entry - 1);
entry++) {
count = pile_count(entry) - clusters[0].pile_count(entry);
@ -386,7 +385,7 @@ int32_t STATS::cluster(float lower, // thresholds
do {
new_cluster = false;
new_mode = 0;
for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
for (entry = 0; entry <= rangemax_ - rangemin_; entry++) {
count = buckets_[entry] - clusters[0].buckets_[entry];
// remaining pile
if (count > 0) { // any to handle
@ -433,7 +432,7 @@ int32_t STATS::cluster(float lower, // thresholds
clusters[0].add(entry, count);
}
}
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
pile_count(entry) <= pile_count(entry - 1);
entry++) {
count = pile_count(entry) - clusters[0].pile_count(entry);
@ -482,7 +481,7 @@ int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes)
if (max_modes <= 0) {
return 0;
}
int src_count = rangemax_ - rangemin_;
int src_count = 1 + rangemax_ - rangemin_;
// Used copies the counts in buckets_ as they get used.
STATS used(rangemin_, rangemax_);
modes.clear();
@ -605,7 +604,7 @@ void STATS::plot(ScrollView *window, // to draw in
}
window->Pen(colour);
for (int index = 0; index < rangemax_ - rangemin_; index++) {
for (int index = 0; index <= rangemax_ - rangemin_; index++) {
window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1),
yorigin + yscale * buckets_[index]);
}
@ -630,7 +629,7 @@ void STATS::plotline(ScrollView *window, // to draw in
}
window->Pen(colour);
window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
for (int index = 0; index < rangemax_ - rangemin_; index++) {
for (int index = 0; index <= rangemax_ - rangemin_; index++) {
window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]);
}
}

View File

@ -30,23 +30,20 @@ namespace tesseract {
class TESS_API STATS {
public:
// The histogram buckets are in the range
// [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.
// [min_bucket_value, max_bucket_value].
// Any data under min_bucket value is silently mapped to min_bucket_value,
// and likewise, any data over max_bucket_value is silently mapped to
// max_bucket_value.
// In the internal array, min_bucket_value maps to 0 and
// max_bucket_value_plus_1 - min_bucket_value to the array size.
// TODO(rays) This is ugly. Convert the second argument to
// max_bucket_value and all the code that uses it.
STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
// 1 + max_bucket_value - min_bucket_value to the array size.
STATS(int32_t min_bucket_value, int32_t max_bucket_value);
STATS() = default; // empty for arrays
~STATS();
// (Re)Sets the range and clears the counts.
// See the constructor for info on max and min values.
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value);
void clear(); // empty buckets
@ -73,11 +70,14 @@ public:
double median() const; // get median of samples
// Returns the count of the given value.
int32_t pile_count(int32_t value) const {
if (buckets_ == nullptr) {
return 0;
}
if (value <= rangemin_) {
return buckets_[0];
}
if (value >= rangemax_ - 1) {
return buckets_[rangemax_ - rangemin_ - 1];
if (value >= rangemax_) {
return buckets_[rangemax_ - rangemin_];
}
return buckets_[value - rangemin_];
}
@ -139,7 +139,6 @@ public:
private:
int32_t rangemin_ = 0; // min of range
// rangemax_ is not well named as it is really one past the max.
int32_t rangemax_ = 0; // max of range
int32_t total_count_ = 0; // no of samples
int32_t *buckets_ = nullptr; // array of cells

View File

@ -142,7 +142,6 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
for (j = 0;
j < adaption_ambigs_entry->size() && (*adaption_ambigs_entry)[j] > id_to_insert;
++j) {
;
}
if (j < adaption_ambigs_entry->size()) {
if ((*adaption_ambigs_entry)[j] != id_to_insert) {

View File

@ -116,7 +116,7 @@ public:
// Comparator function for sorting AmbigSpec_LISTs. The lists will
// be sorted by their wrong_ngram arrays. Example of wrong_ngram vectors
// in a a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1].
// in a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1].
static int compare_ambig_specs(const void *spec1, const void *spec2) {
const AmbigSpec *s1 = *static_cast<const AmbigSpec *const *>(spec1);
const AmbigSpec *s2 = *static_cast<const AmbigSpec *const *>(spec2);

View File

@ -10,13 +10,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(_WIN32)
# include <io.h> // for _access
#endif
#include "ccutil.h"
#include <cstdlib>
#include <cstring> // for std::strrchr
namespace tesseract {
CCUtil::CCUtil()
: params_()
, INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", &params_)
, BOOL_MEMBER(use_ambigs_for_adaption, false,
, INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", &params_)
, BOOL_MEMBER(use_ambigs_for_adaption, false,
"Use ambigs for deciding"
" whether to adapt to a character",
&params_) {}
@ -26,4 +34,61 @@ CCUtil::CCUtil()
// instead of weak vtables in every compilation unit.
CCUtil::~CCUtil() = default;
/**
* @brief CCUtil::main_setup - set location of tessdata and name of image
*
* @param argv0 - paths to the directory with language files and config files.
* An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
* used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
* previous is not successful - use current directory.
* @param basename - name of image
*/
void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
imagebasename = basename; /**< name of image */
char *tessdata_prefix = getenv("TESSDATA_PREFIX");
if (!argv0.empty()) {
/* Use tessdata prefix from the command line. */
datadir = argv0;
} else if (tessdata_prefix) {
/* Use tessdata prefix from the environment. */
datadir = tessdata_prefix;
#if defined(_WIN32)
} else if (datadir.empty() || _access(datadir.c_str(), 0) != 0) {
/* Look for tessdata in directory of executable. */
char path[_MAX_PATH];
DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
if (length > 0 && length < sizeof(path)) {
char *separator = std::strrchr(path, '\\');
if (separator != nullptr) {
*separator = '\0';
std::string subdir = path;
subdir += "/tessdata";
if (_access(subdir.c_str(), 0) == 0) {
datadir = subdir;
}
}
}
#endif /* _WIN32 */
}
// datadir may still be empty:
if (datadir.empty()) {
#if defined(TESSDATA_PREFIX)
// Use tessdata prefix which was compiled in.
datadir = TESSDATA_PREFIX "/tessdata";
#else
datadir = "./";
#endif /* TESSDATA_PREFIX */
}
// check for missing directory separator
const char *lastchar = datadir.c_str();
lastchar += datadir.length() - 1;
if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) {
datadir += "/";
}
}
} // namespace tesseract

View File

@ -89,7 +89,7 @@ void CLIST::assign_to_sublist( // to this list
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
if (!empty()) {
LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT, nullptr);
LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT);
}
last = start_it->extract_sublist(end_it);
@ -246,9 +246,9 @@ void *CLIST_ITERATOR::data_relative( // get data + or - ...
#ifndef NDEBUG
if (!list)
NO_LIST.error("CLIST_ITERATOR::data_relative", ABORT, nullptr);
NO_LIST.error("CLIST_ITERATOR::data_relative", ABORT);
if (list->empty())
EMPTY_LIST.error("CLIST_ITERATOR::data_relative", ABORT, nullptr);
EMPTY_LIST.error("CLIST_ITERATOR::data_relative", ABORT);
if (offset < -1)
BAD_PARAMETER.error("CLIST_ITERATOR::data_relative", ABORT, "offset < -l");
#endif
@ -308,7 +308,7 @@ link */
/* Error if either current element is deleted */
if (!current || !other_it->current) {
DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT, nullptr);
DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT);
}
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
@ -389,12 +389,12 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current
constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points");
if (list != other_it->list)
BAD_EXTRACTION_PTS.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
BAD_EXTRACTION_PTS.error("CLIST_ITERATOR.extract_sublist", ABORT);
if (list->empty())
EMPTY_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT, nullptr);
EMPTY_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT);
if (!current || !other_it->current)
DONT_EXTRACT_DELETED.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
DONT_EXTRACT_DELETED.error("CLIST_ITERATOR.extract_sublist", ABORT);
#endif
ex_current_was_last = other_it->ex_current_was_last = false;
@ -404,7 +404,7 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current
temp_it.mark_cycle_pt();
do { // walk sublist
if (temp_it.cycled_list()) { // can't find end pt
BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT);
}
if (temp_it.at_last()) {

View File

@ -190,7 +190,7 @@ public:
void *data() { // get current data
#ifndef NDEBUG
if (!list) {
NO_LIST.error("CLIST_ITERATOR::data", ABORT, nullptr);
NO_LIST.error("CLIST_ITERATOR::data", ABORT);
}
#endif
return current->data;
@ -523,7 +523,7 @@ inline void *CLIST_ITERATOR::extract() {
#ifndef NDEBUG
if (!current) { // list empty or
// element extracted
NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT, nullptr);
NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT);
}
#endif
@ -576,7 +576,7 @@ inline void *CLIST_ITERATOR::move_to_first() {
inline void CLIST_ITERATOR::mark_cycle_pt() {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT, nullptr);
NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT);
}
#endif
@ -666,7 +666,7 @@ inline void CLIST_ITERATOR::add_to_end( // element to add
void *new_data) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT, nullptr);
NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT);
}
if (!new_data) {
BAD_PARAMETER.error("CLIST_ITERATOR::add_to_end", ABORT, "new_data is nullptr");
@ -702,15 +702,12 @@ public:
}
};
#define CLISTIZEH(CLASSNAME) \
class CLASSNAME##_CLIST : public X_CLIST<CLASSNAME> { \
public: \
using X_CLIST<CLASSNAME>::X_CLIST; \
}; \
class CLASSNAME##_C_IT : public X_ITER<CLIST_ITERATOR, CLASSNAME> { \
public: \
using X_ITER<CLIST_ITERATOR, CLASSNAME>::X_ITER; \
CLASSNAME##_C_IT(CLASSNAME##_CLIST *list) : X_ITER(list) {} \
#define CLISTIZEH(CLASSNAME) \
class CLASSNAME##_CLIST : public X_CLIST<CLASSNAME> { \
using X_CLIST<CLASSNAME>::X_CLIST; \
}; \
struct CLASSNAME##_C_IT : X_ITER<CLIST_ITERATOR, CLASSNAME> { \
using X_ITER<CLIST_ITERATOR, CLASSNAME>::X_ITER; \
};
} // namespace tesseract

View File

@ -70,7 +70,7 @@ void ELIST::assign_to_sublist( // to this list
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
if (!empty()) {
LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT, nullptr);
LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT);
}
last = start_it->extract_sublist(end_it);
@ -169,7 +169,7 @@ ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void *
ELIST_LINK *ELIST_ITERATOR::forward() {
#ifndef NDEBUG
if (!list)
NO_LIST.error("ELIST_ITERATOR::forward", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::forward", ABORT);
#endif
if (list->empty()) {
return nullptr;
@ -189,13 +189,17 @@ ELIST_LINK *ELIST_ITERATOR::forward() {
}
#ifndef NDEBUG
if (!current)
NULL_DATA.error("ELIST_ITERATOR::forward", ABORT, nullptr);
NULL_DATA.error("ELIST_ITERATOR::forward", ABORT);
#endif
next = current->next;
#ifndef NDEBUG
if (!next)
NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT, "This is: %p Current is: %p", this, current);
if (!next) {
NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT,
"This is: %p Current is: %p",
static_cast<void *>(this),
static_cast<void *>(current));
}
#endif
return current;
}
@ -214,9 +218,9 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
#ifndef NDEBUG
if (!list)
NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT);
if (list->empty())
EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT);
if (offset < -1)
BAD_PARAMETER.error("ELIST_ITERATOR::data_relative", ABORT, "offset < -l");
#endif
@ -231,7 +235,7 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
#ifndef NDEBUG
if (!ptr)
NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT);
#endif
return ptr;
@ -248,7 +252,7 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
ELIST_LINK *ELIST_ITERATOR::move_to_last() {
#ifndef NDEBUG
if (!list)
NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT);
#endif
while (current != list->last) {
@ -276,7 +280,7 @@ void ELIST_ITERATOR::exchange( // positions of 2 links
#ifndef NDEBUG
if (!list)
NO_LIST.error("ELIST_ITERATOR::exchange", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::exchange", ABORT);
if (!other_it)
BAD_PARAMETER.error("ELIST_ITERATOR::exchange", ABORT, "other_it nullptr");
if (!(other_it->list))
@ -293,7 +297,7 @@ link */
/* Error if either current element is deleted */
if (!current || !other_it->current) {
DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT, nullptr);
DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT);
}
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
@ -379,14 +383,14 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current
if (!other_it)
BAD_PARAMETER.error("ELIST_ITERATOR::extract_sublist", ABORT, "other_it nullptr");
if (!list)
NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT);
if (list != other_it->list)
BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT);
if (list->empty())
EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT, nullptr);
EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT);
if (!current || !other_it->current)
DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT);
#endif
ex_current_was_last = other_it->ex_current_was_last = false;
@ -396,7 +400,7 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current
temp_it.mark_cycle_pt();
do { // walk sublist
if (temp_it.cycled_list()) { // can't find end pt
BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT);
}
if (temp_it.at_last()) {

View File

@ -231,10 +231,10 @@ public:
ELIST_LINK *data() { // get current data
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::data", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::data", ABORT);
}
if (!current) {
NULL_DATA.error("ELIST_ITERATOR::data", ABORT, nullptr);
NULL_DATA.error("ELIST_ITERATOR::data", ABORT);
}
#endif
return current;
@ -256,7 +256,7 @@ public:
bool empty() const { // is list empty?
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::empty", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::empty", ABORT);
}
#endif
return list->empty();
@ -334,13 +334,13 @@ inline void ELIST_ITERATOR::add_after_then_move( // element to add
ELIST_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr);
STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT);
}
#endif
@ -381,13 +381,13 @@ inline void ELIST_ITERATOR::add_after_stay_put( // element to add
ELIST_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr);
STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT);
}
#endif
@ -430,13 +430,13 @@ inline void ELIST_ITERATOR::add_before_then_move( // element to add
ELIST_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr);
STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT);
}
#endif
@ -473,13 +473,13 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add
ELIST_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr);
STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT);
}
#endif
@ -517,7 +517,7 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add
inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT);
}
if (!list_to_add) {
BAD_PARAMETER.error("ELIST_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr");
@ -564,7 +564,7 @@ inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) {
inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT);
}
if (!list_to_add) {
BAD_PARAMETER.error("ELIST_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr");
@ -612,11 +612,11 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::extract", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::extract", ABORT);
}
if (!current) { // list empty or
// element extracted
NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT, nullptr);
NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT);
}
#endif
@ -649,7 +649,7 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() {
inline ELIST_LINK *ELIST_ITERATOR::move_to_first() {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT);
}
#endif
@ -673,7 +673,7 @@ inline ELIST_LINK *ELIST_ITERATOR::move_to_first() {
inline void ELIST_ITERATOR::mark_cycle_pt() {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT);
}
#endif
@ -695,7 +695,7 @@ inline void ELIST_ITERATOR::mark_cycle_pt() {
inline bool ELIST_ITERATOR::at_first() const {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::at_first", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::at_first", ABORT);
}
#endif
@ -715,7 +715,7 @@ inline bool ELIST_ITERATOR::at_first() const {
inline bool ELIST_ITERATOR::at_last() const {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::at_last", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::at_last", ABORT);
}
#endif
@ -735,7 +735,7 @@ inline bool ELIST_ITERATOR::at_last() const {
inline bool ELIST_ITERATOR::cycled_list() const {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT);
}
#endif
@ -754,7 +754,7 @@ inline void ELIST_ITERATOR::sort( // sort elements
const void *, const void *)) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::sort", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::sort", ABORT);
}
#endif
@ -776,13 +776,13 @@ inline void ELIST_ITERATOR::add_to_end( // element to add
ELIST_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT, nullptr);
NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST_ITERATOR::add_to_end", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT, nullptr);
STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT);
}
#endif
@ -802,13 +802,10 @@ inline void ELIST_ITERATOR::add_to_end( // element to add
#define ELISTIZEH(CLASSNAME) \
class CLASSNAME##_LIST : public X_LIST<ELIST, ELIST_ITERATOR, CLASSNAME> { \
public: \
using X_LIST<ELIST, ELIST_ITERATOR, CLASSNAME>::X_LIST; \
}; \
class CLASSNAME##_IT : public X_ITER<ELIST_ITERATOR, CLASSNAME> { \
public: \
using X_ITER<ELIST_ITERATOR, CLASSNAME>::X_ITER; \
CLASSNAME##_IT(CLASSNAME##_LIST *list) : X_ITER(list) {} \
};
} // namespace tesseract

View File

@ -71,7 +71,7 @@ void ELIST2::assign_to_sublist( // to this list
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
if (!empty()) {
LIST_NOT_EMPTY.error("ELIST2.assign_to_sublist", ABORT, nullptr);
LIST_NOT_EMPTY.error("ELIST2.assign_to_sublist", ABORT);
}
last = start_it->extract_sublist(end_it);
@ -162,7 +162,7 @@ void ELIST2::add_sorted(int comparator(const void *, const void *), ELIST2_LINK
ELIST2_LINK *ELIST2_ITERATOR::forward() {
#ifndef NDEBUG
if (!list)
NO_LIST.error("ELIST2_ITERATOR::forward", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::forward", ABORT);
#endif
if (list->empty()) {
return nullptr;
@ -183,15 +183,18 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() {
#ifndef NDEBUG
if (!current)
NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT, nullptr);
NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT);
#endif
next = current->next;
#ifndef NDEBUG
if (!next)
NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT, "This is: %p Current is: %p", this,
current);
if (!next) {
NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT,
"This is: %p Current is: %p",
static_cast<void *>(this),
static_cast<void *>(current));
}
#endif
return current;
@ -207,7 +210,7 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() {
ELIST2_LINK *ELIST2_ITERATOR::backward() {
#ifndef NDEBUG
if (!list)
NO_LIST.error("ELIST2_ITERATOR::backward", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::backward", ABORT);
#endif
if (list->empty()) {
return nullptr;
@ -228,10 +231,13 @@ ELIST2_LINK *ELIST2_ITERATOR::backward() {
#ifndef NDEBUG
if (!current)
NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT, nullptr);
if (!prev)
NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT, "This is: %p Current is: %p", this,
current);
NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT);
if (!prev) {
NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT,
"This is: %p Current is: %p",
static_cast<void *>(this),
static_cast<void *>(current));
}
#endif
prev = current->prev;
@ -251,9 +257,9 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( // get data + or - ..
#ifndef NDEBUG
if (!list)
NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT);
if (list->empty())
EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT);
#endif
if (offset < 0) {
@ -268,7 +274,7 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( // get data + or - ..
#ifndef NDEBUG
if (!ptr)
NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT);
#endif
return ptr;
@ -292,7 +298,7 @@ void ELIST2_ITERATOR::exchange( // positions of 2 links
#ifndef NDEBUG
if (!list)
NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT);
if (!other_it)
BAD_PARAMETER.error("ELIST2_ITERATOR::exchange", ABORT, "other_it nullptr");
if (!(other_it->list))
@ -309,7 +315,7 @@ link */
/* Error if either current element is deleted */
if (!current || !other_it->current) {
DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT, nullptr);
DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT);
}
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
@ -407,14 +413,14 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( // from this current
if (!other_it)
BAD_PARAMETER.error("ELIST2_ITERATOR::extract_sublist", ABORT, "other_it nullptr");
if (!list)
NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT);
if (list != other_it->list)
BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT);
if (list->empty())
EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr);
EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT);
if (!current || !other_it->current)
DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT);
#endif
ex_current_was_last = other_it->ex_current_was_last = false;
@ -424,7 +430,7 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( // from this current
temp_it.mark_cycle_pt();
do { // walk sublist
if (temp_it.cycled_list()) { // can't find end pt
BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT);
}
if (temp_it.at_last()) {

View File

@ -191,10 +191,10 @@ public:
ELIST2_LINK *data() { // get current data
#ifndef NDEBUG
if (!current) {
NULL_DATA.error("ELIST2_ITERATOR::data", ABORT, nullptr);
NULL_DATA.error("ELIST2_ITERATOR::data", ABORT);
}
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::data", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::data", ABORT);
}
#endif
return current;
@ -219,7 +219,7 @@ public:
bool empty() const { // is list empty?
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::empty", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::empty", ABORT);
}
#endif
return list->empty();
@ -301,13 +301,13 @@ inline void ELIST2_ITERATOR::add_after_then_move( // element to add
ELIST2_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr);
STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT);
}
#endif
@ -352,13 +352,13 @@ inline void ELIST2_ITERATOR::add_after_stay_put( // element to add
ELIST2_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr);
STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT);
}
#endif
@ -405,13 +405,13 @@ inline void ELIST2_ITERATOR::add_before_then_move( // element to add
ELIST2_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr);
STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT);
}
#endif
@ -453,13 +453,13 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add
ELIST2_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr);
STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT);
}
#endif
@ -502,7 +502,7 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add
inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT);
}
if (!list_to_add) {
BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr");
@ -553,7 +553,7 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) {
inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT);
}
if (!list_to_add) {
BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr");
@ -605,11 +605,11 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::extract", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::extract", ABORT);
}
if (!current) { // list empty or
// element extracted
NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT, nullptr);
NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT);
}
#endif
@ -646,7 +646,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() {
inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT);
}
#endif
@ -666,7 +666,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() {
inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT);
}
#endif
@ -690,7 +690,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() {
inline void ELIST2_ITERATOR::mark_cycle_pt() {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT);
}
#endif
@ -712,7 +712,7 @@ inline void ELIST2_ITERATOR::mark_cycle_pt() {
inline bool ELIST2_ITERATOR::at_first() const {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT);
}
#endif
@ -732,7 +732,7 @@ inline bool ELIST2_ITERATOR::at_first() const {
inline bool ELIST2_ITERATOR::at_last() const {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT);
}
#endif
@ -752,7 +752,7 @@ inline bool ELIST2_ITERATOR::at_last() const {
inline bool ELIST2_ITERATOR::cycled_list() const {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT);
}
#endif
@ -771,7 +771,7 @@ inline void ELIST2_ITERATOR::sort( // sort elements
const void *, const void *)) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::sort", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::sort", ABORT);
}
#endif
@ -793,13 +793,13 @@ inline void ELIST2_ITERATOR::add_to_end( // element to add
ELIST2_LINK *new_element) {
#ifndef NDEBUG
if (!list) {
NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT, nullptr);
NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT);
}
if (!new_element) {
BAD_PARAMETER.error("ELIST2_ITERATOR::add_to_end", ABORT, "new_element is nullptr");
}
if (new_element->next) {
STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT, nullptr);
STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT);
}
#endif
@ -821,13 +821,10 @@ inline void ELIST2_ITERATOR::add_to_end( // element to add
#define ELIST2IZEH(CLASSNAME) \
class CLASSNAME##_LIST : public X_LIST<ELIST2, ELIST2_ITERATOR, CLASSNAME> { \
public: \
using X_LIST<ELIST2, ELIST2_ITERATOR, CLASSNAME>::X_LIST; \
}; \
class CLASSNAME##_IT : public X_ITER<ELIST2_ITERATOR, CLASSNAME> { \
public: \
struct CLASSNAME##_IT : X_ITER<ELIST2_ITERATOR, CLASSNAME> { \
using X_ITER<ELIST2_ITERATOR, CLASSNAME>::X_ITER; \
CLASSNAME##_IT(CLASSNAME##_LIST *list) : X_ITER(list) {} \
CLASSNAME *backward() { \
return reinterpret_cast<CLASSNAME *>(ELIST2_ITERATOR::backward()); \
} \

View File

@ -91,8 +91,12 @@ void ERRCODE::error( // handle error
#endif
abort();
default:
BADERRACTION.error("error", ABORT, nullptr);
BADERRACTION.error("error", ABORT);
}
}
void ERRCODE::error(const char *caller, TessErrorLogCode action) const {
error(caller, action, nullptr);
}
} // namespace tesseract

View File

@ -31,12 +31,6 @@ enum TessErrorLogCode {
ABORT = 2 /*abort after error */
};
/* Explicit Error Abort codes */
#define NO_ABORT_CODE 0
#define LIST_ABORT 1
#define MEMORY_ABORT 2
#define FILE_ABORT 3
#if !defined(__GNUC__) && !defined(__attribute__)
# define __attribute__(attr) // compiler without support for __attribute__
#endif
@ -49,6 +43,7 @@ public:
TessErrorLogCode action, // action to take
const char *format, ... // fprintf format
) const __attribute__((format(printf, 4, 5)));
void error(const char *caller, TessErrorLogCode action) const;
constexpr ERRCODE(const char *string) : message(string) {} // initialize with string
};

View File

@ -24,14 +24,6 @@
namespace tesseract {
constexpr ERRCODE CANTOPENFILE("Can't open file");
constexpr ERRCODE CANTCREATEFILE("Can't create file");
constexpr ERRCODE CANTMAKEPIPE("Can't create pipe");
constexpr ERRCODE CANTCONNECTPIPE("Can't reconnect pipes to stdin/stdout");
constexpr ERRCODE READFAILED("Read of file failed");
constexpr ERRCODE WRITEFAILED("Write of file failed");
constexpr ERRCODE SELECTFAILED("Select failed");
constexpr ERRCODE EXECFAILED("Could not exec new process");
} // namespace tesseract

View File

@ -41,10 +41,6 @@ public:
GenericVector() {
init(kDefaultVectorSize);
}
GenericVector(int size, const T &init_val) {
init(size);
init_to_size(size, init_val);
}
// Copy
GenericVector(const GenericVector &other) {
@ -107,14 +103,6 @@ public:
int push_back(T object);
void operator+=(const T &t);
// Push an element in the end of the array if the same
// element is not already contained in the array.
int push_back_new(const T &object);
// Push an element in the front of the array
// Note: This function is O(n)
int push_front(const T &object);
// Set the value at the given index
void set(const T &t, int index);
@ -178,27 +166,13 @@ public:
// bool T::Serialize(FILE* fp) const that returns false in case of error.
// Returns false in case of error.
bool SerializeClasses(FILE *fp) const;
bool SerializeClasses(TFile *fp) const;
// Reads a vector of classes from the given file. Assumes the existence of
// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
// error. Also needs T::T() and T::T(constT&), as init_to_size is used in
// this function. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeClasses(bool swap, FILE *fp);
bool DeSerializeClasses(TFile *fp);
// Allocates a new array of double the current_size, copies over the
// information from data to the new location, deletes data and returns
// the pointed to the new larger array.
// This function uses memcpy to copy the data, instead of invoking
// operator=() for each element like double_the_size() does.
static T *double_the_size_memcpy(int current_size, T *data) {
T *data_new = new T[current_size * 2];
memcpy(data_new, data, sizeof(T) * current_size);
delete[] data;
return data_new;
}
// Reverses the elements of the vector.
void reverse() {
for (int i = 0; i < size_used_ / 2; ++i) {
@ -221,26 +195,6 @@ public:
qsort(data_, size_used_, sizeof(*data_), comparator);
}
// Searches the array (assuming sorted in ascending order, using sort()) for
// an element equal to target and returns the index of the best candidate.
// The return value is conceptually the largest index i such that
// data_[i] <= target or 0 if target < the whole vector.
// NOTE that this function uses operator> so really the return value is
// the largest index i such that data_[i] > target is false.
int binary_search(const T &target) const {
int bottom = 0;
int top = size_used_;
while (top - bottom > 1) {
int middle = (bottom + top) / 2;
if (data_[middle] > target) {
top = middle;
} else {
bottom = middle;
}
}
return bottom;
}
// Swaps the elements with the given indices.
void swap(int index1, int index2) {
if (index1 != index2) {
@ -307,11 +261,6 @@ inline bool SaveDataToFile(const GenericVector<char> &data, const char *filename
return result;
}
template <typename T>
bool cmp_eq(T const &t1, T const &t2) {
return t1 == t2;
}
// Used by sort()
// return < 0 if t1 < t2
// return 0 if t1 == t2
@ -632,29 +581,6 @@ int GenericVector<T>::push_back(T object) {
return index;
}
template <typename T>
int GenericVector<T>::push_back_new(const T &object) {
int index = get_index(object);
if (index >= 0) {
return index;
}
return push_back(object);
}
// Add an element in the array (front)
template <typename T>
int GenericVector<T>::push_front(const T &object) {
if (size_used_ == size_reserved_) {
double_the_size();
}
for (int i = size_used_; i > 0; --i) {
data_[i] = data_[i - 1];
}
data_[0] = object;
++size_used_;
return 0;
}
template <typename T>
void GenericVector<T>::operator+=(const T &t) {
push_back(t);
@ -831,18 +757,6 @@ bool GenericVector<T>::SerializeClasses(FILE *fp) const {
}
return true;
}
template <typename T>
bool GenericVector<T>::SerializeClasses(TFile *fp) const {
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
return false;
}
for (int i = 0; i < size_used_; ++i) {
if (!data_[i].Serialize(fp)) {
return false;
}
}
return true;
}
// Reads a vector of classes from the given file. Assumes the existence of
// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
@ -850,24 +764,6 @@ bool GenericVector<T>::SerializeClasses(TFile *fp) const {
// this function. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
template <typename T>
bool GenericVector<T>::DeSerializeClasses(bool swap, FILE *fp) {
int32_t reserved;
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
return false;
}
if (swap) {
Reverse32(&reserved);
}
T empty;
init_to_size(reserved, empty);
for (int i = 0; i < reserved; ++i) {
if (!data_[i].DeSerialize(swap, fp)) {
return false;
}
}
return true;
}
template <typename T>
bool GenericVector<T>::DeSerializeClasses(TFile *fp) {
int32_t reserved;
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {

View File

@ -93,13 +93,6 @@ inline void chomp_string(char *str) {
}
}
// Advance the current pointer of the file if it points to a newline character.
inline void SkipNewline(FILE *file) {
if (fgetc(file) != '\n') {
fseek(file, -1, SEEK_CUR);
}
}
// return the smallest multiple of block_size greater than or equal to n.
inline int RoundUp(int n, int block_size) {
return block_size * ((n + block_size - 1) / block_size);
@ -197,21 +190,11 @@ inline void ReverseN(void *ptr, int num_bytes) {
}
}
// Reverse the order of bytes in a 16 bit quantity for big/little-endian switch.
inline void Reverse16(void *ptr) {
ReverseN(ptr, 2);
}
// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
inline void Reverse32(void *ptr) {
ReverseN(ptr, 4);
}
// Reverse the order of bytes in a 64 bit quantity for big/little-endian switch.
inline void Reverse64(void *ptr) {
ReverseN(ptr, 8);
}
// Reads a vector of simple types from the given file. Assumes that bitwise
// read/write will work with ReverseN according to sizeof(T).
// Returns false in case of error.

View File

@ -23,12 +23,9 @@
namespace tesseract {
constexpr ERRCODE SERIALISE_LINKS("Attempted to (de)serialise a link element");
#ifndef NDEBUG
constexpr ERRCODE NO_LIST("Iterator not set to a list");
constexpr ERRCODE NULL_OBJECT("List found this = nullptr!");
constexpr ERRCODE NULL_DATA("List would have returned a nullptr data pointer");
constexpr ERRCODE NULL_CURRENT("List current position is nullptr");
constexpr ERRCODE NULL_NEXT("Next element on the list is nullptr");

View File

@ -1,82 +0,0 @@
/**********************************************************************
* File: mainblk.cpp (Formerly main.c)
* Description: Function to call from main() to setup.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <cstdlib>
#include <cstring> // for std::strrchr
#if defined(_WIN32)
# include <io.h> // for _access
#endif
#include "ccutil.h"
#include "fileerr.h"
namespace tesseract {
/**
* @brief CCUtil::main_setup - set location of tessdata and name of image
*
* @param argv0 - paths to the directory with language files and config files.
* An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
* used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
* previous is not successful - use current directory.
* @param basename - name of image
*/
void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
imagebasename = basename; /**< name of image */
char *tessdata_prefix = getenv("TESSDATA_PREFIX");
if (!argv0.empty()) {
/* Use tessdata prefix from the command line. */
datadir = argv0;
} else if (tessdata_prefix) {
/* Use tessdata prefix from the environment. */
datadir = tessdata_prefix;
#if defined(_WIN32)
} else if (datadir.empty() || _access(datadir.c_str(), 0) != 0) {
/* Look for tessdata in directory of executable. */
char path[_MAX_PATH];
DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
if (length > 0 && length < sizeof(path)) {
char *separator = std::strrchr(path, '\\');
if (separator != nullptr) {
*separator = '\0';
datadir = path;
datadir += "/tessdata";
}
}
#endif /* _WIN32 */
#if defined(TESSDATA_PREFIX)
} else {
// Use tessdata prefix which was compiled in.
datadir = TESSDATA_PREFIX "/tessdata";
#endif
}
// datadir may still be empty:
if (datadir.empty()) {
datadir = "./";
}
// check for missing directory separator
const char *lastchar = datadir.c_str();
lastchar += datadir.length() - 1;
if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) {
datadir += "/";
}
}
} // namespace tesseract

View File

@ -43,7 +43,8 @@ public:
tprintf(
"ObjectCache(%p)::~ObjectCache(): WARNING! LEAK! object %p "
"still has count %d (id %s)\n",
this, it.object, it.count, it.id.c_str());
static_cast<void *>(this), static_cast<void *>(it.object),
it.count, it.id.c_str());
} else {
delete it.object;
it.object = nullptr;

View File

@ -29,14 +29,6 @@
namespace tesseract {
/***********************************************************************
QUOTE_IT MACRO DEFINITION
===========================
Replace <parm> with "<parm>". <parm> may be an arbitrary number of tokens
***********************************************************************/
#define QUOTE_IT(parm) #parm
// Return number of elements of an array.
template <typename T, size_t N>
constexpr size_t countof(T const (&)[N]) noexcept {

View File

@ -48,7 +48,7 @@ TessdataManager::TessdataManager(FileReader reader)
SetVersionString(TESSERACT_VERSION_STR);
}
// Lazily loads from the the given filename. Won't actually read the file
// Lazily loads from the given filename. Won't actually read the file
// until it needs it.
void TessdataManager::LoadFileLater(const char *data_file_name) {
Clear();

View File

@ -138,7 +138,7 @@ public:
return is_loaded_;
}
// Lazily loads from the the given filename. Won't actually read the file
// Lazily loads from the given filename. Won't actually read the file
// until it needs it.
void LoadFileLater(const char *data_file_name);
/**

View File

@ -38,4 +38,6 @@ extern TESS_API void tprintf( // Trace printf
} // namespace tesseract
#undef __attribute__
#endif // define TESSERACT_CCUTIL_TPRINTF_H

View File

@ -614,7 +614,6 @@ public:
if (INVALID_UNICHAR_ID == unichar_id) {
*width = 0.0f;
*width_sd = 0.0f;
;
return;
}
ASSERT_HOST(contains_unichar_id(unichar_id));

View File

@ -276,7 +276,7 @@ void Classify::LearnWord(const char *fontname, WERD_RES *word) {
tprintf("\n\nAdapting to word = %s\n", word->best_choice->debug_string().c_str());
}
thresholds = new float[word_len];
word->ComputeAdaptionThresholds(certainty_scale, matcher_perfect_threshold,
word->ComputeAdaptionThresholds(getDict().certainty_scale, matcher_perfect_threshold,
matcher_good_threshold, matcher_rating_margin, thresholds);
}
int start_blob = 0;

View File

@ -101,7 +101,6 @@ Classify::Classify()
"its expected textline position",
this->params())
, double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params())
, double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", this->params())
, double_MEMBER(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used",
this->params())
, double_MEMBER(classify_adapted_pruning_factor, 2.5,

View File

@ -394,7 +394,6 @@ public:
double_VAR_H(matcher_clustering_max_angle_delta);
double_VAR_H(classify_misfit_junk_penalty);
double_VAR_H(rating_scale);
double_VAR_H(certainty_scale);
double_VAR_H(tessedit_class_miss_scale);
double_VAR_H(classify_adapted_pruning_factor);
double_VAR_H(classify_adapted_pruning_threshold);

View File

@ -28,7 +28,6 @@
#include <cfloat> // for FLT_MAX
#include <cmath> // for M_PI
#include <array> // for std::array
#include <vector> // for std::vector
namespace tesseract {

View File

@ -66,7 +66,7 @@ class FCOORD;
/* The first 3 dimensions of the CLASS_PRUNER_STRUCT are the
* 3 axes of the quantized feature space.
* The position of the the bits recorded for each class in the
* The position of the bits recorded for each class in the
* 4th dimension is determined by using CPrunerWordIndexFor(c),
* where c is the corresponding class id. */
struct CLASS_PRUNER_STRUCT {

View File

@ -44,7 +44,7 @@ struct UnicharRating {
tprintf(
"Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%u,"
" %zu fonts\n",
unichar_id, rating, adapted, config, feature_misses, fonts.size());
unichar_id, static_cast<double>(rating), adapted, config, feature_misses, fonts.size());
}
// Helper function to get the index of the first result with the required

View File

@ -333,7 +333,7 @@ protected:
// Finds the edge with the given direction, word_end and unichar_id
// in the node indicated by node_ref. Fills in the pointer to the
// EDGE_RECORD and the index of the edge with the the values
// EDGE_RECORD and the index of the edge with the values
// corresponding to the edge found. Returns true if an edge was found.
bool edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, bool word_end,
UNICHAR_ID unichar_id, EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const;

View File

@ -1,4 +1,4 @@
// Generated code with lookup tables
// Generated code with lookup tables (see generate_lut.py)
#include "functions.h"
namespace tesseract {
const TFloat TanhTable[] = {

View File

@ -42,13 +42,13 @@ extern const TFloat LogisticTable[];
// Non-linearity (sigmoid) functions with cache tables and clipping.
inline TFloat Tanh(TFloat x) {
if (x < 0.0) {
if (x < 0) {
return -Tanh(-x);
}
x *= kScaleFactor;
auto index = static_cast<unsigned>(x);
if (index >= (kTableSize - 1)) {
return 1.0;
return 1;
}
TFloat tanh_i0 = TanhTable[index];
TFloat tanh_i1 = TanhTable[index + 1];
@ -57,13 +57,13 @@ inline TFloat Tanh(TFloat x) {
}
inline TFloat Logistic(TFloat x) {
if (x < 0.0) {
return 1.0 - Logistic(-x);
if (x < 0) {
return 1 - Logistic(-x);
}
x *= kScaleFactor;
auto index = static_cast<unsigned>(x);
if (index >= (kTableSize - 1)) {
return 1.0;
return 1;
}
TFloat l0 = LogisticTable[index];
TFloat l1 = LogisticTable[index + 1];
@ -79,36 +79,36 @@ struct FFunc {
};
struct FPrime {
inline TFloat operator()(TFloat y) const {
return y * (1.0 - y);
return y * (1 - y);
}
};
struct ClipFFunc {
inline TFloat operator()(TFloat x) const {
if (x <= 0.0) {
return 0.0;
if (x <= 0) {
return 0;
}
if (x >= 1.0) {
return 1.0;
if (x >= 1) {
return 1;
}
return x;
}
};
struct ClipFPrime {
inline TFloat operator()(TFloat y) const {
return 0.0 < y && y < 1.0 ? 1.0 : 0.0;
return 0 < y && y < 1 ? 1 : 0;
}
};
struct Relu {
inline TFloat operator()(TFloat x) const {
if (x <= 0.0) {
return 0.0;
if (x <= 0) {
return 0;
}
return x;
}
};
struct ReluPrime {
inline TFloat operator()(TFloat y) const {
return 0.0 < y ? 1.0 : 0.0;
return 0 < y ? 1 : 0;
}
};
struct GFunc {
@ -118,23 +118,23 @@ struct GFunc {
};
struct GPrime {
inline TFloat operator()(TFloat y) const {
return 1.0 - y * y;
return 1 - y * y;
}
};
struct ClipGFunc {
inline TFloat operator()(TFloat x) const {
if (x <= -1.0) {
return -1.0;
if (x <= -1) {
return -1;
}
if (x >= 1.0) {
return 1.0;
if (x >= 1) {
return 1;
}
return x;
}
};
struct ClipGPrime {
inline TFloat operator()(TFloat y) const {
return -1.0 < y && y < 1.0 ? 1.0 : 0.0;
return -1 < y && y < 1 ? 1 : 0;
}
};
struct HFunc {
@ -183,7 +183,7 @@ inline void SoftmaxInPlace(int n, T *inout) {
return;
}
// A limit on the negative range input to exp to guarantee non-zero output.
const T kMaxSoftmaxActivation = 86.0f;
const T kMaxSoftmaxActivation = 86;
T max_output = inout[0];
for (int i = 1; i < n; i++) {
@ -192,14 +192,14 @@ inline void SoftmaxInPlace(int n, T *inout) {
max_output = output;
}
}
T prob_total = 0.0;
T prob_total = 0;
for (int i = 0; i < n; i++) {
T prob = inout[i] - max_output;
prob = exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
prob = std::exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
prob_total += prob;
inout[i] = prob;
}
if (prob_total > 0.0) {
if (prob_total > 0) {
for (int i = 0; i < n; i++) {
inout[i] /= prob_total;
}
@ -207,7 +207,7 @@ inline void SoftmaxInPlace(int n, T *inout) {
}
// Copies n values of the given src vector to dest.
inline void CopyVector(int n, const TFloat *src, TFloat *dest) {
inline void CopyVector(unsigned n, const TFloat *src, TFloat *dest) {
memcpy(dest, src, n * sizeof(dest[0]));
}
@ -242,7 +242,7 @@ inline void SumVectors(int n, const TFloat *v1, const TFloat *v2, const TFloat *
// Sets the given n-vector vec to 0.
template <typename T>
inline void ZeroVector(int n, T *vec) {
inline void ZeroVector(unsigned n, T *vec) {
memset(vec, 0, n * sizeof(*vec));
}

View File

@ -4,22 +4,24 @@
import math
# kTableSize and kScaleFactor must match the values in functions.h.
# Size of static tables.
kTableSize = 4096
# Scale factor for float arg to int index.
kScaleFactor = 256.0
print("// Generated code with lookup tables")
print("// Generated code with lookup tables (see generate_lut.py)")
print('#include "functions.h"')
print("namespace tesseract {")
print("const double TanhTable[] = {")
print("const TFloat TanhTable[] = {")
for i in range(kTableSize):
print(" %a," % math.tanh(i / kScaleFactor))
print(" %a," % math.tanh(i / kScaleFactor))
print("};")
print("const double LogisticTable[] = {")
print("const TFloat LogisticTable[] = {")
for i in range(kTableSize):
print(" %a," % (1 / (1 + math.exp(-i / kScaleFactor))))
print(" %a," % (1 / (1 + math.exp(-i / kScaleFactor))))
print("};")
print("} // namespace tesseract.")
print("} // namespace tesseract.")

View File

@ -28,7 +28,7 @@
#include <cstdlib>
#include <sstream> // for std::ostringstream
#if !defined(__GNUC__) && defined(_MSC_VER)
#if defined(_MSC_VER) && !defined(__clang__)
# include <intrin.h> // _BitScanReverse
#endif

View File

@ -294,7 +294,7 @@ void LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
void LSTMRecognizer::OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output,
float *sd) {
const int kOutputScale = INT8_MAX;
STATS stats(0, kOutputScale + 1);
STATS stats(0, kOutputScale);
for (int t = 0; t < outputs.Width(); ++t) {
int best_label = outputs.BestLabel(t, nullptr);
if (best_label != null_char_) {

View File

@ -127,7 +127,7 @@ void NetworkIO::ZeroInvalidElements() {
static void ComputeBlackWhite(Image pix, float *black, float *white) {
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
STATS mins(0, 256), maxes(0, 256);
STATS mins(0, 255), maxes(0, 255);
if (width >= 3) {
int y = height / 2;
l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y;
@ -412,15 +412,6 @@ void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, int num_feature
}
}
// Zeroes a single time step.
void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) {
if (int_mode_) {
ZeroVector(num_features, i_[t] + offset);
} else {
ZeroVector(num_features, f_[t] + offset);
}
}
// Sets the given range to random values.
void NetworkIO::Randomize(int t, int offset, int num_features, TRand *randomizer) {
if (int_mode_) {

View File

@ -2,7 +2,6 @@
// File: networkio.h
// Description: Network input/output data, allowing float/int implementations.
// Author: Ray Smith
// Created: Tue Jun 17 08:43:11 PST 2014
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -146,9 +145,12 @@ public:
int src_t, int src_offset);
// Zeroes a single time step.
void ZeroTimeStep(int t) {
ZeroTimeStepGeneral(t, 0, NumFeatures());
if (int_mode_) {
memset(i_[t], 0, sizeof(*i_[t]) * NumFeatures());
} else {
memset(f_[t], 0, sizeof(*f_[t]) * NumFeatures());
}
}
void ZeroTimeStepGeneral(int t, int offset, int num_features);
// Sets the given range to random values.
void Randomize(int t, int offset, int num_features, TRand *randomizer);

View File

@ -28,7 +28,6 @@ public:
// ni_ and no_ will be set by AddToStack.
TESS_API
Parallel(const char *name, NetworkType type);
~Parallel() override = default;
// Returns the shape output from the network given an input shape (which may
// be partially unknown ie zero).

View File

@ -25,7 +25,7 @@
namespace tesseract {
// Holds a collection of other networks and forwards calls to each of them.
class Plumbing : public Network {
class TESS_API Plumbing : public Network {
public:
// ni_ and no_ will be set by AddToStack.
explicit Plumbing(const std::string &name);
@ -103,10 +103,8 @@ public:
return stack_;
}
// Returns a set of strings representing the layer-ids of all layers below.
TESS_API
void EnumerateLayers(const std::string *prefix, std::vector<std::string> &layers) const;
// Returns a pointer to the network layer corresponding to the given id.
TESS_API
Network *GetLayer(const char *id) const;
// Returns the learning rate for a specific layer of the stack.
float LayerLearningRate(const char *id) {
@ -129,7 +127,6 @@ public:
}
// Returns a pointer to the learning rate for the given layer id.
TESS_API
float *LayerLearningRatePtr(const char *id);
// Writes to the given file. Returns false in case of error.

View File

@ -24,12 +24,6 @@
#include "unicharcompress.h"
#include <algorithm> // for std::reverse
#include <deque>
#include <map>
#include <set>
#include <tuple>
#include <unordered_set>
#include <vector>
namespace tesseract {

View File

@ -29,11 +29,8 @@
#include "ratngs.h"
#include "unicharcompress.h"
#include <deque>
#include <set>
#include <tuple>
#include <unordered_set>
#include <vector>
#include <unordered_set> // for std::unordered_set
#include <vector> // for std::vector
namespace tesseract {

View File

@ -525,7 +525,7 @@ static void HistogramWeight(TFloat weight, STATS *histogram) {
}
void WeightMatrix::Debug2D(const char *msg) {
STATS histogram(0, kHistogramBuckets);
STATS histogram(0, kHistogramBuckets - 1);
if (int_mode_) {
for (int i = 0; i < wi_.dim1(); ++i) {
for (int j = 0; j < wi_.dim2(); ++j) {

Some files were not shown because too many files have changed in this diff Show More