mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Merge branch 'main' into improve-build-for-android-integrate-with-cpu_features-library
This commit is contained in:
commit
a96b2abb1a
6
.github/workflows/autotools-macos.yml
vendored
6
.github/workflows/autotools-macos.yml
vendored
@ -5,6 +5,7 @@ on:
|
||||
#push:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
|
||||
brew:
|
||||
@ -42,7 +43,7 @@ jobs:
|
||||
|
||||
- name: Configure Tesseract
|
||||
run: |
|
||||
./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2' "PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/opt/libffi/lib/pkgconfig"
|
||||
./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
|
||||
|
||||
- name: Make and Install Tesseract
|
||||
run: |
|
||||
@ -130,6 +131,9 @@ jobs:
|
||||
- name: Install Macports
|
||||
run: |
|
||||
curl -LO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install
|
||||
# --remove-brew does not remove the Homebrew entries in bin,
|
||||
# so remove them now.
|
||||
rm -v $(brew --prefix)/bin/*
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
|
83
.github/workflows/autotools-openmp.yml
vendored
Normal file
83
.github/workflows/autotools-openmp.yml
vendored
Normal file
@ -0,0 +1,83 @@
|
||||
name: autotools-openmp
|
||||
# autotools on Ubuntu - run benchmark test. '--enable-openmp' no training tools
|
||||
on:
|
||||
#push:
|
||||
#schedule:
|
||||
# - cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
|
||||
linux:
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
- { name: 18.04-openmp, os: ubuntu-18.04 }
|
||||
- { name: 20.04-openmp, os: ubuntu-20.04 }
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Download fonts, tessdata and langdata required for tests
|
||||
run: |
|
||||
git clone https://github.com/egorpugin/tessdata tessdata_unittest
|
||||
cp tessdata_unittest/fonts/* test/testing/
|
||||
mv tessdata_unittest/* ../
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf libleptonica-dev -y
|
||||
sudo apt-get install libpango1.0-dev -y
|
||||
sudo apt-get install cabextract libarchive-dev -y
|
||||
sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
|
||||
|
||||
- name: Setup Tesseract
|
||||
run: |
|
||||
mkdir -p m4
|
||||
./autogen.sh
|
||||
|
||||
- name: Configure Tesseract
|
||||
run: |
|
||||
./configure '--disable-shared' '--enable-openmp' '--disable-doc' 'CXX=g++' 'CXXFLAGS=-g -O2'
|
||||
grep -i OpenMP config.log
|
||||
|
||||
- name: Make and Install Tesseract
|
||||
run: |
|
||||
make
|
||||
sudo make install
|
||||
|
||||
- name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file
|
||||
run: |
|
||||
wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg
|
||||
printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list
|
||||
|
||||
- name: Run Tesseract using image from issue 263 with tessdata_fast
|
||||
run: |
|
||||
lscpu
|
||||
free
|
||||
g++ --version
|
||||
tesseract -v
|
||||
time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1
|
||||
echo "tessdata_fast"
|
||||
|
||||
- name: Run Tesseract using image from issue 263 with tessdata_fast and OpenMP Thread Limit
|
||||
run: |
|
||||
for lmt in {1..3}; do
|
||||
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_fast"
|
||||
done
|
||||
|
||||
- name: Run Tesseract using image from issue 263 with tessdata_best and OpenMP Thread Limit
|
||||
run: |
|
||||
for lmt in {1..3}; do
|
||||
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_best"
|
||||
done
|
||||
|
||||
- name: Run Tesseract using image from issue 263 with tessdata and OpenMP Thread Limit
|
||||
run: |
|
||||
for lmt in {1..3}; do
|
||||
time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata"
|
||||
done
|
140
.github/workflows/cmake-win64.yml
vendored
140
.github/workflows/cmake-win64.yml
vendored
@ -5,93 +5,81 @@ on:
|
||||
#push:
|
||||
schedule:
|
||||
- cron: 0 23 * * *
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
ILOC: d:/a/local
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: ${{ matrix.config.name }}
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
- {
|
||||
name: "Windows Latest MSVC - cmake",
|
||||
os: windows-latest,
|
||||
cc: "cl",
|
||||
cxx: "cl",
|
||||
environment_script: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars64.bat"
|
||||
}
|
||||
|
||||
name: cmake-win64
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- uses: ilammy/setup-nasm@v1
|
||||
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Build and Install leptonica dependencies
|
||||
- name: Get the version
|
||||
id: get_version
|
||||
run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
|
||||
- name: Setup Instalation Location
|
||||
run: |
|
||||
mkdir ${{env.ILOC}}
|
||||
- name: Build and Install zlib-ng
|
||||
shell: cmd
|
||||
run: |
|
||||
mkdir d:/a/local
|
||||
set PKG_CONFIG_PATH=d:/a/local/lib/pkgconfig
|
||||
echo "PKG_CONFIG_PATH=d:/a/local/lib/pkgconfig" >> $GITHUB_ENV
|
||||
|
||||
- name: Build and Install zlib
|
||||
shell: cmd
|
||||
run: |
|
||||
curl -sSL -o zlib1211.zip https://zlib.net/zlib1211.zip
|
||||
unzip.exe zlib1211.zip
|
||||
cd zlib-1.2.11
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git
|
||||
cd zlib-ng
|
||||
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
|
||||
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
- name: Build and Install libpng
|
||||
shell: cmd
|
||||
run: |
|
||||
curl -sSL -o lpng1637.zip https://download.sourceforge.net/libpng/lpng1637.zip
|
||||
unzip.exe lpng1637.zip
|
||||
unzip.exe -qq lpng1637.zip
|
||||
cd lpng1637
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
cmake --build build --config Release --target install
|
||||
|
||||
- name: Build and Install webp
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/webmproject/libwebp.git && cd libwebp
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
- name: Build and Install giflib
|
||||
shell: cmd
|
||||
run: |
|
||||
curl -sSL -o giflib-master.zip https://codeload.github.com/xbmc/giflib/zip/master
|
||||
unzip giflib-master.zip
|
||||
unzip -qq giflib-master.zip
|
||||
cd giflib-master
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}}
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
- name: Build and Install libjpeg
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git
|
||||
cd libjpeg-turbo
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
- name: Build and Install openjpeg
|
||||
- name: Build and Install webp
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/uclouvain/openjpeg.git
|
||||
cd openjpeg
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
git clone --depth 1 https://github.com/webmproject/libwebp.git
|
||||
cd libwebp
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWEBP_BUILD_ANIM_UTILS=OFF -DWEBP_BUILD_CWEBP=OFF -DWEBP_BUILD_DWEBP=OFF -DWEBP_BUILD_GIF2WEBP=OFF -DWEBP_BUILD_IMG2WEBP=OFF -DWEBP_BUILD_VWEBP=OFF -DWEBP_BUILD_WEBPMUX=OFF -DWEBP_BUILD_WEBPINFO=OFF -DWEBP_BUILD_EXTRAS=OFF
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
- name: Build and Install jbigkit
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/zdenop/jbigkit
|
||||
cd jbigkit-2.1
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
git clone --depth 1 https://github.com/zdenop/jbigkit.git
|
||||
cd jbigkit
|
||||
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_PROGRAMS=OFF -DBUILD_TOOLS=OFF -DCMAKE_WARN_DEPRECATED=OFF
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
@ -99,17 +87,26 @@ jobs:
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/facebook/zstd.git
|
||||
cd zstd
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
cd zstd/build/cmake
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DZSTD_BUILD_PROGRAMS=OFF -DBUILD_TESTING=OFF
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
- name: Build and Install libtiff
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://gitlab.com/libtiff/libtiff
|
||||
git clone --depth 1 https://gitlab.com/libtiff/libtiff.git
|
||||
cd libtiff
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
- name: Build and Install openjpeg
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/uclouvain/openjpeg.git
|
||||
cd openjpeg
|
||||
cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_CODEC=OFF -DBUILD_TESTING=OFF -DBUILD_DOC=OFF -DCMAKE_WARN_DEPRECATED=OFF
|
||||
cmake --build build --config Release --target install
|
||||
cd ..
|
||||
|
||||
@ -117,26 +114,45 @@ jobs:
|
||||
shell: cmd
|
||||
run: |
|
||||
echo "Building leptonica..."
|
||||
git clone --depth 1 https://github.com/DanBloomberg/leptonica.git && cd leptonica
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DBUILD_PROG=OFF -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
git clone --depth 1 https://github.com/DanBloomberg/leptonica.git
|
||||
cd leptonica
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_PROG=OFF -DBUILD_SHARED_LIBS=ON
|
||||
cmake --build build --config Release --target install
|
||||
|
||||
- name: Build and Install libarchive
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/libarchive/libarchive.git
|
||||
cd libarchive
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DENABLE_TEST=OFF
|
||||
cmake --build build --config Release --target install
|
||||
|
||||
- name: Remove not needed tools Before building tesseract
|
||||
shell: cmd
|
||||
run: >
|
||||
rm -Rf ${{env.ILOC}}/bin/*.exe
|
||||
|
||||
- name: Build and Install tesseract
|
||||
shell: cmd
|
||||
run: |
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DBUILD_TRAINING_TOOLS=OFF -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DOPENMP_BUILD=OFF -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
|
||||
REM cmake -E env CXXFLAGS="/Qpar /fp:fast"
|
||||
cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=ON
|
||||
cmake --build build --config Release --target install
|
||||
|
||||
- name: Display Tesseract Version and Test Command Line Usage
|
||||
shell: cmd
|
||||
run: |
|
||||
git clone --depth 1 https://github.com/tesseract-ocr/tessconfigs
|
||||
mkdir d:/a/local/share
|
||||
move tessconfigs d:/a/local/share
|
||||
curl -L https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output d:/a/local/share/tessconfigs/eng.traineddata
|
||||
curl -L https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output d:/a/local/share/tessconfigs/osd.traineddata
|
||||
set TESSDATA_PREFIX=d:/a/local/share/tessconfigs
|
||||
set PATH=d:/a/local/bin;%PATH%
|
||||
curl -L https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output ${{env.ILOC}}/share/tessdata/eng.traineddata
|
||||
curl -L https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output ${{env.ILOC}}/share/tessdata/osd.traineddata
|
||||
set TESSDATA_PREFIX=${{env.ILOC}}/share/tessdata
|
||||
set PATH=${{env.ILOC}}/bin;%PATH%
|
||||
tesseract -v
|
||||
tesseract --list-langs
|
||||
tesseract test/testing/phototest.tif -
|
||||
|
||||
- name: Upload Build Results
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: tesseract-${{ steps.get_version.outputs.VERSION }}-VS2019_win64
|
||||
path: ${{env.ILOC}}
|
||||
retention-days: 5
|
||||
|
13
.github/workflows/cmake.yml
vendored
13
.github/workflows/cmake.yml
vendored
@ -50,23 +50,26 @@ jobs:
|
||||
steps:
|
||||
- name: Install compilers on Linux
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install ${{ matrix.config.cxx }} -y
|
||||
if: runner.os == 'Linux'
|
||||
|
||||
# sudo apt-get install libarchive-dev libcurl4-openssl-dev libcurl4 curl -y
|
||||
- name: Install dependencies on Linux
|
||||
run: |
|
||||
sudo apt-get install autoconf libleptonica-dev -y
|
||||
sudo apt-get install libarchive-dev libcurl4-openssl-dev -y
|
||||
sudo apt-get install libpango1.0-dev -y
|
||||
sudo apt-get install cabextract -y
|
||||
sudo apt-get install ninja-build -y
|
||||
cmake --version
|
||||
if: runner.os == 'Linux'
|
||||
|
||||
- name: Install dependencies on macOS
|
||||
run: |
|
||||
brew install autoconf automake
|
||||
brew install leptonica
|
||||
brew install cairo pango icu4c
|
||||
brew install libarchive
|
||||
brew install pango
|
||||
brew install cabextract
|
||||
brew install ninja
|
||||
ninja --version
|
||||
@ -147,15 +150,15 @@ jobs:
|
||||
run: |
|
||||
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
|
||||
cd test
|
||||
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++11
|
||||
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++11
|
||||
./basicapitest
|
||||
if: runner.os == 'Linux'
|
||||
|
||||
- name: Build and run basicapitest (macOS)
|
||||
run: |
|
||||
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
|
||||
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH"
|
||||
cd test
|
||||
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" `pkg-config --cflags --libs tesseract lept ` -lcurl -pthread -std=c++11
|
||||
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++11
|
||||
./basicapitest
|
||||
if: runner.os == 'macOS'
|
||||
|
||||
|
20
.github/workflows/sw.yml
vendored
20
.github/workflows/sw.yml
vendored
@ -13,7 +13,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [windows-latest, windows-2022, ubuntu-20.04, macOS-latest]
|
||||
os: [windows-2022, windows-2019, ubuntu-22.04, ubuntu-20.04, macOS-latest]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@ -22,50 +22,50 @@ jobs:
|
||||
- uses: egorpugin/sw-action@master
|
||||
|
||||
- name: build
|
||||
if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
|
||||
if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
|
||||
run: ./sw -static -shared -platform x86,x64 -config d,r build
|
||||
|
||||
- name: build
|
||||
if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
|
||||
if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
|
||||
run: ./sw -static -shared -config d,r build -Dwith-tests=1
|
||||
|
||||
- name: download test data
|
||||
run: git clone https://github.com/egorpugin/tessdata tessdata_unittest
|
||||
|
||||
- name: copy fonts
|
||||
if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
|
||||
if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
|
||||
run: cp tessdata_unittest/fonts/* test/testing/
|
||||
|
||||
- name: copy fonts
|
||||
if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
|
||||
if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
|
||||
run: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse
|
||||
shell: pwsh
|
||||
|
||||
- name: test
|
||||
if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
|
||||
if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
|
||||
run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
|
||||
continue-on-error: true
|
||||
|
||||
- name: test-nightly
|
||||
if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022' && github.event.schedule=='0 0 * * *'
|
||||
if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019' && github.event.schedule=='0 0 * * *'
|
||||
run: ./sw -static -shared -config "d,r" test -Dwith-tests=1
|
||||
continue-on-error: true
|
||||
|
||||
# windows tests hang here for some reason, investigate
|
||||
#- name: test
|
||||
#if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
|
||||
#if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
|
||||
#run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
|
||||
#continue-on-error: true
|
||||
|
||||
- name: Upload Unit Test Results
|
||||
if: always() && matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
|
||||
if: always() && matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: Test Results (${{ matrix.os }})
|
||||
path: .sw/test/results.xml
|
||||
|
||||
- name: Publish Test Report
|
||||
if: always() && matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
|
||||
if: always() && matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
|
||||
uses: mikepenz/action-junit-report@v1
|
||||
with:
|
||||
check_name: test (${{ matrix.os }})
|
||||
|
1
.github/workflows/unittest-disablelegacy.yml
vendored
1
.github/workflows/unittest-disablelegacy.yml
vendored
@ -24,6 +24,7 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
|
||||
sudo apt-get install cabextract -y
|
||||
#sudo apt-get install libc++-7-dev libc++abi-7-dev -y
|
||||
|
3
.github/workflows/unittest-macos.yml
vendored
3
.github/workflows/unittest-macos.yml
vendored
@ -36,8 +36,7 @@ jobs:
|
||||
run: |
|
||||
./configure '--disable-shared' '--with-pic' \
|
||||
'CXX=${{ matrix.config.cxx }}' \
|
||||
'CXXFLAGS=-g -O2 -fsanitize=address,undefined' \
|
||||
"PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/opt/libffi/lib/pkgconfig"
|
||||
'CXXFLAGS=-g -O2 -fsanitize=address,undefined'
|
||||
|
||||
- name: Make and Install Tesseract
|
||||
run: |
|
||||
|
1
.github/workflows/unittest.yml
vendored
1
.github/workflows/unittest.yml
vendored
@ -24,6 +24,7 @@ jobs:
|
||||
|
||||
- name: Install dependencies (Linux)
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
|
||||
sudo apt-get install cabextract -y
|
||||
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -63,6 +63,7 @@ config_auto.h
|
||||
# ignore compilation files
|
||||
build/*
|
||||
/bin
|
||||
/cmake-*
|
||||
.deps
|
||||
.dirstamp
|
||||
/.libs
|
||||
|
40
.travis.yml
40
.travis.yml
@ -1,40 +0,0 @@
|
||||
# Travis CI configuration for Tesseract
|
||||
|
||||
sudo: false
|
||||
notifications:
|
||||
email: false
|
||||
language: cpp
|
||||
|
||||
os: linux
|
||||
dist: focal
|
||||
arch:
|
||||
- amd64
|
||||
- arm64
|
||||
- ppc64le
|
||||
- s390x
|
||||
compiler:
|
||||
- gcc
|
||||
- clang
|
||||
env:
|
||||
cache:
|
||||
directories:
|
||||
|
||||
before_install:
|
||||
- sudo apt-get install libleptonica-dev libpango1.0-dev libtiff5-dev -y
|
||||
|
||||
install:
|
||||
|
||||
script:
|
||||
- mkdir build
|
||||
- cd build
|
||||
- cmake .. -DSW_BUILD=OFF
|
||||
- make
|
||||
- sudo make install
|
||||
|
||||
#after_script: # let those commands trigger build errors
|
||||
- tesseract -v
|
||||
- text2image -v
|
||||
- lstmtraining -v
|
||||
- ls /home/travis/build/tesseract-ocr/tesseract/test/testing/*.tif
|
||||
- wget https://github.com/egorpugin/tessdata/raw/master/tessdata/eng.traineddata
|
||||
- tesseract /home/travis/build/tesseract-ocr/tesseract/test/testing/phototest.tif - -l eng --tessdata-dir ./
|
697
CMakeLists.txt
697
CMakeLists.txt
File diff suppressed because it is too large
Load Diff
19
ChangeLog
19
ChangeLog
@ -1,3 +1,22 @@
|
||||
2022-03-01 - V5.1.0
|
||||
* Handle image and line regions in output formats ALTO, hOCR and text.
|
||||
* New parameter curl_timeout for curl_easy_setop.
|
||||
* Build fixes and improvements.
|
||||
* Catch nullptr in PageIterator::Orientation to improve robustness.
|
||||
* Remove unused code.
|
||||
|
||||
2022-01-06 - V5.0.1
|
||||
* Add SPDX-License-Identifier to public include files.
|
||||
* Support redirections when running OCR on a URL.
|
||||
* Lots of fixes and improvements for cmake builds.
|
||||
Distributions should use the autoconf build.
|
||||
* Fix broken msys2 build with gcc 11.
|
||||
* Fix parameter certainty_scale (was duplicated).
|
||||
* Fix some compiler warnings and clean code.
|
||||
* Correctly detect amd64 and i386 on FreeBSD.
|
||||
* Add libarchive and libcurl in continuous integration actions.
|
||||
* Update submodule googletest to release v1.11.0.
|
||||
|
||||
2021-11-22 - V5.0.0
|
||||
* Faster training and recognition by default (float instead of
|
||||
double calculations)
|
||||
|
11
Makefile.am
11
Makefile.am
@ -107,7 +107,7 @@ libtesseract_la_LDFLAGS += $(libarchive_LIBS)
|
||||
libtesseract_la_LDFLAGS += $(libcurl_LIBS)
|
||||
libtesseract_la_LDFLAGS += $(TENSORFLOW_LIBS)
|
||||
if T_WIN
|
||||
libtesseract_la_LDFLAGS += -no-undefined -Wl,--as-needed -lws2_32
|
||||
libtesseract_la_LDFLAGS += -no-undefined -lws2_32
|
||||
else
|
||||
libtesseract_la_LDFLAGS += $(NOUNDEFINED)
|
||||
endif
|
||||
@ -160,6 +160,14 @@ libtesseract_la_LIBADD += libtesseract_avx2.la
|
||||
noinst_LTLIBRARIES += libtesseract_avx2.la
|
||||
endif
|
||||
|
||||
if HAVE_AVX512F
|
||||
libtesseract_avx512_la_CXXFLAGS = -mavx512f
|
||||
libtesseract_avx512_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_avx512_la_SOURCES = src/arch/dotproductavx512.cpp
|
||||
libtesseract_la_LIBADD += libtesseract_avx512.la
|
||||
noinst_LTLIBRARIES += libtesseract_avx512.la
|
||||
endif
|
||||
|
||||
if HAVE_FMA
|
||||
libtesseract_fma_la_CXXFLAGS = -mfma
|
||||
libtesseract_fma_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
@ -379,7 +387,6 @@ libtesseract_ccutil_la_SOURCES += src/ccutil/clst.cpp
|
||||
libtesseract_ccutil_la_SOURCES += src/ccutil/elst2.cpp
|
||||
libtesseract_ccutil_la_SOURCES += src/ccutil/elst.cpp
|
||||
libtesseract_ccutil_la_SOURCES += src/ccutil/errcode.cpp
|
||||
libtesseract_ccutil_la_SOURCES += src/ccutil/mainblk.cpp
|
||||
libtesseract_ccutil_la_SOURCES += src/ccutil/serialis.cpp
|
||||
libtesseract_ccutil_la_SOURCES += src/ccutil/scanutils.cpp
|
||||
libtesseract_ccutil_la_SOURCES += src/ccutil/tessdatamanager.cpp
|
||||
|
15
README.md
15
README.md
@ -1,8 +1,7 @@
|
||||
# Tesseract OCR
|
||||
|
||||
[![Build Status](https://travis-ci.org/tesseract-ocr/tesseract.svg?branch=master)](https://travis-ci.org/tesseract-ocr/tesseract)
|
||||
[![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
|
||||
![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)<br>
|
||||
[![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)<br>
|
||||
[![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr)
|
||||
[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/cpp/g/tesseract-ocr/tesseract.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tesseract-ocr/tesseract/context:cpp)
|
||||
[![Total Alerts](https://img.shields.io/lgtm/alerts/g/tesseract-ocr/tesseract.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tesseract-ocr/tesseract/alerts)
|
||||
@ -34,7 +33,7 @@ on line recognition, but also still supports the legacy Tesseract OCR engine of
|
||||
Tesseract 3 which works by recognizing character patterns. Compatibility with
|
||||
Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0).
|
||||
It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files which support the legacy engine, for example
|
||||
those from the tessdata repository.
|
||||
those from the [tessdata](https://github.com/tesseract-ocr/tessdata) repository.
|
||||
|
||||
The lead developer is Ray Smith. The maintainer is Zdenko Podobny.
|
||||
For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS)
|
||||
@ -42,7 +41,9 @@ and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/gr
|
||||
|
||||
Tesseract has **unicode (UTF-8) support**, and can **recognize more than 100 languages** "out of the box".
|
||||
|
||||
Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV. The main branch also has experimental support for ALTO (XML) output.
|
||||
Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF.
|
||||
|
||||
Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO (the last one - since version 4.1.0).
|
||||
|
||||
You should note that in many cases, in order to get better OCR results,
|
||||
you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract.
|
||||
@ -60,7 +61,11 @@ at Hewlett-Packard Co, Greeley Colorado between 1985 and 1994, with some
|
||||
more changes made in 1996 to port to Windows, and some C++izing in 1998.
|
||||
In 2005 Tesseract was open sourced by HP. From 2006 until November 2018 it was developed by Google.
|
||||
|
||||
The latest stable version is **[5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0)**, released on November 30, 2021.
|
||||
Major version 5 is the current stable version and started with release
|
||||
[5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0) on November 30, 2021.
|
||||
Newer minor versions and bugfix versions are available from
|
||||
[GitHub](https://github.com/tesseract-ocr/tesseract/releases/).
|
||||
|
||||
Latest source code is available from [main branch on GitHub](https://github.com/tesseract-ocr/tesseract/tree/main).
|
||||
Open issues can be found in [issue tracker](https://github.com/tesseract-ocr/tesseract/issues),
|
||||
and [planning documentation](https://tesseract-ocr.github.io/tessdoc/Planning.html).
|
||||
|
@ -96,13 +96,20 @@ set(include_files_list
|
||||
pango-1.0/pango/pango-features.h
|
||||
unicode/uchar.h
|
||||
)
|
||||
check_includes(include_files_list)
|
||||
# check_includes(include_files_list)
|
||||
|
||||
set(types_list
|
||||
"long long int"
|
||||
wchar_t
|
||||
)
|
||||
check_types(types_list)
|
||||
# check_types(types_list)
|
||||
|
||||
list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES -lm)
|
||||
set(functions_list
|
||||
feenableexcept
|
||||
)
|
||||
check_functions(functions_list)
|
||||
|
||||
file(APPEND ${AUTOCONFIG_SRC} "
|
||||
/* Version number */
|
||||
@ -113,6 +120,7 @@ file(APPEND ${AUTOCONFIG_SRC} "
|
||||
#cmakedefine HAVE_TIFFIO_H ${HAVE_TIFFIO_H}
|
||||
#cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE}
|
||||
#cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL}
|
||||
#cmakedefine USE_OPENCL ${USE_OPENCL}
|
||||
")
|
||||
|
||||
if(TESSDATA_PREFIX)
|
||||
|
42
configure.ac
42
configure.ac
@ -7,7 +7,7 @@
|
||||
# ----------------------------------------
|
||||
AC_PREREQ([2.69])
|
||||
AC_INIT([tesseract],
|
||||
[m4_esyscmd_s([test -d .git && git describe --abbrev=4 || cat VERSION])],
|
||||
[m4_esyscmd_s([git describe --abbrev=4 2>/dev/null || cat VERSION])],
|
||||
[https://github.com/tesseract-ocr/tesseract/issues],,
|
||||
[https://github.com/tesseract-ocr/tesseract/])
|
||||
|
||||
@ -28,8 +28,8 @@ AM_INIT_AUTOMAKE([foreign subdir-objects nostdinc])
|
||||
|
||||
# Define date of package, etc. Could be useful in auto-generated
|
||||
# documentation.
|
||||
PACKAGE_YEAR=2021
|
||||
PACKAGE_DATE="11/30"
|
||||
PACKAGE_YEAR=2022
|
||||
PACKAGE_DATE="03/01"
|
||||
|
||||
abs_top_srcdir=`AS_DIRNAME([$0])`
|
||||
|
||||
@ -91,7 +91,7 @@ case "${host_os}" in
|
||||
mingw*)
|
||||
AM_CONDITIONAL([T_WIN], true)
|
||||
AM_CONDITIONAL([ADD_RT], false)
|
||||
AC_SUBST([AM_LDFLAGS], ['-Wl,-no-undefined -Wl,--as-needed'])
|
||||
AC_SUBST([AM_LDFLAGS], ['-no-undefined'])
|
||||
;;
|
||||
cygwin*)
|
||||
AM_CONDITIONAL([ADD_RT], false)
|
||||
@ -129,6 +129,7 @@ AX_CHECK_COMPILE_FLAG([-Werror=unused-command-line-argument], [WERROR=-Werror=un
|
||||
|
||||
AM_CONDITIONAL([HAVE_AVX], false)
|
||||
AM_CONDITIONAL([HAVE_AVX2], false)
|
||||
AM_CONDITIONAL([HAVE_AVX512F], false)
|
||||
AM_CONDITIONAL([HAVE_FMA], false)
|
||||
AM_CONDITIONAL([HAVE_SSE4_1], false)
|
||||
AM_CONDITIONAL([HAVE_NEON], false)
|
||||
@ -149,6 +150,12 @@ case "${host_cpu}" in
|
||||
AC_DEFINE([HAVE_AVX2], [1], [Enable AVX2 instructions])
|
||||
fi
|
||||
|
||||
AX_CHECK_COMPILE_FLAG([-mavx512f], [avx512f=true], [avx512f=false], [$WERROR])
|
||||
AM_CONDITIONAL([HAVE_AVX512F], $avx512f)
|
||||
if $avx512f; then
|
||||
AC_DEFINE([HAVE_AVX512F], [1], [Enable AVX512F instructions])
|
||||
fi
|
||||
|
||||
AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR])
|
||||
AM_CONDITIONAL([HAVE_FMA], $fma)
|
||||
if $fma; then
|
||||
@ -163,7 +170,7 @@ case "${host_cpu}" in
|
||||
|
||||
;;
|
||||
|
||||
aarch64)
|
||||
aarch64|arm64)
|
||||
|
||||
# ARMv8 always has NEON and does not need special compiler flags.
|
||||
AM_CONDITIONAL([HAVE_NEON], true)
|
||||
@ -178,6 +185,7 @@ case "${host_cpu}" in
|
||||
AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions])
|
||||
NEON_CXXFLAGS="-mfpu=neon"
|
||||
AC_SUBST([NEON_CXXFLAGS])
|
||||
check_for_neon=1
|
||||
fi
|
||||
|
||||
;;
|
||||
@ -188,6 +196,19 @@ case "${host_cpu}" in
|
||||
|
||||
esac
|
||||
|
||||
# check whether feenableexcept is supported. some C libraries (e.g. uclibc) don't.
|
||||
AC_CHECK_FUNCS([feenableexcept])
|
||||
|
||||
# additional checks for NEON targets
|
||||
if test x$check_for_neon = x1; then
|
||||
AC_MSG_NOTICE([checking how to detect NEON availability])
|
||||
AC_CHECK_FUNCS([getauxval elf_aux_info android_getCpuFamily])
|
||||
|
||||
if test $ac_cv_func_getauxval = no && test $ac_cv_func_elf_aux_info = no && test $ac_cv_func_android_getCpuFamily = no; then
|
||||
AC_MSG_WARN([NEON is available, but we don't know how to check for it. Will not be able to use NEON.])
|
||||
fi
|
||||
fi
|
||||
|
||||
AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR])
|
||||
AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd)
|
||||
|
||||
@ -443,6 +464,15 @@ esac
|
||||
|
||||
AC_SEARCH_LIBS([pthread_create], [pthread])
|
||||
|
||||
# Set PKG_CONFIG_PATH for MacOS with Homebrew unless it is already set.
|
||||
AC_CHECK_PROG([have_brew], brew, true, false)
|
||||
if $have_brew; then
|
||||
brew_prefix=$(brew --prefix)
|
||||
if test -z "$PKG_CONFIG_PATH"; then
|
||||
PKG_CONFIG_PATH=$brew_prefix/opt/icu4c/lib/pkgconfig:$brew_prefix/opt/libarchive/lib/pkgconfig
|
||||
export PKG_CONFIG_PATH
|
||||
fi
|
||||
fi
|
||||
|
||||
# ----------------------------------------
|
||||
# Check for programs needed to build documentation.
|
||||
@ -462,9 +492,7 @@ AS_IF([test "$enable_doc" != "no"], [
|
||||
if $have_asciidoc && $have_xsltproc; then
|
||||
AM_CONDITIONAL([ASCIIDOC], true)
|
||||
XML_CATALOG_FILES=
|
||||
AC_CHECK_PROG([have_brew], brew, true, false)
|
||||
if $have_brew; then
|
||||
brew_prefix=$(brew --prefix)
|
||||
catalog_file=$brew_prefix/etc/xml/catalog
|
||||
if test -f $catalog_file; then
|
||||
AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], true)
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
@ -13,8 +13,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
@ -12,8 +12,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
@ -233,6 +231,12 @@ TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: export.h
|
||||
// Description: Place holder
|
||||
//
|
||||
@ -12,8 +12,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_PLATFORM_H_
|
||||
#define TESSERACT_PLATFORM_H_
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
@ -14,8 +14,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
@ -183,7 +181,7 @@ class TESS_API ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that is is useless.
|
||||
// choices for this symbol and after that it is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator &result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
/**********************************************************************
|
||||
* File: ocrclass.h
|
||||
* Description: Class definitions and constants for the OCR API.
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
@ -14,8 +14,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
@ -14,8 +14,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: publictypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
@ -13,8 +13,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
@ -12,8 +12,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
@ -23,6 +21,7 @@
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include <cstdint>
|
||||
#include <string> // for std::string
|
||||
#include <vector> // for std::vector
|
||||
|
||||
@ -232,7 +231,7 @@ private:
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
std::vector<long int> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
@ -15,8 +15,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: unichar.h
|
||||
// Description: Unicode character/ligature class.
|
||||
// Author: Ray Smith
|
||||
@ -13,8 +13,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
|
||||
#define TESSERACT_CCUTIL_UNICHAR_H_
|
||||
@ -99,10 +97,10 @@ public:
|
||||
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
|
||||
// it != UNICHAR::end(str, len);
|
||||
// ++it) {
|
||||
// tprintf("UCS-4 symbol code = %d\n", *it);
|
||||
// printf("UCS-4 symbol code = %d\n", *it);
|
||||
// char buf[5];
|
||||
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
|
||||
// tprintf("Char = %s\n", buf);
|
||||
// printf("Char = %s\n", buf);
|
||||
// }
|
||||
class TESS_API const_iterator {
|
||||
using CI = const_iterator;
|
||||
|
@ -1,4 +1,4 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
@ -12,8 +12,6 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
@ -13,9 +13,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "errcode.h" // for ASSERT_HOST
|
||||
#ifdef _WIN32
|
||||
# include "host.h" // windows.h for MultiByteToWideChar, ...
|
||||
#endif
|
||||
#include "tprintf.h" // for tprintf
|
||||
|
||||
#include <tesseract/baseapi.h>
|
||||
#include <tesseract/renderer.h>
|
||||
@ -174,6 +176,36 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int left, top, right, bottom;
|
||||
auto block_type = res_it->BlockType();
|
||||
|
||||
switch (block_type) {
|
||||
case PT_FLOWING_IMAGE:
|
||||
case PT_HEADING_IMAGE:
|
||||
case PT_PULLOUT_IMAGE: {
|
||||
// Handle all kinds of images.
|
||||
// TODO: optionally add TYPE, for example TYPE="photo".
|
||||
alto_str << "\t\t\t\t<Illustration ID=\"cblock_" << bcnt++ << "\"";
|
||||
AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
|
||||
alto_str << "</Illustration>\n";
|
||||
res_it->Next(RIL_BLOCK);
|
||||
continue;
|
||||
}
|
||||
case PT_HORZ_LINE:
|
||||
case PT_VERT_LINE:
|
||||
// Handle horizontal and vertical lines.
|
||||
alto_str << "\t\t\t\t<GraphicalElement ID=\"cblock_" << bcnt++ << "\"";
|
||||
AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
|
||||
alto_str << "</GraphicalElement >\n";
|
||||
res_it->Next(RIL_BLOCK);
|
||||
continue;
|
||||
case PT_NOISE:
|
||||
tprintf("TODO: Please report image which triggers the noise case.\n");
|
||||
ASSERT_HOST(false);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
|
||||
alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << bcnt << "\"";
|
||||
AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
|
||||
@ -200,7 +232,6 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
|
||||
bool last_word_in_tblock = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
|
||||
bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
|
||||
|
||||
int left, top, right, bottom;
|
||||
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
|
||||
|
||||
do {
|
||||
|
@ -99,6 +99,9 @@ namespace tesseract {
|
||||
|
||||
static BOOL_VAR(stream_filelist, false, "Stream a filelist from stdin");
|
||||
static STRING_VAR(document_title, "", "Title of output document (used for hOCR and PDF output)");
|
||||
#ifdef HAVE_LIBCURL
|
||||
static INT_VAR(curl_timeout, 0, "Timeout for curl in seconds");
|
||||
#endif
|
||||
|
||||
/** Minimum sensible image size to be worth running tesseract. */
|
||||
const int kMinRectSize = 10;
|
||||
@ -1150,6 +1153,17 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
|
||||
if (curlcode != CURLE_OK) {
|
||||
return error("curl_easy_setopt");
|
||||
}
|
||||
int timeout = curl_timeout;
|
||||
if (timeout > 0) {
|
||||
curlcode = curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
|
||||
if (curlcode != CURLE_OK) {
|
||||
return error("curl_easy_setopt");
|
||||
}
|
||||
curlcode = curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
||||
if (curlcode != CURLE_OK) {
|
||||
return error("curl_easy_setopt");
|
||||
}
|
||||
}
|
||||
curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
|
||||
if (curlcode != CURLE_OK) {
|
||||
return error("curl_easy_setopt");
|
||||
@ -1357,6 +1371,22 @@ char *TessBaseAPI::GetUTF8Text() {
|
||||
if (it->Empty(RIL_PARA)) {
|
||||
continue;
|
||||
}
|
||||
auto block_type = it->BlockType();
|
||||
switch (block_type) {
|
||||
case PT_FLOWING_IMAGE:
|
||||
case PT_HEADING_IMAGE:
|
||||
case PT_PULLOUT_IMAGE:
|
||||
case PT_HORZ_LINE:
|
||||
case PT_VERT_LINE:
|
||||
// Ignore images and lines for text output.
|
||||
continue;
|
||||
case PT_NOISE:
|
||||
tprintf("TODO: Please report image which triggers the noise case.\n");
|
||||
ASSERT_HOST(false);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
|
||||
text += para_text.get();
|
||||
} while (it->Next(RIL_PARA));
|
||||
|
@ -228,6 +228,22 @@ int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath, const char *lang
|
||||
return handle->Init(datapath, language);
|
||||
}
|
||||
|
||||
int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size, const char *language,
|
||||
TessOcrEngineMode mode, char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params) {
|
||||
std::vector<std::string> varNames;
|
||||
std::vector<std::string> varValues;
|
||||
if (vars_vec != nullptr && vars_values != nullptr) {
|
||||
for (size_t i = 0; i < vars_vec_size; i++) {
|
||||
varNames.emplace_back(vars_vec[i]);
|
||||
varValues.emplace_back(vars_values[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return handle->Init(data, data_size, language, mode, configs, configs_size, &varNames, &varValues,
|
||||
set_only_non_debug_params != 0, nullptr);
|
||||
}
|
||||
|
||||
const char *TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI *handle) {
|
||||
return handle->GetInitLanguagesAsString();
|
||||
}
|
||||
|
@ -189,6 +189,36 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
|
||||
std::unique_ptr<ResultIterator> res_it(GetIterator());
|
||||
while (!res_it->Empty(RIL_BLOCK)) {
|
||||
int left, top, right, bottom;
|
||||
auto block_type = res_it->BlockType();
|
||||
switch (block_type) {
|
||||
case PT_FLOWING_IMAGE:
|
||||
case PT_HEADING_IMAGE:
|
||||
case PT_PULLOUT_IMAGE: {
|
||||
// Handle all kinds of images.
|
||||
res_it.get()->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
|
||||
hocr_str << " <div class='ocr_photo' id='block_" << page_id << '_'
|
||||
<< bcnt++ << "' title=\"bbox " << left << " " << top << " "
|
||||
<< right << " " << bottom << "\"></div>\n";
|
||||
res_it->Next(RIL_BLOCK);
|
||||
continue;
|
||||
}
|
||||
case PT_HORZ_LINE:
|
||||
case PT_VERT_LINE:
|
||||
// Handle horizontal and vertical lines.
|
||||
res_it.get()->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
|
||||
hocr_str << " <div class='ocr_separator' id='block_" << page_id << '_'
|
||||
<< bcnt++ << "' title=\"bbox " << left << " " << top << " "
|
||||
<< right << " " << bottom << "\"></div>\n";
|
||||
res_it->Next(RIL_BLOCK);
|
||||
continue;
|
||||
case PT_NOISE:
|
||||
tprintf("TODO: Please report image which triggers the noise case.\n");
|
||||
ASSERT_HOST(false);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (res_it->Empty(RIL_WORD)) {
|
||||
res_it->Next(RIL_WORD);
|
||||
continue;
|
||||
@ -218,7 +248,7 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
}
|
||||
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
|
||||
hocr_str << "\n <span class='";
|
||||
switch (res_it->BlockType()) {
|
||||
switch (block_type) {
|
||||
case PT_HEADING_TEXT:
|
||||
hocr_str << "ocr_header";
|
||||
break;
|
||||
@ -228,6 +258,11 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
case PT_CAPTION_TEXT:
|
||||
hocr_str << "ocr_caption";
|
||||
break;
|
||||
case PT_FLOWING_IMAGE:
|
||||
case PT_HEADING_IMAGE:
|
||||
case PT_PULLOUT_IMAGE:
|
||||
ASSERT_HOST(false);
|
||||
break;
|
||||
default:
|
||||
hocr_str << "ocr_line";
|
||||
}
|
||||
@ -248,12 +283,10 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
hocr_str << "\n <span class='ocrx_word'"
|
||||
<< " id='"
|
||||
<< "word_" << page_id << "_" << wcnt << "'";
|
||||
int left, top, right, bottom;
|
||||
bool bold, italic, underlined, monospace, serif, smallcaps;
|
||||
int pointsize, font_id;
|
||||
const char *font_name;
|
||||
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
|
||||
font_name =
|
||||
const char *font_name =
|
||||
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
|
||||
&serif, &smallcaps, &pointsize, &font_id);
|
||||
hocr_str << " title='bbox " << left << " " << top << " " << right << " "
|
||||
|
@ -109,6 +109,9 @@ bool TessResultRenderer::EndDocument() {
|
||||
}
|
||||
|
||||
void TessResultRenderer::AppendString(const char *s) {
|
||||
if (s == nullptr) {
|
||||
return;
|
||||
}
|
||||
AppendData(s, strlen(s));
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,9 @@ TFloat DotProductNative(const TFloat *u, const TFloat *v, int n);
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
TFloat DotProductAVX(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
// Uses Intel AVX512F intrinsics to access the SIMD instruction set.
|
||||
TFloat DotProductAVX512F(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
// Use Intel FMA.
|
||||
TFloat DotProductFMA(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
|
70
src/arch/dotproductavx512.cpp
Normal file
70
src/arch/dotproductavx512.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductavx512.cpp
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Stefan Weil
|
||||
//
|
||||
// (C) Copyright 2022
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if !defined(__AVX__)
|
||||
# if defined(__i686__) || defined(__x86_64__)
|
||||
# error Implementation only for AVX capable architectures
|
||||
# endif
|
||||
#else
|
||||
|
||||
# include <immintrin.h>
|
||||
# include <cstdint>
|
||||
# include "dotproduct.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
# if defined(FAST_FLOAT)
|
||||
float DotProductAVX512F(const float *u, const float *v, int n) {
|
||||
const unsigned quot = n / 16;
|
||||
const unsigned rem = n % 16;
|
||||
__m512 t0 = _mm512_setzero_ps();
|
||||
for (unsigned k = 0; k < quot; k++) {
|
||||
__m512 f0 = _mm512_loadu_ps(u);
|
||||
__m512 f1 = _mm512_loadu_ps(v);
|
||||
t0 = _mm512_fmadd_ps(f0, f1, t0);
|
||||
u += 16;
|
||||
v += 16;
|
||||
}
|
||||
float result = _mm512_reduce_add_ps(t0);
|
||||
for (unsigned k = 0; k < rem; k++) {
|
||||
result += *u++ * *v++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
# else
|
||||
double DotProductAVX512F(const double *u, const double *v, int n) {
|
||||
const unsigned quot = n / 8;
|
||||
const unsigned rem = n % 8;
|
||||
__m512d t0 = _mm512_setzero_pd();
|
||||
for (unsigned k = 0; k < quot; k++) {
|
||||
t0 = _mm512_fmadd_pd(_mm512_loadu_pd(u), _mm512_loadu_pd(v), t0);
|
||||
u += 8;
|
||||
v += 8;
|
||||
}
|
||||
double result = _mm512_reduce_add_pd(t0);
|
||||
for (unsigned k = 0; k < rem; k++) {
|
||||
result += *u++ * *v++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
# endif
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif
|
@ -27,6 +27,14 @@
|
||||
# include <cstdint>
|
||||
# include <vector>
|
||||
|
||||
# if defined(_MSC_VER) && _MSC_VER >= 1925 && _MSC_VER <= 1929 && \
|
||||
defined(_WIN32) && !defined(_WIN64)
|
||||
// Optimize for size (/Os) instead of using the default optimization for some
|
||||
// versions of the 32 bit Visual Studio compiler which generate buggy code.
|
||||
# pragma optimize("", off)
|
||||
# pragma optimize("s", on)
|
||||
# endif
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Number of outputs held in each register. 8 x 32 bit ints.
|
||||
|
@ -53,12 +53,14 @@
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON) && !defined(__aarch64__)
|
||||
# ifdef ANDROID
|
||||
# if defined(HAVE_ANDROID_GETCPUFAMILY)
|
||||
# include <cpu-features.h>
|
||||
# else
|
||||
/* Assume linux */
|
||||
# elif defined(HAVE_GETAUXVAL)
|
||||
# include <asm/hwcap.h>
|
||||
# include <sys/auxv.h>
|
||||
# elif defined(HAVE_ELF_AUX_INFO)
|
||||
# include <sys/auxv.h>
|
||||
# include <sys/elf.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -210,21 +212,29 @@ SIMDDetect::SIMDDetect() {
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON) && !defined(__aarch64__)
|
||||
# ifdef ANDROID
|
||||
# if defined(HAVE_ANDROID_GETCPUFAMILY)
|
||||
{
|
||||
AndroidCpuFamily family = android_getCpuFamily();
|
||||
if (family == ANDROID_CPU_FAMILY_ARM)
|
||||
neon_available_ = (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON);
|
||||
}
|
||||
# else
|
||||
/* Assume linux */
|
||||
# elif defined(HAVE_GETAUXVAL)
|
||||
neon_available_ = getauxval(AT_HWCAP) & HWCAP_NEON;
|
||||
# elif defined(HAVE_ELF_AUX_INFO)
|
||||
unsigned long hwcap = 0;
|
||||
elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
|
||||
neon_available_ = hwcap & HWCAP_NEON;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Select code for calculation of dot product based on autodetection.
|
||||
if (false) {
|
||||
// This is a dummy to support conditional compilation.
|
||||
#if defined(HAVE_AVX512F)
|
||||
} else if (avx512F_available_) {
|
||||
// AVX512F detected.
|
||||
SetDotProduct(DotProductAVX512F, &IntSimdMatrix::intSimdMatrixAVX2);
|
||||
#endif
|
||||
#if defined(HAVE_AVX2)
|
||||
} else if (avx2_available_) {
|
||||
// AVX2 detected.
|
||||
|
@ -159,7 +159,7 @@ PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation,
|
||||
// Helper computes median xheight in the image.
|
||||
static double MedianXHeight(BLOCK_LIST *block_list) {
|
||||
BLOCK_IT block_it(block_list);
|
||||
STATS xheights(0, block_it.data()->pdblk.bounding_box().height());
|
||||
STATS xheights(0, block_it.data()->pdblk.bounding_box().height() - 1);
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
||||
ROW_IT row_it(block_it.data()->row_list());
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
|
@ -2015,7 +2015,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
WERD_RES *word; // current word
|
||||
STATS doc_fonts(0, font_table_size_); // font counters
|
||||
STATS doc_fonts(0, font_table_size_ - 1); // font counters
|
||||
|
||||
// Gather font id statistics.
|
||||
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
|
||||
|
@ -103,8 +103,8 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
|
||||
// Returns a new x-height maximally compatible with the result in word_res.
|
||||
// See comment above for overall algorithm.
|
||||
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift) {
|
||||
STATS top_stats(0, UINT8_MAX);
|
||||
STATS shift_stats(-UINT8_MAX, UINT8_MAX);
|
||||
STATS top_stats(0, UINT8_MAX - 1);
|
||||
STATS shift_stats(-UINT8_MAX, UINT8_MAX - 1);
|
||||
int bottom_shift = 0;
|
||||
int num_blobs = word_res->rebuild_word->NumBlobs();
|
||||
do {
|
||||
|
@ -225,7 +225,7 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
|
||||
return true; // Already at the end!
|
||||
}
|
||||
// The result is true if we step forward by element and find we are
|
||||
// at the the end of the page or at beginning of *all* levels in:
|
||||
// at the end of the page or at beginning of *all* levels in:
|
||||
// [level, element).
|
||||
// When there is more than one level difference between element and level,
|
||||
// we could for instance move forward one symbol and still be at the first
|
||||
@ -566,7 +566,15 @@ void PageIterator::Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const {
|
||||
BLOCK *block = it_->block()->block;
|
||||
auto *block_res = it_->block();
|
||||
if (block_res == nullptr) {
|
||||
// Nothing can be done, so return default values.
|
||||
*orientation = ORIENTATION_PAGE_UP;
|
||||
*writing_direction = WRITING_DIRECTION_LEFT_TO_RIGHT;
|
||||
*textline_order = TEXTLINE_ORDER_TOP_TO_BOTTOM;
|
||||
return;
|
||||
}
|
||||
auto *block = block_res->block;
|
||||
|
||||
// Orientation
|
||||
FCOORD up_in_image(0.0, 1.0);
|
||||
|
@ -108,10 +108,8 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
|
||||
// If a UNLV zone file can be found, use that instead of segmentation.
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file[0] != '\0') {
|
||||
std::string name = input_file;
|
||||
const char *lastdot = strrchr(name.c_str(), '.');
|
||||
if (lastdot != nullptr) {
|
||||
name[lastdot - name.c_str()] = '\0';
|
||||
}
|
||||
std::size_t lastdot = name.find_last_of(".");
|
||||
name = name.substr(0, lastdot);
|
||||
read_unlv_file(name, width, height, blocks);
|
||||
}
|
||||
if (blocks->empty()) {
|
||||
|
@ -1623,8 +1623,8 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
|
||||
UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
|
||||
UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
|
||||
}
|
||||
STATS lefts(lmin, lmax + 1);
|
||||
STATS rights(rmin, rmax + 1);
|
||||
STATS lefts(lmin, lmax);
|
||||
STATS rights(rmin, rmax);
|
||||
for (int i = start; i < end; i++) {
|
||||
RowScratchRegisters &sr = (*rows)[i];
|
||||
if (sr.ri_->num_words == 0) {
|
||||
@ -1655,7 +1655,7 @@ int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start,
|
||||
(rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
|
||||
int word_width =
|
||||
(rows[row_start].ri_->lword_box.width() + rows[row_end - 1].ri_->lword_box.width()) / 2;
|
||||
STATS spacing_widths(0, 5 + word_width);
|
||||
STATS spacing_widths(0, 4 + word_width);
|
||||
for (int i = row_start; i < row_end; i++) {
|
||||
if (rows[i].ri_->num_words > 1) {
|
||||
spacing_widths.add(rows[i].ri_->average_interword_space, 1);
|
||||
|
@ -616,7 +616,7 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel
|
||||
return true; // Already at the end!
|
||||
}
|
||||
// The result is true if we step forward by element and find we are
|
||||
// at the the end of the page or at beginning of *all* levels in:
|
||||
// at the end of the page or at beginning of *all* levels in:
|
||||
// [level, element).
|
||||
// When there is more than one level difference between element and level,
|
||||
// we could for instance move forward one symbol and still be at the first
|
||||
@ -731,10 +731,12 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) {
|
||||
std::vector<int> textline_order;
|
||||
std::vector<StrongScriptDirection> dirs;
|
||||
CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &dirs, &textline_order);
|
||||
tprintf("Strong Script dirs [%p/P=%s]: ", it_->row(),
|
||||
tprintf("Strong Script dirs [%p/P=%s]: ",
|
||||
static_cast<void *>(it_->row()),
|
||||
current_paragraph_is_ltr_ ? "ltr" : "rtl");
|
||||
PrintScriptDirs(dirs);
|
||||
tprintf("Logical textline order [%p/P=%s]: ", it_->row(),
|
||||
tprintf("Logical textline order [%p/P=%s]: ",
|
||||
static_cast<void *>(it_->row()),
|
||||
current_paragraph_is_ltr_ ? "ltr" : "rtl");
|
||||
for (int i : textline_order) {
|
||||
tprintf("%d ", i);
|
||||
|
@ -23,8 +23,6 @@
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include <regex> // for std::regex_match
|
||||
|
||||
#include "control.h"
|
||||
#include "matchdefs.h"
|
||||
#include "pageres.h"
|
||||
@ -248,12 +246,11 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
|
||||
std::string remains(lang_str);
|
||||
// Look whether the model file uses a prefix which must be applied to
|
||||
// included model files as well.
|
||||
std::regex e("(.*)/[^/]*");
|
||||
std::cmatch cm;
|
||||
std::string prefix;
|
||||
if (std::regex_match(lang.c_str(), cm, e, std::regex_constants::match_default)) {
|
||||
size_t found = lang.find_last_of('/');
|
||||
if (found != std::string::npos) {
|
||||
// A prefix was found.
|
||||
prefix = cm[1].str() + "/";
|
||||
prefix = lang.substr(0, found + 1);
|
||||
}
|
||||
while (!remains.empty()) {
|
||||
// Find the start of the lang code and which vector to add to.
|
||||
|
@ -86,27 +86,27 @@ Tesseract::Tesseract()
|
||||
, double_MEMBER(thresholding_window_size, 0.33,
|
||||
"Window size for measuring local statistics (to be "
|
||||
"multiplied by image DPI). "
|
||||
"This parameter is used by the Sauvola thresolding method",
|
||||
"This parameter is used by the Sauvola thresholding method",
|
||||
this->params())
|
||||
, double_MEMBER(thresholding_kfactor, 0.34,
|
||||
"Factor for reducing threshold due to variance. "
|
||||
"This parameter is used by the Sauvola thresolding method."
|
||||
"This parameter is used by the Sauvola thresholding method."
|
||||
" Normal range: 0.2-0.5",
|
||||
this->params())
|
||||
, double_MEMBER(thresholding_tile_size, 0.33,
|
||||
"Desired tile size (to be multiplied by image DPI). "
|
||||
"This parameter is used by the LeptonicaOtsu thresolding "
|
||||
"This parameter is used by the LeptonicaOtsu thresholding "
|
||||
"method",
|
||||
this->params())
|
||||
, double_MEMBER(thresholding_smooth_kernel_size, 0.0,
|
||||
"Size of convolution kernel applied to threshold array "
|
||||
"(to be multiplied by image DPI). Use 0 for no smoothing. "
|
||||
"This parameter is used by the LeptonicaOtsu thresolding "
|
||||
"This parameter is used by the LeptonicaOtsu thresholding "
|
||||
"method",
|
||||
this->params())
|
||||
, double_MEMBER(thresholding_score_fraction, 0.1,
|
||||
"Fraction of the max Otsu score. "
|
||||
"This parameter is used by the LeptonicaOtsu thresolding "
|
||||
"This parameter is used by the LeptonicaOtsu thresholding "
|
||||
"method. "
|
||||
"For standard Otsu use 0.0, otherwise 0.1 is recommended",
|
||||
this->params())
|
||||
|
@ -16,6 +16,11 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Include automatically generated configuration file
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "otsuthr.h"
|
||||
#include "thresholder.h"
|
||||
#include "tprintf.h" // for tprintf
|
||||
@ -27,6 +32,7 @@
|
||||
#include <allheaders.h>
|
||||
#include <tesseract/baseapi.h> // for api->GetIntVariable()
|
||||
|
||||
#include <algorithm> // for std::max, std::min
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <cstring>
|
||||
#include <tuple>
|
||||
@ -164,16 +170,7 @@ void ImageThresholder::SetImage(const Image pix) {
|
||||
// Convert the image as necessary so it is one of binary, plain RGB, or
|
||||
// 8 bit with no colormap. Guarantee that we always end up with our own copy,
|
||||
// not just a clone of the input.
|
||||
if (pixGetColormap(src)) {
|
||||
Image tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
|
||||
depth = pixGetDepth(tmp);
|
||||
if (depth > 1 && depth < 8) {
|
||||
pix_ = pixConvertTo8(tmp, false);
|
||||
tmp.destroy();
|
||||
} else {
|
||||
pix_ = tmp;
|
||||
}
|
||||
} else if (depth > 1 && depth < 8) {
|
||||
pix_ = pixConvertTo8(src, false);
|
||||
} else {
|
||||
pix_ = src.copy();
|
||||
|
@ -810,7 +810,7 @@ void TO_ROW::compute_vertical_projection() { // project whole row
|
||||
row_box += blob_it.data()->bounding_box();
|
||||
}
|
||||
|
||||
projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN);
|
||||
projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN - 1);
|
||||
projection_left = row_box.left() - PROJECTION_MARGIN;
|
||||
projection_right = row_box.right() + PROJECTION_MARGIN;
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
|
@ -725,8 +725,8 @@ public:
|
||||
ASSERT_HOST(block->pdblk.poly_block() != nullptr);
|
||||
block->rotate(rotation);
|
||||
// Update the median size statistic from the blobs list.
|
||||
STATS widths(0, block->pdblk.bounding_box().width());
|
||||
STATS heights(0, block->pdblk.bounding_box().height());
|
||||
STATS widths(0, block->pdblk.bounding_box().width() - 1);
|
||||
STATS heights(0, block->pdblk.bounding_box().height() - 1);
|
||||
BLOBNBOX_IT blob_it(&blobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
widths.add(blob_it.data()->bounding_box().width(), 1);
|
||||
@ -769,7 +769,7 @@ public:
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
// Draw the noise blobs from all lists in red.
|
||||
void plot_noise_blobs(ScrollView *to_win);
|
||||
// Draw the blobs on on the various lists in the block in different colors.
|
||||
// Draw the blobs on the various lists in the block in different colors.
|
||||
void plot_graded_blobs(ScrollView *to_win);
|
||||
#endif
|
||||
|
||||
|
@ -28,9 +28,4 @@ const double CCStruct::kAscenderFraction = 0.25;
|
||||
const double CCStruct::kXHeightCapRatio =
|
||||
CCStruct::kXHeightFraction / (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
|
||||
|
||||
// Destructor.
|
||||
// It is defined here, so the compiler can create a single vtable
|
||||
// instead of weak vtables in every compilation unit.
|
||||
CCStruct::~CCStruct() = default;
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -22,11 +22,9 @@
|
||||
#include "ccutil.h" // for CCUtil
|
||||
|
||||
namespace tesseract {
|
||||
class TESS_API CCStruct : public CCUtil {
|
||||
public:
|
||||
CCStruct() = default;
|
||||
~CCStruct() override;
|
||||
|
||||
class CCStruct : public CCUtil {
|
||||
public:
|
||||
// Globally accessible constants.
|
||||
// APPROXIMATIONS of the fractions of the character cell taken by
|
||||
// the descenders, ascenders, and x-height.
|
||||
@ -36,6 +34,7 @@ public:
|
||||
// Derived value giving the x-height as a fraction of cap-height.
|
||||
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
|
@ -871,7 +871,7 @@ void C_OUTLINE::ComputeBinaryOffsets() {
|
||||
increment_step(s, 1, &head_pos, dir_counts, pos_totals);
|
||||
}
|
||||
for (int s = 0; s < stepcount; pos += step(s++)) {
|
||||
// At step s, s in in the middle of [s-2, s+2].
|
||||
// At step s, s in the middle of [s-2, s+2].
|
||||
increment_step(s + 2, 1, &head_pos, dir_counts, pos_totals);
|
||||
int dir_index = chain_code(s);
|
||||
ICOORD step_vec = step(s);
|
||||
|
@ -81,7 +81,7 @@ void LLSQ::add(const LLSQ &other) {
|
||||
|
||||
void LLSQ::remove(double x, double y) { // delete an element
|
||||
if (total_weight <= 0.0) { // illegal
|
||||
EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr);
|
||||
EMPTY_LLSQ.error("LLSQ::remove", ABORT);
|
||||
}
|
||||
total_weight--; // count elements
|
||||
sigx -= x; // update accumulators
|
||||
|
@ -180,7 +180,7 @@ static void ComputeRunlengthImage(const TBOX &box,
|
||||
++y;
|
||||
}
|
||||
}
|
||||
// Now set the image pixels the the MIN of the x and y runlengths.
|
||||
// Now set the image pixels the MIN of the x and y runlengths.
|
||||
for (int iy = 0; iy < height; ++iy) {
|
||||
int x = 0;
|
||||
for (auto x_coord : x_coords[iy]) {
|
||||
|
@ -347,7 +347,7 @@ void BLOCK_RECT_IT::forward() { // next rectangle
|
||||
/**********************************************************************
|
||||
* BLOCK_LINE_IT::get_line
|
||||
*
|
||||
* Get the the start and width of a line in the block.
|
||||
* Get the start and width of a line in the block.
|
||||
**********************************************************************/
|
||||
|
||||
TDimension BLOCK_LINE_IT::get_line( // get a line
|
||||
|
@ -59,8 +59,8 @@ public:
|
||||
unichar_id_ = UNICHAR_SPACE;
|
||||
fontinfo_id_ = -1;
|
||||
fontinfo_id2_ = -1;
|
||||
rating_ = 10.0;
|
||||
certainty_ = -1.0;
|
||||
rating_ = 10.0f;
|
||||
certainty_ = -1.0f;
|
||||
script_id_ = -1;
|
||||
min_xheight_ = 0.0f;
|
||||
max_xheight_ = 0.0f;
|
||||
@ -170,13 +170,17 @@ public:
|
||||
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const;
|
||||
|
||||
void print(const UNICHARSET *unicharset) const {
|
||||
tprintf("r%.2f c%.2f x[%g,%g]: %d %s", rating_, certainty_, min_xheight_, max_xheight_,
|
||||
tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
|
||||
static_cast<double>(rating_),
|
||||
static_cast<double>(certainty_),
|
||||
static_cast<double>(min_xheight_),
|
||||
static_cast<double>(max_xheight_),
|
||||
unichar_id_, (unicharset == nullptr) ? "" : unicharset->debug_str(unichar_id_).c_str());
|
||||
}
|
||||
void print_full() const {
|
||||
print(nullptr);
|
||||
tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
|
||||
fontinfo_id2_, yshift_, classifier_);
|
||||
fontinfo_id2_, static_cast<double>(yshift_), classifier_);
|
||||
}
|
||||
// Sort function for sorting BLOB_CHOICEs in increasing order of rating.
|
||||
static int SortByRating(const void *p1, const void *p2) {
|
||||
|
@ -40,14 +40,14 @@ namespace tesseract {
|
||||
*
|
||||
* Construct a new stats element by allocating and zeroing the memory.
|
||||
**********************************************************************/
|
||||
STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
|
||||
if (max_bucket_value_plus_1 <= min_bucket_value) {
|
||||
STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value) {
|
||||
if (max_bucket_value < min_bucket_value) {
|
||||
min_bucket_value = 0;
|
||||
max_bucket_value_plus_1 = 1;
|
||||
max_bucket_value = 1;
|
||||
}
|
||||
rangemin_ = min_bucket_value; // setup
|
||||
rangemax_ = max_bucket_value_plus_1;
|
||||
buckets_ = new int32_t[rangemax_ - rangemin_];
|
||||
rangemax_ = max_bucket_value;
|
||||
buckets_ = new int32_t[1 + rangemax_ - rangemin_];
|
||||
clear();
|
||||
}
|
||||
|
||||
@ -56,16 +56,16 @@ STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
|
||||
*
|
||||
* Alter the range on an existing stats element.
|
||||
**********************************************************************/
|
||||
bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
|
||||
if (max_bucket_value_plus_1 <= min_bucket_value) {
|
||||
bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value) {
|
||||
if (max_bucket_value < min_bucket_value) {
|
||||
return false;
|
||||
}
|
||||
if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
|
||||
if (rangemax_ - rangemin_ != max_bucket_value - min_bucket_value) {
|
||||
delete[] buckets_;
|
||||
buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value];
|
||||
buckets_ = new int32_t[1 + max_bucket_value - min_bucket_value];
|
||||
}
|
||||
rangemin_ = min_bucket_value; // setup
|
||||
rangemax_ = max_bucket_value_plus_1;
|
||||
rangemax_ = max_bucket_value;
|
||||
clear(); // zero it
|
||||
return true;
|
||||
}
|
||||
@ -78,7 +78,7 @@ bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
|
||||
void STATS::clear() { // clear out buckets
|
||||
total_count_ = 0;
|
||||
if (buckets_ != nullptr) {
|
||||
memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
|
||||
memset(buckets_, 0, (1 + rangemax_ - rangemin_) * sizeof(buckets_[0]));
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,12 +97,11 @@ STATS::~STATS() {
|
||||
* Add a set of samples to (or delete from) a pile.
|
||||
**********************************************************************/
|
||||
void STATS::add(int32_t value, int32_t count) {
|
||||
if (buckets_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
value = ClipToRange(value, rangemin_, rangemax_ - 1);
|
||||
if (buckets_ != nullptr) {
|
||||
value = ClipToRange(value, rangemin_, rangemax_);
|
||||
buckets_[value - rangemin_] += count;
|
||||
total_count_ += count; // keep count of total
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
@ -116,7 +115,7 @@ int32_t STATS::mode() const { // get mode of samples
|
||||
}
|
||||
int32_t max = buckets_[0]; // max cell count
|
||||
int32_t maxindex = 0; // index of max
|
||||
for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
|
||||
for (int index = rangemax_ - rangemin_; index > 0; --index) {
|
||||
if (buckets_[index] > max) {
|
||||
max = buckets_[index]; // find biggest
|
||||
maxindex = index;
|
||||
@ -135,7 +134,7 @@ double STATS::mean() const { // get mean of samples
|
||||
return static_cast<double>(rangemin_);
|
||||
}
|
||||
int64_t sum = 0;
|
||||
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
|
||||
for (int index = rangemax_ - rangemin_; index >= 0; --index) {
|
||||
sum += static_cast<int64_t>(index) * buckets_[index];
|
||||
}
|
||||
return static_cast<double>(sum) / total_count_ + rangemin_;
|
||||
@ -152,7 +151,7 @@ double STATS::sd() const { // standard deviation
|
||||
}
|
||||
int64_t sum = 0;
|
||||
double sqsum = 0.0;
|
||||
for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
|
||||
for (int index = rangemax_ - rangemin_; index >= 0; --index) {
|
||||
sum += static_cast<int64_t>(index) * buckets_[index];
|
||||
sqsum += static_cast<double>(index) * index * buckets_[index];
|
||||
}
|
||||
@ -186,7 +185,7 @@ double STATS::ile(double frac) const {
|
||||
#endif
|
||||
int sum = 0;
|
||||
int index = 0;
|
||||
for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
|
||||
for (index = 0; index <= rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
|
||||
;
|
||||
}
|
||||
if (index > 0) {
|
||||
@ -207,7 +206,7 @@ int32_t STATS::min_bucket() const { // Find min
|
||||
return rangemin_;
|
||||
}
|
||||
int32_t min = 0;
|
||||
for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
|
||||
for (min = 0; (min <= rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
|
||||
;
|
||||
}
|
||||
return rangemin_ + min;
|
||||
@ -224,7 +223,7 @@ int32_t STATS::max_bucket() const { // Find max
|
||||
return rangemin_;
|
||||
}
|
||||
int32_t max;
|
||||
for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
|
||||
for (max = rangemax_ - rangemin_; max > 0 && buckets_[max] == 0; max--) {
|
||||
;
|
||||
}
|
||||
return rangemin_ + max;
|
||||
@ -270,7 +269,7 @@ bool STATS::local_min(int32_t x) const {
|
||||
if (buckets_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
|
||||
x = ClipToRange(x, rangemin_, rangemax_) - rangemin_;
|
||||
if (buckets_[x] == 0) {
|
||||
return true;
|
||||
}
|
||||
@ -281,10 +280,10 @@ bool STATS::local_min(int32_t x) const {
|
||||
if (index >= 0 && buckets_[index] < buckets_[x]) {
|
||||
return false;
|
||||
}
|
||||
for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
|
||||
for (index = x + 1; index <= rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
|
||||
;
|
||||
}
|
||||
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
|
||||
if (index <= rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
@ -304,7 +303,7 @@ void STATS::smooth(int32_t factor) {
|
||||
return;
|
||||
}
|
||||
STATS result(rangemin_, rangemax_);
|
||||
int entrycount = rangemax_ - rangemin_;
|
||||
int entrycount = 1 + rangemax_ - rangemin_;
|
||||
for (int entry = 0; entry < entrycount; entry++) {
|
||||
// centre weight
|
||||
int count = buckets_[entry] * factor;
|
||||
@ -368,7 +367,7 @@ int32_t STATS::cluster(float lower, // thresholds
|
||||
clusters[0].add(entry, count);
|
||||
}
|
||||
}
|
||||
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
|
||||
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
|
||||
pile_count(entry) <= pile_count(entry - 1);
|
||||
entry++) {
|
||||
count = pile_count(entry) - clusters[0].pile_count(entry);
|
||||
@ -386,7 +385,7 @@ int32_t STATS::cluster(float lower, // thresholds
|
||||
do {
|
||||
new_cluster = false;
|
||||
new_mode = 0;
|
||||
for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
|
||||
for (entry = 0; entry <= rangemax_ - rangemin_; entry++) {
|
||||
count = buckets_[entry] - clusters[0].buckets_[entry];
|
||||
// remaining pile
|
||||
if (count > 0) { // any to handle
|
||||
@ -433,7 +432,7 @@ int32_t STATS::cluster(float lower, // thresholds
|
||||
clusters[0].add(entry, count);
|
||||
}
|
||||
}
|
||||
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
|
||||
for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
|
||||
pile_count(entry) <= pile_count(entry - 1);
|
||||
entry++) {
|
||||
count = pile_count(entry) - clusters[0].pile_count(entry);
|
||||
@ -482,7 +481,7 @@ int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes)
|
||||
if (max_modes <= 0) {
|
||||
return 0;
|
||||
}
|
||||
int src_count = rangemax_ - rangemin_;
|
||||
int src_count = 1 + rangemax_ - rangemin_;
|
||||
// Used copies the counts in buckets_ as they get used.
|
||||
STATS used(rangemin_, rangemax_);
|
||||
modes.clear();
|
||||
@ -605,7 +604,7 @@ void STATS::plot(ScrollView *window, // to draw in
|
||||
}
|
||||
window->Pen(colour);
|
||||
|
||||
for (int index = 0; index < rangemax_ - rangemin_; index++) {
|
||||
for (int index = 0; index <= rangemax_ - rangemin_; index++) {
|
||||
window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1),
|
||||
yorigin + yscale * buckets_[index]);
|
||||
}
|
||||
@ -630,7 +629,7 @@ void STATS::plotline(ScrollView *window, // to draw in
|
||||
}
|
||||
window->Pen(colour);
|
||||
window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
|
||||
for (int index = 0; index < rangemax_ - rangemin_; index++) {
|
||||
for (int index = 0; index <= rangemax_ - rangemin_; index++) {
|
||||
window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]);
|
||||
}
|
||||
}
|
||||
|
@ -30,23 +30,20 @@ namespace tesseract {
|
||||
class TESS_API STATS {
|
||||
public:
|
||||
// The histogram buckets are in the range
|
||||
// [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.
|
||||
// [min_bucket_value, max_bucket_value].
|
||||
// Any data under min_bucket value is silently mapped to min_bucket_value,
|
||||
// and likewise, any data over max_bucket_value is silently mapped to
|
||||
// max_bucket_value.
|
||||
// In the internal array, min_bucket_value maps to 0 and
|
||||
// max_bucket_value_plus_1 - min_bucket_value to the array size.
|
||||
// TODO(rays) This is ugly. Convert the second argument to
|
||||
// max_bucket_value and all the code that uses it.
|
||||
STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
|
||||
// 1 + max_bucket_value - min_bucket_value to the array size.
|
||||
STATS(int32_t min_bucket_value, int32_t max_bucket_value);
|
||||
STATS() = default; // empty for arrays
|
||||
|
||||
~STATS();
|
||||
|
||||
// (Re)Sets the range and clears the counts.
|
||||
// See the constructor for info on max and min values.
|
||||
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
|
||||
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value);
|
||||
|
||||
void clear(); // empty buckets
|
||||
|
||||
@ -73,11 +70,14 @@ public:
|
||||
double median() const; // get median of samples
|
||||
// Returns the count of the given value.
|
||||
int32_t pile_count(int32_t value) const {
|
||||
if (buckets_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
if (value <= rangemin_) {
|
||||
return buckets_[0];
|
||||
}
|
||||
if (value >= rangemax_ - 1) {
|
||||
return buckets_[rangemax_ - rangemin_ - 1];
|
||||
if (value >= rangemax_) {
|
||||
return buckets_[rangemax_ - rangemin_];
|
||||
}
|
||||
return buckets_[value - rangemin_];
|
||||
}
|
||||
@ -139,7 +139,6 @@ public:
|
||||
|
||||
private:
|
||||
int32_t rangemin_ = 0; // min of range
|
||||
// rangemax_ is not well named as it is really one past the max.
|
||||
int32_t rangemax_ = 0; // max of range
|
||||
int32_t total_count_ = 0; // no of samples
|
||||
int32_t *buckets_ = nullptr; // array of cells
|
||||
|
@ -142,7 +142,6 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
for (j = 0;
|
||||
j < adaption_ambigs_entry->size() && (*adaption_ambigs_entry)[j] > id_to_insert;
|
||||
++j) {
|
||||
;
|
||||
}
|
||||
if (j < adaption_ambigs_entry->size()) {
|
||||
if ((*adaption_ambigs_entry)[j] != id_to_insert) {
|
||||
|
@ -116,7 +116,7 @@ public:
|
||||
|
||||
// Comparator function for sorting AmbigSpec_LISTs. The lists will
|
||||
// be sorted by their wrong_ngram arrays. Example of wrong_ngram vectors
|
||||
// in a a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1].
|
||||
// in a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1].
|
||||
static int compare_ambig_specs(const void *spec1, const void *spec2) {
|
||||
const AmbigSpec *s1 = *static_cast<const AmbigSpec *const *>(spec1);
|
||||
const AmbigSpec *s2 = *static_cast<const AmbigSpec *const *>(spec2);
|
||||
|
@ -10,9 +10,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#if defined(_WIN32)
|
||||
# include <io.h> // for _access
|
||||
#endif
|
||||
|
||||
#include "ccutil.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring> // for std::strrchr
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CCUtil::CCUtil()
|
||||
: params_()
|
||||
, INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", ¶ms_)
|
||||
@ -26,4 +34,61 @@ CCUtil::CCUtil()
|
||||
// instead of weak vtables in every compilation unit.
|
||||
CCUtil::~CCUtil() = default;
|
||||
|
||||
/**
|
||||
* @brief CCUtil::main_setup - set location of tessdata and name of image
|
||||
*
|
||||
* @param argv0 - paths to the directory with language files and config files.
|
||||
* An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
|
||||
* used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
|
||||
* previous is not successful - use current directory.
|
||||
* @param basename - name of image
|
||||
*/
|
||||
void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
|
||||
imagebasename = basename; /**< name of image */
|
||||
|
||||
char *tessdata_prefix = getenv("TESSDATA_PREFIX");
|
||||
|
||||
if (!argv0.empty()) {
|
||||
/* Use tessdata prefix from the command line. */
|
||||
datadir = argv0;
|
||||
} else if (tessdata_prefix) {
|
||||
/* Use tessdata prefix from the environment. */
|
||||
datadir = tessdata_prefix;
|
||||
#if defined(_WIN32)
|
||||
} else if (datadir.empty() || _access(datadir.c_str(), 0) != 0) {
|
||||
/* Look for tessdata in directory of executable. */
|
||||
char path[_MAX_PATH];
|
||||
DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
|
||||
if (length > 0 && length < sizeof(path)) {
|
||||
char *separator = std::strrchr(path, '\\');
|
||||
if (separator != nullptr) {
|
||||
*separator = '\0';
|
||||
std::string subdir = path;
|
||||
subdir += "/tessdata";
|
||||
if (_access(subdir.c_str(), 0) == 0) {
|
||||
datadir = subdir;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* _WIN32 */
|
||||
}
|
||||
|
||||
// datadir may still be empty:
|
||||
if (datadir.empty()) {
|
||||
#if defined(TESSDATA_PREFIX)
|
||||
// Use tessdata prefix which was compiled in.
|
||||
datadir = TESSDATA_PREFIX "/tessdata";
|
||||
#else
|
||||
datadir = "./";
|
||||
#endif /* TESSDATA_PREFIX */
|
||||
}
|
||||
|
||||
// check for missing directory separator
|
||||
const char *lastchar = datadir.c_str();
|
||||
lastchar += datadir.length() - 1;
|
||||
if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) {
|
||||
datadir += "/";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -89,7 +89,7 @@ void CLIST::assign_to_sublist( // to this list
|
||||
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
|
||||
|
||||
if (!empty()) {
|
||||
LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT, nullptr);
|
||||
LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT);
|
||||
}
|
||||
|
||||
last = start_it->extract_sublist(end_it);
|
||||
@ -246,9 +246,9 @@ void *CLIST_ITERATOR::data_relative( // get data + or - ...
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("CLIST_ITERATOR::data_relative", ABORT, nullptr);
|
||||
NO_LIST.error("CLIST_ITERATOR::data_relative", ABORT);
|
||||
if (list->empty())
|
||||
EMPTY_LIST.error("CLIST_ITERATOR::data_relative", ABORT, nullptr);
|
||||
EMPTY_LIST.error("CLIST_ITERATOR::data_relative", ABORT);
|
||||
if (offset < -1)
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::data_relative", ABORT, "offset < -l");
|
||||
#endif
|
||||
@ -308,7 +308,7 @@ link */
|
||||
/* Error if either current element is deleted */
|
||||
|
||||
if (!current || !other_it->current) {
|
||||
DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT, nullptr);
|
||||
DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT);
|
||||
}
|
||||
|
||||
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
|
||||
@ -389,12 +389,12 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current
|
||||
constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points");
|
||||
|
||||
if (list != other_it->list)
|
||||
BAD_EXTRACTION_PTS.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
BAD_EXTRACTION_PTS.error("CLIST_ITERATOR.extract_sublist", ABORT);
|
||||
if (list->empty())
|
||||
EMPTY_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT, nullptr);
|
||||
EMPTY_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT);
|
||||
|
||||
if (!current || !other_it->current)
|
||||
DONT_EXTRACT_DELETED.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
DONT_EXTRACT_DELETED.error("CLIST_ITERATOR.extract_sublist", ABORT);
|
||||
#endif
|
||||
|
||||
ex_current_was_last = other_it->ex_current_was_last = false;
|
||||
@ -404,7 +404,7 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current
|
||||
temp_it.mark_cycle_pt();
|
||||
do { // walk sublist
|
||||
if (temp_it.cycled_list()) { // can't find end pt
|
||||
BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT);
|
||||
}
|
||||
|
||||
if (temp_it.at_last()) {
|
||||
|
@ -190,7 +190,7 @@ public:
|
||||
void *data() { // get current data
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::data", ABORT, nullptr);
|
||||
NO_LIST.error("CLIST_ITERATOR::data", ABORT);
|
||||
}
|
||||
#endif
|
||||
return current->data;
|
||||
@ -523,7 +523,7 @@ inline void *CLIST_ITERATOR::extract() {
|
||||
#ifndef NDEBUG
|
||||
if (!current) { // list empty or
|
||||
// element extracted
|
||||
NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT, nullptr);
|
||||
NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -576,7 +576,7 @@ inline void *CLIST_ITERATOR::move_to_first() {
|
||||
inline void CLIST_ITERATOR::mark_cycle_pt() {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT, nullptr);
|
||||
NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -666,7 +666,7 @@ inline void CLIST_ITERATOR::add_to_end( // element to add
|
||||
void *new_data) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT, nullptr);
|
||||
NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT);
|
||||
}
|
||||
if (!new_data) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::add_to_end", ABORT, "new_data is nullptr");
|
||||
@ -704,13 +704,10 @@ public:
|
||||
|
||||
#define CLISTIZEH(CLASSNAME) \
|
||||
class CLASSNAME##_CLIST : public X_CLIST<CLASSNAME> { \
|
||||
public: \
|
||||
using X_CLIST<CLASSNAME>::X_CLIST; \
|
||||
}; \
|
||||
class CLASSNAME##_C_IT : public X_ITER<CLIST_ITERATOR, CLASSNAME> { \
|
||||
public: \
|
||||
struct CLASSNAME##_C_IT : X_ITER<CLIST_ITERATOR, CLASSNAME> { \
|
||||
using X_ITER<CLIST_ITERATOR, CLASSNAME>::X_ITER; \
|
||||
CLASSNAME##_C_IT(CLASSNAME##_CLIST *list) : X_ITER(list) {} \
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -70,7 +70,7 @@ void ELIST::assign_to_sublist( // to this list
|
||||
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
|
||||
|
||||
if (!empty()) {
|
||||
LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT, nullptr);
|
||||
LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT);
|
||||
}
|
||||
|
||||
last = start_it->extract_sublist(end_it);
|
||||
@ -169,7 +169,7 @@ ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void *
|
||||
ELIST_LINK *ELIST_ITERATOR::forward() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST_ITERATOR::forward", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::forward", ABORT);
|
||||
#endif
|
||||
if (list->empty()) {
|
||||
return nullptr;
|
||||
@ -189,13 +189,17 @@ ELIST_LINK *ELIST_ITERATOR::forward() {
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
if (!current)
|
||||
NULL_DATA.error("ELIST_ITERATOR::forward", ABORT, nullptr);
|
||||
NULL_DATA.error("ELIST_ITERATOR::forward", ABORT);
|
||||
#endif
|
||||
next = current->next;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!next)
|
||||
NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT, "This is: %p Current is: %p", this, current);
|
||||
if (!next) {
|
||||
NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT,
|
||||
"This is: %p Current is: %p",
|
||||
static_cast<void *>(this),
|
||||
static_cast<void *>(current));
|
||||
}
|
||||
#endif
|
||||
return current;
|
||||
}
|
||||
@ -214,9 +218,9 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT);
|
||||
if (list->empty())
|
||||
EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
|
||||
EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT);
|
||||
if (offset < -1)
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::data_relative", ABORT, "offset < -l");
|
||||
#endif
|
||||
@ -231,7 +235,7 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!ptr)
|
||||
NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
|
||||
NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT);
|
||||
#endif
|
||||
|
||||
return ptr;
|
||||
@ -248,7 +252,7 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
|
||||
ELIST_LINK *ELIST_ITERATOR::move_to_last() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT);
|
||||
#endif
|
||||
|
||||
while (current != list->last) {
|
||||
@ -276,7 +280,7 @@ void ELIST_ITERATOR::exchange( // positions of 2 links
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST_ITERATOR::exchange", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::exchange", ABORT);
|
||||
if (!other_it)
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::exchange", ABORT, "other_it nullptr");
|
||||
if (!(other_it->list))
|
||||
@ -293,7 +297,7 @@ link */
|
||||
/* Error if either current element is deleted */
|
||||
|
||||
if (!current || !other_it->current) {
|
||||
DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT, nullptr);
|
||||
DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT);
|
||||
}
|
||||
|
||||
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
|
||||
@ -379,14 +383,14 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current
|
||||
if (!other_it)
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::extract_sublist", ABORT, "other_it nullptr");
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT);
|
||||
if (list != other_it->list)
|
||||
BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT);
|
||||
if (list->empty())
|
||||
EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT, nullptr);
|
||||
EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT);
|
||||
|
||||
if (!current || !other_it->current)
|
||||
DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT);
|
||||
#endif
|
||||
|
||||
ex_current_was_last = other_it->ex_current_was_last = false;
|
||||
@ -396,7 +400,7 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current
|
||||
temp_it.mark_cycle_pt();
|
||||
do { // walk sublist
|
||||
if (temp_it.cycled_list()) { // can't find end pt
|
||||
BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT);
|
||||
}
|
||||
|
||||
if (temp_it.at_last()) {
|
||||
|
@ -231,10 +231,10 @@ public:
|
||||
ELIST_LINK *data() { // get current data
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::data", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::data", ABORT);
|
||||
}
|
||||
if (!current) {
|
||||
NULL_DATA.error("ELIST_ITERATOR::data", ABORT, nullptr);
|
||||
NULL_DATA.error("ELIST_ITERATOR::data", ABORT);
|
||||
}
|
||||
#endif
|
||||
return current;
|
||||
@ -256,7 +256,7 @@ public:
|
||||
bool empty() const { // is list empty?
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::empty", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::empty", ABORT);
|
||||
}
|
||||
#endif
|
||||
return list->empty();
|
||||
@ -334,13 +334,13 @@ inline void ELIST_ITERATOR::add_after_then_move( // element to add
|
||||
ELIST_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -381,13 +381,13 @@ inline void ELIST_ITERATOR::add_after_stay_put( // element to add
|
||||
ELIST_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -430,13 +430,13 @@ inline void ELIST_ITERATOR::add_before_then_move( // element to add
|
||||
ELIST_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -473,13 +473,13 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add
|
||||
ELIST_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -517,7 +517,7 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add
|
||||
inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT);
|
||||
}
|
||||
if (!list_to_add) {
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr");
|
||||
@ -564,7 +564,7 @@ inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) {
|
||||
inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT);
|
||||
}
|
||||
if (!list_to_add) {
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr");
|
||||
@ -612,11 +612,11 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() {
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::extract", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::extract", ABORT);
|
||||
}
|
||||
if (!current) { // list empty or
|
||||
// element extracted
|
||||
NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT, nullptr);
|
||||
NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -649,7 +649,7 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() {
|
||||
inline ELIST_LINK *ELIST_ITERATOR::move_to_first() {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -673,7 +673,7 @@ inline ELIST_LINK *ELIST_ITERATOR::move_to_first() {
|
||||
inline void ELIST_ITERATOR::mark_cycle_pt() {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -695,7 +695,7 @@ inline void ELIST_ITERATOR::mark_cycle_pt() {
|
||||
inline bool ELIST_ITERATOR::at_first() const {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::at_first", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::at_first", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -715,7 +715,7 @@ inline bool ELIST_ITERATOR::at_first() const {
|
||||
inline bool ELIST_ITERATOR::at_last() const {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::at_last", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::at_last", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -735,7 +735,7 @@ inline bool ELIST_ITERATOR::at_last() const {
|
||||
inline bool ELIST_ITERATOR::cycled_list() const {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -754,7 +754,7 @@ inline void ELIST_ITERATOR::sort( // sort elements
|
||||
const void *, const void *)) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::sort", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::sort", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -776,13 +776,13 @@ inline void ELIST_ITERATOR::add_to_end( // element to add
|
||||
ELIST_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::add_to_end", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -802,13 +802,10 @@ inline void ELIST_ITERATOR::add_to_end( // element to add
|
||||
|
||||
#define ELISTIZEH(CLASSNAME) \
|
||||
class CLASSNAME##_LIST : public X_LIST<ELIST, ELIST_ITERATOR, CLASSNAME> { \
|
||||
public: \
|
||||
using X_LIST<ELIST, ELIST_ITERATOR, CLASSNAME>::X_LIST; \
|
||||
}; \
|
||||
class CLASSNAME##_IT : public X_ITER<ELIST_ITERATOR, CLASSNAME> { \
|
||||
public: \
|
||||
using X_ITER<ELIST_ITERATOR, CLASSNAME>::X_ITER; \
|
||||
CLASSNAME##_IT(CLASSNAME##_LIST *list) : X_ITER(list) {} \
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -71,7 +71,7 @@ void ELIST2::assign_to_sublist( // to this list
|
||||
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
|
||||
|
||||
if (!empty()) {
|
||||
LIST_NOT_EMPTY.error("ELIST2.assign_to_sublist", ABORT, nullptr);
|
||||
LIST_NOT_EMPTY.error("ELIST2.assign_to_sublist", ABORT);
|
||||
}
|
||||
|
||||
last = start_it->extract_sublist(end_it);
|
||||
@ -162,7 +162,7 @@ void ELIST2::add_sorted(int comparator(const void *, const void *), ELIST2_LINK
|
||||
ELIST2_LINK *ELIST2_ITERATOR::forward() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST2_ITERATOR::forward", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::forward", ABORT);
|
||||
#endif
|
||||
if (list->empty()) {
|
||||
return nullptr;
|
||||
@ -183,15 +183,18 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() {
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!current)
|
||||
NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT, nullptr);
|
||||
NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT);
|
||||
#endif
|
||||
|
||||
next = current->next;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!next)
|
||||
NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT, "This is: %p Current is: %p", this,
|
||||
current);
|
||||
if (!next) {
|
||||
NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT,
|
||||
"This is: %p Current is: %p",
|
||||
static_cast<void *>(this),
|
||||
static_cast<void *>(current));
|
||||
}
|
||||
#endif
|
||||
|
||||
return current;
|
||||
@ -207,7 +210,7 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() {
|
||||
ELIST2_LINK *ELIST2_ITERATOR::backward() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST2_ITERATOR::backward", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::backward", ABORT);
|
||||
#endif
|
||||
if (list->empty()) {
|
||||
return nullptr;
|
||||
@ -228,10 +231,13 @@ ELIST2_LINK *ELIST2_ITERATOR::backward() {
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!current)
|
||||
NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT, nullptr);
|
||||
if (!prev)
|
||||
NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT, "This is: %p Current is: %p", this,
|
||||
current);
|
||||
NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT);
|
||||
if (!prev) {
|
||||
NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT,
|
||||
"This is: %p Current is: %p",
|
||||
static_cast<void *>(this),
|
||||
static_cast<void *>(current));
|
||||
}
|
||||
#endif
|
||||
|
||||
prev = current->prev;
|
||||
@ -251,9 +257,9 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( // get data + or - ..
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT);
|
||||
if (list->empty())
|
||||
EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
|
||||
EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT);
|
||||
#endif
|
||||
|
||||
if (offset < 0) {
|
||||
@ -268,7 +274,7 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( // get data + or - ..
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!ptr)
|
||||
NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
|
||||
NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT);
|
||||
#endif
|
||||
|
||||
return ptr;
|
||||
@ -292,7 +298,7 @@ void ELIST2_ITERATOR::exchange( // positions of 2 links
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT);
|
||||
if (!other_it)
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::exchange", ABORT, "other_it nullptr");
|
||||
if (!(other_it->list))
|
||||
@ -309,7 +315,7 @@ link */
|
||||
/* Error if either current element is deleted */
|
||||
|
||||
if (!current || !other_it->current) {
|
||||
DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT, nullptr);
|
||||
DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT);
|
||||
}
|
||||
|
||||
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
|
||||
@ -407,14 +413,14 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( // from this current
|
||||
if (!other_it)
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::extract_sublist", ABORT, "other_it nullptr");
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT);
|
||||
if (list != other_it->list)
|
||||
BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT);
|
||||
if (list->empty())
|
||||
EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr);
|
||||
EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT);
|
||||
|
||||
if (!current || !other_it->current)
|
||||
DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT);
|
||||
#endif
|
||||
|
||||
ex_current_was_last = other_it->ex_current_was_last = false;
|
||||
@ -424,7 +430,7 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( // from this current
|
||||
temp_it.mark_cycle_pt();
|
||||
do { // walk sublist
|
||||
if (temp_it.cycled_list()) { // can't find end pt
|
||||
BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT);
|
||||
}
|
||||
|
||||
if (temp_it.at_last()) {
|
||||
|
@ -191,10 +191,10 @@ public:
|
||||
ELIST2_LINK *data() { // get current data
|
||||
#ifndef NDEBUG
|
||||
if (!current) {
|
||||
NULL_DATA.error("ELIST2_ITERATOR::data", ABORT, nullptr);
|
||||
NULL_DATA.error("ELIST2_ITERATOR::data", ABORT);
|
||||
}
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::data", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::data", ABORT);
|
||||
}
|
||||
#endif
|
||||
return current;
|
||||
@ -219,7 +219,7 @@ public:
|
||||
bool empty() const { // is list empty?
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::empty", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::empty", ABORT);
|
||||
}
|
||||
#endif
|
||||
return list->empty();
|
||||
@ -301,13 +301,13 @@ inline void ELIST2_ITERATOR::add_after_then_move( // element to add
|
||||
ELIST2_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -352,13 +352,13 @@ inline void ELIST2_ITERATOR::add_after_stay_put( // element to add
|
||||
ELIST2_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -405,13 +405,13 @@ inline void ELIST2_ITERATOR::add_before_then_move( // element to add
|
||||
ELIST2_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -453,13 +453,13 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add
|
||||
ELIST2_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -502,7 +502,7 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add
|
||||
inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT);
|
||||
}
|
||||
if (!list_to_add) {
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr");
|
||||
@ -553,7 +553,7 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) {
|
||||
inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT);
|
||||
}
|
||||
if (!list_to_add) {
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr");
|
||||
@ -605,11 +605,11 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() {
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::extract", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::extract", ABORT);
|
||||
}
|
||||
if (!current) { // list empty or
|
||||
// element extracted
|
||||
NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT, nullptr);
|
||||
NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -646,7 +646,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() {
|
||||
inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -666,7 +666,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() {
|
||||
inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -690,7 +690,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() {
|
||||
inline void ELIST2_ITERATOR::mark_cycle_pt() {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -712,7 +712,7 @@ inline void ELIST2_ITERATOR::mark_cycle_pt() {
|
||||
inline bool ELIST2_ITERATOR::at_first() const {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -732,7 +732,7 @@ inline bool ELIST2_ITERATOR::at_first() const {
|
||||
inline bool ELIST2_ITERATOR::at_last() const {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -752,7 +752,7 @@ inline bool ELIST2_ITERATOR::at_last() const {
|
||||
inline bool ELIST2_ITERATOR::cycled_list() const {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -771,7 +771,7 @@ inline void ELIST2_ITERATOR::sort( // sort elements
|
||||
const void *, const void *)) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::sort", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::sort", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -793,13 +793,13 @@ inline void ELIST2_ITERATOR::add_to_end( // element to add
|
||||
ELIST2_LINK *new_element) {
|
||||
#ifndef NDEBUG
|
||||
if (!list) {
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT, nullptr);
|
||||
NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT);
|
||||
}
|
||||
if (!new_element) {
|
||||
BAD_PARAMETER.error("ELIST2_ITERATOR::add_to_end", ABORT, "new_element is nullptr");
|
||||
}
|
||||
if (new_element->next) {
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT, nullptr);
|
||||
STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -821,13 +821,10 @@ inline void ELIST2_ITERATOR::add_to_end( // element to add
|
||||
|
||||
#define ELIST2IZEH(CLASSNAME) \
|
||||
class CLASSNAME##_LIST : public X_LIST<ELIST2, ELIST2_ITERATOR, CLASSNAME> { \
|
||||
public: \
|
||||
using X_LIST<ELIST2, ELIST2_ITERATOR, CLASSNAME>::X_LIST; \
|
||||
}; \
|
||||
class CLASSNAME##_IT : public X_ITER<ELIST2_ITERATOR, CLASSNAME> { \
|
||||
public: \
|
||||
struct CLASSNAME##_IT : X_ITER<ELIST2_ITERATOR, CLASSNAME> { \
|
||||
using X_ITER<ELIST2_ITERATOR, CLASSNAME>::X_ITER; \
|
||||
CLASSNAME##_IT(CLASSNAME##_LIST *list) : X_ITER(list) {} \
|
||||
CLASSNAME *backward() { \
|
||||
return reinterpret_cast<CLASSNAME *>(ELIST2_ITERATOR::backward()); \
|
||||
} \
|
||||
|
@ -91,8 +91,12 @@ void ERRCODE::error( // handle error
|
||||
#endif
|
||||
abort();
|
||||
default:
|
||||
BADERRACTION.error("error", ABORT, nullptr);
|
||||
BADERRACTION.error("error", ABORT);
|
||||
}
|
||||
}
|
||||
|
||||
void ERRCODE::error(const char *caller, TessErrorLogCode action) const {
|
||||
error(caller, action, nullptr);
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -31,12 +31,6 @@ enum TessErrorLogCode {
|
||||
ABORT = 2 /*abort after error */
|
||||
};
|
||||
|
||||
/* Explicit Error Abort codes */
|
||||
#define NO_ABORT_CODE 0
|
||||
#define LIST_ABORT 1
|
||||
#define MEMORY_ABORT 2
|
||||
#define FILE_ABORT 3
|
||||
|
||||
#if !defined(__GNUC__) && !defined(__attribute__)
|
||||
# define __attribute__(attr) // compiler without support for __attribute__
|
||||
#endif
|
||||
@ -49,6 +43,7 @@ public:
|
||||
TessErrorLogCode action, // action to take
|
||||
const char *format, ... // fprintf format
|
||||
) const __attribute__((format(printf, 4, 5)));
|
||||
void error(const char *caller, TessErrorLogCode action) const;
|
||||
constexpr ERRCODE(const char *string) : message(string) {} // initialize with string
|
||||
};
|
||||
|
||||
|
@ -24,14 +24,6 @@
|
||||
namespace tesseract {
|
||||
|
||||
constexpr ERRCODE CANTOPENFILE("Can't open file");
|
||||
constexpr ERRCODE CANTCREATEFILE("Can't create file");
|
||||
constexpr ERRCODE CANTMAKEPIPE("Can't create pipe");
|
||||
constexpr ERRCODE CANTCONNECTPIPE("Can't reconnect pipes to stdin/stdout");
|
||||
constexpr ERRCODE READFAILED("Read of file failed");
|
||||
constexpr ERRCODE WRITEFAILED("Write of file failed");
|
||||
constexpr ERRCODE SELECTFAILED("Select failed");
|
||||
|
||||
constexpr ERRCODE EXECFAILED("Could not exec new process");
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
|
@ -41,10 +41,6 @@ public:
|
||||
GenericVector() {
|
||||
init(kDefaultVectorSize);
|
||||
}
|
||||
GenericVector(int size, const T &init_val) {
|
||||
init(size);
|
||||
init_to_size(size, init_val);
|
||||
}
|
||||
|
||||
// Copy
|
||||
GenericVector(const GenericVector &other) {
|
||||
@ -107,14 +103,6 @@ public:
|
||||
int push_back(T object);
|
||||
void operator+=(const T &t);
|
||||
|
||||
// Push an element in the end of the array if the same
|
||||
// element is not already contained in the array.
|
||||
int push_back_new(const T &object);
|
||||
|
||||
// Push an element in the front of the array
|
||||
// Note: This function is O(n)
|
||||
int push_front(const T &object);
|
||||
|
||||
// Set the value at the given index
|
||||
void set(const T &t, int index);
|
||||
|
||||
@ -178,27 +166,13 @@ public:
|
||||
// bool T::Serialize(FILE* fp) const that returns false in case of error.
|
||||
// Returns false in case of error.
|
||||
bool SerializeClasses(FILE *fp) const;
|
||||
bool SerializeClasses(TFile *fp) const;
|
||||
// Reads a vector of classes from the given file. Assumes the existence of
|
||||
// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
|
||||
// error. Also needs T::T() and T::T(constT&), as init_to_size is used in
|
||||
// this function. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerializeClasses(bool swap, FILE *fp);
|
||||
bool DeSerializeClasses(TFile *fp);
|
||||
|
||||
// Allocates a new array of double the current_size, copies over the
|
||||
// information from data to the new location, deletes data and returns
|
||||
// the pointed to the new larger array.
|
||||
// This function uses memcpy to copy the data, instead of invoking
|
||||
// operator=() for each element like double_the_size() does.
|
||||
static T *double_the_size_memcpy(int current_size, T *data) {
|
||||
T *data_new = new T[current_size * 2];
|
||||
memcpy(data_new, data, sizeof(T) * current_size);
|
||||
delete[] data;
|
||||
return data_new;
|
||||
}
|
||||
|
||||
// Reverses the elements of the vector.
|
||||
void reverse() {
|
||||
for (int i = 0; i < size_used_ / 2; ++i) {
|
||||
@ -221,26 +195,6 @@ public:
|
||||
qsort(data_, size_used_, sizeof(*data_), comparator);
|
||||
}
|
||||
|
||||
// Searches the array (assuming sorted in ascending order, using sort()) for
|
||||
// an element equal to target and returns the index of the best candidate.
|
||||
// The return value is conceptually the largest index i such that
|
||||
// data_[i] <= target or 0 if target < the whole vector.
|
||||
// NOTE that this function uses operator> so really the return value is
|
||||
// the largest index i such that data_[i] > target is false.
|
||||
int binary_search(const T &target) const {
|
||||
int bottom = 0;
|
||||
int top = size_used_;
|
||||
while (top - bottom > 1) {
|
||||
int middle = (bottom + top) / 2;
|
||||
if (data_[middle] > target) {
|
||||
top = middle;
|
||||
} else {
|
||||
bottom = middle;
|
||||
}
|
||||
}
|
||||
return bottom;
|
||||
}
|
||||
|
||||
// Swaps the elements with the given indices.
|
||||
void swap(int index1, int index2) {
|
||||
if (index1 != index2) {
|
||||
@ -307,11 +261,6 @@ inline bool SaveDataToFile(const GenericVector<char> &data, const char *filename
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool cmp_eq(T const &t1, T const &t2) {
|
||||
return t1 == t2;
|
||||
}
|
||||
|
||||
// Used by sort()
|
||||
// return < 0 if t1 < t2
|
||||
// return 0 if t1 == t2
|
||||
@ -632,29 +581,6 @@ int GenericVector<T>::push_back(T object) {
|
||||
return index;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int GenericVector<T>::push_back_new(const T &object) {
|
||||
int index = get_index(object);
|
||||
if (index >= 0) {
|
||||
return index;
|
||||
}
|
||||
return push_back(object);
|
||||
}
|
||||
|
||||
// Add an element in the array (front)
|
||||
template <typename T>
|
||||
int GenericVector<T>::push_front(const T &object) {
|
||||
if (size_used_ == size_reserved_) {
|
||||
double_the_size();
|
||||
}
|
||||
for (int i = size_used_; i > 0; --i) {
|
||||
data_[i] = data_[i - 1];
|
||||
}
|
||||
data_[0] = object;
|
||||
++size_used_;
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void GenericVector<T>::operator+=(const T &t) {
|
||||
push_back(t);
|
||||
@ -831,18 +757,6 @@ bool GenericVector<T>::SerializeClasses(FILE *fp) const {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::SerializeClasses(TFile *fp) const {
|
||||
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < size_used_; ++i) {
|
||||
if (!data_[i].Serialize(fp)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reads a vector of classes from the given file. Assumes the existence of
|
||||
// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
|
||||
@ -850,24 +764,6 @@ bool GenericVector<T>::SerializeClasses(TFile *fp) const {
|
||||
// this function. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerializeClasses(bool swap, FILE *fp) {
|
||||
int32_t reserved;
|
||||
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
Reverse32(&reserved);
|
||||
}
|
||||
T empty;
|
||||
init_to_size(reserved, empty);
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
if (!data_[i].DeSerialize(swap, fp)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerializeClasses(TFile *fp) {
|
||||
int32_t reserved;
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
|
||||
|
@ -93,13 +93,6 @@ inline void chomp_string(char *str) {
|
||||
}
|
||||
}
|
||||
|
||||
// Advance the current pointer of the file if it points to a newline character.
|
||||
inline void SkipNewline(FILE *file) {
|
||||
if (fgetc(file) != '\n') {
|
||||
fseek(file, -1, SEEK_CUR);
|
||||
}
|
||||
}
|
||||
|
||||
// return the smallest multiple of block_size greater than or equal to n.
|
||||
inline int RoundUp(int n, int block_size) {
|
||||
return block_size * ((n + block_size - 1) / block_size);
|
||||
@ -197,21 +190,11 @@ inline void ReverseN(void *ptr, int num_bytes) {
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse the order of bytes in a 16 bit quantity for big/little-endian switch.
|
||||
inline void Reverse16(void *ptr) {
|
||||
ReverseN(ptr, 2);
|
||||
}
|
||||
|
||||
// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
|
||||
inline void Reverse32(void *ptr) {
|
||||
ReverseN(ptr, 4);
|
||||
}
|
||||
|
||||
// Reverse the order of bytes in a 64 bit quantity for big/little-endian switch.
|
||||
inline void Reverse64(void *ptr) {
|
||||
ReverseN(ptr, 8);
|
||||
}
|
||||
|
||||
// Reads a vector of simple types from the given file. Assumes that bitwise
|
||||
// read/write will work with ReverseN according to sizeof(T).
|
||||
// Returns false in case of error.
|
||||
|
@ -23,12 +23,9 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
constexpr ERRCODE SERIALISE_LINKS("Attempted to (de)serialise a link element");
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
||||
constexpr ERRCODE NO_LIST("Iterator not set to a list");
|
||||
constexpr ERRCODE NULL_OBJECT("List found this = nullptr!");
|
||||
constexpr ERRCODE NULL_DATA("List would have returned a nullptr data pointer");
|
||||
constexpr ERRCODE NULL_CURRENT("List current position is nullptr");
|
||||
constexpr ERRCODE NULL_NEXT("Next element on the list is nullptr");
|
||||
|
@ -1,82 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: mainblk.cpp (Formerly main.c)
|
||||
* Description: Function to call from main() to setup.
|
||||
* Author: Ray Smith
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring> // for std::strrchr
|
||||
#if defined(_WIN32)
|
||||
# include <io.h> // for _access
|
||||
#endif
|
||||
|
||||
#include "ccutil.h"
|
||||
#include "fileerr.h"
|
||||
|
||||
namespace tesseract {
|
||||
/**
|
||||
* @brief CCUtil::main_setup - set location of tessdata and name of image
|
||||
*
|
||||
* @param argv0 - paths to the directory with language files and config files.
|
||||
* An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
|
||||
* used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
|
||||
* previous is not successful - use current directory.
|
||||
* @param basename - name of image
|
||||
*/
|
||||
void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
|
||||
imagebasename = basename; /**< name of image */
|
||||
|
||||
char *tessdata_prefix = getenv("TESSDATA_PREFIX");
|
||||
|
||||
if (!argv0.empty()) {
|
||||
/* Use tessdata prefix from the command line. */
|
||||
datadir = argv0;
|
||||
} else if (tessdata_prefix) {
|
||||
/* Use tessdata prefix from the environment. */
|
||||
datadir = tessdata_prefix;
|
||||
#if defined(_WIN32)
|
||||
} else if (datadir.empty() || _access(datadir.c_str(), 0) != 0) {
|
||||
/* Look for tessdata in directory of executable. */
|
||||
char path[_MAX_PATH];
|
||||
DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
|
||||
if (length > 0 && length < sizeof(path)) {
|
||||
char *separator = std::strrchr(path, '\\');
|
||||
if (separator != nullptr) {
|
||||
*separator = '\0';
|
||||
datadir = path;
|
||||
datadir += "/tessdata";
|
||||
}
|
||||
}
|
||||
#endif /* _WIN32 */
|
||||
#if defined(TESSDATA_PREFIX)
|
||||
} else {
|
||||
// Use tessdata prefix which was compiled in.
|
||||
datadir = TESSDATA_PREFIX "/tessdata";
|
||||
#endif
|
||||
}
|
||||
|
||||
// datadir may still be empty:
|
||||
if (datadir.empty()) {
|
||||
datadir = "./";
|
||||
}
|
||||
|
||||
// check for missing directory separator
|
||||
const char *lastchar = datadir.c_str();
|
||||
lastchar += datadir.length() - 1;
|
||||
if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) {
|
||||
datadir += "/";
|
||||
}
|
||||
}
|
||||
} // namespace tesseract
|
@ -43,7 +43,8 @@ public:
|
||||
tprintf(
|
||||
"ObjectCache(%p)::~ObjectCache(): WARNING! LEAK! object %p "
|
||||
"still has count %d (id %s)\n",
|
||||
this, it.object, it.count, it.id.c_str());
|
||||
static_cast<void *>(this), static_cast<void *>(it.object),
|
||||
it.count, it.id.c_str());
|
||||
} else {
|
||||
delete it.object;
|
||||
it.object = nullptr;
|
||||
|
@ -29,14 +29,6 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/***********************************************************************
|
||||
QUOTE_IT MACRO DEFINITION
|
||||
===========================
|
||||
Replace <parm> with "<parm>". <parm> may be an arbitrary number of tokens
|
||||
***********************************************************************/
|
||||
|
||||
#define QUOTE_IT(parm) #parm
|
||||
|
||||
// Return number of elements of an array.
|
||||
template <typename T, size_t N>
|
||||
constexpr size_t countof(T const (&)[N]) noexcept {
|
||||
|
@ -48,7 +48,7 @@ TessdataManager::TessdataManager(FileReader reader)
|
||||
SetVersionString(TESSERACT_VERSION_STR);
|
||||
}
|
||||
|
||||
// Lazily loads from the the given filename. Won't actually read the file
|
||||
// Lazily loads from the given filename. Won't actually read the file
|
||||
// until it needs it.
|
||||
void TessdataManager::LoadFileLater(const char *data_file_name) {
|
||||
Clear();
|
||||
|
@ -138,7 +138,7 @@ public:
|
||||
return is_loaded_;
|
||||
}
|
||||
|
||||
// Lazily loads from the the given filename. Won't actually read the file
|
||||
// Lazily loads from the given filename. Won't actually read the file
|
||||
// until it needs it.
|
||||
void LoadFileLater(const char *data_file_name);
|
||||
/**
|
||||
|
@ -38,4 +38,6 @@ extern TESS_API void tprintf( // Trace printf
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#undef __attribute__
|
||||
|
||||
#endif // define TESSERACT_CCUTIL_TPRINTF_H
|
||||
|
@ -614,7 +614,6 @@ public:
|
||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||
*width = 0.0f;
|
||||
*width_sd = 0.0f;
|
||||
;
|
||||
return;
|
||||
}
|
||||
ASSERT_HOST(contains_unichar_id(unichar_id));
|
||||
|
@ -276,7 +276,7 @@ void Classify::LearnWord(const char *fontname, WERD_RES *word) {
|
||||
tprintf("\n\nAdapting to word = %s\n", word->best_choice->debug_string().c_str());
|
||||
}
|
||||
thresholds = new float[word_len];
|
||||
word->ComputeAdaptionThresholds(certainty_scale, matcher_perfect_threshold,
|
||||
word->ComputeAdaptionThresholds(getDict().certainty_scale, matcher_perfect_threshold,
|
||||
matcher_good_threshold, matcher_rating_margin, thresholds);
|
||||
}
|
||||
int start_blob = 0;
|
||||
|
@ -101,7 +101,6 @@ Classify::Classify()
|
||||
"its expected textline position",
|
||||
this->params())
|
||||
, double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params())
|
||||
, double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", this->params())
|
||||
, double_MEMBER(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used",
|
||||
this->params())
|
||||
, double_MEMBER(classify_adapted_pruning_factor, 2.5,
|
||||
|
@ -394,7 +394,6 @@ public:
|
||||
double_VAR_H(matcher_clustering_max_angle_delta);
|
||||
double_VAR_H(classify_misfit_junk_penalty);
|
||||
double_VAR_H(rating_scale);
|
||||
double_VAR_H(certainty_scale);
|
||||
double_VAR_H(tessedit_class_miss_scale);
|
||||
double_VAR_H(classify_adapted_pruning_factor);
|
||||
double_VAR_H(classify_adapted_pruning_threshold);
|
||||
|
@ -28,7 +28,6 @@
|
||||
|
||||
#include <cfloat> // for FLT_MAX
|
||||
#include <cmath> // for M_PI
|
||||
#include <array> // for std::array
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
@ -66,7 +66,7 @@ class FCOORD;
|
||||
|
||||
/* The first 3 dimensions of the CLASS_PRUNER_STRUCT are the
|
||||
* 3 axes of the quantized feature space.
|
||||
* The position of the the bits recorded for each class in the
|
||||
* The position of the bits recorded for each class in the
|
||||
* 4th dimension is determined by using CPrunerWordIndexFor(c),
|
||||
* where c is the corresponding class id. */
|
||||
struct CLASS_PRUNER_STRUCT {
|
||||
|
@ -44,7 +44,7 @@ struct UnicharRating {
|
||||
tprintf(
|
||||
"Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%u,"
|
||||
" %zu fonts\n",
|
||||
unichar_id, rating, adapted, config, feature_misses, fonts.size());
|
||||
unichar_id, static_cast<double>(rating), adapted, config, feature_misses, fonts.size());
|
||||
}
|
||||
|
||||
// Helper function to get the index of the first result with the required
|
||||
|
@ -333,7 +333,7 @@ protected:
|
||||
|
||||
// Finds the edge with the given direction, word_end and unichar_id
|
||||
// in the node indicated by node_ref. Fills in the pointer to the
|
||||
// EDGE_RECORD and the index of the edge with the the values
|
||||
// EDGE_RECORD and the index of the edge with the values
|
||||
// corresponding to the edge found. Returns true if an edge was found.
|
||||
bool edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, bool word_end,
|
||||
UNICHAR_ID unichar_id, EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const;
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Generated code with lookup tables
|
||||
// Generated code with lookup tables (see generate_lut.py)
|
||||
#include "functions.h"
|
||||
namespace tesseract {
|
||||
const TFloat TanhTable[] = {
|
||||
|
@ -42,13 +42,13 @@ extern const TFloat LogisticTable[];
|
||||
|
||||
// Non-linearity (sigmoid) functions with cache tables and clipping.
|
||||
inline TFloat Tanh(TFloat x) {
|
||||
if (x < 0.0) {
|
||||
if (x < 0) {
|
||||
return -Tanh(-x);
|
||||
}
|
||||
x *= kScaleFactor;
|
||||
auto index = static_cast<unsigned>(x);
|
||||
if (index >= (kTableSize - 1)) {
|
||||
return 1.0;
|
||||
return 1;
|
||||
}
|
||||
TFloat tanh_i0 = TanhTable[index];
|
||||
TFloat tanh_i1 = TanhTable[index + 1];
|
||||
@ -57,13 +57,13 @@ inline TFloat Tanh(TFloat x) {
|
||||
}
|
||||
|
||||
inline TFloat Logistic(TFloat x) {
|
||||
if (x < 0.0) {
|
||||
return 1.0 - Logistic(-x);
|
||||
if (x < 0) {
|
||||
return 1 - Logistic(-x);
|
||||
}
|
||||
x *= kScaleFactor;
|
||||
auto index = static_cast<unsigned>(x);
|
||||
if (index >= (kTableSize - 1)) {
|
||||
return 1.0;
|
||||
return 1;
|
||||
}
|
||||
TFloat l0 = LogisticTable[index];
|
||||
TFloat l1 = LogisticTable[index + 1];
|
||||
@ -79,36 +79,36 @@ struct FFunc {
|
||||
};
|
||||
struct FPrime {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return y * (1.0 - y);
|
||||
return y * (1 - y);
|
||||
}
|
||||
};
|
||||
struct ClipFFunc {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
if (x <= 0.0) {
|
||||
return 0.0;
|
||||
if (x <= 0) {
|
||||
return 0;
|
||||
}
|
||||
if (x >= 1.0) {
|
||||
return 1.0;
|
||||
if (x >= 1) {
|
||||
return 1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
struct ClipFPrime {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return 0.0 < y && y < 1.0 ? 1.0 : 0.0;
|
||||
return 0 < y && y < 1 ? 1 : 0;
|
||||
}
|
||||
};
|
||||
struct Relu {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
if (x <= 0.0) {
|
||||
return 0.0;
|
||||
if (x <= 0) {
|
||||
return 0;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
struct ReluPrime {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return 0.0 < y ? 1.0 : 0.0;
|
||||
return 0 < y ? 1 : 0;
|
||||
}
|
||||
};
|
||||
struct GFunc {
|
||||
@ -118,23 +118,23 @@ struct GFunc {
|
||||
};
|
||||
struct GPrime {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return 1.0 - y * y;
|
||||
return 1 - y * y;
|
||||
}
|
||||
};
|
||||
struct ClipGFunc {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
if (x <= -1.0) {
|
||||
return -1.0;
|
||||
if (x <= -1) {
|
||||
return -1;
|
||||
}
|
||||
if (x >= 1.0) {
|
||||
return 1.0;
|
||||
if (x >= 1) {
|
||||
return 1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
struct ClipGPrime {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return -1.0 < y && y < 1.0 ? 1.0 : 0.0;
|
||||
return -1 < y && y < 1 ? 1 : 0;
|
||||
}
|
||||
};
|
||||
struct HFunc {
|
||||
@ -183,7 +183,7 @@ inline void SoftmaxInPlace(int n, T *inout) {
|
||||
return;
|
||||
}
|
||||
// A limit on the negative range input to exp to guarantee non-zero output.
|
||||
const T kMaxSoftmaxActivation = 86.0f;
|
||||
const T kMaxSoftmaxActivation = 86;
|
||||
|
||||
T max_output = inout[0];
|
||||
for (int i = 1; i < n; i++) {
|
||||
@ -192,14 +192,14 @@ inline void SoftmaxInPlace(int n, T *inout) {
|
||||
max_output = output;
|
||||
}
|
||||
}
|
||||
T prob_total = 0.0;
|
||||
T prob_total = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
T prob = inout[i] - max_output;
|
||||
prob = exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
|
||||
prob = std::exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
|
||||
prob_total += prob;
|
||||
inout[i] = prob;
|
||||
}
|
||||
if (prob_total > 0.0) {
|
||||
if (prob_total > 0) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
inout[i] /= prob_total;
|
||||
}
|
||||
@ -207,7 +207,7 @@ inline void SoftmaxInPlace(int n, T *inout) {
|
||||
}
|
||||
|
||||
// Copies n values of the given src vector to dest.
|
||||
inline void CopyVector(int n, const TFloat *src, TFloat *dest) {
|
||||
inline void CopyVector(unsigned n, const TFloat *src, TFloat *dest) {
|
||||
memcpy(dest, src, n * sizeof(dest[0]));
|
||||
}
|
||||
|
||||
@ -242,7 +242,7 @@ inline void SumVectors(int n, const TFloat *v1, const TFloat *v2, const TFloat *
|
||||
|
||||
// Sets the given n-vector vec to 0.
|
||||
template <typename T>
|
||||
inline void ZeroVector(int n, T *vec) {
|
||||
inline void ZeroVector(unsigned n, T *vec) {
|
||||
memset(vec, 0, n * sizeof(*vec));
|
||||
}
|
||||
|
||||
|
@ -4,21 +4,23 @@
|
||||
|
||||
import math
|
||||
|
||||
# kTableSize and kScaleFactor must match the values in functions.h.
|
||||
|
||||
# Size of static tables.
|
||||
kTableSize = 4096
|
||||
# Scale factor for float arg to int index.
|
||||
kScaleFactor = 256.0
|
||||
|
||||
print("// Generated code with lookup tables")
|
||||
print("// Generated code with lookup tables (see generate_lut.py)")
|
||||
print('#include "functions.h"')
|
||||
print("namespace tesseract {")
|
||||
|
||||
print("const double TanhTable[] = {")
|
||||
print("const TFloat TanhTable[] = {")
|
||||
for i in range(kTableSize):
|
||||
print(" %a," % math.tanh(i / kScaleFactor))
|
||||
print("};")
|
||||
|
||||
print("const double LogisticTable[] = {")
|
||||
print("const TFloat LogisticTable[] = {")
|
||||
for i in range(kTableSize):
|
||||
print(" %a," % (1 / (1 + math.exp(-i / kScaleFactor))))
|
||||
print("};")
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include <cstdlib>
|
||||
#include <sstream> // for std::ostringstream
|
||||
|
||||
#if !defined(__GNUC__) && defined(_MSC_VER)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
# include <intrin.h> // _BitScanReverse
|
||||
#endif
|
||||
|
||||
|
@ -294,7 +294,7 @@ void LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
|
||||
void LSTMRecognizer::OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output,
|
||||
float *sd) {
|
||||
const int kOutputScale = INT8_MAX;
|
||||
STATS stats(0, kOutputScale + 1);
|
||||
STATS stats(0, kOutputScale);
|
||||
for (int t = 0; t < outputs.Width(); ++t) {
|
||||
int best_label = outputs.BestLabel(t, nullptr);
|
||||
if (best_label != null_char_) {
|
||||
|
@ -127,7 +127,7 @@ void NetworkIO::ZeroInvalidElements() {
|
||||
static void ComputeBlackWhite(Image pix, float *black, float *white) {
|
||||
int width = pixGetWidth(pix);
|
||||
int height = pixGetHeight(pix);
|
||||
STATS mins(0, 256), maxes(0, 256);
|
||||
STATS mins(0, 255), maxes(0, 255);
|
||||
if (width >= 3) {
|
||||
int y = height / 2;
|
||||
l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y;
|
||||
@ -412,15 +412,6 @@ void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, int num_feature
|
||||
}
|
||||
}
|
||||
|
||||
// Zeroes a single time step.
|
||||
void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) {
|
||||
if (int_mode_) {
|
||||
ZeroVector(num_features, i_[t] + offset);
|
||||
} else {
|
||||
ZeroVector(num_features, f_[t] + offset);
|
||||
}
|
||||
}
|
||||
|
||||
// Sets the given range to random values.
|
||||
void NetworkIO::Randomize(int t, int offset, int num_features, TRand *randomizer) {
|
||||
if (int_mode_) {
|
||||
|
@ -2,7 +2,6 @@
|
||||
// File: networkio.h
|
||||
// Description: Network input/output data, allowing float/int implementations.
|
||||
// Author: Ray Smith
|
||||
// Created: Tue Jun 17 08:43:11 PST 2014
|
||||
//
|
||||
// (C) Copyright 2014, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -146,9 +145,12 @@ public:
|
||||
int src_t, int src_offset);
|
||||
// Zeroes a single time step.
|
||||
void ZeroTimeStep(int t) {
|
||||
ZeroTimeStepGeneral(t, 0, NumFeatures());
|
||||
if (int_mode_) {
|
||||
memset(i_[t], 0, sizeof(*i_[t]) * NumFeatures());
|
||||
} else {
|
||||
memset(f_[t], 0, sizeof(*f_[t]) * NumFeatures());
|
||||
}
|
||||
}
|
||||
void ZeroTimeStepGeneral(int t, int offset, int num_features);
|
||||
// Sets the given range to random values.
|
||||
void Randomize(int t, int offset, int num_features, TRand *randomizer);
|
||||
|
||||
|
@ -28,7 +28,6 @@ public:
|
||||
// ni_ and no_ will be set by AddToStack.
|
||||
TESS_API
|
||||
Parallel(const char *name, NetworkType type);
|
||||
~Parallel() override = default;
|
||||
|
||||
// Returns the shape output from the network given an input shape (which may
|
||||
// be partially unknown ie zero).
|
||||
|
@ -25,7 +25,7 @@
|
||||
namespace tesseract {
|
||||
|
||||
// Holds a collection of other networks and forwards calls to each of them.
|
||||
class Plumbing : public Network {
|
||||
class TESS_API Plumbing : public Network {
|
||||
public:
|
||||
// ni_ and no_ will be set by AddToStack.
|
||||
explicit Plumbing(const std::string &name);
|
||||
@ -103,10 +103,8 @@ public:
|
||||
return stack_;
|
||||
}
|
||||
// Returns a set of strings representing the layer-ids of all layers below.
|
||||
TESS_API
|
||||
void EnumerateLayers(const std::string *prefix, std::vector<std::string> &layers) const;
|
||||
// Returns a pointer to the network layer corresponding to the given id.
|
||||
TESS_API
|
||||
Network *GetLayer(const char *id) const;
|
||||
// Returns the learning rate for a specific layer of the stack.
|
||||
float LayerLearningRate(const char *id) {
|
||||
@ -129,7 +127,6 @@ public:
|
||||
}
|
||||
|
||||
// Returns a pointer to the learning rate for the given layer id.
|
||||
TESS_API
|
||||
float *LayerLearningRatePtr(const char *id);
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
|
@ -24,12 +24,6 @@
|
||||
#include "unicharcompress.h"
|
||||
|
||||
#include <algorithm> // for std::reverse
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
|
@ -29,11 +29,8 @@
|
||||
#include "ratngs.h"
|
||||
#include "unicharcompress.h"
|
||||
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include <unordered_set> // for std::unordered_set
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
|
@ -525,7 +525,7 @@ static void HistogramWeight(TFloat weight, STATS *histogram) {
|
||||
}
|
||||
|
||||
void WeightMatrix::Debug2D(const char *msg) {
|
||||
STATS histogram(0, kHistogramBuckets);
|
||||
STATS histogram(0, kHistogramBuckets - 1);
|
||||
if (int_mode_) {
|
||||
for (int i = 0; i < wi_.dim1(); ++i) {
|
||||
for (int j = 0; j < wi_.dim2(); ++j) {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user