merging with tesseract master in order to create a pull request

This commit is contained in:
Stefan Brechtken 2021-03-15 17:02:19 +01:00
commit 594a000ecd
664 changed files with 80036 additions and 87703 deletions

View File

@ -1,5 +1,4 @@
---
BasedOnStyle: Google
BasedOnStyle: Google
# Only merge empty functions.
AllowShortFunctionsOnASingleLine: Empty
# Do not allow short if statements.
@ -7,3 +6,16 @@ AllowShortIfStatementsOnASingleLine: false
# Enforce always the same pointer alignment.
DerivePointerAlignment: false
IndentPPDirectives: AfterHash
PointerAlignment: Right
IncludeBlocks: Preserve
FixNamespaceComments: true
ColumnLimit: 100
IndentWidth: 2
#IndentAccessModifiers: false # not accepted atm
AccessModifierOffset: -2 # set to minus IndentWidth (-IndentWidth)
SpacesBeforeTrailingComments: 1
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
BreakConstructorInitializers: BeforeComma
#ConstructorInitializerAllOnOneLineOrOnePerLine: false

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
* text=auto

320
.github/workflows/autotools.yml vendored Normal file
View File

@ -0,0 +1,320 @@
name: autotools
# autotools build of tesseract and training tools on ubuntu, macos homebrew and macports.
# run command line tests, basicapitest and unittests.
on:
#push:
schedule:
- cron: 0 20 * * *
jobs:
# ============================================================================================
linux:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: ubuntu-18.04-clang-7-autotools, os: ubuntu-18.04, cxx: clang++-7 }
- { name: ubuntu-18.04-clang-8-autotools, os: ubuntu-18.04, cxx: clang++-8 } #installed
- { name: ubuntu-18.04-clang-9-autotools, os: ubuntu-18.04, cxx: clang++-9 } #installed
- { name: ubuntu-18.04-clang-10-autotools, os: ubuntu-18.04, cxx: clang++-10 }
- { name: ubuntu-18.04-gcc-7-autotools, os: ubuntu-18.04, cxx: g++-7 } #installed
- { name: ubuntu-18.04-gcc-8-autotools, os: ubuntu-18.04, cxx: g++-8 } #installed
- { name: ubuntu-18.04-gcc-9-autotools, os: ubuntu-18.04, cxx: g++-9 } #installed
- { name: ubuntu-18.04-gcc-10-autotools, os: ubuntu-18.04, cxx: g++-10 } #installed
- { name: ubuntu-20.04-clang-7-autotools, os: ubuntu-20.04, cxx: clang++-7 }
- { name: ubuntu-20.04-clang-8-autotools, os: ubuntu-20.04, cxx: clang++-8 } #installed
- { name: ubuntu-20.04-clang-9-autotools, os: ubuntu-20.04, cxx: clang++-9 } #installed
- { name: ubuntu-20.04-clang-10-autotools, os: ubuntu-20.04, cxx: clang++-10 } #installed
- { name: ubuntu-20.04-gcc-7-autotools, os: ubuntu-20.04, cxx: g++-7 } #installed
- { name: ubuntu-20.04-gcc-8-autotools, os: ubuntu-20.04, cxx: g++-8 } #installed
- { name: ubuntu-20.04-gcc-9-autotools, os: ubuntu-20.04, cxx: g++-9 } #installed
- { name: ubuntu-20.04-gcc-10-autotools, os: ubuntu-20.04, cxx: g++-10 } #installed
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Install Compiler
run: |
sudo apt-get install -y ${{ matrix.config.cxx }}
- name: Install dependencies
run: |
sudo apt-get install autoconf-archive libleptonica-dev -y
sudo apt-get install libicu-dev libpango1.0-dev libcairo2-dev -y
sudo apt-get install cabextract libarchive-dev -y
sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
- name: Setup Tesseract
run: |
mkdir -p m4
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
- name: Make and Install Tesseract
run: |
make -j 8
sudo make install install
- name: Make and Install Training Tools
run: |
make training -j 8
sudo make install training-install
- name: Make and run Unit Tests
run: |
make check
- name: Display Version for tesseract, lstmtraining, text2image
run: |
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: List languages in different test tessdata-dir
run: |
tesseract --list-langs --tessdata-dir ../tessdata
tesseract --list-langs --tessdata-dir ../tessdata_best
tesseract --list-langs --tessdata-dir ../tessdata_fast
- name: Run Tesseract on test images in different languages
run: |
tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata
- name: Run Tesseract basicapitest
run: |
export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++11
./basicapitest
- name: Display Compiler Version
run: |
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
- name: Display Unit Tests Report
run: |
cat test-suite.log
if: always()
# ============================================================================================
brew:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: macos-10.15-clang-12-autotools, os: macos-10.15, cxx: clang++ }
- { name: macos-10.15-gcc-10-autotools, os: macos-10.15, cxx: g++-10 }
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Get fonts, tessdata and langdata required for unit tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Install dependencies
run: |
brew install automake autoconf-archive
brew install leptonica
brew install cairo pango icu4c
brew install cabextract
brew install libarchive curl
- name: Setup Tesseract
run: |
mkdir -p m4
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2' "PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/opt/libffi/lib/pkgconfig"
- name: Make and Install Tesseract
run: |
make -j 8
sudo make install install
- name: Make and Install Training Tools
run: |
make training -j 8
sudo make install training-install
- name: Make and run Unit Tests (clang)
if: startsWith(matrix.config.cxx, 'clang')
run: |
make check
- name: Make and run Unit Tests (unset LANG needed for g++-8, g++-9, g++-10 on macOS)
if: startsWith(matrix.config.cxx, 'g')
shell: bash
run: |
unset LANG LC_ALL LC_CTYPE
locale
make check
- name: Display Version for tesseract, lstmtraining, text2image
run: |
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: List languages in different test tessdata-dir
run: |
tesseract --list-langs --tessdata-dir ../tessdata
tesseract --list-langs --tessdata-dir ../tessdata_best
tesseract --list-langs --tessdata-dir ../tessdata_fast
- name: Run Tesseract on test images in different languages
run: |
tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata
- name: Run Tesseract basicapitest
run: |
export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++11
./basicapitest
- name: Display Compiler Version
run: |
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
- name: Display Unit Tests Report
run: |
cat test-suite.log
if: always()
# ============================================================================================
ports:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: macos-10.15-clang-12-autotools, os: macos-10.15, cxx: clang++ }
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Get fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Install Macports
run: |
curl -LO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install
- name: Install Dependencies
run: |
sudo port install autoconf autoconf-archive automake libtool pkgconfig
sudo port install leptonica
sudo port install cairo pango
sudo port install icu +devel
sudo port install cabextract libarchive curl
- name: Setup Tesseract
run: |
mkdir -p m4
./autogen.sh
- name: Configure Tesseract
run: |
./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
- name: Make and Install Tesseract
run: |
make -j 8
sudo make install install
- name: Make and Install Training Tools
run: |
make training -j 8
sudo make install training-install
- name: Make and run Unit Tests (clang)
if: startsWith(matrix.config.cxx, 'clang')
run: |
make check
- name: Display Version for tesseract, lstmtraining, text2image
run: |
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: List languages in different test tessdata-dir
run: |
tesseract --list-langs --tessdata-dir ../tessdata
tesseract --list-langs --tessdata-dir ../tessdata_best
tesseract --list-langs --tessdata-dir ../tessdata_fast
- name: Run Tesseract on test images in different languages
run: |
tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata
tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata
- name: Run Tesseract basicapitest
run: |
export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/opt/local/include -L/opt/local/lib -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++11
./basicapitest
- name: Display Compiler Version
run: |
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()
- name: Display Unit Tests Report
run: |
cat test-suite.log
if: always()

152
.github/workflows/cmake.yml vendored Normal file
View File

@ -0,0 +1,152 @@
name: cmake
# cmake build of tesseract and training tools on ubuntu and macOS homebrew using Ninja.
# test command line version of tesseract. run basicapitest.
on:
#push:
schedule:
- cron: 0 21 * * *
jobs:
basictests:
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: macos-10.15-clang-12-cmake, os: macos-10.15, cxx: clang++ } # defualt
- { name: macos-10.15-clang-11-cmake, os: macos-10.15, cxx: '$(brew --prefix llvm)/bin/clang++' } #installed
- { name: macos-10.15-gcc-8-cmake, os: macos-10.15, cxx: g++-8 } #installed
- { name: macos-10.15-gcc-9-cmake, os: macos-10.15, cxx: g++-9 } #installed
- { name: macos-10.15-gcc-10-cmake, os: macos-10.15, cxx: g++-10 } #installed
- { name: ubuntu-18.04-clang-7-cmake, os: ubuntu-18.04, cxx: clang++-7 }
- { name: ubuntu-18.04-clang-8-cmake, os: ubuntu-18.04, cxx: clang++-8 } #installed
- { name: ubuntu-18.04-clang-9-cmake, os: ubuntu-18.04, cxx: clang++-9 } #installed
- { name: ubuntu-18.04-clang-10-cmake, os: ubuntu-18.04, cxx: clang++-10 }
- { name: ubuntu-18.04-gcc-7-cmake, os: ubuntu-18.04, cxx: g++-7 } #installed
- { name: ubuntu-18.04-gcc-8-cmake, os: ubuntu-18.04, cxx: g++-8 } #installed
- { name: ubuntu-18.04-gcc-9-cmake, os: ubuntu-18.04, cxx: g++-9 } #installed
- { name: ubuntu-18.04-gcc-10-cmake, os: ubuntu-18.04, cxx: g++-10 } #installed
- { name: ubuntu-20.04-clang-7-cmake, os: ubuntu-20.04, cxx: clang++-7 }
- { name: ubuntu-20.04-clang-8-cmake, os: ubuntu-20.04, cxx: clang++-8 } #installed
- { name: ubuntu-20.04-clang-9-cmake, os: ubuntu-20.04, cxx: clang++-9 } #installed
- { name: ubuntu-20.04-clang-10-cmake, os: ubuntu-20.04, cxx: clang++-10 } #installed
- { name: ubuntu-20.04-gcc-7-cmake, os: ubuntu-20.04, cxx: g++-7 } #installed
- { name: ubuntu-20.04-gcc-8-cmake, os: ubuntu-20.04, cxx: g++-8 } #installed
- { name: ubuntu-20.04-gcc-9-cmake, os: ubuntu-20.04, cxx: g++-9 } #installed
- { name: ubuntu-20.04-gcc-10-cmake, os: ubuntu-20.04, cxx: g++-10 } #installed
steps:
- name: Install compilers on Linux
run: |
sudo apt-get install ${{ matrix.config.cxx }} -y
if: runner.os == 'Linux'
# sudo apt-get install libarchive-dev libcurl4-openssl-dev libcurl4 curl -y
- name: Install dependencies on Linux
run: |
sudo apt-get install autoconf-archive libleptonica-dev -y
sudo apt-get install libicu-dev libpango1.0-dev libcairo2-dev -y
sudo apt-get install cabextract -y
sudo apt-get install ninja-build -y
if: runner.os == 'Linux'
- name: Install dependencies on macOS
run: |
brew install automake autoconf-archive
brew install leptonica
brew install cairo pango icu4c
brew install cabextract
brew install ninja
ninja --version
cmake --version
if: runner.os == 'macOS'
- name: Checkout Source
uses: actions/checkout@v2
with:
submodules: recursive
- name: Configure Tesseract (Linux)
run: |
mkdir build
mkdir inst
cmake \
-S . \
-B build \
-G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DOPENMP_BUILD=OFF \
-DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
-DCMAKE_INSTALL_PREFIX:PATH=inst
if: runner.os == 'Linux'
- name: Configure Tesseract (macOS)
shell: bash
run: |
set -e
export PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:/$(brew --prefix)/opt/libffi/lib/pkgconfig:$PKG_CONFIG_PATH
mkdir build
mkdir inst
cmake \
-S . \
-B build \
-G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DOPENMP_BUILD=OFF \
-DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
-DCMAKE_INSTALL_PREFIX:PATH=inst
if: runner.os == 'macOS'
- name: Build Tesseract
run: |
cmake --build build --config Release --target install
- name: Display Tesseract Version
run: |
build/inst/bin/tesseract -v
- name: Display Training Tools Version
run: |
build/inst/bin/lstmtraining -v
build/inst/bin/text2image -v
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: List languages in different tessdata-dir
run: |
build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata
build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata_best
build/inst/bin/tesseract --list-langs --tessdata-dir ../tessdata_fast
- name: Run Tesseract on test images in different languages
run: |
build/inst/bin/tesseract test/testing/phototest.tif - --oem 1 --tessdata-dir ../tessdata
build/inst/bin/tesseract test/testing/raaj.tif - -l hin --oem 1 --tessdata-dir ../tessdata
build/inst/bin/tesseract test/testing/viet.tif - -l vie --oem 1 --tessdata-dir ../tessdata
build/inst/bin/tesseract test/testing/hebrew.png - -l heb --oem 1 --tessdata-dir ../tessdata
build/inst/bin/tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
build/inst/bin/tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ../tessdata
- name: Build and run basicapitest
run: |
export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
cd test
${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++11
./basicapitest
- name: Display Compiler Version
run: |
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()

View File

@ -1,45 +0,0 @@
name: linux
on: [push]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v1
- name: Download SW
shell: cmake -P {0}
run: |
if (WIN32)
file(DOWNLOAD "https://software-network.org/client/sw-master-windows-client.zip" ./sw.zip)
elseif (APPLE)
file(DOWNLOAD "https://software-network.org/client/sw-master-macos-client.tar.gz" ./sw.zip)
else()
file(DOWNLOAD "https://software-network.org/client/sw-master-linux-client.tar.gz" ./sw.zip)
endif()
- name: Unpack SW
run: |
cmake -E tar xvf sw.zip
chmod 755 sw
- name: gcc
run: |
sudo add-apt-repository ppa:jonathonf/gcc-9.0
sudo apt-get update
sudo apt-get install g++-9
- name: llvm
run: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key 2>/dev/null | sudo apt-key add -
sudo add-apt-repository 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main' -y
sudo apt-get update -q
sudo apt-get install -y clang-9 lld-9 libc++-9-dev libc++abi-9-dev clang-tools-9
- name: build
run: ./sw -static -shared -config d,r build

View File

@ -1,34 +0,0 @@
name: macos
on: [push]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [macOS-latest]
steps:
- uses: actions/checkout@v1
- name: Download SW
shell: cmake -P {0}
run: |
if (WIN32)
file(DOWNLOAD "https://software-network.org/client/sw-master-windows-client.zip" ./sw.zip)
elseif (APPLE)
file(DOWNLOAD "https://software-network.org/client/sw-master-macos-client.tar.gz" ./sw.zip)
else()
file(DOWNLOAD "https://software-network.org/client/sw-master-linux-client.tar.gz" ./sw.zip)
endif()
- name: Unpack SW
run: cmake -E tar xvf sw.zip
- name: chmod
run: chmod 755 sw
shell: sh
- name: build
run: ./sw -static -shared -config d,r build

73
.github/workflows/sw.yml vendored Normal file
View File

@ -0,0 +1,73 @@
name: sw
on:
push:
pull_request:
schedule:
# every day
- cron: 0 0 * * *
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [windows-latest, ubuntu-20.04, macOS-latest]
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: egorpugin/sw-action@master
- name: build
if: matrix.os == 'windows-latest'
run: ./sw -static -shared -platform x86,x64 -config d,r build
- name: build
if: matrix.os != 'windows-latest'
run: ./sw -static -shared -config d,r build -Dwith-tests=1
- name: download test data
run: git clone https://github.com/egorpugin/tessdata tessdata_unittest
- name: copy fonts
if: matrix.os != 'windows-latest'
run: cp tessdata_unittest/fonts/* test/testing/
- name: copy fonts
if: matrix.os == 'windows-latest'
run: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse
shell: pwsh
- name: test
if: matrix.os != 'windows-latest'
run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
continue-on-error: true
- name: test-nightly
if: matrix.os != 'windows-latest' && github.event.schedule=='0 0 * * *'
run: ./sw -static -shared -config "d,r" test -Dwith-tests=1
continue-on-error: true
# windows tests hang here for some reason, investigate
#- name: test
#if: matrix.os == 'windows-latest'
#run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
#continue-on-error: true
- name: Upload Unit Test Results
if: always() && matrix.os != 'windows-latest'
uses: actions/upload-artifact@v2
with:
name: Test Results (${{ matrix.os }})
path: .sw/test/results.xml
- name: Publish Test Report
if: always() && matrix.os != 'windows-latest'
uses: mikepenz/action-junit-report@v1
with:
check_name: test (${{ matrix.os }})
report_paths: .sw/test/results.xml
github_token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -0,0 +1,76 @@
name: unittest-disablelegacy
# autotools build on ubuntu, unittests with disabled legacy engine.
# currently some unittests are failing with disabled legacy engine.
on:
#push:
schedule:
- cron: 0 0 1 * *
jobs:
linux:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
compiler: [ g++, clang++ ]
os: [ ubuntu-18.04, ubuntu-20.04 ]
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Install dependencies
run: |
sudo apt-get install autoconf-archive libleptonica-dev libpango1.0-dev -y
sudo apt-get install cabextract -y
- name: Setup
run: |
mkdir -p m4
./autogen.sh
- name: Configure
run: |
./configure '--disable-shared' '--disable-legacy' 'CXX=${{ matrix.compiler }}'
- name: Make and Install Tesseract
run: |
make -j 8
sudo make install install
- name: Make and Install Training Tools
run: |
make training -j 8
sudo make install training-install
- name: Display Version
run: |
${{ matrix.compiler }} --version
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Run Tesseract on phototest.tif and devatest.png
run: |
tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata
tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata
- name: Make and run Unit Tests
run: |
make check -j 4
- name: Display Unit Tests Report
run: |
git log -3
${{ matrix.compiler }} --version
cat test-suite.log
if: always()

100
.github/workflows/unittest.yml vendored Normal file
View File

@ -0,0 +1,100 @@
name: unittest
# autotools build on ubuntu and macOS homebrew.
# unittests with address sanitizers.
# [ FAILED ] LSTMTrainerTest.DeterminismTest - clang version 9.0.0-2~ubuntu18.04.2
on:
#push:
schedule:
- cron: 0 19 * * *
jobs:
sanitizers:
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { name: macos-10.15-clang-unittest, os: macos-10.15, cxx: clang++ }
- { name: ubuntu-18.04-clang-8-unittest, os: ubuntu-18.04, cxx: clang++-8 }
- { name: ubuntu-18.04-gcc-unittest, os: ubuntu-18.04, cxx: g++ }
- { name: ubuntu-20.04-clang-10-unittest, os: ubuntu-20.04, cxx: clang++-10 }
- { name: ubuntu-20.04-gcc-unittest, os: ubuntu-20.04, cxx: g++ }
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Install dependencies (Linux)
run: |
sudo apt-get install autoconf-archive libleptonica-dev libpango1.0-dev -y
sudo apt-get install cabextract -y
if: runner.os == 'Linux'
- name: Install dependencies (MacOS Homebrew)
run: |
brew install automake autoconf-archive libarchive
brew install leptonica cairo pango
brew install cabextract abseil
if: runner.os == 'macOS'
- name: Setup
run: |
mkdir -p m4
./autogen.sh
- name: Configure (Linux)
run: |
./configure '--disable-shared' \
'CXX=${{ matrix.config.cxx }}' \
'CXXFLAGS=-g -O2 -fsanitize=address,undefined'
if: runner.os == 'Linux'
- name: Configure (MacOS Homebrew)
run: |
./configure '--disable-shared' '--with-pic' \
'CXX=${{ matrix.config.cxx }}' \
'CXXFLAGS=-g -O2 -fsanitize=address,undefined' \
"PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/opt/libffi/lib/pkgconfig"
if: runner.os == 'macOS'
- name: Make and Install Tesseract
run: |
make -j 8
sudo make install
- name: Make and Install Training Tools
run: |
make training -j 8
sudo make training-install
- name: Display Version
run: |
${{ matrix.config.cxx }} --version
tesseract -v
lstmtraining -v
text2image -v
if: success() || failure()
- name: Download fonts, tessdata and langdata required for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
cp tessdata_unittest/fonts/* test/testing/
mv tessdata_unittest/* ../
- name: Run Tesseract on phototest.tif and devatest.png
run: |
tesseract test/testing/phototest.tif - --tessdata-dir ../tessdata
tesseract test/testing/devatest.png - -l hin+eng --tessdata-dir ../tessdata
- name: Make and run Unit Tests
run: |
make check -j 4
- name: Display Unit Tests Report
run: |
cat test-suite.log
${{ matrix.config.cxx }} --version
git log -3 --pretty=format:'%h %ad %s | %an'
if: always()

109
.github/workflows/vcpkg-4.11.yml vendored Normal file
View File

@ -0,0 +1,109 @@
name: vcpkg-4.11
# build tesseract 4.1 using vcpkg and cmake on ubuntu and windows.
# build and run basicapitest on windows.
# macos fails on leptonica build - https://github.com/microsoft/vcpkg/issues/16116
on:
#push:
schedule:
- cron: 0 22 * * *
env:
DEVELOPER_DIR: /Applications/Xcode_11.2.1.app/Contents/Developer
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
steps:
- name: Checkout Tesseract Source (for test images)
uses: actions/checkout@v2
with:
submodules: recursive
- name: Install vcpkg (Linux)
run: |
git clone https://github.com/microsoft/vcpkg
vcpkg/bootstrap-vcpkg.sh
vcpkg/vcpkg integrate install
if: runner.os == 'Linux'
- name: Build Tesseract 4.1.1 (Linux)
run: |
vcpkg/vcpkg install tesseract:x64-linux
if: runner.os == 'Linux'
- name: Visual Studio Setup (Windows)
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
if: runner.os == 'Windows'
- name: Install vcpkg (Windows)
run: |
git clone https://github.com/microsoft/vcpkg
vcpkg/bootstrap-vcpkg.bat
vcpkg/vcpkg integrate install
if: runner.os == 'Windows'
- name: Build and Install Tesseract and dependencies using vcpkg (Windows)
run: |
vcpkg/vcpkg install tesseract:x64-windows
if: runner.os == 'Windows'
- name: Download tessdata used for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
mv tessdata_unittest/* ../
if: runner.os == 'Windows'
- name: Create CMakeLists.txt file for basicapitest
shell: bash
run: |
cd test
cat << "EOF" > CMakeLists.txt
cmake_minimum_required(VERSION 3.19)
project( basicapitest )
find_package( Tesseract REQUIRED )
find_package( Leptonica REQUIRED )
include_directories(${Tesseract_INCLUDE_DIRS})
include_directories(${Leptonica_INCLUDE_DIRS})
add_executable( basicapitest testing/basicapitest.cpp )
target_link_libraries(basicapitest ${Leptonica_LIBRARIES})
target_link_libraries(basicapitest ${Tesseract_LIBRARIES})
target_link_libraries(basicapitest libtesseract)
EOF
cat CMakeLists.txt
if: runner.os == 'Windows'
- name: Configure basicapitest
run: |
cd test
cmake . "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"
if: runner.os == 'Windows'
- name: Build basicapitest
run: |
cd test
cmake --build . --config Release
if: runner.os == 'Windows'
- name: Run basicapitest (Windows)
run: |
cd test
D:\a\tesseract\tesseract\test\Release\basicapitest.exe
if: runner.os == 'Windows'
- name: Build Tesseract 4.1.1 (macOS) Fails
run: |
xcode-select --print-path
vcpkg install leptonica:x64-osx
vcpkg install tesseract:x64-osx
if: runner.os == 'macOS'
- name: Display Leptonica error log (macOS) Fails
run: |
cat /usr/local/share/vcpkg/buildtrees/leptonica/install-x64-osx-dbg-out.log
if: runner.os == 'macOS'

100
.github/workflows/vcpkg.yml vendored Normal file
View File

@ -0,0 +1,100 @@
name: vcpkg
# build and test of tesseract on windows using vcpkg and cmake.
# vcpkg with -head does not work. https://github.com/microsoft/vcpkg/issues/16019
on:
#push:
schedule:
- cron: 0 23 * * *
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [windows-2019]
steps:
- name: Checkout Tesseract Source (--head from master branch)
uses: actions/checkout@v2
with:
submodules: recursive
- name: Visual Studio Setup
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
- name: Install vcpkg
run: |
git clone https://github.com/microsoft/vcpkg
vcpkg/bootstrap-vcpkg.bat
vcpkg/vcpkg integrate install
- name: Build and Install Leptonica and image libraries using vcpkg
run: |
vcpkg/vcpkg install leptonica:x64-windows
- name: Configure and Build Tesseract (--head from master branch) with cmake
run: |
cmake . -B build -DCMAKE_BUILD_TYPE=Release -DSW_BUILD=OFF -DOPENMP_BUILD=OFF -DBUILD_TRAINING_TOOLS=OFF "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"
cmake --build build --config Release --target install
- name: Display Tesseract Version
run: |
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --version
- name: Create CMakeLists.txt file for basicapitest
shell: bash
run: |
cd test
cat << "EOF" > CMakeLists.txt
cmake_minimum_required(VERSION 3.19)
project( basicapitest )
find_package( Tesseract REQUIRED )
find_package( Leptonica REQUIRED )
include_directories(${Tesseract_INCLUDE_DIRS})
include_directories(${Leptonica_INCLUDE_DIRS})
add_executable( basicapitest testing/basicapitest.cpp )
target_link_libraries(basicapitest ${Leptonica_LIBRARIES})
target_link_libraries(basicapitest ${Tesseract_LIBRARIES})
add_library(libtesseract UNKNOWN IMPORTED)
set_property(TARGET libtesseract PROPERTY IMPORTED_LOCATION D:/a/tesseract/tesseract/build/Release/tesseract50.lib)
target_link_libraries(basicapitest libtesseract)
EOF
cat CMakeLists.txt
- name: Configure basicapitest
run: |
cd test
cmake . "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"
- name: Build basicapitest
run: |
cd test
cmake --build . --config Release
- name: Download tessdata and image files used for tests
run: |
git clone https://github.com/egorpugin/tessdata tessdata_unittest
mv tessdata_unittest/* ../
- name: Run basicapitest
run: |
cd test
D:\a\tesseract\tesseract\test\Release\basicapitest.exe
- name: Run Tesseract CLI on test images in different languages
run: |
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\phototest.tif - --oem 1 --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\raaj.tif - -l hin --oem 1 --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\viet.tif - -l vie --oem 1 --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\hebrew.png - -l heb --oem 1 --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\eurotext.tif - -l fra --oem 1 --tessdata-dir ..\tessdata_best
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe test\testing\arabic.tif - -l ara --oem 1 --psm 6 --tessdata-dir ..\tessdata
- name: List languages in different test tessdata-dir
run: |
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata_best
D:\a\tesseract\tesseract\build\bin\Release\tesseract.exe --list-langs --tessdata-dir ..\tessdata_fast

View File

@ -1,35 +0,0 @@
name: windows
on: [push]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest]
#os: [windows-latest, ubuntu-latest, macOS-latest]
steps:
- uses: actions/checkout@v1
- name: Download SW
shell: cmake -P {0}
run: |
if (WIN32)
file(DOWNLOAD "https://software-network.org/client/sw-master-windows-client.zip" ./sw.zip)
elseif (APPLE)
file(DOWNLOAD "https://software-network.org/client/sw-master-macos-client.tar.gz" ./sw.zip)
else()
file(DOWNLOAD "https://software-network.org/client/sw-master-linux-client.tar.gz" ./sw.zip)
endif()
- name: Unpack SW
run: cmake -E tar xvf sw.zip
- name: chmod
run: chmod 755 sw
shell: sh
- name: build
run: ./sw -static -shared -platform x86,x64 -config d,r build

3
.gitignore vendored
View File

@ -63,7 +63,7 @@ config_auto.h
# ignore compilation files
build/*
/bin
*/.deps/*
.deps
.dirstamp
/.libs
*/.libs/*
@ -80,6 +80,7 @@ __pycache__
# tessdata
*.traineddata
tessdata_*
# OpenCL
tesseract_opencl_profile_devices.dat

2
.gitmodules vendored
View File

@ -6,4 +6,4 @@
url = https://github.com/google/googletest.git
[submodule "test"]
path = test
url = https://github.com/tesseract-ocr/test
url = https://github.com/tesseract-ocr/test.git

View File

@ -1,51 +1,30 @@
# Travis CI configuration for Tesseract
language: cpp
dist: xenial
env:
- LEPT_VER=1.78.0
sudo: false
notifications:
email: false
language: cpp
sudo: false
os:
- linux
- osx
addons:
apt:
sources:
#- ubuntu-toolchain-r-test
packages:
- libarchive-dev
- libpango1.0-dev
#- g++-6
#matrix:
#include:
#- os: osx
#install:
#script: brew install tesseract --HEAD
#cache:
#directories:
#- $HOME/Library/Caches/Homebrew
#allow_failures:
#- script: brew install tesseract --HEAD
os: linux
dist: focal
arch:
- amd64
- arm64
- ppc64le
- s390x
compiler:
- gcc
- clang
env:
- LEPT_VER=1.80.0
cache:
directories:
- leptonica-$LEPT_VER
- leptonica-$LEPT_VER
before_install:
- if [[ $TRAVIS_OS_NAME == linux ]]; then LINUX=true; fi
- if [[ $TRAVIS_OS_NAME == osx ]]; then OSX=true; fi
- sudo apt-get install libpango1.0-dev libicu-dev libtiff5-dev -y
- rm -rf leptonica-$LEPT_VER/usr
install:
#- if [[ $LINUX && "$CXX" = "g++" ]]; then export CXX="g++-6" CC="gcc-6"; fi
- if test ! -d leptonica-$LEPT_VER/src; then curl -Ls https://github.com/DanBloomberg/leptonica/archive/$LEPT_VER.tar.gz | tar -xz; fi
- if test ! -d leptonica-$LEPT_VER/usr; then cmake -Hleptonica-$LEPT_VER -Bleptonica-$LEPT_VER/build -DCMAKE_INSTALL_PREFIX=leptonica-$LEPT_VER/usr; fi
- if test ! -e leptonica-$LEPT_VER/usr/lib/libleptonica.so; then make -C leptonica-$LEPT_VER/build install; fi
@ -55,3 +34,12 @@ script:
- cd build
- cmake .. -DLeptonica_DIR=leptonica-$LEPT_VER/build -DSW_BUILD=OFF
- make
- sudo make install
#after_script: # let those commands trigger build errors
- tesseract -v
- text2image -v
- lstmtraining -v
- ls /home/travis/build/tesseract-ocr/tesseract/test/testing/*.tif
- wget https://github.com/egorpugin/tessdata/raw/master/tessdata/eng.traineddata
- tesseract /home/travis/build/tesseract-ocr/tesseract/test/testing/phototest.tif - -l eng --tessdata-dir ./

View File

@ -32,6 +32,13 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}")
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "CMake Targets")
if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15.0")
if (WIN32)
cmake_policy(SET CMP0091 NEW)
message(STATUS "Setting policy CMP0091 to NEW")
endif()
endif()
###############################################################################
#
# project settings
@ -75,12 +82,17 @@ else()
option(SW_BUILD "Build with sw" OFF)
endif()
option(OPENMP_BUILD "Build with openmp support" OFF) # see issue #1662
option(AUTO_OPTIMIZE "Usage of cmake auto optimize macros (not suitable for portable build)" ON)
option(GRAPHICS_DISABLED "Disable disable graphics (ScrollView)" OFF)
option(DISABLED_LEGACY_ENGINE "Disable the legacy OCR engine" OFF)
option(ENABLE_LTO "Enable link-time optimization" OFF)
option(BUILD_TRAINING_TOOLS "Build training tools" ON)
option(BUILD_TESTS "Build tests" OFF)
option(USE_SYSTEM_ICU "Use system ICU" OFF)
if(NOT ${CMAKE_VERSION} VERSION_LESS "3.15.0")
if(WIN32 AND MSVC)
option(WIN32_MT_BUILD "Build with MT flag for MSVC" OFF)
endif()
endif()
###############################################################################
#
@ -88,6 +100,10 @@ option(USE_SYSTEM_ICU "Use system ICU" OFF)
#
###############################################################################
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CLANG 1)
endif()
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "Setting build type to 'Release' as none was specified.")
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
@ -96,44 +112,120 @@ endif()
include(CheckCXXCompilerFlag)
# Check for C++ standard to use
get_property(known_features GLOBAL PROPERTY CMAKE_CXX_KNOWN_FEATURES)
if (cxx_std_17 IN_LIST known_features)
set(CMAKE_CXX_STANDARD 17)
elseif (cxx_std_14 IN_LIST known_features)
set(CMAKE_CXX_STANDARD 14)
else() # minimum required standard
set(CMAKE_CXX_STANDARD 11)
endif()
# Avoid using experimental c++1y (c++1z) standard even if the compiler announces cxx14 (cxx17)
# in CMAKE_CXX_KNOWN_FEATURES and CMAKE_CXX_COMPILE_FEATURES
# It is the case of clang 3.9, 4.0 (announces c++1z) and gcc 4.8 (announces c++1y)
if ("${CMAKE_CXX17_STANDARD_COMPILE_OPTION}" STREQUAL "-std=c++1z")
set(CMAKE_CXX_STANDARD 14)
endif()
if ("${CMAKE_CXX14_STANDARD_COMPILE_OPTION}" STREQUAL "-std=c++1y")
set(CMAKE_CXX_STANDARD 11)
endif()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(LIBRARY_TYPE SHARED)
if (STATIC)
set(LIBRARY_TYPE)
if (BUILD_SHARED_LIBS)
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
endif()
# auto optimize
if (AUTO_OPTIMIZE)
include(OptimizeForArchitecture)
AutodetectHostArchitecture()
OptimizeForArchitecture()
# LTO
cmake_policy(SET CMP0069 NEW)
include(CheckIPOSupported)
check_ipo_supported(RESULT LTO_SUPPORTED OUTPUT error)
if(LTO_SUPPORTED)
message(STATUS "IPO / LTO supported")
else()
message(STATUS "IPO / LTO not supported: <${error}>")
endif()
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
if(COMPILER_SUPPORTS_MARCH_NATIVE)
set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -march=native -mtune=native")
set(MARCH_NATIVE_OPT ON)
endif()
# Flags for SIMD support
set(HAVE_AVX FALSE)
set(HAVE_AVX2 FALSE)
set(HAVE_FMA FALSE)
set(HAVE_SSE4_1 FALSE)
set(HAVE_NEON FALSE)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64.*|AARCH64.*")
set(AARCH64 TRUE)
endif()
if(AARCH64)
add_definitions("-DHAVE_NEON")
set(HAVE_NEON TRUE)
else()
CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_AVX)
if(HAVE_AVX)
set(AVX_COMPILE_FLAGS "-mavx")
add_definitions("-DHAVE_AVX")
endif(HAVE_AVX)
CHECK_CXX_COMPILER_FLAG("-mavx2" HAVE_AVX2)
if(HAVE_AVX2)
set(AVX2_COMPILE_FLAGS "-mavx2")
add_definitions("-DHAVE_AVX2")
endif()
CHECK_CXX_COMPILER_FLAG("-mfma" HAVE_FMA)
if(HAVE_FMA)
set(FMA_COMPILE_FLAGS "-mfma")
add_definitions("-DHAVE_FMA")
endif()
CHECK_CXX_COMPILER_FLAG("-msse4.1" HAVE_SSE4_1)
if(HAVE_SSE4_1)
set(SSE4_1_COMPILE_FLAGS "-msse4.1")
add_definitions("-DHAVE_SSE4_1")
endif()
if(NOT APPLE)
# NEON support relies on getauxval, which is not available on OSX, only on Linux and Android
CHECK_CXX_COMPILER_FLAG("-mfpu=neon" HAVE_NEON)
if(HAVE_NEON)
set(NEON_COMPILE_FLAGS "-mfpu=neon")
add_definitions("-DHAVE_NEON")
endif()
endif(NOT APPLE)
if(MSVC)
if(NOT HAVE_AVX)
set(AVX_COMPILE_FLAGS "/arch:AVX")
set(HAVE_AVX ON)
add_definitions("-DHAVE_AVX")
endif()
if(NOT HAVE_AVX2)
set(AVX2_COMPILE_FLAGS "/arch:AVX2")
set(HAVE_AVX2 ON)
add_definitions("-DHAVE_AVX2")
set(FMA_COMPILE_FLAGS "-D__FMA__")
set(HAVE_FMA ON)
add_definitions("-DHAVE_FMA")
endif()
if(NOT HAVE_SSE4_1)
set(SSE4_1_COMPILE_FLAGS "-D__SSE4_1__")
set(HAVE_SSE4_1 ON)
add_definitions("-DHAVE_SSE4_1")
endif()
endif(MSVC)
endif(AARCH64)
# auto optimize - used only for information about available vectors
include(OptimizeForArchitecture)
OptimizeForArchitecture()
# remove global definition to eliminate effect on build
foreach(_flag ${_enable_vector_unit_list})
string(TOUPPER "${_flag}" _flag)
string(REPLACE "." "_" _flag "__${_flag}__")
remove_definitions("-D${_flag}")
endforeach(_flag)
foreach(flag ${Vc_ARCHITECTURE_FLAGS})
set(Vc_CXX_FLAGS "${Vc_CXX_FLAGS} ${flag}")
endforeach()
# Compiler specific environments
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CLANG 1)
endif()
if(CMAKE_COMPILER_IS_GNUCXX OR MINGW)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -pedantic -Og")
elseif(MSVC)
@ -142,8 +234,14 @@ elseif(MSVC)
if (NOT CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
endif()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /wd4244 /wd4305 /wd4267")
# Don't use /Wall because it generates too many warnings.
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /W4 /bigobj")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /W0 /bigobj")
# MT flag
if(WIN32_MT_BUILD)
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
message (STATUS "Building with static CRT.")
endif()
endif()
if(CLANG) # clang all platforms
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wno-unused-command-line-argument")
@ -176,7 +274,6 @@ if (OPENMP_BUILD)
endif()
endif()
if (CYGWIN)
add_definitions(-D__CYGWIN__)
elseif(UNIX)
@ -187,6 +284,8 @@ elseif(WIN32)
set(LIB_Ws2_32 Ws2_32)
endif()
add_definitions("-DCMAKE_BUILD")
###############################################################################
#
# packages
@ -195,10 +294,10 @@ endif()
if (SW_BUILD)
find_package(SW REQUIRED)
if (STATIC)
set(SW_BUILD_SHARED_LIBS 0)
else()
if (BUILD_SHARED_LIBS)
set(SW_BUILD_SHARED_LIBS 1)
else()
set(SW_BUILD_SHARED_LIBS 0)
endif()
sw_add_package(
org.sw.demo.danbloomberg.leptonica
@ -223,7 +322,12 @@ else()
message(FATAL_ERROR "Cannot find required library Leptonica. Quitting!")
endif(NOT Leptonica_FOUND)
find_package(LibArchive)
# Check for optional libarchive.
if(PKG_CONFIG_EXECUTABLE)
pkg_check_modules(LibArchive libarchive)
else()
find_package(LibArchive)
endif()
if(LibArchive_FOUND)
set(HAVE_LIBARCHIVE ON)
endif()
@ -238,32 +342,9 @@ find_package(OpenCL QUIET)
#
###############################################################################
foreach(flag ${Vc_ARCHITECTURE_FLAGS})
set(Vc_CXX_FLAGS "${Vc_CXX_FLAGS} ${flag}")
endforeach()
# add definition as expected in src/arch/simddetect.cpp
set(HAVE_AVX OFF)
set(HAVE_AVX2 OFF)
set(HAVE_FMA OFF)
set(HAVE_SSE4_1 OFF)
set(MARCH_NATIVE_OPT OFF)
foreach(flag ${_enable_vector_unit_list}) # from OptimizeForArchitecture()
string(TOUPPER "${flag}" flag)
string(REPLACE "\." "_" flag "${flag}")
if("${flag}" MATCHES "AVX|AVX2|FMA|SSE4_1")
set(simd_flags "${simd_flags} -DHAVE_${flag}")
set("HAVE_${flag}" ON)
endif()
endforeach(flag)
if (NOT MSVC)
set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -O3 -ffast-math")
endif()
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
if(COMPILER_SUPPORTS_MARCH_NATIVE)
set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -march=native -mtune=native")
set(MARCH_NATIVE_OPT ON)
endif()
set(AUTOCONFIG_SRC ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h.in)
set(AUTOCONFIG ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h)
@ -277,29 +358,34 @@ include(Configure)
configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include")
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/include/tesseract/version.h.in
${CMAKE_CURRENT_BINARY_DIR}/include/tesseract/version.h @ONLY)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/tesseract.rc.in
${CMAKE_CURRENT_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/libtesseract.rc.in
${CMAKE_CURRENT_BINARY_DIR}/vs2010/tesseract/libtesseract.rc @ONLY)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/TesseractConfig-version.cmake @ONLY)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/cmake/templates/TesseractConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/TesseractConfig.cmake @ONLY)
include(CMakePackageConfigHelpers)
include(GenerateExportHeader)
configure_package_config_file(
cmake/templates/TesseractConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/cmake/tesseract/TesseractConfig.cmake
INSTALL_DESTINATION lib/cmake/tesseract
PATH_VARS INCLUDE_DIR)
write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/cmake/tesseract/TesseractConfigVersion.cmake
VERSION ${PACKAGE_VERSION}
COMPATIBILITY SameMajorVersion)
# show summary of configuration
if(${CMAKE_BUILD_TYPE} MATCHES Debug)
set(COMPILER_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}")
elseif(${CMAKE_BUILD_TYPE} MATCHES Release)
set(COMPILER_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}")
if (LTO_SUPPORTED AND ENABLE_LTO)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION FALSE)
endif() # LTO_SUPPORTED
endif()
message( STATUS )
message( STATUS "General configuration for Tesseract ${PACKAGE_VERSION}")
@ -308,6 +394,8 @@ message( STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message( STATUS "Compiler: ${CMAKE_CXX_COMPILER_ID}")
message( STATUS "Used standard: C++${CMAKE_CXX_STANDARD}")
message( STATUS "CXX compiler options: ${COMPILER_FLAGS}")
get_directory_property( DirCompDefs COMPILE_DEFINITIONS)
message( STATUS "Compile definitions = ${DirCompDefs}")
message( STATUS "Linker options: ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${CMAKE_BUILD_TYPE_UP}}")
message( STATUS "Install directory: ${CMAKE_INSTALL_PREFIX}")
message( STATUS "Architecture flags: ${Vc_ARCHITECTURE_FLAGS}")
@ -317,7 +405,8 @@ message( STATUS "HAVE_AVX2: ${HAVE_AVX2}")
message( STATUS "HAVE_FMA: ${HAVE_FMA}")
message( STATUS "HAVE_SSE4_1: ${HAVE_SSE4_1}")
message( STATUS "MARCH_NATIVE_OPT: ${MARCH_NATIVE_OPT}")
message( STATUS "simd_flags: ${simd_flags}")
message( STATUS "HAVE_NEON: ${HAVE_NEON}")
message( STATUS "Link-time optimization: ${CMAKE_INTERPROCEDURAL_OPTIMIZATION}")
message( STATUS "--------------------------------------------------------")
message( STATUS "Build with sw [SW_BUILD]: ${SW_BUILD}")
message( STATUS "Build with openmp support [OPENMP_BUILD]: ${OPENMP_BUILD}")
@ -345,20 +434,6 @@ include_directories(${LibArchive_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR}/include)
include_directories(include)
include_directories(src/arch)
include_directories(src/ccmain)
include_directories(src/ccstruct)
include_directories(src/ccutil)
include_directories(src/classify)
include_directories(src/cutil)
include_directories(src/dict)
include_directories(src/lstm)
include_directories(src/opencl)
include_directories(src/textord)
include_directories(src/viewer)
include_directories(src/wordrec)
include_directories(src/training)
if(ANDROID_TOOLCHAIN)
include_directories(${ANDROID_TOOLCHAIN}/sysroot/usr/include)
add_compile_definitions(__ANDROID_API_FUTURE__)
@ -382,32 +457,125 @@ file(GLOB tesseract_src
src/wordrec/*.cpp
)
if (DISABLED_LEGACY_ENGINE)
function(prepend_path srcs path)
set(tmp, "")
foreach(src IN LISTS ${srcs})
list(APPEND tmp ${path}/${src})
endforeach(src ${srcs})
set(${srcs} ${tmp} PARENT_SCOPE)
endfunction()
SET(tesseract_src_legacy
src/ccmain/adaptions.cpp
src/ccmain/docqual.cpp
src/ccmain/equationdetect.cpp
src/ccmain/fixspace.cpp
src/ccmain/fixxht.cpp
src/ccmain/osdetect.cpp
src/ccmain/par_control.cpp
src/ccmain/recogtraining.cpp
src/ccmain/superscript.cpp
src/ccmain/tessbox.cpp
src/ccmain/tfacepp.cpp
src/ccstruct/fontinfo.cpp
src/ccstruct/params_training_featdef.cpp
src/ccutil/ambigs.cpp
src/ccutil/bitvector.cpp
src/ccutil/indexmapbidi.cpp
src/ccutil/universalambigs.cpp
src/classify/adaptive.cpp
src/classify/adaptmatch.cpp
src/classify/blobclass.cpp
src/classify/cluster.cpp
src/classify/clusttool.cpp
src/classify/cutoffs.cpp
src/classify/featdefs.cpp
src/classify/float2int.cpp
src/classify/fpoint.cpp
src/classify/intfeaturespace.cpp
src/classify/intfx.cpp
src/classify/intmatcher.cpp
src/classify/intproto.cpp
src/classify/kdtree.cpp
src/classify/mf.cpp
src/classify/mfdefs.cpp
src/classify/mfoutline.cpp
src/classify/mfx.cpp
src/classify/normfeat.cpp
src/classify/normmatch.cpp
src/classify/ocrfeatures.cpp
src/classify/outfeat.cpp
src/classify/picofeat.cpp
src/classify/protos.cpp
src/classify/shapeclassifier.cpp
src/classify/shapetable.cpp
src/classify/tessclassifier.cpp
src/classify/trainingsample.cpp
src/dict/permdawg.cpp
src/dict/hyphen.cpp
src/wordrec/associate.cpp
src/wordrec/chop.cpp
src/wordrec/chopper.cpp
src/wordrec/drawfx.cpp
src/wordrec/findseam.cpp
src/wordrec/gradechop.cpp
src/wordrec/language_model.cpp
src/wordrec/lm_consistency.cpp
src/wordrec/lm_pain_points.cpp
src/wordrec/lm_state.cpp
src/wordrec/outlines.cpp
src/wordrec/params_model.cpp
src/wordrec/pieces.cpp
src/wordrec/plotedges.cpp
src/wordrec/render.cpp
src/wordrec/segsearch.cpp
src/wordrec/wordclass.cpp
)
prepend_path(tesseract_src_legacy "${CMAKE_CURRENT_SOURCE_DIR}")
list(REMOVE_ITEM tesseract_src ${tesseract_src_legacy})
endif(DISABLED_LEGACY_ENGINE)
list(APPEND arch_files
src/arch/dotproduct.cpp
src/arch/simddetect.cpp
src/arch/intsimdmatrix.cpp
)
set_source_files_properties(${arch_files} PROPERTIES COMPILE_FLAGS "${simd_flags}")
set_source_files_properties(src/arch/dotproduct.cpp PROPERTIES COMPILE_FLAGS "${MARCH_NATIVE_FLAGS} ${Vc_CXX_FLAGS}")
if(MARCH_NATIVE_FLAGS)
set_source_files_properties(src/arch/dotproduct.cpp
PROPERTIES COMPILE_FLAGS ${MARCH_NATIVE_FLAGS})
endif(MARCH_NATIVE_FLAGS)
if(HAVE_AVX)
list(APPEND arch_files_opt src/arch/dotproductavx.cpp)
set_source_files_properties(src/arch/dotproductavx.cpp PROPERTIES COMPILE_FLAGS "-mavx")
list(APPEND arch_files_opt src/arch/dotproductavx.cpp)
set_source_files_properties(src/arch/dotproductavx.cpp
PROPERTIES COMPILE_FLAGS ${AVX_COMPILE_FLAGS})
endif(HAVE_AVX)
if(HAVE_AVX2)
list(APPEND arch_files_opt src/arch/intsimdmatrixavx2.cpp)
set_source_files_properties(src/arch/intsimdmatrixavx2.cpp PROPERTIES COMPILE_FLAGS "-mavx2")
list(APPEND arch_files_opt src/arch/intsimdmatrixavx2.cpp src/arch/dotproductavx.cpp)
set_source_files_properties(src/arch/intsimdmatrixavx2.cpp
PROPERTIES COMPILE_FLAGS ${AVX2_COMPILE_FLAGS})
endif(HAVE_AVX2)
if(HAVE_FMA)
list(APPEND arch_files_opt src/arch/dotproductfma.cpp)
set_source_files_properties(src/arch/dotproductfma.cpp PROPERTIES COMPILE_FLAGS "-mfma")
list(APPEND arch_files_opt src/arch/dotproductfma.cpp)
set_source_files_properties(src/arch/dotproductfma.cpp
PROPERTIES COMPILE_FLAGS ${FMA_COMPILE_FLAGS})
endif(HAVE_FMA)
if(HAVE_SSE4_1)
list(APPEND arch_files_opt src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp)
set_source_files_properties(src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp PROPERTIES COMPILE_FLAGS "-msse4.1")
list(APPEND arch_files_opt src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp)
set_source_files_properties(src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp
PROPERTIES COMPILE_FLAGS ${SSE4_1_COMPILE_FLAGS})
endif(HAVE_SSE4_1)
set_source_files_properties(${arch_files_opt} PROPERTIES COMPILE_FLAGS "${Vc_CXX_FLAGS}")
if(HAVE_NEON)
list(APPEND arch_files_opt src/arch/intsimdmatrixneon.cpp)
if(NEON_COMPILE_FLAGS)
set_source_files_properties(src/arch/intsimdmatrixneon.cpp
PROPERTIES COMPILE_FLAGS ${NEON_COMPILE_FLAGS})
endif()
endif(HAVE_NEON)
file(GLOB tesseract_hdr
file(GLOB_RECURSE tesseract_hdr
include/*
src/arch/*.h
src/ccmain/*.h
src/ccstruct/*.h
@ -433,25 +601,35 @@ set(tesseract_src ${tesseract_src}
src/api/wordstrboxrenderer.cpp
)
if (WIN32)
if (MSVC)
include_directories(src/vs2010/tesseract)
set(tesseract_hdr
${tesseract_hdr}
${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/resource.h)
set(tesseract_rsc ${CMAKE_CURRENT_BINARY_DIR}/vs2010/tesseract/libtesseract.rc)
endif() # MSVC
endif()
set(libtessfiles ${tesseract_src} ${arch_files} ${arch_files_opt} ${tesseract_hdr})
add_library (libtesseract ${LIBRARY_TYPE} ${tesseract_src} ${arch_files}
${arch_files_opt} ${tesseract_hdr} ${tesseract_rsc}
)
if (NOT STATIC)
source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${libtessfiles})
add_library (libtesseract ${libtessfiles})
target_include_directories (libtesseract
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
PRIVATE src
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/arch>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/ccmain>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/ccstruct>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/ccutil>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/classify>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/cutil>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/dict>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/lstm>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/opencl>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/textord>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/viewer>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/wordrec>
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/training>
)
if (BUILD_SHARED_LIBS)
target_compile_definitions (libtesseract
PRIVATE -DTESS_EXPORTS
INTERFACE -DTESS_IMPORTS
)
set_target_properties (libtesseract PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS True)
#generate_export_header (libtesseract EXPORT_MACRO_NAME TESS_API)
endif()
target_link_libraries (libtesseract PRIVATE ${LIB_Ws2_32} ${LIB_pthread})
if(OpenMP_CXX_FOUND)
@ -472,13 +650,13 @@ if (SW_BUILD)
org.sw.demo.libarchive.libarchive
)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_CURRENT_BINARY_DIR}/cppan.cmake)\n")
export(TARGETS libtesseract APPEND FILE ${CMAKE_CURRENT_BINARY_DIR}/TesseractTargets.cmake)
export(TARGETS libtesseract APPEND FILE ${CMAKE_CURRENT_BINARY_DIR}/TesseractTargets.cmake NAMESPACE Tesseract::)
else()
target_link_libraries (libtesseract PUBLIC
${Leptonica_LIBRARIES}
${LibArchive_LIBRARIES}
)
export(TARGETS libtesseract FILE ${CMAKE_CURRENT_BINARY_DIR}/TesseractTargets.cmake)
export(TARGETS libtesseract FILE ${CMAKE_CURRENT_BINARY_DIR}/TesseractTargets.cmake NAMESPACE Tesseract::)
endif()
if (WIN32 AND CLANG AND OPENMP_BUILD)
@ -491,12 +669,7 @@ endif()
# EXECUTABLE tesseractmain
########################################
set(tesseractmain_src src/api/tesseractmain.cpp)
if (MSVC)
set(tesseractmain_rsc ${CMAKE_CURRENT_BINARY_DIR}/vs2010/tesseract/tesseract.rc)
endif()
add_executable (tesseract ${tesseractmain_src} ${tesseractmain_rsc})
add_executable (tesseract src/api/tesseractmain.cpp)
target_link_libraries (tesseract libtesseract)
if (HAVE_TIFFIO_H)
target_link_libraries(tesseract tiff)
@ -523,37 +696,25 @@ configure_file(tesseract.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc @ONLY
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc DESTINATION lib/pkgconfig)
install(TARGETS tesseract RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
install(TARGETS libtesseract EXPORT TesseractTargets RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
install(EXPORT TesseractTargets DESTINATION cmake)
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/TesseractConfig.cmake
${CMAKE_CURRENT_BINARY_DIR}/TesseractConfig-version.cmake
DESTINATION cmake)
install(EXPORT TesseractTargets NAMESPACE Tesseract:: DESTINATION lib/cmake/tesseract)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/cmake DESTINATION lib)
install(FILES
# from api/makefile.am
include/tesseract/apitypes.h
include/tesseract/baseapi.h
include/tesseract/capi.h
include/tesseract/renderer.h
${CMAKE_CURRENT_BINARY_DIR}/include/tesseract/version.h
#from ccmain/makefile.am
include/tesseract/thresholder.h
include/tesseract/ltrresultiterator.h
include/tesseract/pageiterator.h
include/tesseract/resultiterator.h
include/tesseract/osdetect.h
#from ccstruct/makefile.am
include/tesseract/publictypes.h
#from ccutil/makefile.am
include/tesseract/genericvector.h
include/tesseract/helpers.h
include/tesseract/ocrclass.h
include/tesseract/platform.h
include/tesseract/serialis.h
include/tesseract/strngs.h
include/tesseract/export.h
include/tesseract/unichar.h
#${CMAKE_CURRENT_BINARY_DIR}/src/endianness.h

View File

@ -7,7 +7,7 @@ RUN apt-get update
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y cmake curl git ruby bundler wget unzip \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN gem install bundler travis --no-ri --no-rdoc
RUN gem install bundler travis -no-ri -no-rdoc
RUN git clone --depth 1 https://github.com/travis-ci/travis-build ~/.travis/travis-build
RUN bundle install --gemfile ~/.travis/travis-build/Gemfile

View File

@ -40,8 +40,8 @@ all languages).
You need an Internet connection and [curl](https://curl.haxx.se/) to compile `ScrollView.jar`
because the build will automatically download
[piccolo2d-core-3.0.jar](http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0/piccolo2d-core-3.0.jar > piccolo2d-core-3.0.jar) and
[piccolo2d-extras-3.0.jar](http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0/piccolo2d-extras-3.0.jar) and
[piccolo2d-core-3.0.1.jar](https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0.1/piccolo2d-core-3.0.1.jar) and
[piccolo2d-extras-3.0.1.jar](https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0.1/piccolo2d-extras-3.0.1.jar) and
[jaxb-api-2.3.1.jar](http://search.maven.org/remotecontent?filepath=javax/xml/bind/jaxb-api/2.3.1/jaxb-api-2.3.1.jar) and place them to `tesseract/java`.
Just run:

File diff suppressed because it is too large Load Diff

View File

@ -2,11 +2,11 @@
[![Build Status](https://travis-ci.org/tesseract-ocr/tesseract.svg?branch=master)](https://travis-ci.org/tesseract-ocr/tesseract)
[![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
![Build status](https://github.com/tesseract-ocr/tesseract/workflows/windows/badge.svg)<br>
![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)<br>
[![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr)
[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/cpp/g/tesseract-ocr/tesseract.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tesseract-ocr/tesseract/context:cpp)
[![Total Alerts](https://img.shields.io/lgtm/alerts/g/tesseract-ocr/tesseract.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tesseract-ocr/tesseract/alerts)
[![OSS-Fuzz](https://img.shields.io/badge/oss--fuzz-fuzzing-brightgreen)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:tesseract-ocr)
[![OSS-Fuzz](https://img.shields.io/badge/oss--fuzz-fuzzing-brightgreen)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=2&q=proj:tesseract-ocr)
<br/>
[![GitHub license](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://raw.githubusercontent.com/tesseract-ocr/tesseract/master/LICENSE)
[![Downloads](https://img.shields.io/badge/download-all%20releases-brightgreen.svg)](https://github.com/tesseract-ocr/tesseract/releases/)
@ -18,7 +18,7 @@ Tesseract 4 adds a new neural net (LSTM) based OCR engine which is focused
on line recognition, but also still supports the legacy Tesseract OCR engine of
Tesseract 3 which works by recognizing character patterns. Compatibility with
Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0).
It also needs traineddata files which support the legacy engine, for example
It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files which support the legacy engine, for example
those from the tessdata repository.
The lead developer is Ray Smith. The maintainer is Zdenko Podobny.
@ -61,13 +61,7 @@ and **[Change Log](https://github.com/tesseract-ocr/tesseract/blob/master/Change
You can either [Install Tesseract via pre-built binary package](https://tesseract-ocr.github.io/tessdoc/Home.html)
or [build it from source](https://tesseract-ocr.github.io/tessdoc/Compiling.html).
Supported Compilers are:
* GCC 4.8 and above
* Clang 3.4 and above
* MSVC 2015, 2017, 2019
Other compilers might work, but are not officially supported.
C++17 support is required for building.
## Running Tesseract

View File

@ -1 +1 @@
5.0.0-alpha
5.0.0-alpha-20201231

2
abseil

@ -1 +1 @@
Subproject commit daf381e8535a1f1f1b8a75966a74e7cca63dee89
Subproject commit b832dce8489ef7b6231384909fd9b68d5a5ff2b7

View File

@ -3,12 +3,12 @@ environment:
# does not work with sw at the moment
#- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
#platform: Win32
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
platform: Win32
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
platform: Win64
#- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
#- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
#platform: Win32
#- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
#platform: Win64
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
platform: Win64
configuration:
- Release
@ -17,18 +17,30 @@ cache:
- c:/Users/appveyor/.sw -> appveyor.yml
before_build:
- git submodule update --init --recursive
- curl -fsS -L -o dl.zip https://github.com/SoftwareNetwork/binaries/raw/master/sw-master-windows-client.zip
- 7z x dl.zip
- set PATH=%PATH%;%cd%
build_script:
- sw -version
- sw -show-output -platform %platform% build
# -show-output - show command output
# debug build causes long builds (> 1h), appveyor drops them
- sw -platform %platform% -config r build -Dwith-tests=1
# test
- git clone https://github.com/egorpugin/tessdata tessdata_unittest
- ps: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse
- sw -platform %platform% -config r test -Dwith-tests=1 -Dskip-tests=lstm,lstm_recode
after_build:
- 7z a tesseract.zip %APPVEYOR_BUILD_FOLDER%\.sw\out\**\*.exe %APPVEYOR_BUILD_FOLDER%\.sw\out\**\*.dll
#- 7z a tesseract.zip %APPVEYOR_BUILD_FOLDER%\.sw\Windows_*_Shared_Release_MSVC_*\*.exe %APPVEYOR_BUILD_FOLDER%\.sw\Windows_*_Shared_Release_MSVC_*\*.dll
on_finish:
# gather tests
- ps: $wc = New-Object 'System.Net.WebClient'
- ps: $wc.UploadFile("https://ci.appveyor.com/api/testresults/junit/$($env:APPVEYOR_JOB_ID)", (Resolve-Path .\.sw\test\results.xml))
artifacts:
- path: tesseract.zip
name: tesseract-$(APPVEYOR_BUILD_VERSION)

View File

@ -35,8 +35,8 @@
#=============================================================================
get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH)
include("${_currentDir}/CheckCCompilerFlag.cmake")
include("${_currentDir}/CheckCXXCompilerFlag.cmake")
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)
macro(AddCompilerFlag _flag)
string(REGEX REPLACE "[-.+/:= ]" "_" _flag_esc "${_flag}")

View File

@ -1,73 +0,0 @@
# - Check whether the C compiler supports a given flag.
# CHECK_C_COMPILER_FLAG(<flag> <var>)
# <flag> - the compiler flag
# <var> - variable to store the result
# This internally calls the check_c_source_compiles macro.
# See help for CheckCSourceCompiles for a listing of variables
# that can modify the build.
#=============================================================================
# Copyright 2006-2009 Kitware, Inc.
# Copyright 2006 Alexander Neundorf <neundorf@kde.org>
# Copyright 2011-2013 Matthias Kretz <kretz@kde.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * The names of Kitware, Inc., the Insight Consortium, or the names of
# any consortium members, or of any contributors, may not be used to
# endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
INCLUDE(CheckCSourceCompiles)
MACRO (CHECK_C_COMPILER_FLAG _FLAG _RESULT)
SET(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}")
SET(CMAKE_REQUIRED_DEFINITIONS "${_FLAG}")
if(${ARGC} GREATER 2)
SET(TEST_SOURCE "${ARGV2}")
else()
SET(TEST_SOURCE "int main() { return 0;}")
endif()
CHECK_C_SOURCE_COMPILES("${TEST_SOURCE}" ${_RESULT}
# Some compilers do not fail with a bad flag
FAIL_REGEX "error: bad value (.*) for .* switch" # GNU
FAIL_REGEX "argument unused during compilation" # clang
FAIL_REGEX "is valid for .* but not for C" # GNU
FAIL_REGEX "unrecognized .*option" # GNU
FAIL_REGEX "ignored for target" # GNU
FAIL_REGEX "ignoring unknown option" # MSVC
FAIL_REGEX "warning D9002" # MSVC
FAIL_REGEX "[Uu]nknown option" # HP
FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro
FAIL_REGEX "command option .* is not recognized" # XL
FAIL_REGEX "WARNING: unknown flag:" # Open64
FAIL_REGEX "command line error" # ICC
FAIL_REGEX "command line warning" # ICC
FAIL_REGEX "#10236:" # ICC: File not found
FAIL_REGEX " #10159: " # ICC
FAIL_REGEX " #10353: " # ICC: option '-mfma' ignored, suggest using '-march=core-avx2'
)
SET (CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}")
ENDMACRO (CHECK_C_COMPILER_FLAG)

View File

@ -1,73 +0,0 @@
# - Check whether the CXX compiler supports a given flag.
# CHECK_CXX_COMPILER_FLAG(<flag> <var>)
# <flag> - the compiler flag
# <var> - variable to store the result
# This internally calls the check_cxx_source_compiles macro. See help
# for CheckCXXSourceCompiles for a listing of variables that can
# modify the build.
#=============================================================================
# Copyright 2006-2009 Kitware, Inc.
# Copyright 2006 Alexander Neundorf <neundorf@kde.org>
# Copyright 2011-2013 Matthias Kretz <kretz@kde.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * The names of Kitware, Inc., the Insight Consortium, or the names of
# any consortium members, or of any contributors, may not be used to
# endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
INCLUDE(CheckCXXSourceCompiles)
MACRO (CHECK_CXX_COMPILER_FLAG _FLAG _RESULT)
SET(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}")
SET(CMAKE_REQUIRED_DEFINITIONS "${_FLAG}")
if(${ARGC} GREATER 2)
SET(TEST_SOURCE "${ARGV2}")
else()
SET(TEST_SOURCE "int main() { return 0;}")
endif()
CHECK_CXX_SOURCE_COMPILES("${TEST_SOURCE}" ${_RESULT}
# Some compilers do not fail with a bad flag
FAIL_REGEX "error: bad value (.*) for .* switch" # GNU
FAIL_REGEX "argument unused during compilation" # clang
FAIL_REGEX "is valid for .* but not for C\\\\+\\\\+" # GNU
FAIL_REGEX "unrecognized .*option" # GNU
FAIL_REGEX "ignored for target" # GNU
FAIL_REGEX "ignoring unknown option" # MSVC
FAIL_REGEX "warning D9002" # MSVC
FAIL_REGEX "[Uu]nknown option" # HP
FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro
FAIL_REGEX "command option .* is not recognized" # XL
FAIL_REGEX "WARNING: unknown flag:" # Open64
FAIL_REGEX "command line error" # ICC
FAIL_REGEX "command line warning" # ICC
FAIL_REGEX "#10236:" # ICC: File not found
FAIL_REGEX " #10159: " # ICC
FAIL_REGEX " #10353: " # ICC: option '-mfma' ignored, suggest using '-march=core-avx2'
)
SET (CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}")
ENDMACRO (CHECK_CXX_COMPILER_FLAG)

View File

@ -82,18 +82,12 @@ include(TestBigEndian)
set(include_files_list
dlfcn.h
inttypes.h
limits.h
malloc.h
memory.h
stdbool.h
stdint.h
stdlib.h
string.h
sys/ipc.h
sys/shm.h
sys/stat.h
sys/types.h
sys/wait.h
unistd.h
cairo/cairo-version.h
@ -107,10 +101,7 @@ check_includes(include_files_list)
set(types_list
"long long int"
off_t
mbstate_t
wchar_t
_Bool
)
check_types(types_list)
@ -129,14 +120,6 @@ if(TESSDATA_PREFIX)
")
endif()
test_big_endian(WORDS_BIGENDIAN)
file(APPEND ${AUTOCONFIG_SRC} "
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#cmakedefine WORDS_BIGENDIAN 1
")
########################################
################################################################################

View File

@ -1,14 +0,0 @@
set(Tesseract_VERSION @VERSION_PLAIN@)
set(PACKAGE_VERSION ${Tesseract_VERSION})
set(PACKAGE_VERSION_EXACT False)
set(PACKAGE_VERSION_COMPATIBLE False)
if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
set(PACKAGE_VERSION_EXACT True)
set(PACKAGE_VERSION_COMPATIBLE True)
endif()
if(PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION)
set(PACKAGE_VERSION_COMPATIBLE True)
endif()

View File

@ -7,39 +7,22 @@
# In your CMakeLists.txt, add these lines:
#
# find_package(Tesseract REQUIRED)
# include_directories(${Tesseract_INCLUDE_DIRS})
# target_link_libraries(MY_TARGET_NAME ${Tesseract_LIBRARIES})
# target_link_libraries(MY_TARGET_NAME Tesseract::libtesseract)
#
# This file will define the following variables:
# - Tesseract_LIBRARIES : The list of all imported targets for OpenCV modules.
# - Tesseract_INCLUDE_DIRS : The Tesseract include directories.
# - Tesseract_VERSION : The version of this Tesseract build: "@VERSION_PLAIN@"
# - Tesseract_VERSION_MAJOR : Major version part of Tesseract_VERSION: "@VERSION_MAJOR@"
# - Tesseract_VERSION_MINOR : Minor version part of Tesseract_VERSION: "@VERSION_MINOR@"
#
# ===================================================================================
include(CMakeFindDependencyMacro)
find_dependency(Leptonica)
include(${CMAKE_CURRENT_LIST_DIR}/TesseractTargets.cmake)
find_package(Leptonica REQUIRED)
# ======================================================
# Version variables:
# ======================================================
SET(Tesseract_VERSION @VERSION_PLAIN@)
SET(Tesseract_VERSION_MAJOR @VERSION_MAJOR@)
SET(Tesseract_VERSION_MINOR @VERSION_MINOR@)
# ======================================================
# Include directories to add to the user project:
# ======================================================
# Provide the include directories to the caller
set(Tesseract_INCLUDE_DIRS @INCLUDE_DIR@)
# ====================================================================
# Link libraries:
# ====================================================================
@PACKAGE_INIT@
set_and_check(Tesseract_INCLUDE_DIRS "@PACKAGE_INCLUDE_DIR@")
set(Tesseract_LIBRARIES libtesseract)
check_required_components(Tesseract)

View File

@ -28,8 +28,8 @@ AM_INIT_AUTOMAKE([foreign subdir-objects])
# Define date of package, etc. Could be useful in auto-generated
# documentation.
PACKAGE_YEAR=2018
PACKAGE_DATE="10/29"
PACKAGE_YEAR=2020
PACKAGE_DATE="12/31"
abs_top_srcdir=`AS_DIRNAME([$0])`
@ -72,7 +72,6 @@ AC_CONFIG_HEADERS([config_auto.h:config/config.h.in])
# default conditional
AM_CONDITIONAL([T_WIN], false)
AM_CONDITIONAL([OSX], false)
AM_CONDITIONAL([GRAPHICS_DISABLED], false)
AC_SUBST([AM_CPPFLAGS])
@ -124,29 +123,66 @@ AX_CHECK_COMPILE_FLAG([-Werror=unused-command-line-argument], [WERROR=-Werror=un
## Checks for supported compiler options.
AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false], [$WERROR])
AM_CONDITIONAL([HAVE_AVX], ${avx})
if $avx; then
AC_DEFINE([HAVE_AVX], [1], [Enable AVX instructions])
fi
AM_CONDITIONAL([HAVE_AVX], false)
AM_CONDITIONAL([HAVE_AVX2], false)
AM_CONDITIONAL([HAVE_FMA], false)
AM_CONDITIONAL([HAVE_SSE4_1], false)
AM_CONDITIONAL([HAVE_NEON], false)
AX_CHECK_COMPILE_FLAG([-mavx2], [avx2=true], [avx2=false], [$WERROR])
AM_CONDITIONAL([HAVE_AVX2], $avx2)
if $avx2; then
AC_DEFINE([HAVE_AVX2], [1], [Enable AVX2 instructions])
fi
case "${host_cpu}" in
AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR])
AM_CONDITIONAL([HAVE_FMA], $fma)
if $fma; then
AC_DEFINE([HAVE_FMA], [1], [Enable FMA instructions])
fi
*86*)
AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false], [$WERROR])
AM_CONDITIONAL([HAVE_SSE4_1], $sse41)
if $sse41; then
AC_DEFINE([HAVE_SSE4_1], [1], [Enable SSE 4.1 instructions])
fi
AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false], [$WERROR])
AM_CONDITIONAL([HAVE_AVX], ${avx})
if $avx; then
AC_DEFINE([HAVE_AVX], [1], [Enable AVX instructions])
fi
AX_CHECK_COMPILE_FLAG([-mavx2], [avx2=true], [avx2=false], [$WERROR])
AM_CONDITIONAL([HAVE_AVX2], $avx2)
if $avx2; then
AC_DEFINE([HAVE_AVX2], [1], [Enable AVX2 instructions])
fi
AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR])
AM_CONDITIONAL([HAVE_FMA], $fma)
if $fma; then
AC_DEFINE([HAVE_FMA], [1], [Enable FMA instructions])
fi
AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false], [$WERROR])
AM_CONDITIONAL([HAVE_SSE4_1], $sse41)
if $sse41; then
AC_DEFINE([HAVE_SSE4_1], [1], [Enable SSE 4.1 instructions])
fi
;;
aarch64)
# ARMv8 always has NEON and does not need special compiler flags.
AM_CONDITIONAL([HAVE_NEON], true)
AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions])
;;
arm*)
AX_CHECK_COMPILE_FLAG([-mfpu=neon], [neon=true], [neon=false], [$WERROR])
AM_CONDITIONAL([HAVE_NEON], $neon)
if $neon; then
AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions])
NEON_CXXFLAGS="-mfpu=neon"
AC_SUBST([NEON_CXXFLAGS])
fi
;;
*)
AC_MSG_WARN([No compiler options for $host_cpu])
esac
AX_CHECK_COMPILE_FLAG([-march=native], [arch_native=true], [arch_native=false], [$WERROR])
AM_CONDITIONAL([MARCH_NATIVE_OPT], $arch_native)
@ -207,11 +243,21 @@ if test "$enable_opencl" = "yes"; then
])
fi
# Check whether to build with support for TensorFlow.
# Configure arguments which allow disabling some optional libraries.
AC_ARG_WITH([archive],
AS_HELP_STRING([--with-archive],
[Build with libarchive which supports compressed model files @<:@default=check@:>@]),
[], [with_archive=check])
AC_ARG_WITH([curl],
AS_HELP_STRING([--with-curl],
[Build with libcurl which supports processing an image URL @<:@default=check@:>@]),
[], [with_curl=check])
AC_ARG_WITH([tensorflow],
AS_HELP_STRING([--with-tensorflow],
[support TensorFlow @<:@default=check@:>@]),
[], [with_tensorflow=check])
# Check whether to build with support for TensorFlow.
AM_CONDITIONAL([TENSORFLOW], false)
TENSORFLOW_LIBS=
AS_IF([test "x$with_tensorflow" != xno],
@ -280,7 +326,7 @@ case "${host_os}" in
fi
;;
esac
AM_CONDITIONAL([USE_OPENCL], [test "$enable_opencl" = "yes"])
AM_CONDITIONAL([OPENCL], [test "$enable_opencl" = "yes"])
AC_SUBST([OPENCL_CPPFLAGS])
AC_SUBST([OPENCL_LDFLAGS])
@ -338,23 +384,6 @@ else
AM_CPPFLAGS="$AM_CPPFLAGS -O2 -DNDEBUG"
fi
# Always look into a "gnu" directory.
curwd=`pwd`
if test -d $curwd/gnu/include ; then
CPPFLAGS="$CPPFLAGS -I$curwd/gnu/include"
fi
if test -d $curwd/gnu/lib ; then
LDFLAGS="$LDFLAGS -L$curwd/gnu/lib"
fi
# ----------------------------------------
# Additional checking of compiler characteristics
# ----------------------------------------
# Check Endianness. If Big Endian, this will define WORDS_BIGENDIAN
AC_C_BIGENDIAN
# ----------------------------------------
# Init libtool
# ----------------------------------------
@ -366,20 +395,18 @@ LT_INIT
# C++ related options
# ----------------------------------------
dnl **********************
dnl Turn on C++11 or newer
dnl Turn on C++17 or newer
dnl **********************
CPLUSPLUS=
AX_CHECK_COMPILE_FLAG([-std=c++11], [CPLUSPLUS=11], [], [$WERROR])
AX_CHECK_COMPILE_FLAG([-std=c++14], [CPLUSPLUS=14], [], [$WERROR])
AX_CHECK_COMPILE_FLAG([-std=c++17], [CPLUSPLUS=17], [], [$WERROR])
#AX_CHECK_COMPILE_FLAG([-std=c++20], [CPLUSPLUS=20], [], [$WERROR])
AX_CHECK_COMPILE_FLAG([-std=c++20], [CPLUSPLUS=20], [], [$WERROR])
if test -z "$CPLUSPLUS"; then
AC_MSG_ERROR([Your compiler does not have the necessary C++11 support! Cannot proceed.])
AC_MSG_ERROR([Your compiler does not have the necessary C++17 support! Cannot proceed.])
fi
# Set C++11, C++14 or C++17 support based on platform/compiler
# Set C++17 or newer support based on platform/compiler
case "${host_os}" in
cygwin*)
CXXFLAGS="$CXXFLAGS -std=gnu++$CPLUSPLUS"
@ -404,29 +431,43 @@ esac
AC_SEARCH_LIBS([pthread_create], [pthread])
# ----------------------------------------
# Checks for header files.
# ----------------------------------------
AC_HEADER_STDC
AC_HEADER_TIME
AC_HEADER_SYS_WAIT
AC_CHECK_HEADERS([sys/ipc.h sys/shm.h])
AC_CHECK_HEADERS([limits.h malloc.h])
# Enable use of system-defined bool type if available:
AC_HEADER_STDBOOL
# ----------------------------------------
# Check for programs needed to build documentation.
# ----------------------------------------
AC_CHECK_PROG([have_asciidoc], asciidoc, true, false)
AC_CHECK_PROG([have_xsltproc], xsltproc, true, false)
if $have_asciidoc && $have_xsltproc; then
AM_CONDITIONAL([ASCIIDOC], true)
else
AM_CONDITIONAL([ASCIIDOC], false)
fi
AM_CONDITIONAL([ASCIIDOC], false)
AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], false)
AC_ARG_ENABLE([doc],
AS_HELP_STRING([--disable-doc], [disable build of documentation])
[],
[: m4_divert_text([DEFAULTS], [enable_doc=check])])
AS_IF([test "$enable_doc" != "no"], [
AC_CHECK_PROG([have_asciidoc], asciidoc, true, false)
AC_CHECK_PROG([have_xsltproc], xsltproc, true, false)
# MacOS with Homebrew requires the environment variable
# XML_CATALOG_FILES for xsltproc.
if $have_asciidoc && $have_xsltproc; then
AM_CONDITIONAL([ASCIIDOC], true)
XML_CATALOG_FILES=
AC_CHECK_PROG([have_brew], brew, true, false)
if $have_brew; then
brew_prefix=$(brew --prefix)
catalog_file=$brew_prefix/etc/xml/catalog
if test -f $catalog_file; then
AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], true)
XML_CATALOG_FILES=file:$catalog_file
else
AC_MSG_WARN([Missing file $catalog_file.])
fi
fi
AC_SUBST([XML_CATALOG_FILES])
else
AS_IF([test "x$enable_doc" != xcheck], [
AC_MSG_FAILURE(
[--enable-doc was given, but test for asciidoc and xsltproc failed])
])
fi
])
# ----------------------------------------
# Checks for typedefs, structures, and compiler characteristics.
@ -434,18 +475,24 @@ fi
AC_CHECK_TYPES([wchar_t],,, [#include "wchar.h"])
AC_CHECK_TYPES([long long int])
AC_CHECK_TYPES([off_t],,, [#include "sys/types.h"])
AC_CHECK_TYPES([mbstate_t],,, [#include "wchar.h"])
# ----------------------------------------
# Test auxiliary packages
# ----------------------------------------
PKG_CHECK_MODULES([libcurl], [libcurl], [have_libcurl=true], [have_libcurl=false])
AM_CONDITIONAL([HAVE_LIBCURL], $have_libcurl)
if $have_libcurl; then
AC_DEFINE([HAVE_LIBCURL], [1], [Enable libcurl])
fi
AM_CONDITIONAL([HAVE_LIBCURL], false)
AS_IF([test "x$with_curl" != xno], [
PKG_CHECK_MODULES([libcurl], [libcurl], [have_libcurl=true], [have_libcurl=false])
AM_CONDITIONAL([HAVE_LIBCURL], $have_libcurl)
if $have_libcurl; then
AC_DEFINE([HAVE_LIBCURL], [1], [Enable libcurl])
else
AS_IF([test "x$with_curl" != xcheck], [
AC_MSG_FAILURE(
[--with-curl was given, but test for libcurl failed])
])
fi
])
PKG_CHECK_MODULES([LEPTONICA], [lept >= 1.74], [have_lept=true], [have_lept=false])
if $have_lept; then
@ -454,12 +501,20 @@ else
AC_MSG_ERROR([Leptonica 1.74 or higher is required. Try to install libleptonica-dev package.])
fi
PKG_CHECK_MODULES([libarchive], [libarchive], [have_libarchive=true], [have_libarchive=false])
AM_CONDITIONAL([HAVE_LIBARCHIVE], [$have_libarchive])
if $have_libarchive; then
AC_DEFINE([HAVE_LIBARCHIVE], [1], [Enable libarchive])
CPPFLAGS="$CPPFLAGS $libarchive_CFLAGS"
fi
AM_CONDITIONAL([HAVE_LIBARCHIVE], false)
AS_IF([test "x$with_archive" != xno], [
PKG_CHECK_MODULES([libarchive], [libarchive], [have_libarchive=true], [have_libarchive=false])
AM_CONDITIONAL([HAVE_LIBARCHIVE], [$have_libarchive])
if $have_libarchive; then
AC_DEFINE([HAVE_LIBARCHIVE], [1], [Enable libarchive])
CPPFLAGS="$CPPFLAGS $libarchive_CFLAGS"
else
AS_IF([test "x$with_archive" != xcheck], [
AC_MSG_FAILURE(
[--with-archive was given, but test for libarchive failed])
])
fi
])
AM_CONDITIONAL([ENABLE_TRAINING], true)
@ -503,15 +558,12 @@ AC_CONFIG_FILES([Makefile tesseract.pc])
AC_CONFIG_FILES([tessdata/Makefile])
AC_CONFIG_FILES([tessdata/configs/Makefile])
AC_CONFIG_FILES([tessdata/tessconfigs/Makefile])
AC_CONFIG_FILES([unittest/Makefile])
AC_CONFIG_FILES([java/Makefile])
AC_CONFIG_FILES([java/com/Makefile])
AC_CONFIG_FILES([java/com/google/Makefile])
AC_CONFIG_FILES([java/com/google/scrollview/Makefile])
AC_CONFIG_FILES([java/com/google/scrollview/events/Makefile])
AC_CONFIG_FILES([java/com/google/scrollview/ui/Makefile])
AC_CONFIG_FILES([doc/Makefile])
AM_COND_IF([ENABLE_TRAINING], [AC_CONFIG_FILES(src/training/Makefile)])
AC_OUTPUT
# Final message
@ -524,13 +576,15 @@ echo "$ sudo make install"
echo "$ sudo ldconfig"
echo ""
AM_COND_IF([ASCIIDOC],
[
echo "This will also build the documentation."
AM_COND_IF([ASCIIDOC], [
echo "This will also build the documentation."
], [
AS_IF([test "$enable_doc" = "no"], [
echo "Documentation will not be built because it was disabled."
], [
echo "Documentation will not be built because asciidoc or xsltproc is missing."
]
)
echo "Documentation will not be built because asciidoc or xsltproc is missing."
])
])
# echo "$ sudo make install LANGS=\"eng ara deu\""
# echo " Or:"

View File

@ -899,7 +899,7 @@ RECURSIVE = YES
# Note that relative paths are relative to the directory from which doxygen is
# run.
EXCLUDE = ../src/vs2010
EXCLUDE =
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded

View File

@ -1,53 +0,0 @@
# doc/Makefile.am
if ASCIIDOC
man_MANS = \
combine_lang_model.1 \
combine_tessdata.1 \
dawg2wordlist.1 \
lstmeval.1 \
lstmtraining.1 \
merge_unicharsets.1 \
set_unicharset_properties.1 \
tesseract.1 \
text2image.1 \
unicharambigs.5 \
unicharset_extractor.1 \
wordlist2dawg.1
if !DISABLED_LEGACY_ENGINE
man_MANS += \
ambiguous_words.1 \
classifier_tester.1 \
cntraining.1 \
mftraining.1 \
shapeclustering.1 \
unicharset.5
endif
man_xslt = http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl
EXTRA_DIST = $(man_MANS) Doxyfile
.PHONY: html
html: ${man_MANS:%=%.html}
pdf: ${man_MANS:%=%.pdf}
SUFFIXES = .asc .html .pdf
.asc:
-asciidoc -b docbook -d manpage -o - $< | \
xsltproc --nonet $(man_xslt) -
.asc.html:
asciidoc -b html5 -o $@ $<
.asc.pdf:
asciidoc -b docbook -d manpage -o $*.dbk $<
docbook2pdf $*.dbk
MAINTAINERCLEANFILES = $(man_MANS) Doxyfile
endif

View File

@ -66,6 +66,9 @@ OPTIONS
*-e* '.traineddata' 'FILE'...:
Extracts the specified components from the .traineddata file
*-l* '.traineddata' 'FILE'...:
List the network information.
*-o* '.traineddata' 'FILE'...:
Overwrites the specified components of the .traineddata file
with those provided on the command line.

View File

@ -41,7 +41,7 @@ OPTIONS
Index in continue_from Network at which to attach the new network defined by net_spec (type:int default:-1)
'--max_iterations '::
If set, exit after this many iterations (type:int default:0)
If set, exit after this many iterations. A negative value is interpreted as epochs, 0 means infinite iterations. (type:int default:0)
'--target_error_rate '::
Final error rate in percent. (type:double default:0.01)

View File

@ -191,9 +191,11 @@ following languages:
*chi_sim* (Chinese simplified),
*chi_tra* (Chinese traditional),
*chr* (Cherokee),
*cos* (Corsican),
*cym* (Welsh),
*dan* (Danish),
*deu* (German),
*div* (Dhivehi),
*dzo* (Dzongkha),
*ell* (Greek, Modern, 1453-),
*eng* (English),
@ -203,10 +205,14 @@ following languages:
*est* (Estonian),
*eus* (Basque),
*fas* (Persian),
*fao* (Faroese),
*fil* (Filipino),
*fin* (Finnish),
*fra* (French),
*frk* (Frankish),
*frm* (French, Middle, ca.1400-1600),
*fry* (West Frisian),
*gla* (Scottish Gaelic),
*gle* (Irish),
*glg* (Galician),
*grc* (Greek, Ancient, to 1453),
@ -216,6 +222,7 @@ following languages:
*hin* (Hindi),
*hrv* (Croatian),
*hun* (Hungarian),
*hye* (Armenian),
*iku* (Inuktitut),
*ind* (Indonesian),
*isl* (Icelandic),
@ -232,7 +239,6 @@ following languages:
*kmr* (Kurdish Kurmanji),
*kor* (Korean),
*kor_vert* (Korean vertical),
*kur* (Kurdish),
*lao* (Lao),
*lat* (Latin),
*lav* (Latvian),
@ -277,7 +283,6 @@ following languages:
*tat* (Tatar),
*tel* (Telugu),
*tgk* (Tajik),
*tgl* (Tagalog),
*tha* (Thai),
*tir* (Tigrinya),
*ton* (Tonga),

View File

@ -1,9 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
<Type Name="STRING">
<DisplayString>{(char*)data_+sizeof(int)*2,s8}</DisplayString>
</Type>
<Type Name="GenericVector&lt;*&gt;">
<DisplayString>{{size={size_used_}}}</DisplayString>
<Expand>
@ -15,7 +11,7 @@
</ArrayItems>
</Expand>
</Type>
<Type Name="tesseract::IntParam">
<DisplayString>{value_}</DisplayString>
</Type>
@ -30,5 +26,5 @@
<Type Name="tesseract::DoubleParam">
<DisplayString>{value_}</DisplayString>
</Type>
</AutoVisualizer>

@ -1 +1 @@
Subproject commit a18ac392d883ca88d1849b90071cea5608fd9293
Subproject commit 703bd9caab50b139428cea1aaff9974ebee5742e

View File

@ -19,52 +19,36 @@
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#include <cstdio>
#include <functional> // for std::function
#include <tuple>
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <tesseract/version.h>
#include "apitypes.h"
#include "export.h"
#include "pageiterator.h"
#include "platform.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "serialis.h"
#include "thresholder.h"
#include "unichar.h"
template <typename T>
class GenericVector;
class PAGE_RES;
class PAGE_RES_IT;
class ParagraphModel;
struct BlamerBundle;
class BLOCK_LIST;
class DENORM;
class MATRIX;
class ROW;
class STRING;
class WERD;
#include <tesseract/version.h>
#include <cstdio>
#include <vector> // for std::vector
#include <tuple> // for std::tuple
struct Pix;
struct Box;
struct Pixa;
struct Boxa;
class ETEXT_DESC;
struct OSResults;
class TBOX;
class UNICHARSET;
class WERD_CHOICE_LIST;
struct INT_FEATURE_STRUCT;
using INT_FEATURE = INT_FEATURE_STRUCT*;
struct TBLOB;
namespace tesseract {
class PAGE_RES;
class ParagraphModel;
class BLOCK_LIST;
class ETEXT_DESC;
struct OSResults;
class UNICHARSET;
class Dawg;
class Dict;
class EquationDetect;
@ -74,19 +58,14 @@ class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
class Trie;
class Wordrec;
using DictFunc = int (Dict::*)(void*, const UNICHARSET&, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char*, const char*, int,
const char*, int);
using ParamsModelClassifyFunc = float (Dict::*)(const char*, void*);
using FillLatticeFunc = void (Wordrec::*)(const MATRIX&,
const WERD_CHOICE_LIST&,
const UNICHARSET&, BlamerBundle*);
using TruthCallback =
std::function<void(const UNICHARSET&, int, PageIterator*, Pix*)>;
// Function to read a std::vector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *, int, const char *,
int);
/**
* Base class for all tesseract APIs.
@ -97,17 +76,17 @@ using TruthCallback =
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
public:
TessBaseAPI();
virtual ~TessBaseAPI();
// Copy constructor and assignment operator are currently unsupported.
TessBaseAPI(TessBaseAPI const&) = delete;
TessBaseAPI& operator=(TessBaseAPI const&) = delete;
TessBaseAPI(TessBaseAPI const &) = delete;
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char* Version();
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
@ -116,13 +95,13 @@ class TESS_API TessBaseAPI {
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void** device);
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char* name);
void SetInputName(const char *name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
@ -130,15 +109,15 @@ class TESS_API TessBaseAPI {
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char* GetInputName();
const char *GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix* pix);
Pix* GetInputImage();
void SetInputImage(Pix *pix);
Pix *GetInputImage();
int GetSourceYResolution();
const char* GetDatapath();
const char *GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char* name);
void SetOutputName(const char *name);
/**
* Set the value of an internal "parameter."
@ -153,32 +132,32 @@ class TESS_API TessBaseAPI {
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char* name, const char* value);
bool SetDebugVariable(const char* name, const char* value);
bool SetVariable(const char *name, const char *value);
bool SetDebugVariable(const char *name, const char *value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char* name, int* value) const;
bool GetBoolVariable(const char* name, bool* value) const;
bool GetDoubleVariable(const char* name, double* value) const;
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char* GetStringVariable(const char* name) const;
const char *GetStringVariable(const char *name) const;
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE* fp) const;
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char* name, STRING* val);
bool GetVariableAsString(const char *name, std::string *val);
/**
* Instances are now mostly thread-safe and totally independent,
@ -217,25 +196,21 @@ class TESS_API TessBaseAPI {
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char* datapath, const char* language, OcrEngineMode mode,
char** configs, int configs_size,
const GenericVector<STRING>* vars_vec,
const GenericVector<STRING>* vars_values,
bool set_only_non_debug_params);
int Init(const char* datapath, const char* language, OcrEngineMode oem) {
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs,
int configs_size, const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values, bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char* datapath, const char* language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
int Init(const char *datapath, const char *language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char* data, int data_size, const char* language,
OcrEngineMode mode, char** configs, int configs_size,
const GenericVector<STRING>* vars_vec,
const GenericVector<STRING>* vars_values,
bool set_only_non_debug_params, FileReader reader);
int Init(const char *data, int data_size, const char *language, OcrEngineMode mode,
char **configs, int configs_size, const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values, bool set_only_non_debug_params,
FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
@ -245,19 +220,19 @@ class TESS_API TessBaseAPI {
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char* GetInitLanguagesAsString() const;
const char *GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of STRINGs.
* Returns the loaded languages in the vector of std::string.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Returns the available languages in the sorted vector of STRINGs.
* Returns the available languages in the sorted vector of std::string.
*/
void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Init only the lang model component of Tesseract. The only functions
@ -265,7 +240,7 @@ class TESS_API TessBaseAPI {
* WARNING: temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int InitLangMod(const char* datapath, const char* language);
int InitLangMod(const char *datapath, const char *language);
/**
* Init only for page layout analysis. Use only for calls to SetImage and
@ -279,9 +254,9 @@ class TESS_API TessBaseAPI {
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char* filename);
void ReadConfigFile(const char *filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char* filename);
void ReadDebugConfigFile(const char *filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
@ -310,9 +285,8 @@ class TESS_API TessBaseAPI {
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char* TesseractRect(const unsigned char* imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
/**
* Call between pages or documents etc to free up memory and forget
@ -335,8 +309,8 @@ class TESS_API TessBaseAPI {
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char* imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel,
int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
@ -346,7 +320,7 @@ class TESS_API TessBaseAPI {
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix* pix);
void SetImage(Pix *pix);
/**
* Set the resolution of the source image in pixels per inch so font size
@ -368,7 +342,7 @@ class TESS_API TessBaseAPI {
* Note that Tesseract takes ownership of the Thresholder and will
* delete it when it it is replaced or the API is destructed.
*/
void SetThresholder(ImageThresholder* thresholder) {
void SetThresholder(ImageThresholder *thresholder) {
delete thresholder_;
thresholder_ = thresholder;
ClearResults();
@ -379,14 +353,14 @@ class TESS_API TessBaseAPI {
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix* GetThresholdedImage();
Pix *GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa* GetRegions(Pixa** pixa);
Boxa *GetRegions(Pixa **pixa);
/**
* Get the textlines as a leptonica-style
@ -399,12 +373,11 @@ class TESS_API TessBaseAPI {
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa* GetTextlines(bool raw_image, int raw_padding, Pixa** pixa,
int** blockids, int** paraids);
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa* GetTextlines(Pixa** pixa, int** blockids) {
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
@ -416,14 +389,14 @@ class TESS_API TessBaseAPI {
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa* GetStrips(Pixa** pixa, int** blockids);
Boxa *GetStrips(Pixa **pixa, int **blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa* GetWords(Pixa** pixa);
Boxa *GetWords(Pixa **pixa);
/**
* Gets the individual connected (text) components (created
@ -433,7 +406,7 @@ class TESS_API TessBaseAPI {
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa* GetConnectedComponents(Pixa** cc);
Boxa *GetConnectedComponents(Pixa **cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
@ -447,14 +420,12 @@ class TESS_API TessBaseAPI {
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa* GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa** pixa,
int** blockids, int** paraids);
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding,
Pixa **pixa, int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa* GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa** pixa, int** blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa,
int **blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
}
/**
@ -480,8 +451,8 @@ class TESS_API TessBaseAPI {
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator* AnalyseLayout();
PageIterator* AnalyseLayout(bool merge_similar_words);
PageIterator *AnalyseLayout();
PageIterator *AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
@ -489,18 +460,13 @@ class TESS_API TessBaseAPI {
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC* monitor);
int Recognize(ETEXT_DESC *monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
#ifndef DISABLED_LEGACY_ENGINE
/** Variant on Recognize used for testing chopper. */
int RecognizeForChopTest(ETEXT_DESC* monitor);
#endif
/**
* Turns images into symbolic text.
*
@ -523,11 +489,11 @@ class TESS_API TessBaseAPI {
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char* filename, const char* retry_config,
int timeout_millisec, TessResultRenderer* renderer);
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char* filename, const char* retry_config,
int timeout_millisec, TessResultRenderer* renderer);
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
@ -538,9 +504,8 @@ class TESS_API TessBaseAPI {
*
* See ProcessPages for desciptions of other parameters.
*/
bool ProcessPage(Pix* pix, int page_index, const char* filename,
const char* retry_config, int timeout_millisec,
TessResultRenderer* renderer);
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
@ -550,7 +515,7 @@ class TESS_API TessBaseAPI {
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator* GetIterator();
ResultIterator *GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
@ -560,13 +525,13 @@ class TESS_API TessBaseAPI {
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator* GetMutableIterator();
MutableIterator *GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char* GetUTF8Text();
char *GetUTF8Text();
size_t GetNumberOfTables();
@ -584,6 +549,7 @@ class TESS_API TessBaseAPI {
std::vector<std::tuple<int,int,int,int> > GetTableCols(
unsigned i///<Table index needs to be lesser than GetNumberOfTables()
);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
@ -593,7 +559,7 @@ class TESS_API TessBaseAPI {
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
@ -601,26 +567,26 @@ class TESS_API TessBaseAPI {
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char* GetHOCRText(int page_number);
char *GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char* GetAltoText(ETEXT_DESC* monitor, int page_number);
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char* GetAltoText(int page_number);
char *GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char* GetTSVText(int page_number);
char *GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
@ -628,7 +594,7 @@ class TESS_API TessBaseAPI {
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char* GetLSTMBoxText(int page_number);
char *GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
@ -637,7 +603,7 @@ class TESS_API TessBaseAPI {
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char* GetBoxText(int page_number);
char *GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
@ -645,14 +611,14 @@ class TESS_API TessBaseAPI {
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char* GetWordStrBoxText(int page_number);
char *GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char* GetUNLVText();
char *GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
@ -663,15 +629,15 @@ class TESS_API TessBaseAPI {
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int* orient_deg, float* orient_conf,
const char** script_name, float* script_conf);
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name,
float *script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* GetOsdText(int page_number);
char *GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
@ -681,7 +647,7 @@ class TESS_API TessBaseAPI {
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int* AllWordConfidences();
int *AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
@ -694,8 +660,8 @@ class TESS_API TessBaseAPI {
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
@ -728,11 +694,11 @@ class TESS_API TessBaseAPI {
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char* word);
int IsValidWord(const char *word);
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char* utf8_character);
bool IsValidCharacter(const char *utf8_character);
bool GetTextDirection(int* out_offset, float* out_slope);
bool GetTextDirection(int *out_offset, float *out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
@ -746,73 +712,24 @@ class TESS_API TessBaseAPI {
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults*);
bool DetectOS(OSResults *);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int** block_orientation,
bool** vertical_writing);
#ifndef DISABLED_LEGACY_ENGINE
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
void SetFillLatticeFunc(FillLatticeFunc f);
/** Find lines from the image making the BLOCK_LIST. */
BLOCK_LIST* FindLinesCreateBlockList();
/**
* Delete a block list.
* This is to keep BLOCK_LIST pointer opaque
* and let go of including the other headers.
*/
static void DeleteBlockList(BLOCK_LIST* block_list);
/** Returns a ROW object created from the input row specification. */
static ROW* MakeTessOCRRow(float baseline, float xheight, float descender,
float ascender);
/** Returns a TBLOB corresponding to the entire input image. */
static TBLOB* MakeTBLOB(Pix* pix);
/**
* This method baseline normalizes a TBLOB in-place. The input row is used
* for normalization. The denorm is an optional parameter in which the
* normalization-antidote is returned.
*/
static void NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode);
/** This method returns the features associated with the input image. */
void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* feature_outline_index);
/**
* This method returns the row to which a box of specified dimensions would
* belong. If no good match is found, it returns nullptr.
*/
static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right,
int bottom);
/**
* Method to run adaptive classifier on a blob.
* It returns at max num_max_matches results.
*/
void RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, int* unichar_ids,
float* ratings, int* num_matches_returned);
#endif // ndef DISABLED_LEGACY_ENGINE
void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char* GetUnichar(int unichar_id);
const char *GetUnichar(int unichar_id);
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg* GetDawg(int i) const;
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract* tesseract() const {
Tesseract *tesseract() const {
return tesseract_;
}
@ -820,29 +737,25 @@ class TESS_API TessBaseAPI {
return last_oem_requested_;
}
void InitTruthCallback(TruthCallback cb) {
truth_cb_ = cb;
}
void set_min_orientation_margin(double margin);
/* @} */
protected:
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
TESS_LOCAL bool InternalSetImage();
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
TESS_LOCAL virtual bool Threshold(Pix** pix);
virtual bool Threshold(Pix **pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
TESS_LOCAL int FindLines();
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
@ -852,7 +765,7 @@ class TESS_API TessBaseAPI {
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
TESS_LOCAL LTRResultIterator* GetLTRIterator();
LTRResultIterator *GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
@ -860,61 +773,30 @@ class TESS_API TessBaseAPI {
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
TESS_LOCAL int TextLength(int* blob_count);
int TextLength(int *blob_count);
//// paragraphs.cpp ////////////////////////////////////////////////////
TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
void DetectParagraphs(bool after_text_recognition);
#ifndef DISABLED_LEGACY_ENGINE
/** @defgroup ocropusAddOns ocropus add-ons */
/* @{ */
/**
* Adapt to recognize the current image as the given character.
* The image must be preloaded and be just an image of a single character.
*/
TESS_LOCAL void AdaptToCharacter(const char* unichar_repr, int length,
float baseline, float xheight,
float descender, float ascender);
/** Recognize text doing one pass only, using settings for a given pass. */
TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
PAGE_RES* pass1_result);
/**
* Extract the OCR results, costs (penalty points for uncertainty),
* and the bounding boxes of the characters.
*/
TESS_LOCAL static int TesseractExtractResult(char** text, int** lengths,
float** costs, int** x0,
int** y0, int** x1, int** y1,
PAGE_RES* page_res);
TESS_LOCAL const PAGE_RES* GetPageRes() const {
const PAGE_RES *GetPageRes() const {
return page_res_;
}
/* @} */
#endif // ndef DISABLED_LEGACY_ENGINE
protected:
Tesseract* tesseract_; ///< The underlying data object.
Tesseract* osd_tesseract_; ///< For orientation & script detection.
EquationDetect* equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder* thresholder_; ///< Image thresholding module.
GenericVector<ParagraphModel*>* paragraph_models_;
BLOCK_LIST* block_list_; ///< The page layout.
PAGE_RES* page_res_; ///< The page-level data.
STRING* input_file_; ///< Name used by training code.
STRING* output_file_; ///< Name used by debug code.
STRING* datapath_; ///< Current location of tessdata.
STRING* language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
TruthCallback truth_cb_; ///< fxn for setting truth_* in WERD_RES
protected:
Tesseract *tesseract_; ///< The underlying data object.
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
/**
* @defgroup ThresholderParams Thresholder Parameters
@ -929,21 +811,20 @@ class TESS_API TessBaseAPI {
int image_height_;
/* @} */
private:
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE* fp, STRING* buf, const char* retry_config,
int timeout_millisec, TessResultRenderer* renderer,
bool ProcessPagesFileList(FILE *fp, std::string *buf, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char* data, size_t size,
const char* filename, const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer,
int tessedit_page_number);
}; // class TessBaseAPI.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer, int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
STRING HOcrEscape(const char* text);
} // namespace tesseract.
std::string HOcrEscape(const char *text);
#endif // TESSERACT_API_BASEAPI_H_
} // namespace tesseract
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -18,22 +18,18 @@
#ifndef API_CAPI_H_
#define API_CAPI_H_
#if defined(TESSERACT_API_BASEAPI_H_) && !defined(TESS_CAPI_INCLUDE_BASEAPI)
# define TESS_CAPI_INCLUDE_BASEAPI
#include "export.h"
#ifdef __cplusplus
# include <tesseract/baseapi.h>
# include <tesseract/ocrclass.h>
# include <tesseract/pageiterator.h>
# include <tesseract/renderer.h>
# include <tesseract/resultiterator.h>
#endif
#ifdef TESS_CAPI_INCLUDE_BASEAPI
# include "baseapi.h"
# include "ocrclass.h"
# include "pageiterator.h"
# include "renderer.h"
# include "resultiterator.h"
#else
# include <stdbool.h>
# include <stdio.h>
# include "platform.h"
#endif
#include <stdbool.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
@ -45,7 +41,7 @@ extern "C" {
# define FALSE 0
#endif
#ifdef TESS_CAPI_INCLUDE_BASEAPI
#ifdef __cplusplus
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
@ -54,19 +50,13 @@ typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::ImageThresholder TessImageThresholder;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::DictFunc TessDictFunc;
typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc;
// typedef tesseract::ParamsModelClassifyFunc TessParamsModelClassifyFunc;
typedef tesseract::FillLatticeFunc TessFillLatticeFunc;
typedef tesseract::Dawg TessDawg;
typedef tesseract::TruthCallback TessTruthCallback;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef PolyBlockType TessPolyBlockType;
typedef tesseract::PolyBlockType TessPolyBlockType;
typedef tesseract::ETEXT_DESC ETEXT_DESC;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessBaseAPI TessBaseAPI;
@ -147,9 +137,8 @@ typedef enum TessTextlineOrder {
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void* cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC* ths, int left, int right, int top,
int bottom);
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top, int bottom);
struct Pix;
struct Boxa;
@ -157,409 +146,274 @@ struct Pixa;
/* General free functions */
TESS_API const char* TessVersion();
TESS_API void TessDeleteText(const char* text);
TESS_API void TessDeleteTextArray(char** arr);
TESS_API void TessDeleteIntArray(const int* arr);
TESS_API const char *TessVersion();
TESS_API void TessDeleteText(const char *text);
TESS_API void TessDeleteTextArray(char **arr);
TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer* TessTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessHOcrRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessHOcrRendererCreate2(const char* outputbase,
BOOL font_info);
TESS_API TessResultRenderer* TessAltoRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessTsvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessPDFRendererCreate(const char* outputbase,
const char* datadir,
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase, BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase, const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer* TessUnlvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessBoxTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessLSTMBoxRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessWordStrBoxRendererCreate(
const char* outputbase);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer* renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer* renderer,
TessResultRenderer* next);
TESS_API TessResultRenderer* TessResultRendererNext(
TessResultRenderer* renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer* renderer,
const char* title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer* renderer,
TessBaseAPI* api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer* renderer);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer, TessResultRenderer *next);
TESS_API TessResultRenderer *TessResultRendererNext(TessResultRenderer *renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer, const char *title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer, TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char* TessResultRendererExtention(TessResultRenderer* renderer);
TESS_API const char* TessResultRendererTitle(TessResultRenderer* renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer* renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
/* Base API */
TESS_API TessBaseAPI* TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI* handle);
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void** device);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI* handle, const char* name);
TESS_API const char* TessBaseAPIGetInputName(TessBaseAPI* handle);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI* handle, struct Pix* pix);
TESS_API struct Pix* TessBaseAPIGetInputImage(TessBaseAPI* handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI* handle);
TESS_API const char* TessBaseAPIGetDatapath(TessBaseAPI* handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name,
const char* value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name,
const char* value);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name, const char *value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name, const char *value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI* handle,
const char* name, int* value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle,
const char* name, BOOL* value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle,
const char* name, double* value);
TESS_API const char* TessBaseAPIGetStringVariable(const TessBaseAPI* handle,
const char* name);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle, const char *name, int *value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle, const char *name, BOOL *value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle, const char *name,
double *value);
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle, const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle,
const char* filename);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle, const char *filename);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath, const char *language,
TessOcrEngineMode oem, char **configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath, const char *language,
TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath, const char *language);
TESS_API BOOL TessBaseAPIGetVariableAsString(TessBaseAPI* handle,
const char* name, STRING* val);
TESS_API int TessBaseAPIInit(TessBaseAPI* handle, const char* datapath,
const char* language, TessOcrEngineMode mode,
char** configs, int configs_size,
const STRING* vars_vec, size_t vars_vec_size,
const STRING* vars_values, size_t vars_values_size,
BOOL set_only_init_params);
#endif // def TESS_CAPI_INCLUDE_BASEAPI
TESS_API int TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath,
const char* language, TessOcrEngineMode oem,
char** configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath,
const char* language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath,
const char* language);
TESS_API int TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath,
const char* language, TessOcrEngineMode mode,
char** configs, int configs_size, char** vars_vec,
char** vars_values, size_t vars_vec_size,
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath, const char *language,
TessOcrEngineMode mode, char **configs, int configs_size,
char **vars_vec, char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char* TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI* handle);
TESS_API char** TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI* handle);
TESS_API char** TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI* handle);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI *handle);
TESS_API int TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath,
const char* language);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI* handle);
TESS_API int TessBaseAPIInitLangMod(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI* handle,
const char* filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle,
const char* filename);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle, const char *filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle, const char *filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI* handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle, TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
TESS_API char* TessBaseAPIRect(TessBaseAPI* handle,
const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle, const unsigned char *imagedata,
int bytes_per_pixel, int bytes_per_line, int left, int top,
int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI* handle,
const unsigned char* imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix);
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle, const unsigned char *imagedata, int width,
int height, int bytes_per_pixel, int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top,
int width, int height);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top, int width,
int height);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TessBaseAPISetThresholder(TessBaseAPI* handle,
TessImageThresholder* thresholder);
#endif
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle, struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle, struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle, BOOL raw_image, int raw_padding,
struct Pixa **pixa, int **blockids, int **paraids);
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle, struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle, struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle, struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
TessPageIteratorLevel level, BOOL text_only,
struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(TessBaseAPI *handle,
TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding,
struct Pixa **pixa, int **blockids,
int **paraids);
TESS_API struct Pix* TessBaseAPIGetThresholdedImage(TessBaseAPI* handle);
TESS_API struct Boxa* TessBaseAPIGetRegions(TessBaseAPI* handle,
struct Pixa** pixa);
TESS_API struct Boxa* TessBaseAPIGetTextlines(TessBaseAPI* handle,
struct Pixa** pixa,
int** blockids);
TESS_API struct Boxa* TessBaseAPIGetTextlines1(TessBaseAPI* handle,
BOOL raw_image, int raw_padding,
struct Pixa** pixa,
int** blockids, int** paraids);
TESS_API struct Boxa* TessBaseAPIGetStrips(TessBaseAPI* handle,
struct Pixa** pixa, int** blockids);
TESS_API struct Boxa* TessBaseAPIGetWords(TessBaseAPI* handle,
struct Pixa** pixa);
TESS_API struct Boxa* TessBaseAPIGetConnectedComponents(TessBaseAPI* handle,
struct Pixa** cc);
TESS_API struct Boxa* TessBaseAPIGetComponentImages(TessBaseAPI* handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa** pixa,
int** blockids);
TESS_API struct Boxa* TessBaseAPIGetComponentImages1(
TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa** pixa, int** blockids,
int** paraids);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI *handle);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI* handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API TessPageIterator* TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API int TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix, int page_index,
const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API int TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle,
ETEXT_DESC* monitor);
#endif
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle, TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix,
int page_index, const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer);
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API TessResultIterator* TessBaseAPIGetIterator(TessBaseAPI* handle);
TESS_API TessMutableIterator* TessBaseAPIGetMutableIterator(
TessBaseAPI* handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset, float *out_slope);
TESS_API char* TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
TESS_API char* TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
TESS_API char* TessBaseAPIGetAltoText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetTsvText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetWordStrBoxText(TessBaseAPI* handle,
int page_number);
TESS_API char* TessBaseAPIGetUNLVText(TessBaseAPI* handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI* handle);
TESS_API int* TessBaseAPIAllWordConfidences(TessBaseAPI* handle);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle,
TessPageSegMode mode,
const char* wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI* handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI* handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset,
float* out_slope);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
TESS_API void TessBaseAPISetProbabilityInContextFunc(
TessBaseAPI* handle, TessProbabilityInContextFunc f);
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
int* orient_deg,
float* orient_conf,
const char** script_name,
float* script_conf);
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle, int *orient_deg,
float *orient_conf, const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
#endif // def TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle, double margin);
TESS_API const char* TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle,
double margin);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API const TessDawg* TessBaseAPIGetDawg(const TessBaseAPI* handle, int i);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI* handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI* handle);
TESS_API void TessBaseAPIInitTruthCallback(TessBaseAPI* handle,
TessTruthCallback cb);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI* handle,
int** block_orientation,
bool** vertical_writing);
#endif
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle, int **block_orientation,
bool **vertical_writing);
/* Page iterator */
TESS_API void TessPageIteratorDelete(TessPageIterator* handle);
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
TESS_API TessPageIterator* TessPageIteratorCopy(const TessPageIterator* handle);
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator* handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator* handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle,
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle,
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator* handle,
TessPageIteratorLevel level,
int* left, int* top, int* right,
int* bottom);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
TessPageIteratorLevel level, int *left, int *top,
int *right, int *bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator* handle);
TESS_API TessPolyBlockType TessPageIteratorBlockType(const TessPageIterator *handle);
TESS_API struct Pix* TessPageIteratorGetBinaryImage(
const TessPageIterator* handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetBinaryImage(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API struct Pix* TessPageIteratorGetImage(const TessPageIterator* handle,
TessPageIteratorLevel level,
int padding,
struct Pix* original_image,
int* left, int* top);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
TessPageIteratorLevel level, int padding,
struct Pix *original_image, int *left, int *top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator* handle,
TessPageIteratorLevel level, int* x1,
int* y1, int* x2, int* y2);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle, TessPageIteratorLevel level,
int *x1, int *y1, int *x2, int *y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator* handle, TessOrientation* orientation,
TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
float* deskew_angle);
TESS_API void TessPageIteratorOrientation(TessPageIterator *handle, TessOrientation *orientation,
TessWritingDirection *writing_direction,
TessTextlineOrder *textline_order, float *deskew_angle);
TESS_API void TessPageIteratorParagraphInfo(
TessPageIterator* handle, TessParagraphJustification* justification,
BOOL* is_list_item, BOOL* is_crown, int* first_line_indent);
TESS_API void TessPageIteratorParagraphInfo(TessPageIterator *handle,
TessParagraphJustification *justification,
BOOL *is_list_item, BOOL *is_crown,
int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator* handle);
TESS_API TessResultIterator* TessResultIteratorCopy(
const TessResultIterator* handle);
TESS_API TessPageIterator* TessResultIteratorGetPageIterator(
TessResultIterator* handle);
TESS_API const TessPageIterator* TessResultIteratorGetPageIteratorConst(
const TessResultIterator* handle);
TESS_API TessChoiceIterator* TessResultIteratorGetChoiceIterator(
const TessResultIterator* handle);
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
TESS_API TessResultIterator *TessResultIteratorCopy(const TessResultIterator *handle);
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator* handle,
TessPageIteratorLevel level);
TESS_API char* TessResultIteratorGetUTF8Text(const TessResultIterator* handle,
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle, TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator* handle,
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API const char* TessResultIteratorWordRecognitionLanguage(
const TessResultIterator* handle);
TESS_API const char* TessResultIteratorWordFontAttributes(
const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps,
int* pointsize, int* font_id);
TESS_API const char *TessResultIteratorWordRecognitionLanguage(const TessResultIterator *handle);
TESS_API const char *TessResultIteratorWordFontAttributes(const TessResultIterator *handle,
BOOL *is_bold, BOOL *is_italic,
BOOL *is_underlined, BOOL *is_monospace,
BOOL *is_serif, BOOL *is_smallcaps,
int *pointsize, int *font_id);
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator* handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle);
TESS_API BOOL
TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle);
TESS_API BOOL
TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle);
TESS_API BOOL TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator* handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator* handle);
TESS_API const char* TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator* handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator* handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
TESS_API const char *TessChoiceIteratorGetUTF8Text(const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC* TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC* monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC* monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC* monitor, void* cancelThis);
TESS_API void* TessMonitorGetCancelThis(ETEXT_DESC* monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC* monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC* monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline);
#ifndef DISABLED_LEGACY_ENGINE
# ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle,
TessFillLatticeFunc f);
TESS_API void TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob,
INT_FEATURE_STRUCT* int_features,
int* num_features,
int* FeatureOutlineIndex);
TESS_API ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
int right, int bottom);
TESS_API void TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob,
int num_max_matches,
int* unichar_ids, float* ratings,
int* num_matches_returned);
TESS_API ROW* TessMakeTessOCRRow(float baseline, float xheight, float descender,
float ascender);
TESS_API TBLOB* TessMakeTBLOB(Pix* pix);
TESS_API void TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode);
TESS_API BLOCK_LIST* TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
TESS_API void TessDeleteBlockList(BLOCK_LIST* block_list);
# endif // def TESS_CAPI_INCLUDE_BASEAPI
#endif // ndef DISABLED_LEGACY_ENGINE
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor, TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor, TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_
#endif // API_CAPI_H_

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////
// File: platform.h
// File: export.h
// Description: Place holder
//
// (C) Copyright 2006, Google Inc.
@ -15,45 +15,25 @@
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCUTIL_PLATFORM_H_
#define TESSERACT_CCUTIL_PLATFORM_H_
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#define DLLSYM
#ifndef _WIN32
# ifdef __cplusplus
# include <climits>
# else /* C compiler*/
# include <limits.h>
# endif /* __cplusplus */
# ifndef PATH_MAX
# define MAX_PATH 4096
# else
# define MAX_PATH PATH_MAX
# endif
#endif
#if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# endif
# define TESS_LOCAL
#else
# if __GNUC__ >= 4
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# define TESS_LOCAL __attribute__((visibility("hidden")))
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# define TESS_LOCAL
# endif
# else
# define TESS_API
# define TESS_LOCAL
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# else
# define TESS_API
# endif
# endif
#endif
#endif // TESSERACT_CCUTIL_PLATFORM_H_
#endif // TESSERACT_PLATFORM_H_

View File

@ -20,17 +20,17 @@
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "pageiterator.h" // for PageIterator
#include "platform.h" // for TESS_API
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include "export.h" // for TESS_API
#include "pageiterator.h" // for PageIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
namespace tesseract {
class BLOB_CHOICE_IT;
class PAGE_RES;
class WERD_RES;
namespace tesseract {
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
@ -40,14 +40,14 @@ class Tesseract;
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See apitypes.h for the definition of PageIteratorLevel.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// LTRResultIterator adds text-specific methods for access to OCR output.
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
// via the BaseAPI. They represent the coordinates of some rectangle in an
@ -60,9 +60,8 @@ class TESS_API LTRResultIterator : public PageIterator {
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres,
int rect_left, int rect_top, int rect_width, int rect_height);
~LTRResultIterator() override;
@ -81,21 +80,20 @@ class TESS_API LTRResultIterator : public PageIterator {
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char* GetUTF8Text(PageIteratorLevel level) const;
char *GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char* new_line);
void SetLineSeparator(const char *new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char* new_para);
void SetParagraphSeparator(const char *new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;
// Returns the attributes of the current row.
void RowAttributes(float* row_height, float* descenders,
float* ascenders) const;
void RowAttributes(float *row_height, float *descenders, float *ascenders) const;
// ============= Functions that refer to words only ============.
@ -107,14 +105,13 @@ class TESS_API LTRResultIterator : public PageIterator {
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char* WordFontAttributes(bool* is_bold, bool* is_italic,
bool* is_underlined, bool* is_monospace,
bool* is_serif, bool* is_smallcaps,
int* pointsize, int* font_id) const;
const char *WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined,
bool *is_monospace, bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
const char* WordRecognitionLanguage() const;
const char *WordRecognitionLanguage() const;
// Return the overall directionality of this word.
StrongScriptDirection WordDirection() const;
@ -133,34 +130,34 @@ class TESS_API LTRResultIterator : public PageIterator {
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void* GetParamsTrainingBundle() const;
const void *GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char* GetBlamerDebug() const;
const char *GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char* GetBlamerMisadaptionDebug() const;
const char *GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char* str) const;
bool EquivalentToTruth(const char *str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char* WordTruthUTF8Text() const;
char *WordTruthUTF8Text() const;
// Returns a null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char* WordNormedUTF8Text() const;
char *WordNormedUTF8Text() const;
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char* WordLattice(int* lattice_size) const;
const char *WordLattice(int *lattice_size) const;
// ============= Functions that refer to symbols only ============.
@ -177,18 +174,18 @@ class TESS_API LTRResultIterator : public PageIterator {
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
protected:
const char* line_separator_;
const char* paragraph_separator_;
protected:
const char *line_separator_;
const char *paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class ChoiceIterator {
public:
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that is is useless.
explicit ChoiceIterator(const LTRResultIterator& result_it);
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();
// Moves to the next choice for the symbol and returns false if there
@ -201,7 +198,7 @@ class ChoiceIterator {
// choice.
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
// internal structure and should NOT be delete[]ed to free after use.
const char* GetUTF8Text() const;
const char *GetUTF8Text() const;
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
@ -215,19 +212,19 @@ class ChoiceIterator {
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char*, float>>>* Timesteps() const;
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
private:
private:
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES* word_res_;
WERD_RES *word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT* choice_it_;
std::vector<std::pair<const char*, float>>* LSTM_choices_ = nullptr;
std::vector<std::pair<const char*, float>>::iterator LSTM_choice_it_;
BLOB_CHOICE_IT *choice_it_;
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
const int* tstep_index_;
const int *tstep_index_;
// regulates the rating granularity
double rating_coefficient_;
// leading blanks
@ -236,6 +233,6 @@ class ChoiceIterator {
bool oemLSTM_;
};
} // namespace tesseract.
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -29,6 +29,8 @@
#include <chrono>
#include <ctime>
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR
* Description of a single character. The character code is defined by
@ -53,7 +55,7 @@
* version.
**********************************************************************/
typedef struct { /*single character */
struct EANYCODE_CHAR { /*single character */
// It should be noted that the format for char_code for version 2.0 and beyond
// is UTF8 which means that ASCII characters will come out as one structure
// but other characters will be returned in two or more instances of this
@ -72,7 +74,7 @@ typedef struct { /*single character */
uint8_t point_size; /*of char, 72=i inch, (10) */
int8_t blanks; /*no of spaces before this char (1) */
uint8_t formatting; /*char formatting (0) */
} EANYCODE_CHAR; /*single character */
};
/**********************************************************************
* ETEXT_DESC
@ -92,62 +94,58 @@ typedef struct { /*single character */
**********************************************************************/
class ETEXT_DESC;
using CANCEL_FUNC = bool (*)(void*, int);
using CANCEL_FUNC = bool (*)(void *, int);
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC*, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{
nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void* cancel_this{nullptr}; /// this or other data for cancel
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
end_time = std::chrono::time_point<std::chrono::steady_clock,
std::chrono::milliseconds>();
end_time = std::chrono::time_point<std::chrono::steady_clock, std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
end_time = std::chrono::steady_clock::now() +
std::chrono::milliseconds(deadline_msecs);
end_time = std::chrono::steady_clock::now() + std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
if (end_time.time_since_epoch() ==
std::chrono::steady_clock::duration::zero()) {
if (end_time.time_since_epoch() == std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
return (now > end_time);
}
private:
static bool default_progress_func(ETEXT_DESC* ths, int left, int right,
int top, int bottom) {
private:
static bool default_progress_func(ETEXT_DESC *ths, int left, int right, int top, int bottom) {
if (ths->progress_callback != nullptr) {
return (*(ths->progress_callback))(ths->progress, left, right, top,
bottom);
return (*(ths->progress_callback))(ths->progress, left, right, top, bottom);
}
return true;
}
};
#endif // CCUTIL_OCRCLASS_H_
} // namespace tesseract
#endif // CCUTIL_OCRCLASS_H_

View File

@ -20,27 +20,25 @@
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_
#include "platform.h" // for TESS_API
#include "export.h" // for TESS_API
#include <vector> // for std::vector
namespace tesseract {
class BLOBNBOX;
class BLOBNBOX_CLIST;
class BLOB_CHOICE_LIST;
class STRING;
class TO_BLOCK_LIST;
class UNICHARSET;
template <typename T>
class GenericVector;
namespace tesseract {
class Tesseract;
}
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
@ -50,7 +48,8 @@ struct OSBestResult {
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j) scripts_na[i][j] = 0;
for (int j = 0; j < kMaxNumberOfScripts; ++j)
scripts_na[i][j] = 0;
orientations[i] = 0;
}
}
@ -63,7 +62,7 @@ struct OSResults {
// Return the index of the script with the highest score for this orientation.
TESS_API int get_best_script(int orientation_id) const;
// Accumulate scores with given OSResults instance and update the best script.
void accumulate(const OSResults& osr);
void accumulate(const OSResults &osr);
// Print statistics.
void print_scores(void) const;
@ -77,34 +76,33 @@ struct OSResults {
// Script confidence scores for each of 4 possible orientations.
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET* unicharset;
UNICHARSET *unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(const GenericVector<int>* allowed_scripts,
OSResults* results);
bool detect_blob(BLOB_CHOICE_LIST* scores);
public:
OrientationDetector(const std::vector<int> *allowed_scripts, OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
private:
OSResults* osr_;
const GenericVector<int>* allowed_scripts_;
private:
OSResults *osr_;
const std::vector<int> *allowed_scripts_;
};
class ScriptDetector {
public:
ScriptDetector(const GenericVector<int>* allowed_scripts, OSResults* osr,
tesseract::Tesseract* tess);
void detect_blob(BLOB_CHOICE_LIST* scores);
public:
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess);
void detect_blob(BLOB_CHOICE_LIST *scores);
bool must_stop(int orientation);
private:
OSResults* osr_;
static const char* korean_script_;
static const char* japanese_script_;
static const char* fraktur_script_;
private:
OSResults *osr_;
static const char *korean_script_;
static const char *japanese_script_;
static const char *fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
@ -113,26 +111,25 @@ class ScriptDetector {
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract* tess_;
const GenericVector<int>* allowed_scripts_;
tesseract::Tesseract *tess_;
const std::vector<int> *allowed_scripts_;
};
int orientation_and_script_detection(STRING& filename, OSResults*,
tesseract::Tesseract*);
int orientation_and_script_detection(const char *filename, OSResults *, tesseract::Tesseract *);
int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
tesseract::Tesseract* tess);
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess);
int os_detect_blobs(const GenericVector<int>* allowed_scripts,
BLOBNBOX_CLIST* blob_list, OSResults* osr,
tesseract::Tesseract* tess);
int os_detect_blobs(const std::vector<int> *allowed_scripts, BLOBNBOX_CLIST *blob_list,
OSResults *osr, tesseract::Tesseract *tess);
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, ScriptDetector* s,
OSResults*, tesseract::Tesseract* tess);
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *,
tesseract::Tesseract *tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int& id);
TESS_API int OrientationIdToValue(const int &id);
#endif // TESSERACT_CCMAIN_OSDETECT_H_
} // namespace tesseract
#endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -3,7 +3,6 @@
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
// Author: Ray Smith
// Created: Fri Feb 26 11:01:06 PST 2010
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -21,18 +20,19 @@
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "platform.h"
#include "export.h"
#include "publictypes.h"
struct Pix;
struct Pta;
namespace tesseract {
struct BlamerBundle;
class C_BLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
struct Pix;
struct Pta;
namespace tesseract {
class Tesseract;
@ -44,13 +44,13 @@ class Tesseract;
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
* See apitypes.h for the definition of PageIteratorLevel.
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
* See also ResultIterator, derived from PageIterator, which adds in the
* ability to access OCR output with text-specific methods.
*/
class TESS_API PageIterator {
public:
public:
/**
* page_res and tesseract come directly from the BaseAPI.
* The rectangle parameters are copied indirectly from the Thresholder,
@ -65,9 +65,8 @@ class TESS_API PageIterator {
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left,
int rect_top, int rect_width, int rect_height);
virtual ~PageIterator();
/**
@ -76,11 +75,11 @@ class TESS_API PageIterator {
* objects at a higher level. These constructors DO NOT CALL Begin, so
* iterations will continue from the location of src.
*/
PageIterator(const PageIterator& src);
const PageIterator& operator=(const PageIterator& src);
PageIterator(const PageIterator &src);
const PageIterator &operator=(const PageIterator &src);
/** Are we positioned at the same location as other? */
bool PositionedAtSameWord(const PAGE_RES_IT* other) const;
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
// ============= Moving around within the page ============.
@ -154,8 +153,7 @@ class TESS_API PageIterator {
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
virtual bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const;
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
@ -163,7 +161,7 @@ class TESS_API PageIterator {
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator& other) const;
int Cmp(const PageIterator &other) const;
// ============= Accessing data ==============.
// Coordinate system:
@ -187,8 +185,7 @@ class TESS_API PageIterator {
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
void SetBoundingBoxComponents(bool include_upper_dots, bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
@ -202,24 +199,23 @@ class TESS_API PageIterator {
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level, int* left, int* top, int* right,
int* bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int* left, int* top,
int* right, int* bottom) const;
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top, int *right,
int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level, int* left, int* top,
int* right, int* bottom) const;
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
/**
* Returns the type of the current block. See apitypes.h for
* PolyBlockType.
* Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType.
*/
PolyBlockType BlockType() const;
@ -230,7 +226,7 @@ class TESS_API PageIterator {
* point and the first point. nullptr will be returned if the iterator is
* at the end of the document or layout analysis was not used.
*/
Pta* BlockPolygon() const;
Pta *BlockPolygon() const;
/**
* Returns a binary image of the current object at the given level.
@ -238,7 +234,7 @@ class TESS_API PageIterator {
* this could be upscaled with respect to the original input image.
* Use pixDestroy to delete the image after use.
*/
Pix* GetBinaryImage(PageIteratorLevel level) const;
Pix *GetBinaryImage(PageIteratorLevel level) const;
/**
* Returns an image of the current object at the given level in greyscale
@ -251,8 +247,7 @@ class TESS_API PageIterator {
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix* GetImage(PageIteratorLevel level, int padding, Pix* original_img,
int* left, int* top) const;
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
@ -260,8 +255,7 @@ class TESS_API PageIterator {
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level, int* x1, int* y1, int* x2,
int* y2) const;
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const;
/**
* Returns orientation for the block the iterator points to.
@ -271,10 +265,9 @@ class TESS_API PageIterator {
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation* orientation,
tesseract::WritingDirection* writing_direction,
tesseract::TextlineOrder* textline_order,
float* deskew_angle) const;
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order, float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
@ -304,37 +297,36 @@ class TESS_API PageIterator {
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification* justification,
bool* is_list_item, bool* is_crown,
int* first_line_indent) const;
void ParagraphInfo(tesseract::ParagraphJustification *justification, bool *is_list_item,
bool *is_crown, int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle* blamer_bundle);
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
protected:
protected:
/**
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
TESS_LOCAL void BeginWord(int offset);
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES* page_res_;
PAGE_RES *page_res_;
/** Pointer to the Tesseract object owned by the API. */
Tesseract* tesseract_;
Tesseract *tesseract_;
/**
* The iterator to the page_res_. Owned by this ResultIterator.
* A pointer just to avoid dragging in Tesseract includes.
*/
PAGE_RES_IT* it_;
PAGE_RES_IT *it_;
/**
* The current input WERD being iterated. If there is an output from OCR,
* then word_ is nullptr. Owned by the API
*/
WERD* word_;
WERD *word_;
/** The length of the current word_. */
int word_length_;
/** The current blob index within the word. */
@ -344,7 +336,7 @@ class TESS_API PageIterator {
* OCR results in the box_word.
* Owned by this ResultIterator.
*/
C_BLOB_IT* cblob_it_;
C_BLOB_IT *cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
@ -357,6 +349,6 @@ class TESS_API PageIterator {
int rect_height_;
};
} // namespace tesseract.
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -19,6 +19,8 @@
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
namespace tesseract {
// This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
@ -26,7 +28,6 @@
// but not for the low-level tesseract code to include top-level API code.
// This file should not use other Tesseract types, as that would drag
// their includes into the API-level.
// API-level code should include apitypes.h in preference to this file.
/** Number of printers' points in an inch. The unit of the pointsize return. */
constexpr int kPointsPerInch = 72;
@ -50,21 +51,21 @@ constexpr int kResolutionEstimationFactor = 10;
* Used extensively by ColPartition, and POLY_BLOCK.
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
@ -74,14 +75,12 @@ inline bool PTIsLineType(PolyBlockType type) {
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
type == PT_PULLOUT_IMAGE;
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE || type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT || type == PT_PULLOUT_TEXT ||
type == PT_TABLE || type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
@ -89,7 +88,6 @@ inline bool PTIsPulloutType(PolyBlockType type) {
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}
namespace tesseract {
/**
* +------------------+ Orientation Example:
* | 1 Aaaa Aaaa Aaaa | ====================
@ -157,26 +155,25 @@ enum TextlineOrder {
* so that the inequality test macros below work.
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT = 11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
PSM_COUNT ///< Number of enum entries.
};
/**
@ -214,11 +211,11 @@ inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
* have 5x as many functions.
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
/**
@ -263,21 +260,21 @@ enum ParagraphJustification {
* mention the connection to OcrEngineMode in the comments.
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
};
} // namespace tesseract.
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -18,14 +18,13 @@
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
#include "export.h"
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <string> // for std::string
#include "genericvector.h"
#include "platform.h"
#include "strngs.h" // for STRING
#include <string> // for std::string
#include <vector> // for std::vector
struct Pix;
@ -47,16 +46,16 @@ class TessBaseAPI;
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
public:
virtual ~TessResultRenderer();
// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer* next);
void insert(TessResultRenderer *next);
// Returns the next renderer or nullptr.
TessResultRenderer* next() {
TessResultRenderer *next() {
return next_;
}
@ -65,7 +64,7 @@ class TESS_API TessResultRenderer {
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char* title);
bool BeginDocument(const char *title);
/**
* Adds the recognized text from the source image to the current document.
@ -75,7 +74,7 @@ class TESS_API TessResultRenderer {
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI* api);
bool AddImage(TessBaseAPI *api);
/**
* Finishes the document and finalizes the output data
@ -83,10 +82,10 @@ class TESS_API TessResultRenderer {
*/
bool EndDocument();
const char* file_extension() const {
const char *file_extension() const {
return file_extension_;
}
const char* title() const {
const char *title() const {
return title_.c_str();
}
@ -108,7 +107,7 @@ class TESS_API TessResultRenderer {
return imagenum_;
}
protected:
protected:
/**
* Called by concrete classes.
*
@ -119,13 +118,13 @@ class TESS_API TessResultRenderer {
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char* outputbase, const char* extension);
TessResultRenderer(const char *outputbase, const char *extension);
// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI* api) = 0;
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();
@ -133,62 +132,62 @@ class TESS_API TessResultRenderer {
// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char* s);
void AppendString(const char *s);
// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char* s, int len);
void AppendData(const char *s, int len);
private:
const char* file_extension_; // standard extension for generated output
STRING title_; // title of document being rendered
int imagenum_; // index of last image added
private:
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
FILE* fout_; // output file pointer
TessResultRenderer* next_; // Can link multiple renderers together
bool happy_; // I get grumpy when the disk fills up, etc.
FILE *fout_; // output file pointer
TessResultRenderer *next_; // Can link multiple renderers together
bool happy_; // I get grumpy when the disk fills up, etc.
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char* outputbase);
public:
explicit TessTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI* api) override;
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into an hocr text string
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char* outputbase, bool font_info);
explicit TessHOcrRenderer(const char* outputbase);
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
protected:
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI* api) override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into an alto text string
*/
class TESS_API TessAltoRenderer : public TessResultRenderer {
public:
explicit TessAltoRenderer(const char* outputbase);
public:
explicit TessAltoRenderer(const char *outputbase);
protected:
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI* api) override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
};
@ -196,99 +195,97 @@ class TESS_API TessAltoRenderer : public TessResultRenderer {
* Renders Tesseract output into a TSV string
*/
class TESS_API TessTsvRenderer : public TessResultRenderer {
public:
explicit TessTsvRenderer(const char* outputbase, bool font_info);
explicit TessTsvRenderer(const char* outputbase);
public:
explicit TessTsvRenderer(const char *outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase);
protected:
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI* api) override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into searchable PDF
*/
class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char* outputbase, const char* datadir,
bool textonly = false);
TessPDFRenderer(const char *outputbase, const char *datadir, bool textonly = false);
protected:
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI* api) override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
private:
// We don't want to have every image in memory at once,
// so we store some metadata as we go along producing
// PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
GenericVector<long int> offsets_; // offset of every PDF object in bytes
GenericVector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
long int obj_; // counter for PDF objects
std::vector<long int> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char* data);
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
char** pdf_object, long int* pdf_object_size,
int jpg_quality);
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum, char **pdf_object,
long int *pdf_object_size, int jpg_quality);
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char* outputbase);
public:
explicit TessUnlvRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI* api) override;
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
*/
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
public:
explicit TessLSTMBoxRenderer(const char* outputbase);
public:
explicit TessLSTMBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI* api) override;
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char* outputbase);
public:
explicit TessBoxTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI* api) override;
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string in WordStr format
*/
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
public:
explicit TessWordStrBoxRenderer(const char* outputbase);
public:
explicit TessWordStrBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI* api) override;
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#ifndef DISABLED_LEGACY_ENGINE
@ -297,15 +294,15 @@ class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char* outputbase);
public:
explicit TessOsdRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI* api) override;
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#endif // ndef DISABLED_LEGACY_ENGINE
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H_
#endif // TESSERACT_API_RENDERER_H_

View File

@ -4,7 +4,6 @@
// iterating in proper reading order over Bi Directional
// (e.g. mixed Hebrew and English) text.
// Author: David Eger
// Created: Fri May 27 13:58:06 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -22,28 +21,19 @@
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include <set> // for std::pair
#include <vector> // for std::vector
#include "export.h" // for TESS_API, TESS_LOCAL
#include "ltrresultiterator.h" // for LTRResultIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include "ltrresultiterator.h" // for LTRResultIterator
#include "platform.h" // for TESS_API, TESS_LOCAL
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
template <typename T>
class GenericVector;
template <typename T>
class GenericVectorEqEq;
class STRING;
#include <set> // for std::pair
#include <vector> // for std::vector
namespace tesseract {
class Tesseract;
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator* StartOfParagraph(const LTRResultIterator& resit);
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
/**
* ResultIterator is copy constructible!
@ -85,8 +75,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
@ -98,15 +87,15 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
virtual char* GetUTF8Text(PageIteratorLevel level) const;
virtual char *GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char*, float>>>*
GetBestLSTMSymbolChoices() const;
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char *, float>>> *GetBestLSTMSymbolChoices()
const;
/**
* Return whether the current paragraph's dominant reading direction
@ -138,25 +127,24 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const GenericVector<StrongScriptDirection>& word_dirs,
GenericVectorEqEq<int>* reading_order);
static void CalculateTextlineOrder(bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
static const int kComplexWord;
protected:
protected:
/**
* We presume the data associated with the given iterator will outlive us.
* NB: This is private because it does something that is non-obvious:
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
TESS_LOCAL explicit ResultIterator(const LTRResultIterator& resit);
explicit ResultIterator(const LTRResultIterator &resit);
private:
private:
/**
* Calculates the current paragraph's dominant writing direction.
* Typically, members should use current_paragraph_ltr_ instead.
@ -174,14 +162,12 @@ class TESS_API ResultIterator : public LTRResultIterator {
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator& resit,
GenericVectorEqEq<int>* indices) const;
void CalculateTextlineOrder(bool paragraph_is_ltr, const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator& resit,
GenericVector<StrongScriptDirection>* ssd,
GenericVectorEqEq<int>* indices) const;
void CalculateTextlineOrder(bool paragraph_is_ltr, const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
@ -193,7 +179,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(GenericVector<int>* blob_indices) const;
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
@ -214,10 +200,10 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(STRING* text) const;
void AppendSuffixMarks(std::string *text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(STRING* text) const;
void AppendUTF8WordText(std::string *text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
@ -226,7 +212,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(STRING* text);
void IterateAndAppendUTF8TextlineText(std::string *text);
/**
* Appends the text of the current paragraph in reading order
@ -234,7 +220,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(STRING* text) const;
void AppendUTF8ParagraphText(std::string *text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;
@ -257,6 +243,6 @@ class TESS_API ResultIterator : public LTRResultIterator {
bool preserve_interword_spaces_;
};
} // namespace tesseract.
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,163 +0,0 @@
/**********************************************************************
* File: serialis.h (Formerly serialmac.h)
* Description: Inline routines and macros for serialisation functions
* Author: Phil Cheatle
*
* (C) Copyright 1990, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef SERIALIS_H
#define SERIALIS_H
#include <cstdint> // uint8_t
#include <cstdio>
#include <cstdlib>
#include <cstring>
template <typename T>
class GenericVector;
class STRING;
/***********************************************************************
QUOTE_IT MACRO DEFINITION
===========================
Replace <parm> with "<parm>". <parm> may be an arbitrary number of tokens
***********************************************************************/
#define QUOTE_IT(parm) #parm
namespace tesseract {
// Return number of elements of an array.
template <typename T, size_t N>
constexpr size_t countof(T const (&)[N]) noexcept {
return N;
}
// Function to read a GenericVector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char* filename, GenericVector<char>* data);
// Function to write a GenericVector<char> to a whole file.
// Returns false on failure.
using FileWriter = bool (*)(const GenericVector<char>& data,
const char* filename);
// Deserialize data from file.
bool DeSerialize(FILE* fp, char* data, size_t n = 1);
bool DeSerialize(FILE* fp, float* data, size_t n = 1);
bool DeSerialize(FILE* fp, int8_t* data, size_t n = 1);
bool DeSerialize(FILE* fp, int16_t* data, size_t n = 1);
bool DeSerialize(FILE* fp, int32_t* data, size_t n = 1);
bool DeSerialize(FILE* fp, uint8_t* data, size_t n = 1);
bool DeSerialize(FILE* fp, uint16_t* data, size_t n = 1);
bool DeSerialize(FILE* fp, uint32_t* data, size_t n = 1);
// Serialize data to file.
bool Serialize(FILE* fp, const char* data, size_t n = 1);
bool Serialize(FILE* fp, const float* data, size_t n = 1);
bool Serialize(FILE* fp, const int8_t* data, size_t n = 1);
bool Serialize(FILE* fp, const int16_t* data, size_t n = 1);
bool Serialize(FILE* fp, const int32_t* data, size_t n = 1);
bool Serialize(FILE* fp, const uint8_t* data, size_t n = 1);
bool Serialize(FILE* fp, const uint16_t* data, size_t n = 1);
bool Serialize(FILE* fp, const uint32_t* data, size_t n = 1);
// Simple file class.
// Allows for portable file input from memory and from foreign file systems.
class TFile {
public:
TFile();
~TFile();
// All the Open methods load the whole file into memory for reading.
// Opens a file with a supplied reader, or nullptr to use the default.
// Note that mixed read/write is not supported.
bool Open(const STRING& filename, FileReader reader);
// From an existing memory buffer.
bool Open(const char* data, int size);
// From an open file and an end offset.
bool Open(FILE* fp, int64_t end_offset);
// Sets the value of the swap flag, so that FReadEndian does the right thing.
void set_swap(bool value) {
swap_ = value;
}
// Deserialize data.
bool DeSerialize(char* data, size_t count = 1);
bool DeSerialize(double* data, size_t count = 1);
bool DeSerialize(float* data, size_t count = 1);
bool DeSerialize(int8_t* data, size_t count = 1);
bool DeSerialize(int16_t* data, size_t count = 1);
bool DeSerialize(int32_t* data, size_t count = 1);
bool DeSerialize(int64_t* data, size_t count = 1);
bool DeSerialize(uint8_t* data, size_t count = 1);
bool DeSerialize(uint16_t* data, size_t count = 1);
bool DeSerialize(uint32_t* data, size_t count = 1);
bool DeSerialize(uint64_t* data, size_t count = 1);
// Serialize data.
bool Serialize(const char* data, size_t count = 1);
bool Serialize(const double* data, size_t count = 1);
bool Serialize(const float* data, size_t count = 1);
bool Serialize(const int8_t* data, size_t count = 1);
bool Serialize(const int16_t* data, size_t count = 1);
bool Serialize(const int32_t* data, size_t count = 1);
bool Serialize(const int64_t* data, size_t count = 1);
bool Serialize(const uint8_t* data, size_t count = 1);
bool Serialize(const uint16_t* data, size_t count = 1);
bool Serialize(const uint32_t* data, size_t count = 1);
bool Serialize(const uint64_t* data, size_t count = 1);
// Skip data.
bool Skip(size_t count);
// Reads a line like fgets. Returns nullptr on EOF, otherwise buffer.
// Reads at most buffer_size bytes, including '\0' terminator, even if
// the line is longer. Does nothing if buffer_size <= 0.
char* FGets(char* buffer, int buffer_size);
// Replicates fread, followed by a swap of the bytes if needed, returning the
// number of items read. If swap_ is true then the count items will each have
// size bytes reversed.
int FReadEndian(void* buffer, size_t size, int count);
// Replicates fread, returning the number of items read.
int FRead(void* buffer, size_t size, int count);
// Resets the TFile as if it has been Opened, but nothing read.
// Only allowed while reading!
void Rewind();
// Open for writing. Either supply a non-nullptr data with OpenWrite before
// calling FWrite, (no close required), or supply a nullptr data to OpenWrite
// and call CloseWrite to write to a file after the FWrites.
void OpenWrite(GenericVector<char>* data);
bool CloseWrite(const STRING& filename, FileWriter writer);
// Replicates fwrite, returning the number of items written.
// To use fprintf, use snprintf and FWrite.
int FWrite(const void* buffer, size_t size, int count);
private:
// The buffered data from the file.
GenericVector<char>* data_;
// The number of bytes used so far.
int offset_;
// True if the data_ pointer is owned by *this.
bool data_is_owned_;
// True if the TFile is open for writing.
bool is_writing_;
// True if bytes need to be swapped in FReadEndian.
bool swap_;
};
} // namespace tesseract.
#endif

View File

@ -1,188 +0,0 @@
/**********************************************************************
* File: strngs.h (Formerly strings.h)
* Description: STRING class definition.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef STRNGS_H
#define STRNGS_H
#include <cassert> // for assert
#include <cstdint> // for uint32_t
#include <cstdio> // for FILE
#include <cstring> // for strncpy
#include "platform.h" // for TESS_API
namespace tesseract {
class TFile;
} // namespace tesseract.
// STRING_IS_PROTECTED means that string[index] = X is invalid
// because you have to go through strings interface to modify it.
// This allows the string to ensure internal integrity and maintain
// its own string length. Unfortunately this is not possible because
// STRINGS are used as direct-manipulation data buffers for things
// like length arrays and many places cast away the const on c_str()
// to mutate the string. Turning this off means that internally we
// cannot assume we know the strlen.
#define STRING_IS_PROTECTED 0
template <typename T>
class GenericVector;
class TESS_API STRING {
public:
STRING();
STRING(const STRING& string);
STRING(const char* string);
STRING(const char* data, int length);
~STRING();
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
// Writes to the given file. Returns false in case of error.
bool Serialize(tesseract::TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(tesseract::TFile* fp);
// As DeSerialize, but only seeks past the data - hence a static method.
static bool SkipDeSerialize(tesseract::TFile* fp);
bool contains(char c) const;
int32_t length() const;
int32_t size() const {
return length();
}
// Workaround to avoid g++ -Wsign-compare warnings.
uint32_t unsigned_size() const {
const int32_t len = length();
assert(0 <= len);
return static_cast<uint32_t>(len);
}
const char* c_str() const;
inline char* strdup() const {
int32_t len = length() + 1;
return strncpy(new char[len], GetCStr(), len);
}
#if STRING_IS_PROTECTED
const char& operator[](int32_t index) const;
// len is number of chars in s to insert starting at index in this string
void insert_range(int32_t index, const char* s, int len);
void erase_range(int32_t index, int len);
#else
char& operator[](int32_t index) const;
#endif
void split(char c, GenericVector<STRING>* splited);
void truncate_at(int32_t index);
bool operator==(const STRING& string) const;
bool operator!=(const STRING& string) const;
bool operator!=(const char* string) const;
STRING& operator=(const char* string);
STRING& operator=(const STRING& string);
STRING operator+(const STRING& string) const;
STRING operator+(char ch) const;
STRING& operator+=(const char* string);
STRING& operator+=(const STRING& string);
STRING& operator+=(char ch);
// Assignment for strings which are not null-terminated.
void assign(const char* cstr, int len);
// Appends the given string and int (as a %d) to this.
// += cannot be used for ints as there as a char += operator that would
// be ambiguous, and ints usually need a string before or between them
// anyway.
void add_str_int(const char* str, int number);
// Appends the given string and double (as a %.8g) to this.
void add_str_double(const char* str, double number);
// ensure capacity but keep pointer encapsulated
inline void ensure(int32_t min_capacity) {
ensure_cstr(min_capacity);
}
private:
typedef struct STRING_HEADER {
// How much space was allocated in the string buffer for char data.
int capacity_;
// used_ is how much of the capacity is currently being used,
// including a '\0' terminator.
//
// If used_ is 0 then string is nullptr (not even the '\0')
// else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
// else strlen is >= 0 (not nullptr) but needs to be computed.
// this condition is set when encapsulation is violated because
// an API returned a mutable string.
//
// capacity_ - used_ = excess capacity that the string can grow
// without reallocating
mutable int used_;
} STRING_HEADER;
// To preserve the behavior of the old serialization, we only have space
// for one pointer in this structure. So we are embedding a data structure
// at the start of the storage that will hold additional state variables,
// then storing the actual string contents immediately after.
STRING_HEADER* data_;
// returns the header part of the storage
inline STRING_HEADER* GetHeader() {
return data_;
}
inline const STRING_HEADER* GetHeader() const {
return data_;
}
// returns the string data part of storage
inline char* GetCStr() {
return (reinterpret_cast<char*>(data_)) + sizeof(STRING_HEADER);
}
inline const char* GetCStr() const {
return (reinterpret_cast<const char*>(data_)) + sizeof(STRING_HEADER);
}
inline bool InvariantOk() const {
#if STRING_IS_PROTECTED
return (GetHeader()->used_ == 0)
? (c_str() == nullptr)
: (GetHeader()->used_ == (strlen(c_str()) + 1));
#else
return true;
#endif
}
// Ensure string has requested capacity as optimization
// to avoid unnecessary reallocations.
// The return value is a cstr buffer with at least requested capacity
char* ensure_cstr(int32_t min_capacity);
void FixHeader() const; // make used_ non-negative, even if const
char* AllocData(int used, int capacity);
void DiscardData();
};
#endif

View File

@ -19,7 +19,7 @@
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H_
#define TESSERACT_CCMAIN_THRESHOLDER_H_
#include "platform.h"
#include "export.h"
#include "publictypes.h"
struct Pix;
@ -33,7 +33,7 @@ namespace tesseract {
/// be useful for multiple calls to SetRectangle and ThresholdTo* if
/// desired.
class TESS_API ImageThresholder {
public:
public:
ImageThresholder();
virtual ~ImageThresholder();
@ -51,8 +51,8 @@ class TESS_API ImageThresholder {
/// Binary images of 1 bit per pixel may also be given but they must be
/// byte packed with the MSB of the first byte being the first pixel, and a
/// one pixel is WHITE. For binary images set bytes_per_pixel=0.
void SetImage(const unsigned char* imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel,
int bytes_per_line);
/// Store the coordinates of the rectangle to process for later use.
/// Doesn't actually do any thresholding.
@ -62,8 +62,8 @@ class TESS_API ImageThresholder {
/// original image (not just within the rectangle).
/// Left and top are enough with top-down coordinates, but
/// the height of the rectangle and the image are needed for bottom-up.
virtual void GetImageSizes(int* left, int* top, int* width, int* height,
int* imagewidth, int* imageheight);
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth,
int *imageheight);
/// Return true if the source image is color.
bool IsColor() const {
@ -111,13 +111,13 @@ class TESS_API ImageThresholder {
/// SetImage for Pix clones its input, so the source pix may be pixDestroyed
/// immediately after, but may not go away until after the Thresholder has
/// finished with it.
void SetImage(const Pix* pix);
void SetImage(const Pix *pix);
/// Threshold the source image as efficiently as possible to the output Pix.
/// Creates a Pix and sets pix to point to the resulting pointer.
/// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix** pix);
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix);
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
@ -126,22 +126,22 @@ class TESS_API ImageThresholder {
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
// Returns nullptr if the input is binary. PixDestroy after use.
virtual Pix* GetPixRectThresholds();
virtual Pix *GetPixRectThresholds();
/// Get a clone/copy of the source image rectangle.
/// The returned Pix must be pixDestroyed.
/// This function will be used in the future by the page layout analysis, and
/// the layout analysis that uses it will only be available with Leptonica,
/// so there is no raw equivalent.
Pix* GetPixRect();
Pix *GetPixRect();
// Get a clone/copy of the source image rectangle, reduced to greyscale,
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// Provided to the classifier to extract features from the greyscale image.
virtual Pix* GetPixRectGrey();
virtual Pix *GetPixRectGrey();
protected:
protected:
// ----------------------------------------------------------------------
// Utility functions that may be useful components for other thresholders.
@ -155,34 +155,34 @@ class TESS_API ImageThresholder {
}
// Otsu thresholds the rectangle, taking the rectangle from *this.
void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const;
void OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const;
/// Threshold the rectangle, taking everything except the src_pix
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values
// arrays and also the bytes per pixel in src_pix.
void ThresholdRectToPix(Pix* src_pix, int num_channels, const int* thresholds,
const int* hi_values, Pix** pix) const;
void ThresholdRectToPix(Pix *src_pix, int num_channels, const int *thresholds,
const int *hi_values, Pix **pix) const;
protected:
protected:
/// Clone or other copy of the source Pix.
/// The pix will always be PixDestroy()ed on destruction of the class.
Pix* pix_;
Pix *pix_;
int image_width_; ///< Width of source pix_.
int image_height_; ///< Height of source pix_.
int pix_channels_; ///< Number of 8-bit channels in pix_.
int pix_wpl_; ///< Words per line of pix_.
int image_width_; ///< Width of source pix_.
int image_height_; ///< Height of source pix_.
int pix_channels_; ///< Number of 8-bit channels in pix_.
int pix_wpl_; ///< Words per line of pix_.
// Limits of image rectangle to be processed.
int scale_; ///< Scale factor from original image.
int yres_; ///< y pixels/inch in source image.
int estimated_res_; ///< Resolution estimate from text size.
int scale_; ///< Scale factor from original image.
int yres_; ///< y pixels/inch in source image.
int estimated_res_; ///< Resolution estimate from text size.
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_THRESHOLDER_H_
#endif // TESSERACT_CCMAIN_THRESHOLDER_H_

View File

@ -19,13 +19,14 @@
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
#include <memory.h>
#include "export.h"
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
#include "platform.h"
namespace tesseract {
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
@ -41,23 +42,21 @@ static const int INVALID_UNICHAR_ID = -1;
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
enum StrongScriptDirection {
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
};
namespace tesseract {
using char32 = signed int;
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class UNICHAR {
public:
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
}
@ -65,7 +64,7 @@ class UNICHAR {
// Construct from a utf8 string. If len<0 then the string is null terminated.
// If the string is too long to fit in the UNICHAR then it takes only what
// will fit.
UNICHAR(const char* utf8_str, int len);
UNICHAR(const char *utf8_str, int len);
// Construct from a single UCS4 character.
explicit UNICHAR(int unicode);
@ -82,15 +81,15 @@ class UNICHAR {
}
// Get a UTF8 string, but NOT nullptr terminated.
const char* utf8() const {
const char *utf8() const {
return chars;
}
// Get a terminated UTF8 string: Must delete[] it after use.
char* utf8_str() const;
char *utf8_str() const;
// Get the number of bytes in the first character of the given utf8 string.
static int utf8_step(const char* utf8_str);
static int utf8_step(const char *utf8_str);
// A class to simplify iterating over and accessing elements of a UTF8
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
@ -106,15 +105,15 @@ class UNICHAR {
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// tprintf("Char = %s\n", buf);
// }
class const_iterator {
class TESS_API const_iterator {
using CI = const_iterator;
public:
public:
// Step to the next UTF8 character.
// If the current position is at an illegal UTF8 character, then print an
// error message and step by one byte. If the current position is at a
// nullptr value, don't step past it.
const_iterator& operator++();
const_iterator &operator++();
// Return the UCS-4 value at the current position.
// If the current position is at an illegal UTF8 value, return a single
@ -126,7 +125,7 @@ class UNICHAR {
// If the current position is at an illegal UTF8 value, writes a single
// space character and returns 1.
// Note that this method does not null-terminate the buffer.
int get_utf8(char* buf) const;
int get_utf8(char *buf) const;
// Returns the number of bytes of the current codepoint. Returns 1 if the
// current position is at an illegal UTF8 value.
int utf8_len() const;
@ -134,45 +133,45 @@ class UNICHAR {
bool is_legal() const;
// Return the pointer into the string at the current position.
const char* utf8_data() const {
const char *utf8_data() const {
return it_;
}
// Iterator equality operators.
friend bool operator==(const CI& lhs, const CI& rhs) {
friend bool operator==(const CI &lhs, const CI &rhs) {
return lhs.it_ == rhs.it_;
}
friend bool operator!=(const CI& lhs, const CI& rhs) {
friend bool operator!=(const CI &lhs, const CI &rhs) {
return !(lhs == rhs);
}
private:
private:
friend class UNICHAR;
explicit const_iterator(const char* it) : it_(it) {}
explicit const_iterator(const char *it) : it_(it) {}
const char* it_; // Pointer into the string.
const char *it_; // Pointer into the string.
};
// Create a start/end iterator pointing to a string. Note that these methods
// are static and do NOT create a copy or take ownership of the underlying
// array.
static const_iterator begin(const char* utf8_str, int byte_length);
static const_iterator end(const char* utf8_str, int byte_length);
static const_iterator begin(const char *utf8_str, int byte_length);
static const_iterator end(const char *utf8_str, int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns an empty vector if the input contains invalid UTF-8.
static std::vector<char32> UTF8ToUTF32(const char* utf8_str);
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
// Converts a vector of unicodes to a utf8 string.
// Returns an empty string if the input contains an invalid unicode.
static std::string UTF32ToUTF8(const std::vector<char32>& str32);
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
private:
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_
#endif // TESSERACT_CCUTIL_UNICHAR_H_

View File

@ -18,13 +18,11 @@
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION @
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION @
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION @
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
(TESSERACT_MAJOR_VERSION << 16 | TESSERACT_MINOR_VERSION << 8 | TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
#endif // TESSERACT_API_VERSION_H_
#endif // TESSERACT_API_VERSION_H_

View File

@ -36,11 +36,11 @@ SCROLLVIEW_CLASSES = \
com/google/scrollview/ScrollView.class
SCROLLVIEW_LIBS = \
piccolo2d-core-3.0.jar \
piccolo2d-extras-3.0.jar \
piccolo2d-core-3.0.1.jar \
piccolo2d-extras-3.0.1.jar \
jaxb-api-2.3.1.jar
CLASSPATH = piccolo2d-core-3.0.jar:piccolo2d-extras-3.0.jar:jaxb-api-2.3.1.jar
CLASSPATH = piccolo2d-core-3.0.1.jar:piccolo2d-extras-3.0.1.jar:jaxb-api-2.3.1.jar
ScrollView.jar : $(SCROLLVIEW_CLASSES)
$(JAR) cfm $@ $(srcdir)/Manifest.txt com/google/scrollview/*.class \
@ -51,9 +51,9 @@ $(SCROLLVIEW_CLASSES) : $(SCROLLVIEW_FILES) $(SCROLLVIEW_LIBS)
.PHONY: fetch-jars
fetch-jars $(SCROLLVIEW_LIBS):
curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0/piccolo2d-core-3.0.jar > piccolo2d-core-3.0.jar
curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0/piccolo2d-extras-3.0.jar > piccolo2d-extras-3.0.jar
curl -L http://search.maven.org/remotecontent?filepath=javax/xml/bind/jaxb-api/2.3.1/jaxb-api-2.3.1.jar > jaxb-api-2.3.1.jar
curl -s -S -L -O https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0.1/piccolo2d-core-3.0.1.jar
curl -s -S -L -O https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0.1/piccolo2d-extras-3.0.1.jar
curl -s -S -L -O https://search.maven.org/remotecontent?filepath=javax/xml/bind/jaxb-api/2.3.1/jaxb-api-2.3.1.jar
.PHONY: install-jars
install-jars : ScrollView.jar

View File

@ -1,2 +1,2 @@
Main-Class: com/google/scrollview/ScrollView
Class-Path: ScrollView.jar piccolo2d-core-3.0.jar piccolo2d-extras-3.0.jar jaxb-api-2.3.1.jar
Class-Path: ScrollView.jar piccolo2d-core-3.0.1.jar piccolo2d-extras-3.0.1.jar jaxb-api-2.3.1.jar

View File

@ -13,22 +13,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <sstream> // for std::stringstream
#include <tesseract/baseapi.h>
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>
#include <tesseract/strngs.h> // for STRING
#include <memory>
#include <sstream> // for std::stringstream
namespace tesseract {
/// Add coordinates to specified TextBlock, TextLine or String bounding box.
/// Add word confidence if adding to a String bounding box.
///
static void AddBoxToAlto(const ResultIterator* it, PageIteratorLevel level,
std::stringstream& alto_str) {
static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level,
std::stringstream &alto_str) {
int left, top, right, bottom;
it->BoundingBox(level, &left, &top, &right, &bottom);
@ -90,9 +91,10 @@ bool TessAltoRenderer::BeginDocumentHandler() {
///
/// Append the ALTO XML for the layout of the image
///
bool TessAltoRenderer::AddImageHandler(TessBaseAPI* api) {
bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> text(api->GetAltoText(imagenum()));
if (text == nullptr) return false;
if (text == nullptr)
return false;
AppendString(text.get());
@ -108,14 +110,14 @@ bool TessAltoRenderer::EndDocumentHandler() {
return true;
}
TessAltoRenderer::TessAltoRenderer(const char* outputbase)
TessAltoRenderer::TessAltoRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "xml") {}
///
/// Make an XML-formatted string with ALTO markup from the internal
/// data structures.
///
char* TessBaseAPI::GetAltoText(int page_number) {
char *TessBaseAPI::GetAltoText(int page_number) {
return GetAltoText(nullptr, page_number);
}
@ -123,27 +125,26 @@ char* TessBaseAPI::GetAltoText(int page_number) {
/// Make an XML-formatted string with ALTO markup from the internal
/// data structures.
///
char* TessBaseAPI::GetAltoText(ETEXT_DESC* monitor, int page_number) {
char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
return nullptr;
int lcnt = 0, tcnt = 0, bcnt = 0, wcnt = 0;
if (input_file_ == nullptr) SetInputName(nullptr);
if (input_file_.empty()) {
SetInputName(nullptr);
}
#ifdef _WIN32
// convert input name from ANSI encoding to utf-8
int str16_len =
MultiByteToWideChar(CP_ACP, 0, input_file_->c_str(), -1, nullptr, 0);
wchar_t* uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->c_str(), -1,
uni16_str, str16_len);
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
0, nullptr, nullptr);
char* utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
nullptr, nullptr);
*input_file_ = utf8_str;
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
wchar_t *uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len);
int utf8_len =
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr);
char *utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr);
input_file_ = utf8_str;
delete[] uni16_str;
delete[] utf8_str;
#endif
@ -151,16 +152,14 @@ char* TessBaseAPI::GetAltoText(ETEXT_DESC* monitor, int page_number) {
std::stringstream alto_str;
// Use "C" locale (needed for int values larger than 999).
alto_str.imbue(std::locale::classic());
alto_str
<< "\t\t<Page WIDTH=\"" << rect_width_ << "\" HEIGHT=\""
<< rect_height_
<< "\" PHYSICAL_IMG_NR=\"" << page_number << "\""
<< " ID=\"page_" << page_number << "\">\n"
<< "\t\t\t<PrintSpace HPOS=\"0\" VPOS=\"0\""
<< " WIDTH=\"" << rect_width_ << "\""
<< " HEIGHT=\"" << rect_height_ << "\">\n";
alto_str << "\t\t<Page WIDTH=\"" << rect_width_ << "\" HEIGHT=\"" << rect_height_
<< "\" PHYSICAL_IMG_NR=\"" << page_number << "\""
<< " ID=\"page_" << page_number << "\">\n"
<< "\t\t\t<PrintSpace HPOS=\"0\" VPOS=\"0\""
<< " WIDTH=\"" << rect_width_ << "\""
<< " HEIGHT=\"" << rect_height_ << "\">\n";
ResultIterator* res_it = GetIterator();
ResultIterator *res_it = GetIterator();
while (!res_it->Empty(RIL_BLOCK)) {
if (res_it->Empty(RIL_WORD)) {
res_it->Next(RIL_WORD);
@ -193,13 +192,11 @@ char* TessBaseAPI::GetAltoText(ETEXT_DESC* monitor, int page_number) {
bool last_word_in_tblock = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
int left, top, right, bottom;
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
do {
const std::unique_ptr<const char[]> grapheme(
res_it->GetUTF8Text(RIL_SYMBOL));
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != 0) {
alto_str << HOcrEscape(grapheme.get()).c_str();
}
@ -218,8 +215,8 @@ char* TessBaseAPI::GetAltoText(ETEXT_DESC* monitor, int page_number) {
int vpos = top;
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
int width = left - hpos;
alto_str << "<SP WIDTH=\"" << width << "\" VPOS=\"" << vpos
<< "\" HPOS=\"" << hpos << "\"/>\n";
alto_str << "<SP WIDTH=\"" << width << "\" VPOS=\"" << vpos << "\" HPOS=\"" << hpos
<< "\"/>\n";
}
if (last_word_in_tblock) {
@ -235,12 +232,12 @@ char* TessBaseAPI::GetAltoText(ETEXT_DESC* monitor, int page_number) {
alto_str << "\t\t\t</PrintSpace>\n"
<< "\t\t</Page>\n";
const std::string& text = alto_str.str();
const std::string &text = alto_str.str();
char* result = new char[text.length() + 1];
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
delete res_it;
return result;
}
} // namespace tesseract
} // namespace tesseract

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -17,28 +17,27 @@
*
**********************************************************************/
#include <locale> // for std::locale::classic
#include <memory> // for std::unique_ptr
#include <sstream> // for std::stringstream
#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/baseapi.h> // for TessBaseAPI
#include <locale> // for std::locale::classic
#include <memory> // for std::unique_ptr
#include <sstream> // for std::stringstream
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
#include <tesseract/renderer.h>
#include "tesseractclass.h" // for Tesseract
#include "tesseractclass.h" // for Tesseract
namespace tesseract {
/**
* Gets the block orientation at the current iterator position.
*/
static tesseract::Orientation GetBlockTextOrientation(const PageIterator* it) {
static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
tesseract::Orientation orientation;
tesseract::WritingDirection writing_direction;
tesseract::TextlineOrder textline_order;
float deskew_angle;
it->Orientation(&orientation, &writing_direction, &textline_order,
&deskew_angle);
it->Orientation(&orientation, &writing_direction, &textline_order, &deskew_angle);
return orientation;
}
@ -50,9 +49,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator* it) {
* method currently only inserts a 'textangle' property to indicate the rotation
* direction and does not add any baseline information to the hocr string.
*/
static void AddBaselineCoordsTohOCR(const PageIterator* it,
PageIteratorLevel level,
std::stringstream& hocr_str) {
static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel level,
std::stringstream &hocr_str) {
tesseract::Orientation orientation = GetBlockTextOrientation(it);
if (orientation != ORIENTATION_PAGE_UP) {
hocr_str << "; textangle " << 360 - orientation * 90;
@ -64,7 +62,8 @@ static void AddBaselineCoordsTohOCR(const PageIterator* it,
// Try to get the baseline coordinates at this level.
int x1, y1, x2, y2;
if (!it->Baseline(level, &x1, &y1, &x2, &y2)) return;
if (!it->Baseline(level, &x1, &y1, &x2, &y2))
return;
// Following the description of this field of the hOCR spec, we convert the
// baseline coordinates so that "the bottom left of the bounding box is the
// origin".
@ -82,27 +81,25 @@ static void AddBaselineCoordsTohOCR(const PageIterator* it,
double p1 = (y2 - y1) / static_cast<double>(x2 - x1);
double p0 = y1 - p1 * x1;
hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " "
<< round(p0 * 1000.0) / 1000.0;
hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " " << round(p0 * 1000.0) / 1000.0;
}
static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
std::stringstream& hocr_str) {
static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
std::stringstream &hocr_str) {
int left, top, right, bottom;
it->BoundingBox(level, &left, &top, &right, &bottom);
// This is the only place we use double quotes instead of single quotes,
// but it may too late to change for consistency
hocr_str << " title=\"bbox " << left << " " << top << " " << right << " "
<< bottom;
hocr_str << " title=\"bbox " << left << " " << top << " " << right << " " << bottom;
// Add baseline coordinates & heights for textlines only.
if (level == RIL_TEXTLINE) {
AddBaselineCoordsTohOCR(it, level, hocr_str);
// add custom height measures
float row_height, descenders, ascenders; // row attributes
float row_height, descenders, ascenders; // row attributes
it->RowAttributes(&row_height, &descenders, &ascenders);
// TODO(rays): Do we want to limit these to a single decimal place?
hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders
<< "; x_ascenders " << ascenders;
hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders << "; x_ascenders "
<< ascenders;
}
hocr_str << "\">";
}
@ -116,7 +113,7 @@ static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
* STL removed from original patch submission and refactored by rays.
* Returned string must be freed with the delete [] operator.
*/
char* TessBaseAPI::GetHOCRText(int page_number) {
char *TessBaseAPI::GetHOCRText(int page_number) {
return GetHOCRText(nullptr, page_number);
}
@ -129,34 +126,32 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
* STL removed from original patch submission and refactored by rays.
* Returned string must be freed with the delete [] operator.
*/
char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
return nullptr;
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, ccnt = 1;
int page_id = page_number + 1; // hOCR uses 1-based page numbers.
bool para_is_ltr = true; // Default direction is LTR
const char* paragraph_lang = nullptr;
int page_id = page_number + 1; // hOCR uses 1-based page numbers.
bool para_is_ltr = true; // Default direction is LTR
const char *paragraph_lang = nullptr;
bool font_info = false;
bool hocr_boxes = false;
GetBoolVariable("hocr_font_info", &font_info);
GetBoolVariable("hocr_char_boxes", &hocr_boxes);
if (input_file_ == nullptr) SetInputName(nullptr);
if (input_file_.empty())
SetInputName(nullptr);
#ifdef _WIN32
// convert input name from ANSI encoding to utf-8
int str16_len =
MultiByteToWideChar(CP_ACP, 0, input_file_->c_str(), -1, nullptr, 0);
wchar_t* uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->c_str(), -1,
uni16_str, str16_len);
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
0, nullptr, nullptr);
char* utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
nullptr, nullptr);
*input_file_ = utf8_str;
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
wchar_t *uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len);
int utf8_len =
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr);
char *utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr);
input_file_ = utf8_str;
delete[] uni16_str;
delete[] utf8_str;
#endif
@ -170,14 +165,13 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
hocr_str << " id='"
<< "page_" << page_id << "'";
hocr_str << " title='image \"";
if (input_file_) {
hocr_str << HOcrEscape(input_file_->c_str()).c_str();
if (!input_file_.empty()) {
hocr_str << HOcrEscape(input_file_.c_str()).c_str();
} else {
hocr_str << "unknown";
}
hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " "
<< rect_width_ << " " << rect_height_ << "; ppageno " << page_number
<< "'>\n";
hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " " << rect_width_ << " "
<< rect_height_ << "; ppageno " << page_number << "'>\n";
std::unique_ptr<ResultIterator> res_it(GetIterator());
while (!res_it->Empty(RIL_BLOCK)) {
@ -188,7 +182,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
// Open any new block/paragraph/textline.
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
para_is_ltr = true; // reset to default direction
para_is_ltr = true; // reset to default direction
hocr_str << " <div class='ocr_carea'"
<< " id='"
<< "block_" << page_id << "_" << bcnt << "'";
@ -230,12 +224,9 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
// Now, process the word...
int32_t lstm_choice_mode = tesseract_->lstm_choice_mode;
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>* rawTimestepMap =
nullptr;
std::vector<std::vector<std::pair<const char*, float>>>* CTCMap =
nullptr;
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *rawTimestepMap = nullptr;
std::vector<std::vector<std::pair<const char *, float>>> *CTCMap = nullptr;
if (lstm_choice_mode) {
CTCMap = res_it->GetBestLSTMSymbolChoices();
rawTimestepMap = res_it->GetRawLSTMTimesteps();
}
@ -245,14 +236,12 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
int left, top, right, bottom;
bool bold, italic, underlined, monospace, serif, smallcaps;
int pointsize, font_id;
const char* font_name;
const char *font_name;
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
font_name =
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
&serif, &smallcaps, &pointsize, &font_id);
hocr_str << " title='bbox " << left << " " << top << " " << right << " "
<< bottom << "; x_wconf "
<< static_cast<int>(res_it->Confidence(RIL_WORD));
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
&smallcaps, &pointsize, &font_id);
hocr_str << " title='bbox " << left << " " << top << " " << right << " " << bottom
<< "; x_wconf " << static_cast<int>(res_it->Confidence(RIL_WORD));
if (font_info) {
if (font_name) {
hocr_str << "; x_font " << HOcrEscape(font_name).c_str();
@ -260,86 +249,82 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
hocr_str << "; x_fsize " << pointsize;
}
hocr_str << "'";
const char* lang = res_it->WordRecognitionLanguage();
const char *lang = res_it->WordRecognitionLanguage();
if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
hocr_str << " lang='" << lang << "'";
}
switch (res_it->WordDirection()) {
// Only emit direction if different from current paragraph direction
case DIR_LEFT_TO_RIGHT:
if (!para_is_ltr) hocr_str << " dir='ltr'";
if (!para_is_ltr)
hocr_str << " dir='ltr'";
break;
case DIR_RIGHT_TO_LEFT:
if (para_is_ltr) hocr_str << " dir='rtl'";
if (para_is_ltr)
hocr_str << " dir='rtl'";
break;
case DIR_MIX:
case DIR_NEUTRAL:
default: // Do nothing.
default: // Do nothing.
break;
}
hocr_str << ">";
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
if (bold) hocr_str << "<strong>";
if (italic) hocr_str << "<em>";
if (bold)
hocr_str << "<strong>";
if (italic)
hocr_str << "<em>";
do {
const std::unique_ptr<const char[]> grapheme(
res_it->GetUTF8Text(RIL_SYMBOL));
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != 0) {
if (hocr_boxes) {
res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes "
<< left << " " << top << " " << right << " " << bottom
<< "; x_conf " << res_it->Confidence(RIL_SYMBOL) << "'>";
hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes " << left << " " << top
<< " " << right << " " << bottom << "; x_conf " << res_it->Confidence(RIL_SYMBOL)
<< "'>";
}
hocr_str << HOcrEscape(grapheme.get()).c_str();
if (hocr_boxes) {
hocr_str << "</span>";
tesseract::ChoiceIterator ci(*res_it);
if (lstm_choice_mode == 1 && ci.Timesteps() != nullptr) {
std::vector<std::vector<std::pair<const char*, float>>>* symbol =
ci.Timesteps();
hocr_str << "\n <span class='ocr_symbol'"
std::vector<std::vector<std::pair<const char *, float>>> *symbol = ci.Timesteps();
hocr_str << "\n <span class='ocr_symbol'"
<< " id='"
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
for (auto timestep : *symbol) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt
<< "'>";
for (auto timestep : *symbol) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt << "'>";
for (auto conf : timestep) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
for (auto conf : timestep) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << int(conf.second * 100)
<< "'>" << HOcrEscape(conf.first).c_str()
<< "</span>";
++ccnt;
}
hocr_str << "</span>";
++tcnt;
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< " title='x_confs " << int(conf.second * 100) << "'>"
<< HOcrEscape(conf.first).c_str() << "</span>";
++ccnt;
}
hocr_str << "\n </span>";
++scnt;
hocr_str << "</span>";
++tcnt;
}
hocr_str << "\n </span>";
++scnt;
} else if (lstm_choice_mode == 2) {
tesseract::ChoiceIterator ci(*res_it);
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
do {
const char* choice = ci.GetUTF8Text();
const char *choice = ci.GetUTF8Text();
float choiceconf = ci.Confidence();
if (choice != nullptr) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << choiceconf << "'>"
<< HOcrEscape(choice).c_str() << "</span>";
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< " title='x_confs " << choiceconf << "'>" << HOcrEscape(choice).c_str()
<< "</span>";
ccnt++;
}
} while (ci.Next());
@ -350,8 +335,10 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
}
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (italic) hocr_str << "</em>";
if (bold) hocr_str << "</strong>";
if (italic)
hocr_str << "</em>";
if (bold)
hocr_str << "</strong>";
// If the lstm choice mode is required it is added here
if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) {
for (auto symbol : *rawTimestepMap) {
@ -361,13 +348,11 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
for (auto timestep : symbol) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt << "'>";
for (auto conf : timestep) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< " title='x_confs " << int(conf.second * 100) << "'>"
<< HOcrEscape(conf.first).c_str() << "</span>";
++ccnt;
@ -383,9 +368,8 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (timestep.size() > 0) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
for (auto& j : timestep) {
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
for (auto &j : timestep) {
float conf = 100 - tesseract_->lstm_rating_coefficient * j.second;
if (conf < 0.0f)
conf = 0.0f;
@ -393,17 +377,16 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
conf = 100.0f;
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << conf << "'>"
<< HOcrEscape(j.first).c_str() << "</span>";
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< " title='x_confs " << conf << "'>" << HOcrEscape(j.first).c_str()
<< "</span>";
ccnt++;
}
hocr_str << "</span>";
tcnt++;
}
}
}
}
// Close ocrx_word.
if (hocr_boxes || lstm_choice_mode > 0) {
hocr_str << "\n ";
@ -420,7 +403,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (last_word_in_para) {
hocr_str << "\n </p>\n";
pcnt++;
para_is_ltr = true; // back to default direction
para_is_ltr = true; // back to default direction
}
if (last_word_in_block) {
hocr_str << " </div>\n";
@ -429,8 +412,8 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
}
hocr_str << " </div>\n";
const std::string& text = hocr_str.str();
char* result = new char[text.length() + 1];
const std::string &text = hocr_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
}
@ -438,12 +421,12 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
/**********************************************************************
* HOcr Text Renderer interface implementation
**********************************************************************/
TessHOcrRenderer::TessHOcrRenderer(const char* outputbase)
TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "hocr") {
font_info_ = false;
}
TessHOcrRenderer::TessHOcrRenderer(const char* outputbase, bool font_info)
TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
: TessResultRenderer(outputbase, "hocr") {
font_info_ = font_info;
}
@ -460,11 +443,12 @@ bool TessHOcrRenderer::BeginDocumentHandler() {
"</title>\n"
" <meta http-equiv=\"Content-Type\" content=\"text/html;"
"charset=utf-8\"/>\n"
" <meta name='ocr-system' content='tesseract " PACKAGE_VERSION
" <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
"' />\n"
" <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
" ocr_line ocrx_word ocrp_wconf");
if (font_info_) AppendString(" ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
if (font_info_)
AppendString(" ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
AppendString(
"'/>\n"
" </head>\n"
@ -479,13 +463,14 @@ bool TessHOcrRenderer::EndDocumentHandler() {
return true;
}
bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
bool TessHOcrRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
if (hocr == nullptr) return false;
if (hocr == nullptr)
return false;
AppendString(hocr.get());
return true;
}
} // namespace tesseract
} // namespace tesseract

View File

@ -16,9 +16,9 @@
*
**********************************************************************/
#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "tesseractclass.h" // for Tesseract
#include "tesseractclass.h" // for Tesseract
namespace tesseract {
@ -27,23 +27,23 @@ namespace tesseract {
* page_number is a 0-base page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
static void AddBoxToLSTM(int right, int bottom, int top, int image_height,
int page_num, STRING* text) {
text->add_str_int(" ", image_height - bottom);
text->add_str_int(" ", right + 5);
text->add_str_int(" ", image_height - top);
text->add_str_int(" ", page_num);
static void AddBoxToLSTM(int right, int bottom, int top, int image_height, int page_num,
std::string &text) {
text += " " + std::to_string(image_height - bottom);
text += " " + std::to_string(right + 5);
text += " " + std::to_string(image_height - top);
text += " " + std::to_string(page_num);
}
char* TessBaseAPI::GetLSTMBoxText(int page_number=0) {
char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
return nullptr;
STRING lstm_box_str("");
std::string lstm_box_str;
bool first_word = true;
int left = 0, top = 0, right = 0, bottom = 0;
LTRResultIterator* res_it = GetLTRIterator();
LTRResultIterator *res_it = GetLTRIterator();
while (!res_it->Empty(RIL_BLOCK)) {
if (res_it->Empty(RIL_SYMBOL)) {
res_it->Next(RIL_SYMBOL);
@ -52,38 +52,35 @@ char* TessBaseAPI::GetLSTMBoxText(int page_number=0) {
if (!first_word) {
if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
if (res_it->IsAtBeginningOf(RIL_WORD)) {
lstm_box_str.add_str_int(" ", left);
AddBoxToLSTM(right, bottom, top, image_height_, page_number,
&lstm_box_str);
lstm_box_str += "\n"; // end of row for word
} // word
lstm_box_str += " " + std::to_string(left);
AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
lstm_box_str += "\n"; // end of row for word
} // word
} else {
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
lstm_box_str.add_str_int("\t ", left);
AddBoxToLSTM(right, bottom, top, image_height_, page_number,
&lstm_box_str);
lstm_box_str += "\n"; // end of row for line
} // line
lstm_box_str += "\t " + std::to_string(left);
AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
lstm_box_str += "\n"; // end of row for line
} // line
}
} // not first word
} // not first word
first_word = false;
// Use bounding box for whole line for everything
res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
do {
lstm_box_str +=
std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
lstm_box_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
lstm_box_str.add_str_int(" ", left);
AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
lstm_box_str += "\n"; // end of row for symbol
lstm_box_str += " " + std::to_string(left);
AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
lstm_box_str += "\n"; // end of row for symbol
}
if (!first_word) { // if first_word is true => empty page
lstm_box_str.add_str_int("\t ", left);
AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
lstm_box_str += "\n"; // end of PAGE
if (!first_word) { // if first_word is true => empty page
lstm_box_str += "\t " + std::to_string(left);
AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
lstm_box_str += "\n"; // end of PAGE
}
char* ret = new char[lstm_box_str.length() + 1];
char *ret = new char[lstm_box_str.length() + 1];
strcpy(ret, lstm_box_str.c_str());
delete res_it;
return ret;
@ -92,16 +89,17 @@ char* TessBaseAPI::GetLSTMBoxText(int page_number=0) {
/**********************************************************************
* LSTMBox Renderer interface implementation
**********************************************************************/
TessLSTMBoxRenderer::TessLSTMBoxRenderer(const char* outputbase)
TessLSTMBoxRenderer::TessLSTMBoxRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "box") {}
bool TessLSTMBoxRenderer::AddImageHandler(TessBaseAPI* api) {
bool TessLSTMBoxRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> lstmbox(api->GetLSTMBoxText(imagenum()));
if (lstmbox == nullptr) return false;
if (lstmbox == nullptr)
return false;
AppendString(lstmbox.get());
return true;
}
} // namespace tesseract.
} // namespace tesseract.

63
src/api/pdf_ttf.h Normal file
View File

@ -0,0 +1,63 @@
///////////////////////////////////////////////////////////////////////
// File: pdf_ttf.h
// Description: pdf.ttf (GlyphLessFont) replacement.
// Generated with: "bin2cpp pdf.ttf pdf_ttf cpp17"
// Author: Zdenko Podobny
//
// (C) Copyright 2020, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef pdf_ttf__H
#define pdf_ttf__H
#include <cstdint> // uint8_t
static const uint8_t pdf_ttf[] = {
0x0, 0x1, 0x0, 0x0, 0x0, 0xa, 0x0, 0x80, 0x0, 0x3, 0x0, 0x20, 0x4f, 0x53, 0x2f, 0x32,
0x56, 0xde, 0xc8, 0x94, 0x0, 0x0, 0x1, 0x28, 0x0, 0x0, 0x0, 0x60, 0x63, 0x6d, 0x61, 0x70,
0x0, 0xa, 0x0, 0x34, 0x0, 0x0, 0x1, 0x90, 0x0, 0x0, 0x0, 0x1e, 0x67, 0x6c, 0x79, 0x66,
0x15, 0x22, 0x41, 0x24, 0x0, 0x0, 0x1, 0xb8, 0x0, 0x0, 0x0, 0x18, 0x68, 0x65, 0x61, 0x64,
0xb, 0x78, 0xf1, 0x65, 0x0, 0x0, 0x0, 0xac, 0x0, 0x0, 0x0, 0x36, 0x68, 0x68, 0x65, 0x61,
0xc, 0x2, 0x4, 0x2, 0x0, 0x0, 0x0, 0xe4, 0x0, 0x0, 0x0, 0x24, 0x68, 0x6d, 0x74, 0x78,
0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x88, 0x0, 0x0, 0x0, 0x8, 0x6c, 0x6f, 0x63, 0x61,
0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x1, 0xb0, 0x0, 0x0, 0x0, 0x6, 0x6d, 0x61, 0x78, 0x70,
0x0, 0x4, 0x0, 0x5, 0x0, 0x0, 0x1, 0x8, 0x0, 0x0, 0x0, 0x20, 0x6e, 0x61, 0x6d, 0x65,
0xf2, 0xeb, 0x16, 0xda, 0x0, 0x0, 0x1, 0xd0, 0x0, 0x0, 0x0, 0x4b, 0x70, 0x6f, 0x73, 0x74,
0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x2, 0x1c, 0x0, 0x0, 0x0, 0x20, 0x0, 0x1, 0x0, 0x0,
0x0, 0x1, 0x0, 0x0, 0xb0, 0x94, 0x71, 0x10, 0x5f, 0xf, 0x3c, 0xf5, 0x4, 0x7, 0x8, 0x0,
0x0, 0x0, 0x0, 0x0, 0xcf, 0x9a, 0xfc, 0x6e, 0x0, 0x0, 0x0, 0x0, 0xd4, 0xc3, 0xa7, 0xf2,
0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0, 0x10, 0x0, 0x2, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x8, 0x0, 0xff, 0xff, 0x0, 0x0, 0x4, 0x0,
0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x0, 0x4,
0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x1, 0x90, 0x0, 0x5,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x47, 0x4f, 0x4f, 0x47, 0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0xff, 0xff,
0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x2, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x14, 0x0, 0x3, 0x0, 0x0,
0x0, 0x0, 0x0, 0x14, 0x0, 0x6, 0x0, 0xa, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x4, 0x0,
0x8, 0x0, 0x0, 0x3, 0x0, 0x0, 0x31, 0x21, 0x11, 0x21, 0x4, 0x0, 0xfc, 0x0, 0x8, 0x0,
0x0, 0x0, 0x0, 0x3, 0x0, 0x2a, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x5, 0x0, 0x16,
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0xb, 0x0, 0x16, 0x0, 0x3,
0x0, 0x1, 0x4, 0x9, 0x0, 0x5, 0x0, 0x16, 0x0, 0x0, 0x0, 0x56, 0x0, 0x65, 0x0, 0x72,
0x0, 0x73, 0x0, 0x69, 0x0, 0x6f, 0x0, 0x6e, 0x0, 0x20, 0x0, 0x31, 0x0, 0x2e, 0x0, 0x30,
0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x30, 0x0, 0x0, 0x1, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
#endif

View File

@ -17,19 +17,23 @@
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
# include "config_auto.h"
#endif
#include <locale> // for std::locale::classic
#include <memory> // std::unique_ptr
#include <sstream> // for std::stringstream
#include "allheaders.h"
#include <tesseract/baseapi.h>
#include <cmath>
#include <tesseract/renderer.h>
#include <cstring>
#include "pdf_ttf.h"
#include "tprintf.h"
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>
#include <cmath>
#include <cstring>
#include <fstream> // for std::ifstream
#include <locale> // for std::locale::classic
#include <memory> // std::unique_ptr
#include <sstream> // for std::stringstream
#include "helpers.h" // for Swap
/*
Design notes from Ken Sharp, with light editing.
@ -176,11 +180,9 @@ static const int kMaxBytesPerCodepoint = 20;
/**********************************************************************
* PDF Renderer interface implementation
**********************************************************************/
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly)
: TessResultRenderer(outputbase, "pdf"),
datadir_(datadir) {
obj_ = 0;
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir, bool textonly)
: TessResultRenderer(outputbase, "pdf"), datadir_(datadir) {
obj_ = 0;
textonly_ = textonly;
offsets_.push_back(0);
}
@ -218,13 +220,12 @@ static long dist2(int x1, int y1, int x2, int y2) {
// left-to-right no matter what the reading order is. We need the
// word baseline in reading order, so we do that conversion here. Returns
// the word's baseline origin and length.
static void GetWordBaseline(int writing_direction, int ppi, int height,
int word_x1, int word_y1, int word_x2, int word_y2,
int line_x1, int line_y1, int line_x2, int line_y2,
double *x0, double *y0, double *length) {
static void GetWordBaseline(int writing_direction, int ppi, int height, int word_x1, int word_y1,
int word_x2, int word_y2, int line_x1, int line_y1, int line_x2,
int line_y2, double *x0, double *y0, double *length) {
if (writing_direction == WRITING_DIRECTION_RIGHT_TO_LEFT) {
Swap(&word_x1, &word_x2);
Swap(&word_y1, &word_y2);
std::swap(word_x1, word_x2);
std::swap(word_y1, word_y2);
}
double word_length;
double x, y;
@ -236,13 +237,11 @@ static void GetWordBaseline(int writing_direction, int ppi, int height,
x = line_x1;
y = line_y1;
} else {
double t = ((px - line_x2) * (line_x2 - line_x1) +
(py - line_y2) * (line_y2 - line_y1)) / l2;
double t = ((px - line_x2) * (line_x2 - line_x1) + (py - line_y2) * (line_y2 - line_y1)) / l2;
x = line_x2 + t * (line_x2 - line_x1);
y = line_y2 + t * (line_y2 - line_y1);
}
word_length = sqrt(static_cast<double>(dist2(word_x1, word_y1,
word_x2, word_y2)));
word_length = sqrt(static_cast<double>(dist2(word_x1, word_y1, word_x2, word_y2)));
word_length = word_length * 72.0 / ppi;
x = x * 72 / ppi;
y = height - (y * 72.0 / ppi);
@ -260,16 +259,15 @@ static void GetWordBaseline(int writing_direction, int ppi, int height,
// RTL
// [ x' ] = [ a b ][ x ] = [-1 0 ] [ cos sin ][ x ]
// [ y' ] [ c d ][ y ] [ 0 1 ] [-sin cos ][ y ]
static void AffineMatrix(int writing_direction,
int line_x1, int line_y1, int line_x2, int line_y2,
static void AffineMatrix(int writing_direction, int line_x1, int line_y1, int line_x2, int line_y2,
double *a, double *b, double *c, double *d) {
double theta = atan2(static_cast<double>(line_y1 - line_y2),
static_cast<double>(line_x2 - line_x1));
double theta =
atan2(static_cast<double>(line_y1 - line_y2), static_cast<double>(line_x2 - line_x1));
*a = cos(theta);
*b = sin(theta);
*c = -sin(theta);
*d = cos(theta);
switch(writing_direction) {
switch (writing_direction) {
case WRITING_DIRECTION_RIGHT_TO_LEFT:
*a = -*a;
*b = -*b;
@ -289,8 +287,7 @@ static void AffineMatrix(int writing_direction,
// these viewers. I chose this threshold large enough to absorb noise,
// but small enough that lines probably won't cross each other if the
// whole page is tilted at almost exactly the clipping threshold.
static void ClipBaseline(int ppi, int x1, int y1, int x2, int y2,
int *line_x1, int *line_y1,
static void ClipBaseline(int ppi, int x1, int y1, int x2, int y2, int *line_x1, int *line_y1,
int *line_x2, int *line_y2) {
*line_x1 = x1;
*line_y1 = y1;
@ -313,21 +310,18 @@ static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
int a = code - 0x010000;
int high_surrogate = (0x03FF & (a >> 10)) + 0xD800;
int low_surrogate = (0x03FF & a) + 0xDC00;
snprintf(utf16, kMaxBytesPerCodepoint,
"%04X%04X", high_surrogate, low_surrogate);
snprintf(utf16, kMaxBytesPerCodepoint, "%04X%04X", high_surrogate, low_surrogate);
}
return true;
}
char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
double width, double height) {
char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double height) {
double ppi = api->GetSourceYResolution();
// These initial conditions are all arbitrary and will be overwritten
double old_x = 0.0, old_y = 0.0;
int old_fontsize = 0;
tesseract::WritingDirection old_writing_direction =
WRITING_DIRECTION_LEFT_TO_RIGHT;
tesseract::WritingDirection old_writing_direction = WRITING_DIRECTION_LEFT_TO_RIGHT;
bool new_block = true;
int fontsize = 0;
double a = 1;
@ -358,9 +352,9 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
ResultIterator *res_it = api->GetIterator();
while (!res_it->Empty(RIL_BLOCK)) {
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
pdf_str << "BT\n3 Tr"; // Begin text object, use invisible ink
old_fontsize = 0; // Every block will declare its fontsize
new_block = true; // Every block will declare its affine matrix
pdf_str << "BT\n3 Tr"; // Begin text object, use invisible ink
old_fontsize = 0; // Every block will declare its fontsize
new_block = true; // Every block will declare its affine matrix
}
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
@ -380,8 +374,7 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
tesseract::Orientation orientation;
tesseract::TextlineOrder textline_order;
float deskew_angle;
res_it->Orientation(&orientation, &writing_direction,
&textline_order, &deskew_angle);
res_it->Orientation(&orientation, &writing_direction, &textline_order, &deskew_angle);
if (writing_direction != WRITING_DIRECTION_TOP_TO_BOTTOM) {
switch (res_it->WordDirection()) {
case DIR_LEFT_TO_RIGHT:
@ -401,15 +394,12 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
{
int word_x1, word_y1, word_x2, word_y2;
res_it->Baseline(RIL_WORD, &word_x1, &word_y1, &word_x2, &word_y2);
GetWordBaseline(writing_direction, ppi, height,
word_x1, word_y1, word_x2, word_y2,
line_x1, line_y1, line_x2, line_y2,
&x, &y, &word_length);
GetWordBaseline(writing_direction, ppi, height, word_x1, word_y1, word_x2, word_y2, line_x1,
line_y1, line_x2, line_y2, &x, &y, &word_length);
}
if (writing_direction != old_writing_direction || new_block) {
AffineMatrix(writing_direction,
line_x1, line_y1, line_x2, line_y2, &a, &b, &c, &d);
AffineMatrix(writing_direction, line_x1, line_y1, line_x2, line_y2, &a, &b, &c, &d);
pdf_str << " " << prec(a) // . This affine matrix
<< " " << prec(b) // . sets the coordinate
<< " " << prec(c) // . system for all
@ -421,9 +411,8 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
} else {
double dx = x - old_x;
double dy = y - old_y;
pdf_str << " " << prec(dx * a + dy * b)
<< " " << prec(dx * c + dy * d)
<< (" Td "); // Relative moveto
pdf_str << " " << prec(dx * a + dy * b) << " " << prec(dx * c + dy * d)
<< (" Td "); // Relative moveto
}
old_x = x;
old_y = y;
@ -436,8 +425,8 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
{
bool bold, italic, underlined, monospace, serif, smallcaps;
int font_id;
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
&serif, &smallcaps, &fontsize, &font_id);
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, &smallcaps,
&fontsize, &font_id);
const int kDefaultFontsize = 8;
if (fontsize <= 0)
fontsize = kDefaultFontsize;
@ -452,8 +441,7 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
std::string pdf_word;
int pdf_word_len = 0;
do {
const std::unique_ptr<const char[]> grapheme(
res_it->GetUTF8Text(RIL_SYMBOL));
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != '\0') {
std::vector<char32> unicodes = UNICHAR::UTF8ToUTF32(grapheme.get());
char utf16[kMaxBytesPerCodepoint];
@ -471,21 +459,20 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
pdf_word_len++;
}
if (word_length > 0 && pdf_word_len > 0) {
double h_stretch =
kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len));
pdf_str << h_stretch << " Tz" // horizontal stretch
<< " [ <" << pdf_word // UTF-16BE representation
<< "> ] TJ"; // show the text
double h_stretch = kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len));
pdf_str << h_stretch << " Tz" // horizontal stretch
<< " [ <" << pdf_word // UTF-16BE representation
<< "> ] TJ"; // show the text
}
if (last_word_in_line) {
pdf_str << " \n";
}
if (last_word_in_block) {
pdf_str << "ET\n"; // end the text object
pdf_str << "ET\n"; // end the text object
}
}
const std::string& text = pdf_str.str();
char* result = new char[text.length() + 1];
const std::string &text = pdf_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
delete res_it;
return result;
@ -495,11 +482,12 @@ bool TessPDFRenderer::BeginDocumentHandler() {
AppendPDFObject("%PDF-1.5\n%\xDE\xAD\xBE\xEB\n");
// CATALOG
AppendPDFObject("1 0 obj\n"
"<<\n"
" /Type /Catalog\n"
" /Pages 2 0 R\n"
">>\nendobj\n");
AppendPDFObject(
"1 0 obj\n"
"<<\n"
" /Type /Catalog\n"
" /Pages 2 0 R\n"
">>\nendobj\n");
// We are reserving object #2 for the /Pages
// object, which I am going to create and write
@ -507,56 +495,58 @@ bool TessPDFRenderer::BeginDocumentHandler() {
AppendPDFObject("");
// TYPE0 FONT
AppendPDFObject("3 0 obj\n"
"<<\n"
" /BaseFont /GlyphLessFont\n"
" /DescendantFonts [ 4 0 R ]\n" // CIDFontType2 font
" /Encoding /Identity-H\n"
" /Subtype /Type0\n"
" /ToUnicode 6 0 R\n" // ToUnicode
" /Type /Font\n"
">>\n"
"endobj\n");
AppendPDFObject(
"3 0 obj\n"
"<<\n"
" /BaseFont /GlyphLessFont\n"
" /DescendantFonts [ 4 0 R ]\n" // CIDFontType2 font
" /Encoding /Identity-H\n"
" /Subtype /Type0\n"
" /ToUnicode 6 0 R\n" // ToUnicode
" /Type /Font\n"
">>\n"
"endobj\n");
// CIDFONTTYPE2
std::stringstream stream;
// Use "C" locale (needed for int values larger than 999).
stream.imbue(std::locale::classic());
stream <<
"4 0 obj\n"
"<<\n"
" /BaseFont /GlyphLessFont\n"
" /CIDToGIDMap 5 0 R\n" // CIDToGIDMap
" /CIDSystemInfo\n"
" <<\n"
" /Ordering (Identity)\n"
" /Registry (Adobe)\n"
" /Supplement 0\n"
" >>\n"
" /FontDescriptor 7 0 R\n" // Font descriptor
" /Subtype /CIDFontType2\n"
" /Type /Font\n"
" /DW " << (1000 / kCharWidth) << "\n"
">>\n"
"endobj\n";
stream << "4 0 obj\n"
"<<\n"
" /BaseFont /GlyphLessFont\n"
" /CIDToGIDMap 5 0 R\n" // CIDToGIDMap
" /CIDSystemInfo\n"
" <<\n"
" /Ordering (Identity)\n"
" /Registry (Adobe)\n"
" /Supplement 0\n"
" >>\n"
" /FontDescriptor 7 0 R\n" // Font descriptor
" /Subtype /CIDFontType2\n"
" /Type /Font\n"
" /DW "
<< (1000 / kCharWidth)
<< "\n"
">>\n"
"endobj\n";
AppendPDFObject(stream.str().c_str());
// CIDTOGIDMAP
const int kCIDToGIDMapSize = 2 * (1 << 16);
const std::unique_ptr<unsigned char[]> cidtogidmap(
new unsigned char[kCIDToGIDMapSize]);
const std::unique_ptr<unsigned char[]> cidtogidmap(new unsigned char[kCIDToGIDMapSize]);
for (int i = 0; i < kCIDToGIDMapSize; i++) {
cidtogidmap[i] = (i % 2) ? 1 : 0;
}
size_t len;
unsigned char *comp = zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len);
stream.str("");
stream <<
"5 0 obj\n"
"<<\n"
" /Length " << len << " /Filter /FlateDecode\n"
">>\n"
"stream\n";
stream << "5 0 obj\n"
"<<\n"
" /Length "
<< len
<< " /Filter /FlateDecode\n"
">>\n"
"stream\n";
AppendString(stream.str().c_str());
long objsize = stream.str().size();
AppendData(reinterpret_cast<char *>(comp), len);
@ -594,65 +584,67 @@ bool TessPDFRenderer::BeginDocumentHandler() {
// TOUNICODE
stream.str("");
stream <<
"6 0 obj\n"
"<< /Length " << (sizeof(stream2) - 1) << " >>\n"
"stream\n" << stream2 <<
"endstream\n"
"endobj\n";
stream << "6 0 obj\n"
"<< /Length "
<< (sizeof(stream2) - 1)
<< " >>\n"
"stream\n"
<< stream2
<< "endstream\n"
"endobj\n";
AppendPDFObject(stream.str().c_str());
// FONT DESCRIPTOR
stream.str("");
stream <<
"7 0 obj\n"
"<<\n"
" /Ascent 1000\n"
" /CapHeight 1000\n"
" /Descent -1\n" // Spec says must be negative
" /Flags 5\n" // FixedPitch + Symbolic
" /FontBBox [ 0 0 " << (1000 / kCharWidth) << " 1000 ]\n"
" /FontFile2 8 0 R\n"
" /FontName /GlyphLessFont\n"
" /ItalicAngle 0\n"
" /StemV 80\n"
" /Type /FontDescriptor\n"
">>\n"
"endobj\n";
stream << "7 0 obj\n"
"<<\n"
" /Ascent 1000\n"
" /CapHeight 1000\n"
" /Descent -1\n" // Spec says must be negative
" /Flags 5\n" // FixedPitch + Symbolic
" /FontBBox [ 0 0 "
<< (1000 / kCharWidth)
<< " 1000 ]\n"
" /FontFile2 8 0 R\n"
" /FontName /GlyphLessFont\n"
" /ItalicAngle 0\n"
" /StemV 80\n"
" /Type /FontDescriptor\n"
">>\n"
"endobj\n";
AppendPDFObject(stream.str().c_str());
stream.str("");
stream << datadir_.c_str() << "/pdf.ttf";
FILE *fp = fopen(stream.str().c_str(), "rb");
if (!fp) {
tprintf("Cannot open file \"%s\"!\n", stream.str().c_str());
return false;
const uint8_t *font;
std::ifstream input(stream.str().c_str(), std::ios::in | std::ios::binary);
std::vector<unsigned char> buffer(std::istreambuf_iterator<char>(input), {});
auto size = buffer.size();
if (size) {
font = buffer.data();
} else {
#if !defined(NDEBUG)
tprintf("Cannot open file \"%s\"!\nUsing internal glyphless font.\n", stream.str().c_str());
#endif
font = pdf_ttf;
size = sizeof(pdf_ttf);
}
fseek(fp, 0, SEEK_END);
auto size = std::ftell(fp);
if (size < 0) {
fclose(fp);
return false;
}
fseek(fp, 0, SEEK_SET);
const std::unique_ptr<char[]> buffer(new char[size]);
if (!tesseract::DeSerialize(fp, buffer.get(), size)) {
fclose(fp);
return false;
}
fclose(fp);
// FONTFILE2
stream.str("");
stream <<
"8 0 obj\n"
"<<\n"
" /Length " << size << "\n"
" /Length1 " << size << "\n"
">>\n"
"stream\n";
stream << "8 0 obj\n"
"<<\n"
" /Length "
<< size
<< "\n"
" /Length1 "
<< size
<< "\n"
">>\n"
"stream\n";
AppendString(stream.str().c_str());
objsize = stream.str().size();
AppendData(buffer.get(), size);
objsize = stream.str().size();
AppendData(reinterpret_cast<const char *>(font), size);
objsize += size;
AppendString(endstream_endobj);
objsize += strlen(endstream_endobj);
@ -660,11 +652,8 @@ bool TessPDFRenderer::BeginDocumentHandler() {
return true;
}
bool TessPDFRenderer::imageToPDFObj(Pix *pix,
const char* filename,
long int objnum,
char **pdf_object,
long int* pdf_object_size,
bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
const int jpg_quality) {
if (!pdf_object_size || !pdf_object)
return false;
@ -689,7 +678,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
const char *group4 = "";
const char *filter;
switch(cid->type) {
switch (cid->type) {
case L_FLATE_ENCODE:
filter = "/FlateDecode";
break;
@ -715,15 +704,15 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
// Use "C" locale (needed for int values larger than 999).
colorspace.imbue(std::locale::classic());
if (cid->ncolors > 0) {
colorspace
<< " /ColorSpace [ /Indexed /DeviceRGB " << (cid->ncolors - 1)
<< " " << cid->cmapdatahex << " ]\n";
colorspace << " /ColorSpace [ /Indexed /DeviceRGB " << (cid->ncolors - 1) << " "
<< cid->cmapdatahex << " ]\n";
} else {
switch (cid->spp) {
case 1:
if (cid->bps == 1 && pixGetInputFormat(pix) == IFF_PNG) {
colorspace.str(" /ColorSpace /DeviceGray\n"
" /Decode [1 0]\n");
colorspace.str(
" /ColorSpace /DeviceGray\n"
" /Decode [1 0]\n");
} else {
colorspace.str(" /ColorSpace /DeviceGray\n");
}
@ -743,29 +732,43 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
std::stringstream b1;
// Use "C" locale (needed for int values larger than 999).
b1.imbue(std::locale::classic());
b1 <<
objnum << " 0 obj\n"
"<<\n"
" /Length " << cid->nbytescomp << "\n"
" /Subtype /Image\n";
b1 << objnum
<< " 0 obj\n"
"<<\n"
" /Length "
<< cid->nbytescomp
<< "\n"
" /Subtype /Image\n";
std::stringstream b2;
// Use "C" locale (needed for int values larger than 999).
b2.imbue(std::locale::classic());
b2 <<
" /Width " << cid->w << "\n"
" /Height " << cid->h << "\n"
" /BitsPerComponent " << cid->bps << "\n"
" /Filter " << filter << "\n"
" /DecodeParms\n"
" <<\n"
" /Predictor " << predictor << "\n"
" /Colors " << cid->spp << "\n" << group4 <<
" /Columns " << cid->w << "\n"
" /BitsPerComponent " << cid->bps << "\n"
" >>\n"
">>\n"
"stream\n";
b2 << " /Width " << cid->w
<< "\n"
" /Height "
<< cid->h
<< "\n"
" /BitsPerComponent "
<< cid->bps
<< "\n"
" /Filter "
<< filter
<< "\n"
" /DecodeParms\n"
" <<\n"
" /Predictor "
<< predictor
<< "\n"
" /Colors "
<< cid->spp << "\n"
<< group4 << " /Columns " << cid->w
<< "\n"
" /BitsPerComponent "
<< cid->bps
<< "\n"
" >>\n"
">>\n"
"stream\n";
const char *b3 =
"endstream\n"
@ -776,8 +779,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
size_t b3_len = strlen(b3);
size_t colorspace_len = colorspace.str().size();
*pdf_object_size =
b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len;
*pdf_object_size = b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len;
*pdf_object = new char[*pdf_object_size];
char *p = *pdf_object;
@ -794,9 +796,9 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
return true;
}
bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
Pix *pix = api->GetInputImage();
const char* filename = api->GetInputName();
const char *filename = api->GetInputName();
int ppi = api->GetSourceYResolution();
if (!pix || ppi <= 0)
return false;
@ -815,21 +817,26 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
// Use "C" locale (needed for double values width and height).
stream.imbue(std::locale::classic());
stream.precision(2);
stream << std::fixed <<
obj_ << " 0 obj\n"
"<<\n"
" /Type /Page\n"
" /Parent 2 0 R\n" // Pages object
" /MediaBox [0 0 " << width << " " << height << "]\n"
" /Contents " << (obj_ + 1) << " 0 R\n" // Contents object
" /Resources\n"
" <<\n"
" " << xobject.str() << // Image object
" /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n"
" /Font << /f-0-0 3 0 R >>\n" // Type0 Font
" >>\n"
">>\n"
"endobj\n";
stream << std::fixed << obj_
<< " 0 obj\n"
"<<\n"
" /Type /Page\n"
" /Parent 2 0 R\n" // Pages object
" /MediaBox [0 0 "
<< width << " " << height
<< "]\n"
" /Contents "
<< (obj_ + 1)
<< " 0 R\n" // Contents object
" /Resources\n"
" <<\n"
" "
<< xobject.str() << // Image object
" /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n"
" /Font << /f-0-0 3 0 R >>\n" // Type0 Font
" >>\n"
">>\n"
"endobj\n";
pages_.push_back(obj_);
AppendPDFObject(stream.str().c_str());
@ -837,16 +844,18 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
const std::unique_ptr<char[]> pdftext(GetPDFTextObjects(api, width, height));
const size_t pdftext_len = strlen(pdftext.get());
size_t len;
unsigned char *comp_pdftext = zlibCompress(
reinterpret_cast<unsigned char *>(pdftext.get()), pdftext_len, &len);
unsigned char *comp_pdftext =
zlibCompress(reinterpret_cast<unsigned char *>(pdftext.get()), pdftext_len, &len);
long comp_pdftext_len = len;
stream.str("");
stream <<
obj_ << " 0 obj\n"
"<<\n"
" /Length " << comp_pdftext_len << " /Filter /FlateDecode\n"
">>\n"
"stream\n";
stream << obj_
<< " 0 obj\n"
"<<\n"
" /Length "
<< comp_pdftext_len
<< " /Filter /FlateDecode\n"
">>\n"
"stream\n";
AppendString(stream.str().c_str());
long objsize = stream.str().size();
AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
@ -863,8 +872,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
char *pdf_object = nullptr;
int jpg_quality;
api->GetIntVariable("jpg_quality", &jpg_quality);
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize,
jpg_quality)) {
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality)) {
return false;
}
AppendData(pdf_object, objsize);
@ -874,7 +882,6 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
return true;
}
bool TessPDFRenderer::EndDocumentHandler() {
// We reserved the /Pages object number early, so that the /Page
// objects could refer to their parent. We finally have enough
@ -884,16 +891,16 @@ bool TessPDFRenderer::EndDocumentHandler() {
// PAGES
const long int kPagesObjectNumber = 2;
offsets_[kPagesObjectNumber] = offsets_.back(); // manipulation #1
offsets_[kPagesObjectNumber] = offsets_.back(); // manipulation #1
std::stringstream stream;
// Use "C" locale (needed for int values larger than 999).
stream.imbue(std::locale::classic());
stream << kPagesObjectNumber << " 0 obj\n<<\n /Type /Pages\n /Kids [ ";
AppendString(stream.str().c_str());
size_t pages_objsize = stream.str().size();
for (size_t i = 0; i < pages_.unsigned_size(); i++) {
size_t pages_objsize = stream.str().size();
for (const auto &page : pages_) {
stream.str("");
stream << pages_[i] << " 0 R ";
stream << page << " 0 R ";
AppendString(stream.str().c_str());
pages_objsize += stream.str().size();
}
@ -901,10 +908,10 @@ bool TessPDFRenderer::EndDocumentHandler() {
stream << "]\n /Count " << pages_.size() << "\n>>\nendobj\n";
AppendString(stream.str().c_str());
pages_objsize += stream.str().size();
offsets_.back() += pages_objsize; // manipulation #2
offsets_.back() += pages_objsize; // manipulation #2
// INFO
STRING utf16_title = "FEFF"; // byte_order_marker
std::string utf16_title = "FEFF"; // byte_order_marker
std::vector<char32> unicodes = UNICHAR::UTF8ToUTF32(title());
char utf16[kMaxBytesPerCodepoint];
for (char32 code : unicodes) {
@ -913,16 +920,22 @@ bool TessPDFRenderer::EndDocumentHandler() {
}
}
char* datestr = l_getFormattedDate();
char *datestr = l_getFormattedDate();
stream.str("");
stream
<< obj_ << " 0 obj\n"
"<<\n"
" /Producer (Tesseract " << tesseract::TessBaseAPI::Version() << ")\n"
" /CreationDate (D:" << datestr << ")\n"
" /Title <" << utf16_title.c_str() << ">\n"
">>\n"
"endobj\n";
stream << obj_
<< " 0 obj\n"
"<<\n"
" /Producer (Tesseract "
<< tesseract::TessBaseAPI::Version()
<< ")\n"
" /CreationDate (D:"
<< datestr
<< ")\n"
" /Title <"
<< utf16_title.c_str()
<< ">\n"
">>\n"
"endobj\n";
lept_free(datestr);
AppendPDFObject(stream.str().c_str());
stream.str("");
@ -936,12 +949,15 @@ bool TessPDFRenderer::EndDocumentHandler() {
AppendString(stream.str().c_str());
}
stream.str("");
stream
<< "trailer\n<<\n /Size " << obj_ << "\n"
" /Root 1 0 R\n" // catalog
" /Info " << (obj_ - 1) << " 0 R\n" // info
">>\nstartxref\n" << offsets_.back() << "\n%%EOF\n";
stream << "trailer\n<<\n /Size " << obj_
<< "\n"
" /Root 1 0 R\n" // catalog
" /Info "
<< (obj_ - 1)
<< " 0 R\n" // info
">>\nstartxref\n"
<< offsets_.back() << "\n%%EOF\n";
AppendString(stream.str().c_str());
return true;
}
} // namespace tesseract
} // namespace tesseract

View File

@ -16,29 +16,29 @@
///////////////////////////////////////////////////////////////////////
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
# include "config_auto.h"
#endif
#include <cstring>
#include <memory> // std::unique_ptr
#include <tesseract/baseapi.h>
#include <tesseract/genericvector.h>
#include <tesseract/renderer.h>
#include <cstring>
#include <memory> // std::unique_ptr
#include <string> // std::string
#include "serialis.h" // Serialize
namespace tesseract {
/**********************************************************************
* Base Renderer interface implementation
**********************************************************************/
TessResultRenderer::TessResultRenderer(const char *outputbase,
const char* extension)
: file_extension_(extension),
title_(""), imagenum_(-1),
fout_(stdout),
next_(nullptr),
happy_(true) {
TessResultRenderer::TessResultRenderer(const char *outputbase, const char *extension)
: file_extension_(extension)
, title_("")
, imagenum_(-1)
, fout_(stdout)
, next_(nullptr)
, happy_(true) {
if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
std::string outfile = std::string(outputbase) + "." + extension;
fout_ = fopen(outfile.c_str(), "wb");
if (fout_ == nullptr) {
happy_ = false;
@ -56,10 +56,11 @@ TessResultRenderer::~TessResultRenderer() {
delete next_;
}
void TessResultRenderer::insert(TessResultRenderer* next) {
if (next == nullptr) return;
void TessResultRenderer::insert(TessResultRenderer *next) {
if (next == nullptr)
return;
TessResultRenderer* remainder = next_;
TessResultRenderer *remainder = next_;
next_ = next;
if (remainder) {
while (next->next_ != nullptr) {
@ -69,8 +70,9 @@ void TessResultRenderer::insert(TessResultRenderer* next) {
}
}
bool TessResultRenderer::BeginDocument(const char* title) {
if (!happy_) return false;
bool TessResultRenderer::BeginDocument(const char *title) {
if (!happy_)
return false;
title_ = title;
imagenum_ = -1;
bool ok = BeginDocumentHandler();
@ -80,8 +82,9 @@ bool TessResultRenderer::BeginDocument(const char* title) {
return ok;
}
bool TessResultRenderer::AddImage(TessBaseAPI* api) {
if (!happy_) return false;
bool TessResultRenderer::AddImage(TessBaseAPI *api) {
if (!happy_)
return false;
++imagenum_;
bool ok = AddImageHandler(api);
if (next_) {
@ -91,7 +94,8 @@ bool TessResultRenderer::AddImage(TessBaseAPI* api) {
}
bool TessResultRenderer::EndDocument() {
if (!happy_) return false;
if (!happy_)
return false;
bool ok = EndDocumentHandler();
if (next_) {
ok = next_->EndDocument() && ok;
@ -99,12 +103,13 @@ bool TessResultRenderer::EndDocument() {
return ok;
}
void TessResultRenderer::AppendString(const char* s) {
void TessResultRenderer::AppendString(const char *s) {
AppendData(s, strlen(s));
}
void TessResultRenderer::AppendData(const char* s, int len) {
if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
void TessResultRenderer::AppendData(const char *s, int len) {
if (!tesseract::Serialize(fout_, s, len))
happy_ = false;
fflush(fout_);
}
@ -116,15 +121,13 @@ bool TessResultRenderer::EndDocumentHandler() {
return happy_;
}
/**********************************************************************
* UTF8 Text Renderer interface implementation
**********************************************************************/
TessTextRenderer::TessTextRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "txt") {
}
: TessResultRenderer(outputbase, "txt") {}
bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
bool TessTextRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
if (utf8 == nullptr) {
return false;
@ -132,7 +135,7 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
AppendString(utf8.get());
const char* pageSeparator = api->GetStringVariable("page_separator");
const char *pageSeparator = api->GetStringVariable("page_separator");
if (pageSeparator != nullptr && *pageSeparator != '\0') {
AppendString(pageSeparator);
}
@ -143,12 +146,11 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
/**********************************************************************
* TSV Text Renderer interface implementation
**********************************************************************/
TessTsvRenderer::TessTsvRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "tsv") {
TessTsvRenderer::TessTsvRenderer(const char *outputbase) : TessResultRenderer(outputbase, "tsv") {
font_info_ = false;
}
TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info)
: TessResultRenderer(outputbase, "tsv") {
font_info_ = font_info;
}
@ -161,11 +163,14 @@ bool TessTsvRenderer::BeginDocumentHandler() {
return true;
}
bool TessTsvRenderer::EndDocumentHandler() { return true; }
bool TessTsvRenderer::EndDocumentHandler() {
return true;
}
bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
bool TessTsvRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
if (tsv == nullptr) return false;
if (tsv == nullptr)
return false;
AppendString(tsv.get());
@ -176,12 +181,12 @@ bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
* UNLV Text Renderer interface implementation
**********************************************************************/
TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "unlv") {
}
: TessResultRenderer(outputbase, "unlv") {}
bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
bool TessUnlvRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
if (unlv == nullptr) return false;
if (unlv == nullptr)
return false;
AppendString(unlv.get());
@ -192,12 +197,12 @@ bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
* BoxText Renderer interface implementation
**********************************************************************/
TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "box") {
}
: TessResultRenderer(outputbase, "box") {}
bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
if (text == nullptr) return false;
if (text == nullptr)
return false;
AppendString(text.get());
@ -209,12 +214,12 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
/**********************************************************************
* Osd Text Renderer interface implementation
**********************************************************************/
TessOsdRenderer::TessOsdRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "osd") {}
TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(outputbase, "osd") {}
bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
char* osd = api->GetOsdText(imagenum());
if (osd == nullptr) return false;
bool TessOsdRenderer::AddImageHandler(TessBaseAPI *api) {
char *osd = api->GetOsdText(imagenum());
if (osd == nullptr)
return false;
AppendString(osd);
delete[] osd;
@ -224,4 +229,4 @@ bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract
} // namespace tesseract

View File

@ -18,42 +18,44 @@
// Include automatically generated configuration file if running autoconf
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
# include "config_auto.h"
#endif
#include <cerrno> // for errno
#include <cerrno> // for errno
#if defined(__USE_GNU)
# include <cfenv> // for feenableexcept
#endif
#include <iostream>
#include "allheaders.h"
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include "dict.h"
#if defined(USE_OPENCL)
#include "openclwrapper.h" // for OpenclDevice
# include "openclwrapper.h" // for OpenclDevice
#endif
#include <tesseract/renderer.h>
#include "simddetect.h"
#include "tprintf.h" // for tprintf
#include "tprintf.h" // for tprintf
#ifdef _OPENMP
#include <omp.h>
# include <omp.h>
#endif
#if defined(HAVE_LIBARCHIVE)
#include <archive.h>
# include <archive.h>
#endif
#if defined(HAVE_LIBCURL)
#include <curl/curl.h>
# include <curl/curl.h>
#endif
#if defined(_WIN32)
#include <fcntl.h>
#include <io.h>
#if defined(HAVE_TIFFIO_H)
# include <fcntl.h>
# include <io.h>
# if defined(HAVE_TIFFIO_H)
#include <tiffio.h>
# include <tiffio.h>
static void Win32ErrorHandler(const char* module, const char* fmt,
va_list ap) {
static void Win32ErrorHandler(const char *module, const char *fmt, va_list ap) {
if (module != nullptr) {
fprintf(stderr, "%s: ", module);
}
@ -61,8 +63,7 @@ static void Win32ErrorHandler(const char* module, const char* fmt,
fprintf(stderr, ".\n");
}
static void Win32WarningHandler(const char* module, const char* fmt,
va_list ap) {
static void Win32WarningHandler(const char *module, const char *fmt, va_list ap) {
if (module != nullptr) {
fprintf(stderr, "%s: ", module);
}
@ -71,27 +72,30 @@ static void Win32WarningHandler(const char* module, const char* fmt,
fprintf(stderr, ".\n");
}
#endif /* HAVE_TIFFIO_H */
# endif /* HAVE_TIFFIO_H */
class AutoWin32ConsoleOutputCP {
public:
public:
explicit AutoWin32ConsoleOutputCP(UINT codeCP) {
oldCP_ = GetConsoleOutputCP();
oldCP_ = GetConsoleOutputCP();
SetConsoleOutputCP(codeCP);
}
~AutoWin32ConsoleOutputCP() {
SetConsoleOutputCP(oldCP_);
~AutoWin32ConsoleOutputCP() {
SetConsoleOutputCP(oldCP_);
}
private:
private:
UINT oldCP_;
};
static AutoWin32ConsoleOutputCP autoWin32ConsoleOutputCP(CP_UTF8);
#endif // _WIN32
#endif // _WIN32
using namespace tesseract;
static void PrintVersionInfo() {
char* versionStrP;
char *versionStrP;
printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
@ -112,22 +116,18 @@ static void PrintVersionInfo() {
printf(" Found %u platform(s).\n", num_platforms);
for (unsigned n = 0; n < num_platforms; n++) {
char info[256];
if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) ==
CL_SUCCESS) {
if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) == CL_SUCCESS) {
printf(" Platform %u name: %s.\n", n + 1, info);
}
if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) ==
CL_SUCCESS) {
if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) == CL_SUCCESS) {
printf(" Version: %s.\n", info);
}
cl_device_id devices[2];
cl_uint num_devices;
if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices,
&num_devices) == CL_SUCCESS) {
if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices, &num_devices) == CL_SUCCESS) {
printf(" Found %u device(s).\n", num_devices);
for (unsigned i = 0; i < num_devices; ++i) {
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) ==
CL_SUCCESS) {
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) == CL_SUCCESS) {
printf(" Device %u name: %s.\n", i + 1, info);
}
}
@ -135,12 +135,23 @@ static void PrintVersionInfo() {
}
}
#endif
if (tesseract::SIMDDetect::IsAVX512BWAvailable()) printf(" Found AVX512BW\n");
if (tesseract::SIMDDetect::IsAVX512FAvailable()) printf(" Found AVX512F\n");
if (tesseract::SIMDDetect::IsAVX2Available()) printf(" Found AVX2\n");
if (tesseract::SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n");
if (tesseract::SIMDDetect::IsFMAAvailable()) printf(" Found FMA\n");
if (tesseract::SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n");
#if defined(HAVE_NEON) || defined(__aarch64__)
if (tesseract::SIMDDetect::IsNEONAvailable())
printf(" Found NEON\n");
#else
if (tesseract::SIMDDetect::IsAVX512BWAvailable())
printf(" Found AVX512BW\n");
if (tesseract::SIMDDetect::IsAVX512FAvailable())
printf(" Found AVX512F\n");
if (tesseract::SIMDDetect::IsAVX2Available())
printf(" Found AVX2\n");
if (tesseract::SIMDDetect::IsAVXAvailable())
printf(" Found AVX\n");
if (tesseract::SIMDDetect::IsFMAAvailable())
printf(" Found FMA\n");
if (tesseract::SIMDDetect::IsSSEAvailable())
printf(" Found SSE\n");
#endif
#ifdef _OPENMP
printf(" Found OpenMP %d\n", _OPENMP);
#endif
@ -149,19 +160,20 @@ static void PrintVersionInfo() {
printf(" Found %s\n", archive_version_details());
# else
printf(" Found %s\n", archive_version_string());
# endif // ARCHIVE_VERSION_NUMBER
#endif // HAVE_LIBARCHIVE
# endif // ARCHIVE_VERSION_NUMBER
#endif // HAVE_LIBARCHIVE
#if defined(HAVE_LIBCURL)
printf(" Found %s\n", curl_version());
#endif
}
static void PrintHelpForPSM() {
const char* msg =
const char *msg =
"Page segmentation modes:\n"
" 0 Orientation and script detection (OSD) only.\n"
" 1 Automatic page segmentation with OSD.\n"
" 2 Automatic page segmentation, but no OSD, or OCR. (not implemented)\n"
" 2 Automatic page segmentation, but no OSD, or OCR. (not "
"implemented)\n"
" 3 Fully automatic page segmentation, but no OSD. (Default)\n"
" 4 Assume a single column of text of variable sizes.\n"
" 5 Assume a single uniform block of vertically aligned text.\n"
@ -177,8 +189,7 @@ static void PrintHelpForPSM() {
" bypassing hacks that are Tesseract-specific.\n";
#ifdef DISABLED_LEGACY_ENGINE
const char* disabled_osd_msg =
"\nNOTE: The OSD modes are currently disabled.\n";
const char *disabled_osd_msg = "\nNOTE: The OSD modes are currently disabled.\n";
printf("%s%s", msg, disabled_osd_msg);
#else
printf("%s", msg);
@ -187,7 +198,7 @@ static void PrintHelpForPSM() {
#ifndef DISABLED_LEGACY_ENGINE
static void PrintHelpForOEM() {
const char* msg =
const char *msg =
"OCR Engine modes:\n"
" 0 Legacy engine only.\n"
" 1 Neural nets LSTM engine only.\n"
@ -196,9 +207,9 @@ static void PrintHelpForOEM() {
printf("%s", msg);
}
#endif // ndef DISABLED_LEGACY_ENGINE
#endif // ndef DISABLED_LEGACY_ENGINE
static void PrintHelpExtra(const char* program) {
static void PrintHelpExtra(const char *program) {
printf(
"Usage:\n"
" %s --help | --help-extra | --help-psm | "
@ -208,7 +219,8 @@ static void PrintHelpExtra(const char* program) {
"--version\n"
" %s --list-langs [--tessdata-dir PATH]\n"
" %s --print-parameters [options...] [configfile...]\n"
" %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n"
" %s imagename|imagelist|stdin outputbase|stdout [options...] "
"[configfile...]\n"
"\n"
"OCR options:\n"
" --tessdata-dir PATH Specify the location of tessdata path.\n"
@ -224,8 +236,7 @@ static void PrintHelpExtra(const char* program) {
#endif
"NOTE: These options must occur before any configfile.\n"
"\n",
program, program, program, program
);
program, program, program, program);
PrintHelpForPSM();
#ifndef DISABLED_LEGACY_ENGINE
@ -244,11 +255,10 @@ static void PrintHelpExtra(const char* program) {
#endif
" -v, --version Show version information.\n"
" --list-langs List available languages for tesseract engine.\n"
" --print-parameters Print tesseract parameters.\n"
);
" --print-parameters Print tesseract parameters.\n");
}
static void PrintHelpMessage(const char* program) {
static void PrintHelpMessage(const char *program) {
printf(
"Usage:\n"
" %s --help | --help-extra | --version\n"
@ -263,22 +273,23 @@ static void PrintHelpMessage(const char* program) {
" --help Show this help message.\n"
" --help-extra Show extra help for advanced users.\n"
" --version Show version information.\n"
" --list-langs List available languages for tesseract engine.\n",
program, program, program
);
" --list-langs List available languages for tesseract "
"engine.\n",
program, program, program);
}
static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
char** argv) {
static bool SetVariablesFromCLArgs(tesseract::TessBaseAPI *api, int argc, char **argv) {
bool success = true;
char opt1[256], opt2[255];
for (int i = 0; i < argc; i++) {
if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
strncpy(opt1, argv[i + 1], 255);
opt1[255] = '\0';
char* p = strchr(opt1, '=');
char *p = strchr(opt1, '=');
if (!p) {
fprintf(stderr, "Missing = in configvar assignment\n");
exit(EXIT_FAILURE);
success = false;
break;
}
*p = 0;
strncpy(opt2, strchr(argv[i + 1], '=') + 1, sizeof(opt2) - 1);
@ -290,15 +301,15 @@ static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
}
}
}
return success;
}
static void PrintLangsList(tesseract::TessBaseAPI* api) {
GenericVector<STRING> languages;
static void PrintLangsList(tesseract::TessBaseAPI *api) {
std::vector<std::string> languages;
api->GetAvailableLanguagesAsVector(&languages);
printf("List of available languages (%d):\n", languages.size());
for (int index = 0; index < languages.size(); ++index) {
STRING& string = languages[index];
printf("%s\n", string.c_str());
printf("List of available languages (%zu):\n", languages.size());
for (const auto &language : languages) {
printf("%s\n", language.c_str());
}
api->End();
}
@ -322,27 +333,25 @@ static void PrintBanner() {
* It would be simpler if we could set the value before Init,
* but that doesn't work.
*/
static void FixPageSegMode(tesseract::TessBaseAPI* api,
tesseract::PageSegMode pagesegmode) {
static void FixPageSegMode(tesseract::TessBaseAPI *api, tesseract::PageSegMode pagesegmode) {
if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
api->SetPageSegMode(pagesegmode);
}
static void checkArgValues(int arg, const char* mode, int count) {
static bool checkArgValues(int arg, const char *mode, int count) {
if (arg >= count || arg < 0) {
printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
exit(EXIT_SUCCESS);
return false;
}
return true;
}
// NOTE: arg_i is used here to avoid ugly *i so many times in this function
static void ParseArgs(const int argc, char** argv, const char** lang,
const char** image, const char** outputbase,
const char** datapath, l_int32* dpi, bool* list_langs,
bool* print_parameters, GenericVector<STRING>* vars_vec,
GenericVector<STRING>* vars_values, l_int32* arg_i,
tesseract::PageSegMode* pagesegmode,
tesseract::OcrEngineMode* enginemode) {
static bool ParseArgs(int argc, char **argv, const char **lang, const char **image,
const char **outputbase, const char **datapath, l_int32 *dpi,
bool *list_langs, bool *print_parameters, std::vector<std::string> *vars_vec,
std::vector<std::string> *vars_values, l_int32 *arg_i,
tesseract::PageSegMode *pagesegmode, tesseract::OcrEngineMode *enginemode) {
bool noocr = false;
int i;
for (i = 1; i < argc && (*outputbase == nullptr || argv[i][0] == '-'); i++) {
@ -363,8 +372,7 @@ static void ParseArgs(const int argc, char** argv, const char** lang,
PrintHelpForOEM();
noocr = true;
#endif
} else if ((strcmp(argv[i], "-v") == 0) ||
(strcmp(argv[i], "--version") == 0)) {
} else if ((strcmp(argv[i], "-v") == 0) || (strcmp(argv[i], "--version") == 0)) {
PrintVersionInfo();
noocr = true;
} else if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
@ -388,13 +396,17 @@ static void ParseArgs(const int argc, char** argv, const char** lang,
noocr = true;
*list_langs = true;
} else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
checkArgValues(atoi(argv[i+1]), "PSM", tesseract::PSM_COUNT);
if (!checkArgValues(atoi(argv[i + 1]), "PSM", tesseract::PSM_COUNT)) {
return false;
}
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
++i;
} else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
#ifndef DISABLED_LEGACY_ENGINE
int oem = atoi(argv[i + 1]);
checkArgValues(oem, "OEM", tesseract::OEM_COUNT);
if (!checkArgValues(oem, "OEM", tesseract::OEM_COUNT)) {
return false;
}
*enginemode = static_cast<tesseract::OcrEngineMode>(oem);
#endif
++i;
@ -409,7 +421,7 @@ static void ParseArgs(const int argc, char** argv, const char** lang,
} else {
// Unexpected argument.
fprintf(stderr, "Error, unknown command line argument '%s'\n", argv[i]);
exit(EXIT_FAILURE);
return false;
}
}
@ -429,18 +441,19 @@ static void ParseArgs(const int argc, char** argv, const char** lang,
if (*outputbase == nullptr && noocr == false) {
PrintHelpMessage(argv[0]);
exit(EXIT_FAILURE);
return false;
}
return true;
}
static void PreloadRenderers(
tesseract::TessBaseAPI* api,
tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
tesseract::PageSegMode pagesegmode, const char* outputbase) {
static void PreloadRenderers(tesseract::TessBaseAPI *api,
tesseract::PointerVector<tesseract::TessResultRenderer> *renderers,
tesseract::PageSegMode pagesegmode, const char *outputbase) {
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
#ifndef DISABLED_LEGACY_ENGINE
renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
#endif // ndef DISABLED_LEGACY_ENGINE
#endif // ndef DISABLED_LEGACY_ENGINE
} else {
bool error = false;
bool b;
@ -448,28 +461,24 @@ static void PreloadRenderers(
if (b) {
bool font_info;
api->GetBoolVariable("hocr_font_info", &font_info);
auto* renderer =
new tesseract::TessHOcrRenderer(outputbase, font_info);
auto *renderer = new tesseract::TessHOcrRenderer(outputbase, font_info);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create hOCR output file: %s\n",
strerror(errno));
tprintf("Error, could not create hOCR output file: %s\n", strerror(errno));
error = true;
}
}
api->GetBoolVariable("tessedit_create_alto", &b);
if (b) {
auto* renderer =
new tesseract::TessAltoRenderer(outputbase);
auto *renderer = new tesseract::TessAltoRenderer(outputbase);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create ALTO output file: %s\n",
strerror(errno));
tprintf("Error, could not create ALTO output file: %s\n", strerror(errno));
error = true;
}
}
@ -478,35 +487,30 @@ static void PreloadRenderers(
if (b) {
bool font_info;
api->GetBoolVariable("hocr_font_info", &font_info);
auto* renderer =
new tesseract::TessTsvRenderer(outputbase, font_info);
auto *renderer = new tesseract::TessTsvRenderer(outputbase, font_info);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create TSV output file: %s\n",
strerror(errno));
tprintf("Error, could not create TSV output file: %s\n", strerror(errno));
error = true;
}
}
api->GetBoolVariable("tessedit_create_pdf", &b);
if (b) {
#ifdef WIN32
if (_setmode(_fileno(stdout), _O_BINARY) == -1)
tprintf("ERROR: cin to binary: %s", strerror(errno));
#endif // WIN32
#ifdef WIN32
if (_setmode(_fileno(stdout), _O_BINARY) == -1)
tprintf("ERROR: cin to binary: %s", strerror(errno));
#endif // WIN32
bool textonly;
api->GetBoolVariable("textonly_pdf", &textonly);
auto* renderer =
new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(),
textonly);
auto *renderer = new tesseract::TessPDFRenderer(outputbase, api->GetDatapath(), textonly);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create PDF output file: %s\n",
strerror(errno));
tprintf("Error, could not create PDF output file: %s\n", strerror(errno));
error = true;
}
}
@ -514,56 +518,48 @@ static void PreloadRenderers(
api->GetBoolVariable("tessedit_write_unlv", &b);
if (b) {
api->SetVariable("unlv_tilde_crunching", "true");
auto* renderer =
new tesseract::TessUnlvRenderer(outputbase);
auto *renderer = new tesseract::TessUnlvRenderer(outputbase);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create UNLV output file: %s\n",
strerror(errno));
tprintf("Error, could not create UNLV output file: %s\n", strerror(errno));
error = true;
}
}
api->GetBoolVariable("tessedit_create_lstmbox", &b);
if (b) {
auto* renderer =
new tesseract::TessLSTMBoxRenderer(outputbase);
auto *renderer = new tesseract::TessLSTMBoxRenderer(outputbase);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create LSTM BOX output file: %s\n",
strerror(errno));
tprintf("Error, could not create LSTM BOX output file: %s\n", strerror(errno));
error = true;
}
}
api->GetBoolVariable("tessedit_create_boxfile", &b);
if (b) {
auto* renderer =
new tesseract::TessBoxTextRenderer(outputbase);
auto *renderer = new tesseract::TessBoxTextRenderer(outputbase);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create BOX output file: %s\n",
strerror(errno));
tprintf("Error, could not create BOX output file: %s\n", strerror(errno));
error = true;
}
}
api->GetBoolVariable("tessedit_create_wordstrbox", &b);
if (b) {
auto* renderer =
new tesseract::TessWordStrBoxRenderer(outputbase);
auto *renderer = new tesseract::TessWordStrBoxRenderer(outputbase);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create WordStr BOX output file: %s\n",
strerror(errno));
tprintf("Error, could not create WordStr BOX output file: %s\n", strerror(errno));
error = true;
}
}
@ -573,14 +569,12 @@ static void PreloadRenderers(
// Create text output if no other output was requested
// even if text output was not explicitly requested unless
// there was an error.
auto* renderer =
new tesseract::TessTextRenderer(outputbase);
auto *renderer = new tesseract::TessTextRenderer(outputbase);
if (renderer->happy()) {
renderers->push_back(renderer);
} else {
delete renderer;
tprintf("Error, could not create TXT output file: %s\n",
strerror(errno));
tprintf("Error, could not create TXT output file: %s\n", strerror(errno));
}
}
}
@ -595,17 +589,25 @@ static void PreloadRenderers(
}
}
/**********************************************************************
* main()
*
**********************************************************************/
int main(int argc, char** argv) {
const char* lang = nullptr;
const char* image = nullptr;
const char* outputbase = nullptr;
const char* datapath = nullptr;
int main(int argc, char **argv) {
#if defined(__USE_GNU)
// Raise SIGFPE.
# if defined(__clang__)
// clang creates code which causes some FP exceptions, so don't enable those.
feenableexcept(FE_DIVBYZERO);
# else
feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
# endif
#endif
const char *lang = nullptr;
const char *image = nullptr;
const char *outputbase = nullptr;
const char *datapath = nullptr;
bool list_langs = false;
bool print_parameters = false;
l_int32 dpi = 0;
@ -616,13 +618,10 @@ int main(int argc, char** argv) {
#else
tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
#endif
/* main() calls functions like ParseArgs which call exit().
* This results in memory leaks if vars_vec and vars_values are
* declared as auto variables (destructor is not called then). */
static GenericVector<STRING> vars_vec;
static GenericVector<STRING> vars_values;
std::vector<std::string> vars_vec;
std::vector<std::string> vars_values;
#if !defined(DEBUG)
#if defined(NDEBUG)
// Disable debugging and informational messages from Leptonica.
setMsgSeverity(L_SEVERITY_ERROR);
#endif
@ -633,9 +632,10 @@ int main(int argc, char** argv) {
TIFFSetWarningHandler(Win32WarningHandler);
#endif // HAVE_TIFFIO_H && _WIN32
ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi,
&list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i,
&pagesegmode, &enginemode);
if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
&print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode, &enginemode)) {
return EXIT_FAILURE;
}
if (lang == nullptr) {
// Set default language if none was given.
@ -650,15 +650,16 @@ int main(int argc, char** argv) {
// first TessBaseAPI must be destructed, DawgCache must be the last object.
tesseract::Dict::GlobalDawgCache();
// Avoid memory leak caused by auto variable when return is called.
static tesseract::TessBaseAPI api;
tesseract::TessBaseAPI api;
api.SetOutputName(outputbase);
const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
argc - arg_i, &vars_vec, &vars_values, false);
const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i,
&vars_vec, &vars_values, false);
SetVariablesFromCLArgs(&api, argc, argv);
if (!SetVariablesFromCLArgs(&api, argc, argv)) {
return EXIT_FAILURE;
}
// SIMD settings might be overridden by config variable.
tesseract::SIMDDetect::Update();
@ -674,7 +675,7 @@ int main(int argc, char** argv) {
}
if (print_parameters) {
FILE* fout = stdout;
FILE *fout = stdout;
fprintf(stdout, "Tesseract parameters:\n");
api.PrintVariables(fout);
api.End();
@ -692,7 +693,7 @@ int main(int argc, char** argv) {
if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
int ret_val = EXIT_SUCCESS;
Pix* pixs = pixRead(image);
Pix *pixs = pixRead(image);
if (!pixs) {
fprintf(stderr, "Leptonica can't process input file: %s\n", image);
return 2;
@ -705,7 +706,7 @@ int main(int argc, char** argv) {
tesseract::TextlineOrder order;
float deskew_angle;
const tesseract::PageIterator* it = api.AnalyseLayout();
const tesseract::PageIterator *it = api.AnalyseLayout();
if (it) {
// TODO: Implement output of page segmentation, see documentation
// ("Automatic page segmentation, but no OSD, or OCR").
@ -728,35 +729,36 @@ int main(int argc, char** argv) {
// ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train.
// In this mode no other OCR result files are written.
bool b = false;
bool in_training_mode =
(api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
(api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
bool in_training_mode = (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
(api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
#ifdef DISABLED_LEGACY_ENGINE
auto cur_psm = api.GetPageSegMode();
auto osd_warning = std::string("");
if (cur_psm == tesseract::PSM_OSD_ONLY) {
const char* disabled_osd_msg =
"\nERROR: The page segmentation mode 0 (OSD Only) is currently disabled.\n\n";
fprintf(stderr, "%s", disabled_osd_msg);
const char *disabled_osd_msg =
"\nERROR: The page segmentation mode 0 (OSD Only) is currently "
"disabled.\n\n";
fprintf(stderr, "%s", disabled_osd_msg);
return EXIT_FAILURE;
} else if (cur_psm == tesseract::PSM_AUTO_OSD) {
api.SetPageSegMode(tesseract::PSM_AUTO);
osd_warning +=
"\nWarning: The page segmentation mode 1 (Auto+OSD) is currently disabled. "
"Using PSM 3 (Auto) instead.\n\n";
api.SetPageSegMode(tesseract::PSM_AUTO);
osd_warning +=
"\nWarning: The page segmentation mode 1 (Auto+OSD) is currently "
"disabled. "
"Using PSM 3 (Auto) instead.\n\n";
} else if (cur_psm == tesseract::PSM_SPARSE_TEXT_OSD) {
api.SetPageSegMode(tesseract::PSM_SPARSE_TEXT);
osd_warning +=
"\nWarning: The page segmentation mode 12 (Sparse text + OSD) is currently disabled. "
"Using PSM 11 (Sparse text) instead.\n\n";
api.SetPageSegMode(tesseract::PSM_SPARSE_TEXT);
osd_warning +=
"\nWarning: The page segmentation mode 12 (Sparse text + OSD) is "
"currently disabled. "
"Using PSM 11 (Sparse text) instead.\n\n";
}
#endif // def DISABLED_LEGACY_ENGINE
#endif // def DISABLED_LEGACY_ENGINE
// Avoid memory leak caused by auto variable when exit() is called.
static tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
if (in_training_mode) {
renderers.push_back(nullptr);
@ -765,16 +767,16 @@ int main(int argc, char** argv) {
}
bool banner = false;
if (outputbase != nullptr && strcmp(outputbase, "-") &&
strcmp(outputbase, "stdout")) {
if (outputbase != nullptr && strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
banner = true;
}
if (!renderers.empty()) {
if (banner) PrintBanner();
if (banner)
PrintBanner();
#ifdef DISABLED_LEGACY_ENGINE
if (!osd_warning.empty()) {
fprintf(stderr, "%s",osd_warning.c_str());
fprintf(stderr, "%s", osd_warning.c_str());
}
#endif
bool succeed = api.ProcessPages(image, nullptr, 0, renderers[0]);

View File

@ -16,9 +16,9 @@
*
**********************************************************************/
#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "tesseractclass.h" // for Tesseract
#include "tesseractclass.h" // for Tesseract
namespace tesseract {
@ -28,16 +28,16 @@ namespace tesseract {
* file. Returned string must be freed with the delete [] operator.
*/
char* TessBaseAPI::GetWordStrBoxText(int page_number=0) {
char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
return nullptr;
STRING wordstr_box_str("");
std::string wordstr_box_str;
int left = 0, top = 0, right = 0, bottom = 0;
bool first_line = true;
LTRResultIterator* res_it = GetLTRIterator();
LTRResultIterator *res_it = GetLTRIterator();
while (!res_it->Empty(RIL_BLOCK)) {
if (res_it->Empty(RIL_WORD)) {
res_it->Next(RIL_WORD);
@ -46,41 +46,40 @@ char* TessBaseAPI::GetWordStrBoxText(int page_number=0) {
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
if (!first_line) {
wordstr_box_str.add_str_int("\n\t ", right + 1);
wordstr_box_str.add_str_int(" ", image_height_ - bottom);
wordstr_box_str.add_str_int(" ", right + 5);
wordstr_box_str.add_str_int(" ", image_height_ - top);
wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
wordstr_box_str += "\n\t " + std::to_string(right + 1);
wordstr_box_str += " " + std::to_string(image_height_ - bottom);
wordstr_box_str += " " + std::to_string(right + 5);
wordstr_box_str += " " + std::to_string(image_height_ - top);
wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
wordstr_box_str += "\n";
} else {
first_line = false;
}
// Use bounding box for whole line for WordStr
res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
wordstr_box_str.add_str_int("WordStr ", left);
wordstr_box_str.add_str_int(" ", image_height_ - bottom);
wordstr_box_str.add_str_int(" ", right);
wordstr_box_str.add_str_int(" ", image_height_ - top);
wordstr_box_str.add_str_int(" ", page_number); // word
// Use bounding box for whole line for WordStr
res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
wordstr_box_str += "WordStr " + std::to_string(left);
wordstr_box_str += " " + std::to_string(image_height_ - bottom);
wordstr_box_str += " " + std::to_string(right);
wordstr_box_str += " " + std::to_string(image_height_ - top);
wordstr_box_str += " " + std::to_string(page_number); // word
wordstr_box_str += " #";
}
do {
wordstr_box_str +=
std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
wordstr_box_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
wordstr_box_str += " ";
res_it->Next(RIL_WORD);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
}
if (left != 0 && top != 0 && right != 0 && bottom != 0) {
wordstr_box_str.add_str_int("\n\t ", right + 1);
wordstr_box_str.add_str_int(" ", image_height_ - bottom);
wordstr_box_str.add_str_int(" ", right + 5);
wordstr_box_str.add_str_int(" ", image_height_ - top);
wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
wordstr_box_str += "\n\t " + std::to_string(right + 1);
wordstr_box_str += " " + std::to_string(image_height_ - bottom);
wordstr_box_str += " " + std::to_string(right + 5);
wordstr_box_str += " " + std::to_string(image_height_ - top);
wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
wordstr_box_str += "\n";
}
char* ret = new char[wordstr_box_str.length() + 1];
char *ret = new char[wordstr_box_str.length() + 1];
strcpy(ret, wordstr_box_str.c_str());
delete res_it;
return ret;
@ -89,17 +88,17 @@ char* TessBaseAPI::GetWordStrBoxText(int page_number=0) {
/**********************************************************************
* WordStrBox Renderer interface implementation
**********************************************************************/
TessWordStrBoxRenderer::TessWordStrBoxRenderer(const char* outputbase)
TessWordStrBoxRenderer::TessWordStrBoxRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "box") {}
bool TessWordStrBoxRenderer::AddImageHandler(TessBaseAPI* api) {
const std::unique_ptr<const char[]> wordstrbox(
api->GetWordStrBoxText(imagenum()));
if (wordstrbox == nullptr) return false;
bool TessWordStrBoxRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> wordstrbox(api->GetWordStrBoxText(imagenum()));
if (wordstrbox == nullptr)
return false;
AppendString(wordstrbox.get());
return true;
}
} // namespace tesseract.
} // namespace tesseract.

View File

@ -19,10 +19,11 @@
namespace tesseract {
// Computes and returns the dot product of the two n-vectors u and v.
double DotProductNative(const double* u, const double* v, int n) {
double DotProductNative(const double *u, const double *v, int n) {
double total = 0.0;
for (int k = 0; k < n; ++k) total += u[k] * v[k];
for (int k = 0; k < n; ++k)
total += u[k] * v[k];
return total;
}
} // namespace tesseract
} // namespace tesseract

View File

@ -20,17 +20,17 @@
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
double DotProductNative(const double* u, const double* v, int n);
double DotProductNative(const double *u, const double *v, int n);
// Uses Intel AVX intrinsics to access the SIMD instruction set.
double DotProductAVX(const double* u, const double* v, int n);
double DotProductAVX(const double *u, const double *v, int n);
// Use Intel FMA.
double DotProductFMA(const double* u, const double* v, int n);
double DotProductFMA(const double *u, const double *v, int n);
// Uses Intel SSE intrinsics to access the SIMD instruction set.
double DotProductSSE(const double* u, const double* v, int n);
double DotProductSSE(const double *u, const double *v, int n);
} // namespace tesseract.
} // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCT_H_
#endif // TESSERACT_ARCH_DOTPRODUCT_H_

View File

@ -16,18 +16,20 @@
///////////////////////////////////////////////////////////////////////
#if !defined(__AVX__)
#error Implementation only for AVX capable architectures
#endif
# if defined(__i686__) || defined(__x86_64__)
# error Implementation only for AVX capable architectures
# endif
#else
#include <immintrin.h>
#include <cstdint>
#include "dotproduct.h"
# include <immintrin.h>
# include <cstdint>
# include "dotproduct.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel AVX intrinsics to access the SIMD instruction set.
double DotProductAVX(const double* u, const double* v, int n) {
double DotProductAVX(const double *u, const double *v, int n) {
const unsigned quot = n / 8;
const unsigned rem = n % 8;
__m256d t0 = _mm256_setzero_pd();
@ -56,4 +58,6 @@ double DotProductAVX(const double* u, const double* v, int n) {
return result;
}
} // namespace tesseract.
} // namespace tesseract.
#endif

View File

@ -16,18 +16,20 @@
///////////////////////////////////////////////////////////////////////
#if !defined(__FMA__)
#error Implementation only for FMA capable architectures
#endif
# if defined(__i686__) || defined(__x86_64__)
# error Implementation only for FMA capable architectures
# endif
#else
#include <immintrin.h>
#include <cstdint>
#include "dotproduct.h"
# include <immintrin.h>
# include <cstdint>
# include "dotproduct.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel FMA intrinsics to access the SIMD instruction set.
double DotProductFMA(const double* u, const double* v, int n) {
double DotProductFMA(const double *u, const double *v, int n) {
const unsigned quot = n / 8;
const unsigned rem = n % 8;
__m256d t0 = _mm256_setzero_pd();
@ -54,4 +56,6 @@ double DotProductFMA(const double* u, const double* v, int n) {
return result;
}
} // namespace tesseract.
} // namespace tesseract.
#endif

View File

@ -16,19 +16,21 @@
///////////////////////////////////////////////////////////////////////
#if !defined(__SSE4_1__)
#error Implementation only for SSE 4.1 capable architectures
#endif
# if defined(__i686__) || defined(__x86_64__)
# error Implementation only for SSE 4.1 capable architectures
# endif
#else
#include <emmintrin.h>
#include <smmintrin.h>
#include <cstdint>
#include "dotproduct.h"
# include <emmintrin.h>
# include <smmintrin.h>
# include <cstdint>
# include "dotproduct.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
double DotProductSSE(const double* u, const double* v, int n) {
double DotProductSSE(const double *u, const double *v, int n) {
int max_offset = n - 2;
int offset = 0;
// Accumulate a set of 2 sums in sum, by loading pairs of 2 values from u and
@ -37,8 +39,7 @@ double DotProductSSE(const double* u, const double* v, int n) {
if (offset <= max_offset) {
offset = 2;
// Aligned load is reputedly faster but requires 16 byte aligned input.
if ((reinterpret_cast<uintptr_t>(u) & 15) == 0 &&
(reinterpret_cast<uintptr_t>(v) & 15) == 0) {
if ((reinterpret_cast<uintptr_t>(u) & 15) == 0 && (reinterpret_cast<uintptr_t>(v) & 15) == 0) {
// Use aligned load.
sum = _mm_load_pd(u);
__m128d floats2 = _mm_load_pd(v);
@ -78,4 +79,6 @@ double DotProductSSE(const double* u, const double* v, int n) {
return result;
}
} // namespace tesseract.
} // namespace tesseract.
#endif

View File

@ -2,7 +2,6 @@
// File: intsimdmatrix.cpp
// Description: Base class for 8-bit int SIMD matrix multipliers.
// Author: Ray Smith
// Created: Tue Aug 15 08:01:32 PST 2017
//
// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -17,33 +16,30 @@
///////////////////////////////////////////////////////////////////////
#include "intsimdmatrix.h"
#include <tesseract/genericvector.h> // for GenericVector
#include "matrix.h" // for GENERIC_2D_ARRAY
#include "simddetect.h" // for SIMDDetect
#include "matrix.h" // for GENERIC_2D_ARRAY
#include "simddetect.h" // for SIMDDetect
namespace tesseract {
const IntSimdMatrix* IntSimdMatrix::intSimdMatrix = nullptr;
const IntSimdMatrix *IntSimdMatrix::intSimdMatrix = nullptr;
// Computes a reshaped copy of the weight matrix w.
void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t>& w,
std::vector<int8_t>& shaped_w) const {
void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t> &shaped_w,
int32_t &rounded_num_out) const {
const int num_out = w.dim1();
const int num_in = w.dim2() - 1;
// The rounded-up sizes of the reshaped weight matrix, excluding biases.
int rounded_num_in = Roundup(num_in, num_inputs_per_group_);
int rounded_num_out = RoundOutputs(num_out);
rounded_num_out = RoundOutputs(num_out);
// Add the bias and compute the required size.
shaped_w.resize((rounded_num_in + 1) * rounded_num_out, 0);
int shaped_index = 0;
int output = 0;
// Each number of registers needs a different format! Iterates over the
// different numbers of registers (each a power of 2).
for (int num_registers = max_output_registers_; num_registers >= 1;
num_registers /= 2) {
for (int num_registers = max_output_registers_; num_registers >= 1; num_registers /= 2) {
// The number of outputs that we will generate with this many registers.
int num_outputs_per_register_set =
num_registers * num_outputs_per_register_;
int num_outputs_per_register_set = num_registers * num_outputs_per_register_;
// Use the max number of registers until we have to go fewer.
while (output + num_outputs_per_register_set <= rounded_num_out) {
// Accumulating outputs in registers saves iterating over the inputs, so
@ -64,7 +60,8 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t>& w,
// Append the bias weights for the register set.
for (int j = 0; j < num_outputs_per_register_set; ++j) {
int8_t weight = 0;
if (output + j < num_out) weight = w(output + j, num_in);
if (output + j < num_out)
weight = w(output + j, num_in);
shaped_w[shaped_index++] = weight;
}
output += num_outputs_per_register_set;
@ -76,19 +73,19 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t>& w,
// u is of size W.dim2() - 1 and the output v is of size W.dim1().
// u is imagined to have an extra element at the end with value 1, to
// implement the bias, but it doesn't actually have it.
void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t>& w,
const GenericVector<double>& scales,
const int8_t* u, double* v) {
void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w,
const std::vector<double> &scales, const int8_t *u, double *v) {
int num_out = w.dim1();
int num_in = w.dim2() - 1;
// Base implementation.
for (int i = 0; i < num_out; ++i) {
const int8_t* wi = w[i];
const int8_t *wi = w[i];
int total = 0;
for (int j = 0; j < num_in; ++j) total += wi[j] * u[j];
for (int j = 0; j < num_in; ++j)
total += wi[j] * u[j];
// Add in the bias and correct for integer values.
v[i] = (static_cast<double>(total) / INT8_MAX + wi[num_in]) * scales[i];
v[i] = (total + wi[num_in] * INT8_MAX) * scales[i];
}
}
} // namespace tesseract
} // namespace tesseract

View File

@ -2,7 +2,6 @@
// File: intsimdmatrix.h
// Description: Base class for 8-bit int SIMD matrix multipliers.
// Author: Ray Smith
// Created: Tue Aug 15 07:37:20 PST 2017
//
// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -19,15 +18,15 @@
#ifndef TESSERACT_ARCH_INTSIMDMATRIX_H_
#define TESSERACT_ARCH_INTSIMDMATRIX_H_
#include <tesseract/export.h>
#include <cstdint>
#include <vector>
namespace tesseract {
template <class T>
class GENERIC_2D_ARRAY;
template <typename T>
class GenericVector;
namespace tesseract {
// Base class for a SIMD function to multiply a matrix by a vector, with sources
// of 8-bit signed integer, and result in a double, after appropriate scaling.
@ -60,10 +59,10 @@ namespace tesseract {
// NOTE that, although the subclasses execute on different SIMD hardware, no
// virtual methods are needed, as the constructor sets up everything that
// is required to allow the base class implementation to do all the work.
struct IntSimdMatrix {
struct TESS_API IntSimdMatrix {
// Computes a reshaped copy of the weight matrix w.
void Init(const GENERIC_2D_ARRAY<int8_t>& w,
std::vector<int8_t>& shaped_w) const;
void Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t> &shaped_w,
int32_t &rounded_num_out) const;
// Rounds the size up to a multiple of the input register size (in int8_t).
int RoundInputs(int size) const {
@ -79,9 +78,8 @@ struct IntSimdMatrix {
// u is imagined to have an extra element at the end with value 1, to
// implement the bias, but it doesn't actually have it.
// Computes the base C++ implementation.
static void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t>& w,
const GenericVector<double>& scales,
const int8_t* u, double* v);
static void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w, const std::vector<double> &scales,
const int8_t *u, double *v);
// Rounds the input up to a multiple of the given factor.
static int Roundup(int input, int factor) {
@ -97,9 +95,8 @@ struct IntSimdMatrix {
// RoundInputs above.
// The input will be over-read to the extent of the padding. There are no
// alignment requirements.
using MatrixDotVectorFunction = void (*)(int, int, const int8_t*,
const double*, const int8_t*,
double*);
using MatrixDotVectorFunction = void (*)(int, int, const int8_t *, const double *, const int8_t *,
double *);
MatrixDotVectorFunction matrixDotVectorFunction;
// Number of 32 bit outputs held in each register.
@ -113,11 +110,14 @@ struct IntSimdMatrix {
// Number of groups of inputs to be broadcast.
// num_input_groups_ = num_inputs_per_register_ / num_inputs_per_group_
static const IntSimdMatrix* intSimdMatrix;
static const IntSimdMatrix *intSimdMatrix;
// Only available with NEON.
static const IntSimdMatrix intSimdMatrixNEON;
// Only available with AVX2 / SSE.
static const IntSimdMatrix intSimdMatrixAVX2;
static const IntSimdMatrix intSimdMatrixSSE;
};
} // namespace tesseract
} // namespace tesseract
#endif // TESSERACT_ARCH_INTSIMDMATRIX_H_
#endif // TESSERACT_ARCH_INTSIMDMATRIX_H_

View File

@ -2,7 +2,6 @@
// File: intsimdmatrixavx2.cpp
// Description: matrix-vector product for 8-bit data on avx2.
// Author: Ray Smith
// Created: Fri Aug 04 13:26:20 PST 2017
//
// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -17,15 +16,17 @@
///////////////////////////////////////////////////////////////////////
#if !defined(__AVX2__)
#error Implementation only for AVX2 capable architectures
#endif
# if defined(__i686__) || defined(__x86_64__)
# error Implementation only for AVX2 capable architectures
# endif
#else
#include "intsimdmatrix.h"
# include "intsimdmatrix.h"
#include <immintrin.h>
#include <cstdint>
#include <algorithm>
#include <vector>
# include <immintrin.h>
# include <algorithm>
# include <cstdint>
# include <vector>
namespace tesseract {
@ -57,11 +58,10 @@ constexpr int kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup;
// weights and reps are scratch registers.
// This function must be inlined with references in order for the compiler to
// correctly use the registers declared in the caller.
static inline void MultiplyGroup(const __m256i& rep_input, const __m256i& ones,
const int8_t*& wi, __m256i& weights,
__m256i& reps, __m256i& result) {
static inline void MultiplyGroup(const __m256i &rep_input, const __m256i &ones, const int8_t *&wi,
__m256i &weights, __m256i &reps, __m256i &result) {
// Load a 4x8 block of weights.
weights = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(wi));
weights = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(wi));
wi += kNumInputsPerRegister;
// Normalize the signs on rep_input, weights, so weights is always +ve.
reps = _mm256_sign_epi8(rep_input, weights);
@ -78,24 +78,65 @@ static inline void MultiplyGroup(const __m256i& rep_input, const __m256i& ones,
result = _mm256_add_epi32(result, weights);
}
// Extracts and converts 8x32-bit results from result, adding the bias from wi
// and scaling by scales, before storing in *v. Note that wi, scales and v are
// expected to contain 8 consecutive elements or num_out if less.
static inline void ExtractResults(__m256i& result, __m256i& shift_id,
const int8_t*& wi, const double*& scales,
int num_out, double*& v) {
for (int out = 0; out < num_out; ++out) {
#ifndef _MSC_VER
auto res = _mm256_extract_epi32(result, 0);
#else
// Workaround MSVC's ICE
// _mm256_extract_epi32(X, Y) == ((int32_t*)&X)[Y]
auto res = ((int32_t*)&result)[0];
#endif
*v++ = (static_cast<double>(res) / INT8_MAX + *wi++) * *scales++;
// Rotate the results in int32_t units, so the next result is ready.
result = _mm256_permutevar8x32_epi32(result, shift_id);
}
// Load 64 bits into the bottom of a 128bit register.
// We don't actually care what the top 64bits are, but this ends
// up with them being zero.
static inline __m128i load64_to_128(const int8_t *wi_) {
const int64_t *wi = reinterpret_cast<const int64_t *>(wi_);
return _mm_set_epi64x(0, wi[0]);
}
static inline void ExtractResults8(__m256i result, const int8_t *wi, const double *scales,
double *v) {
__m128i w128 = load64_to_128(wi); // 8x8bit vals in bottom of 128bit reg
__m256i w256 = _mm256_cvtepi8_epi32(w128); // 8x32bit vals in 256bit reg
__m256i bias_scale = _mm256_set_epi32(127, 127, 127, 127, 127, 127, 127, 127);
__m256d scale0123 = _mm256_loadu_pd(scales);
__m256d scale4567 = _mm256_loadu_pd(scales + 4);
w256 = _mm256_mullo_epi32(w256, bias_scale); // 8x32 <bias * 127>
result = _mm256_add_epi32(result, w256); // result += bias * 127
__m256d res0123 = _mm256_cvtepi32_pd(_mm256_castsi256_si128(result));
result = _mm256_permute4x64_epi64(result, 2 + (3 << 2));
__m256d res4567 = _mm256_cvtepi32_pd(_mm256_castsi256_si128(result));
res0123 = _mm256_mul_pd(res0123, scale0123);
res4567 = _mm256_mul_pd(res4567, scale4567);
_mm256_storeu_pd(v, res0123);
_mm256_storeu_pd(v + 4, res4567);
}
static inline void ExtractResults16(__m256i result0, __m256i result1, const int8_t *&wi,
const double *&scales, double *&v) {
__m128i w8 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(wi));
// 8x8bit vals in bottom of 128bit reg
const __m256i bias_scale = _mm256_set_epi32(127, 127, 127, 127, 127, 127, 127, 127);
__m256i w256 = _mm256_cvtepi8_epi32(w8); // 8x32bit vals in 256bit reg
__m256d scale0123 = _mm256_loadu_pd(scales);
__m256d scale4567 = _mm256_loadu_pd(scales + 4);
w256 = _mm256_mullo_epi32(w256, bias_scale); // 8x32 <bias * 127>
result0 = _mm256_add_epi32(result0, w256); // result += bias * 127
__m256d res0123 = _mm256_cvtepi32_pd(_mm256_castsi256_si128(result0));
result0 = _mm256_permute4x64_epi64(result0, 2 + (3 << 2));
__m256d res4567 = _mm256_cvtepi32_pd(_mm256_castsi256_si128(result0));
res0123 = _mm256_mul_pd(res0123, scale0123);
res4567 = _mm256_mul_pd(res4567, scale4567);
_mm256_storeu_pd(v, res0123);
_mm256_storeu_pd(v + 4, res4567);
w8 = _mm_shuffle_epi32(w8, 2 + (3 << 2));
w256 = _mm256_cvtepi8_epi32(w8); // 8x32bit vals in 256bit reg
scale0123 = _mm256_loadu_pd(scales + 8);
scale4567 = _mm256_loadu_pd(scales + 12);
w256 = _mm256_mullo_epi32(w256, bias_scale); // 8x32 <bias * 127>
result1 = _mm256_add_epi32(result1, w256); // result += bias * 127
res0123 = _mm256_cvtepi32_pd(_mm256_castsi256_si128(result1));
result1 = _mm256_permute4x64_epi64(result1, 2 + (3 << 2));
res4567 = _mm256_cvtepi32_pd(_mm256_castsi256_si128(result1));
res0123 = _mm256_mul_pd(res0123, scale0123);
res4567 = _mm256_mul_pd(res4567, scale4567);
_mm256_storeu_pd(v + 8, res0123);
_mm256_storeu_pd(v + 12, res4567);
wi += 16;
scales += 16;
v += 16;
}
// Computes part of matrix.vector v = Wu. Computes N=64 results.
@ -105,13 +146,11 @@ static inline void ExtractResults(__m256i& result, __m256i& shift_id,
// bias weights, before continuing with any more weights.
// u must be padded out with zeros to
// kNumInputsPerGroup*ceil(num_in/kNumInputsPerGroup) elements.
static void PartialMatrixDotVector64(const int8_t* wi, const double* scales,
const int8_t* u, int num_in, int num_out,
double* v) {
static void PartialMatrixDotVector64(const int8_t *wi, const double *scales, const int8_t *u,
int num_in, double *v) {
// Register containing 16-bit ones for horizontal add with 16->32 bit
// conversion.
__m256i ones =
_mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
__m256i ones = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
__m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1);
// Initialize all the results to 0.
__m256i result0 = _mm256_setzero_si256();
@ -124,15 +163,12 @@ static void PartialMatrixDotVector64(const int8_t* wi, const double* scales,
__m256i result7 = _mm256_setzero_si256();
// Iterate over the input (u), one registerful at a time.
for (int j = 0; j < num_in;) {
__m256i inputs =
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(u + j));
__m256i inputs = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(u + j));
// Inputs are processed in groups of kNumInputsPerGroup, replicated
// kNumInputGroups times.
for (int ig = 0; ig < kNumInputGroups && j < num_in;
++ig, j += kNumInputsPerGroup) {
for (int ig = 0; ig < kNumInputGroups && j < num_in; ++ig, j += kNumInputsPerGroup) {
// Replicate the low 32 bits (4 inputs) 8 times.
__m256i rep_input =
_mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
__m256i rep_input = _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
// Rotate the inputs in groups of 4, so the next 4 inputs are ready.
inputs = _mm256_permutevar8x32_epi32(inputs, shift_id);
__m256i weights, reps;
@ -147,27 +183,19 @@ static void PartialMatrixDotVector64(const int8_t* wi, const double* scales,
MultiplyGroup(rep_input, ones, wi, weights, reps, result7);
}
}
ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v);
ExtractResults(result1, shift_id, wi, scales, kNumOutputsPerRegister, v);
ExtractResults(result2, shift_id, wi, scales, kNumOutputsPerRegister, v);
ExtractResults(result3, shift_id, wi, scales, kNumOutputsPerRegister, v);
ExtractResults(result4, shift_id, wi, scales, kNumOutputsPerRegister, v);
ExtractResults(result5, shift_id, wi, scales, kNumOutputsPerRegister, v);
ExtractResults(result6, shift_id, wi, scales, kNumOutputsPerRegister, v);
num_out -= kNumOutputsPerRegister * 7;
ExtractResults(result7, shift_id, wi, scales,
std::min(kNumOutputsPerRegister, num_out), v);
ExtractResults16(result0, result1, wi, scales, v);
ExtractResults16(result2, result3, wi, scales, v);
ExtractResults16(result4, result5, wi, scales, v);
ExtractResults16(result6, result7, wi, scales, v);
}
// Computes part of matrix.vector v = Wu. Computes N=32 results.
// For details see PartialMatrixDotVector64 with N=32.
static void PartialMatrixDotVector32(const int8_t* wi, const double* scales,
const int8_t* u, int num_in, int num_out,
double* v) {
static void PartialMatrixDotVector32(const int8_t *wi, const double *scales, const int8_t *u,
int num_in, double *v) {
// Register containing 16-bit ones for horizontal add with 16->32 bit
// conversion.
__m256i ones =
_mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
__m256i ones = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
__m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1);
// Initialize all the results to 0.
__m256i result0 = _mm256_setzero_si256();
@ -176,15 +204,12 @@ static void PartialMatrixDotVector32(const int8_t* wi, const double* scales,
__m256i result3 = _mm256_setzero_si256();
// Iterate over the input (u), one registerful at a time.
for (int j = 0; j < num_in;) {
__m256i inputs =
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(u + j));
__m256i inputs = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(u + j));
// Inputs are processed in groups of kNumInputsPerGroup, replicated
// kNumInputGroups times.
for (int ig = 0; ig < kNumInputGroups && j < num_in;
++ig, j += kNumInputsPerGroup) {
for (int ig = 0; ig < kNumInputGroups && j < num_in; ++ig, j += kNumInputsPerGroup) {
// Replicate the low 32 bits (4 inputs) 8 times.
__m256i rep_input =
_mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
__m256i rep_input = _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
// Rotate the inputs in groups of 4, so the next 4 inputs are ready.
inputs = _mm256_permutevar8x32_epi32(inputs, shift_id);
__m256i weights, reps;
@ -195,38 +220,29 @@ static void PartialMatrixDotVector32(const int8_t* wi, const double* scales,
MultiplyGroup(rep_input, ones, wi, weights, reps, result3);
}
}
ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v);
ExtractResults(result1, shift_id, wi, scales, kNumOutputsPerRegister, v);
ExtractResults(result2, shift_id, wi, scales, kNumOutputsPerRegister, v);
num_out -= kNumOutputsPerRegister * 3;
ExtractResults(result3, shift_id, wi, scales,
std::min(kNumOutputsPerRegister, num_out), v);
ExtractResults16(result0, result1, wi, scales, v);
ExtractResults16(result2, result3, wi, scales, v);
}
// Computes part of matrix.vector v = Wu. Computes N=16 results.
// For details see PartialMatrixDotVector64 with N=16.
static void PartialMatrixDotVector16(const int8_t* wi, const double* scales,
const int8_t* u, int num_in, int num_out,
double* v) {
static void PartialMatrixDotVector16(const int8_t *wi, const double *scales, const int8_t *u,
int num_in, double *v) {
// Register containing 16-bit ones for horizontal add with 16->32 bit
// conversion.
__m256i ones =
_mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
__m256i ones = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
__m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1);
// Initialize all the results to 0.
__m256i result0 = _mm256_setzero_si256();
__m256i result1 = _mm256_setzero_si256();
// Iterate over the input (u), one registerful at a time.
for (int j = 0; j < num_in;) {
__m256i inputs =
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(u + j));
__m256i inputs = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(u + j));
// Inputs are processed in groups of kNumInputsPerGroup, replicated
// kNumInputGroups times.
for (int ig = 0; ig < kNumInputGroups && j < num_in;
++ig, j += kNumInputsPerGroup) {
for (int ig = 0; ig < kNumInputGroups && j < num_in; ++ig, j += kNumInputsPerGroup) {
// Replicate the low 32 bits (4 inputs) 8 times.
__m256i rep_input =
_mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
__m256i rep_input = _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
// Rotate the inputs in groups of 4, so the next 4 inputs are ready.
inputs = _mm256_permutevar8x32_epi32(inputs, shift_id);
__m256i weights, reps;
@ -235,35 +251,27 @@ static void PartialMatrixDotVector16(const int8_t* wi, const double* scales,
MultiplyGroup(rep_input, ones, wi, weights, reps, result1);
}
}
ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v);
num_out -= kNumOutputsPerRegister;
ExtractResults(result1, shift_id, wi, scales,
std::min(kNumOutputsPerRegister, num_out), v);
ExtractResults16(result0, result1, wi, scales, v);
}
// Computes part of matrix.vector v = Wu. Computes N=8 results.
// For details see PartialMatrixDotVector64 with N=8.
static void PartialMatrixDotVector8(const int8_t* wi, const double* scales,
const int8_t* u, int num_in, int num_out,
double* v) {
static inline void PartialMatrixDotVector8(const int8_t *wi, const double *scales, const int8_t *u,
int num_in, double *v) {
// Register containing 16-bit ones for horizontal add with 16->32 bit
// conversion.
__m256i ones =
_mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
__m256i ones = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
__m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1);
// Initialize all the results to 0.
__m256i result0 = _mm256_setzero_si256();
// Iterate over the input (u), one registerful at a time.
for (int j = 0; j < num_in;) {
__m256i inputs =
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(u + j));
__m256i inputs = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(u + j));
// Inputs are processed in groups of kNumInputsPerGroup, replicated
// kNumInputGroups times.
for (int ig = 0; ig < kNumInputGroups && j < num_in;
++ig, j += kNumInputsPerGroup) {
for (int ig = 0; ig < kNumInputGroups && j < num_in; ++ig, j += kNumInputsPerGroup) {
// Replicate the low 32 bits (4 inputs) 8 times.
__m256i rep_input =
_mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
__m256i rep_input = _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs));
// Rotate the inputs in groups of 4, so the next 4 inputs are ready.
inputs = _mm256_permutevar8x32_epi32(inputs, shift_id);
__m256i weights, reps;
@ -271,19 +279,17 @@ static void PartialMatrixDotVector8(const int8_t* wi, const double* scales,
MultiplyGroup(rep_input, ones, wi, weights, reps, result0);
}
}
ExtractResults(result0, shift_id, wi, scales, num_out, v);
ExtractResults8(result0, wi, scales, v);
}
static void matrixDotVector(int dim1, int dim2, const int8_t* wi,
const double* scales, const int8_t* u, double* v) {
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
const int8_t *u, double *v) {
const int num_out = dim1;
const int num_in = dim2 - 1;
// Each call to a partial_func_ produces group_size outputs, except the
// last one, which can produce less.
const int rounded_num_in =
IntSimdMatrix::Roundup(num_in, kNumInputsPerGroup);
const int rounded_num_out =
IntSimdMatrix::Roundup(num_out, kNumOutputsPerRegister);
const int rounded_num_in = IntSimdMatrix::Roundup(num_in, kNumInputsPerGroup);
const int rounded_num_out = IntSimdMatrix::Roundup(num_out, kNumOutputsPerRegister);
int group_size = kNumOutputsPerRegister * kMaxOutputRegisters;
int output = 0;
@ -292,7 +298,7 @@ static void matrixDotVector(int dim1, int dim2, const int8_t* wi,
// Run with this group size, until it would produce too much output, then
// switch to a smaller size.
for (; output + group_size <= rounded_num_out; output += group_size) {
PartialMatrixDotVector64(wi, scales, u, rounded_num_in, num_out - output, v);
PartialMatrixDotVector64(wi, scales, u, rounded_num_in, v);
wi += w_step;
scales += group_size;
v += group_size;
@ -300,43 +306,42 @@ static void matrixDotVector(int dim1, int dim2, const int8_t* wi,
group_size /= 2;
w_step /= 2;
for (; output + group_size <= rounded_num_out; output += group_size) {
PartialMatrixDotVector32(wi, scales, u, rounded_num_in, num_out - output, v);
if (output + group_size <= rounded_num_out) {
PartialMatrixDotVector32(wi, scales, u, rounded_num_in, v);
wi += w_step;
scales += group_size;
v += group_size;
output += group_size;
}
group_size /= 2;
w_step /= 2;
for (; output + group_size <= rounded_num_out; output += group_size) {
PartialMatrixDotVector16(wi, scales, u, rounded_num_in, num_out - output, v);
if (output + group_size <= rounded_num_out) {
PartialMatrixDotVector16(wi, scales, u, rounded_num_in, v);
wi += w_step;
scales += group_size;
v += group_size;
output += group_size;
}
group_size /= 2;
w_step /= 2;
for (; output + group_size <= rounded_num_out; output += group_size) {
PartialMatrixDotVector8(wi, scales, u, rounded_num_in, num_out - output, v);
wi += w_step;
scales += group_size;
v += group_size;
}
if (output + group_size <= rounded_num_out)
PartialMatrixDotVector8(wi, scales, u, rounded_num_in, v);
}
const IntSimdMatrix IntSimdMatrix::intSimdMatrixAVX2 = {
// Function.
matrixDotVector,
// Number of 32 bit outputs held in each register.
kNumOutputsPerRegister,
// Maximum number of registers that we will use to hold outputs.
kMaxOutputRegisters,
// Number of 8 bit inputs in the inputs register.
kNumInputsPerRegister,
// Number of inputs in each weight group.
kNumInputsPerGroup
};
// Function.
matrixDotVector,
// Number of 32 bit outputs held in each register.
kNumOutputsPerRegister,
// Maximum number of registers that we will use to hold outputs.
kMaxOutputRegisters,
// Number of 8 bit inputs in the inputs register.
kNumInputsPerRegister,
// Number of inputs in each weight group.
kNumInputsPerGroup};
} // namespace tesseract.
} // namespace tesseract.
#endif

View File

@ -0,0 +1,203 @@
///////////////////////////////////////////////////////////////////////
// File: intsimdmatrixneon.cpp
// Description: matrix-vector product for 8-bit data on neon.
// Author: Robin Watts (from the AVX2 original by Ray Smith)
//
// (C) Copyright 2017, Google Inc.
// (C) Copyright 2020, Artifex Software Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#if defined(__ARM_NEON)
# include "intsimdmatrix.h"
# include <algorithm>
# include <cstdint>
# include <vector>
# include "arm_neon.h"
namespace tesseract {
// Number of outputs held in each register. (Actually, we use a
// pair of 4x32 registers, so 8 x 32 bit ints).
constexpr int kNumOutputsPerRegister = 8;
// Maximum number of registers that we will use.
constexpr int kMaxOutputRegisters = 1;
// Number of inputs in the inputs register.
constexpr int kNumInputsPerRegister = 8;
// Number of inputs in each weight group.
constexpr int kNumInputsPerGroup = 8;
// Function to compute part of a matrix.vector multiplication. The weights
// are in a very specific order (see above) in w, which is multiplied by
// u of length num_in, to produce output v after scaling the integer results
// by the corresponding member of scales.
// The amount of w and scales consumed is fixed and not available to the
// caller.
// Computes part of matrix.vector v = Wu. Computes N=8 results.
// The weights *must* be arranged so that consecutive reads from wi
// provides (num_in/kNumInputsPerGroup groups of (N output dim groups of
// (kNumInputsPerGroup inputs))). After that there must be N consecutive
// bias weights, before continuing with any more weights.
// u must be padded out with zeros to
// kNumInputsPerGroup*ceil(num_in/kNumInputsPerGroup) elements.
static inline void PartialMatrixDotVector8(const int8_t *__restrict wi,
const double *__restrict scales,
const int8_t *__restrict u, int num_in,
double *__restrict v, int num_out) {
// Initialize all the results to 0.
int32x4_t result0123 = {0, 0, 0, 0};
int32x4_t result4567 = {0, 0, 0, 0};
int8x8_t bias_scale = {127, 127, 127, 127, 127, 127, 127, 127};
// Iterate over the input (u), one registerful at a time.
for (int j = 0; j < num_in; j += 8) {
int8x8_t vu = vld1_s8(u); // vu = u0 u1 u2 u3 u4 u5 u6 u7
int8x16_t vw01 = vld1q_s8(wi); // vw0 = w00 w01 w02 w03 w04 w05 w06 w07
// w10 w11 w12 w13 w14 w15 w16 w17
int8x16_t vw23 = vld1q_s8(wi + 8 * 2); // vw2 = w20 w21 w22 w23 w24 w25 w26 w27 w30
// w31 w32 w33 w34 w35 w36 w37
int8x16_t vw45 = vld1q_s8(wi + 8 * 4); // vw4 = w40 w41 w42 w43 w44 w45 w46 w47 w50
// w51 w52 w53 w54 w55 w56 w57
int8x16_t vw67 = vld1q_s8(wi + 8 * 6); // vw6 = w60 w61 w62 w63 w64 w65 w66 w67 w70
// w71 w72 w73 w74 w75 w76 w77
int16x8_t vrow0q = vmull_s8(vget_low_s8(vw01), vu); // vrow0q = vw00.u0 w01.u1 w02.u2
// w03.u3 vw04.u4 w05.u5 w06.u6 w07.u7
int16x8_t vrow1q = vmull_s8(vget_high_s8(vw01),
vu); // vrow1q = vw10.u0 w11.u1 w12.u2 w13.u3
// vw14.u4 w15.u5 w16.u6 w17.u7
int16x8_t vrow2q = vmull_s8(vget_low_s8(vw23), vu); // vrow2q = vw20.u0 w21.u1 w22.u2
// w23.u3 vw24.u4 w25.u5 w26.u6 w27.u7
int16x8_t vrow3q = vmull_s8(vget_high_s8(vw23),
vu); // vrow3q = vw30.u0 w31.u1 w32.u2 w33.u3
// vw34.u4 w35.u5 w36.u6 w37.u7
int16x8_t vrow4q = vmull_s8(vget_low_s8(vw45), vu); // vrow4q = vw40.u0 w41.u1 w42.u2
// w43.u3 vw44.u4 w45.u5 w46.u6 w47.u7
int16x8_t vrow5q = vmull_s8(vget_high_s8(vw45),
vu); // vrow5q = vw50.u0 w51.u1 w52.u2 w53.u3
// vw54.u4 w55.u5 w56.u6 w57.u7
int16x8_t vrow6q = vmull_s8(vget_low_s8(vw67), vu); // vrow6q = vw60.u0 w61.u1 w62.u2
// w63.u3 vw64.u4 w65.u5 w66.u6 w67.u7
int16x8_t vrow7q = vmull_s8(vget_high_s8(vw67),
vu); // vrow7q = vw70.u0 w71.u1 w72.u2 w73.u3
// vw74.u4 w75.u5 w76.u6 w77.u7
int32x4_t vrow0q2 = vpaddlq_s16(vrow0q); // vrow0q2 = vw00.u0+w01.u1 w02.u2+w03.u3
// vw04.u4+w05.u5 w06.u6+w07.u7
int32x4_t vrow1q2 = vpaddlq_s16(vrow1q); // vrow1q2 = vw10.u0+w11.u1 w12.u2+w13.u3
// vw14.u4+w15.u5 w16.u6+w17.u7
int32x4_t vrow2q2 = vpaddlq_s16(vrow2q); // vrow2q2 = vw20.u0+w21.u1 w22.u2+w23.u3
// vw24.u4+w25.u5 w26.u6+w27.u7
int32x4_t vrow3q2 = vpaddlq_s16(vrow3q); // vrow3q2 = vw30.u0+w31.u1 w32.u2+w33.u3
// vw34.u4+w35.u5 w36.u6+w37.u7
int32x4_t vrow4q2 = vpaddlq_s16(vrow4q); // vrow4q2 = vw40.u0+w41.u1 w42.u2+w43.u3
// vw44.u4+w45.u5 w46.u6+w47.u7
int32x4_t vrow5q2 = vpaddlq_s16(vrow5q); // vrow5q2 = vw50.u0+w51.u1 w52.u2+w53.u3
// vw54.u4+w55.u5 w56.u6+w57.u7
int32x4_t vrow6q2 = vpaddlq_s16(vrow6q); // vrow6q2 = vw60.u0+w61.u1 w62.u2+w63.u3
// vw64.u4+w65.u5 w66.u6+w67.u7
int32x4_t vrow7q2 = vpaddlq_s16(vrow7q); // vrow7q2 = vw70.u0+w71.u1 w72.u2+w73.u3
// vw74.u4+w75.u5 w76.u6+w77.u7
vrow0q2 = vcombine_s32(vpadd_s32(vget_low_s32(vrow0q2), vget_high_s32(vrow0q2)),
vpadd_s32(vget_low_s32(vrow1q2), vget_high_s32(vrow1q2)));
// vrow0q2 = vw00.u0+...+w03.u3 vw04.u4+...+w07.u7 vw10.u0+...+w13.u3
// vw14.u4+...+w17.u7
vrow2q2 = vcombine_s32(vpadd_s32(vget_low_s32(vrow2q2), vget_high_s32(vrow2q2)),
vpadd_s32(vget_low_s32(vrow3q2), vget_high_s32(vrow3q2)));
// vrow0q2 = vw20.u0+...+w23.u3 vw24.u4+...+w27.u7 vw30.u0+...+w33.u3
// vw34.u4+...+w37.u7
vrow4q2 = vcombine_s32(vpadd_s32(vget_low_s32(vrow4q2), vget_high_s32(vrow4q2)),
vpadd_s32(vget_low_s32(vrow5q2), vget_high_s32(vrow5q2)));
// vrow0q2 = vw40.u0+...+w43.u3 vw44.u4+...+w47.u7 vw50.u0+...+w53.u3
// vw54.u4+...+w57.u7
vrow6q2 = vcombine_s32(vpadd_s32(vget_low_s32(vrow6q2), vget_high_s32(vrow6q2)),
vpadd_s32(vget_low_s32(vrow7q2), vget_high_s32(vrow7q2)));
// vrow0q2 = vw60.u0+...+w63.u3 vw64.u4+...+w67.u7 vw70.u0+...+w73.u3
// vw74.u4+...+w77.u7
vrow0q2 = vcombine_s32(vpadd_s32(vget_low_s32(vrow0q2), vget_high_s32(vrow0q2)),
vpadd_s32(vget_low_s32(vrow2q2), vget_high_s32(vrow2q2)));
// vrow0q2 = vw00.u0+...+w07.u7 vw10.u0+...+w17.u7 vw20.u0+...+w27.u7
// vw30.u0+...+w37.u7
vrow4q2 = vcombine_s32(vpadd_s32(vget_low_s32(vrow4q2), vget_high_s32(vrow4q2)),
vpadd_s32(vget_low_s32(vrow6q2), vget_high_s32(vrow6q2)));
// vrow0q2 = vw40.u0+...+w47.u7 vw50.u0+...+w57.u7 vw60.u0+...+w67.u7
// vw70.u0+...+w77.u7
result0123 = vaddq_s32(result0123, vrow0q2);
result4567 = vaddq_s32(result4567, vrow4q2);
u += 8;
wi += 64;
}
{
int8x8_t bias = vld1_s8(wi); // vw0 = b0 b1 b2 b3 b4 b5 b6 b7
int16x8_t scaled_bias = vmull_s8(bias, bias_scale);
result0123 = vaddw_s16(result0123, vget_low_s16(scaled_bias));
result4567 = vaddw_s16(result4567, vget_high_s16(scaled_bias));
*v++ = vget_lane_s32(vget_low_s32(result0123), 0) * *scales++;
if (num_out > 1)
*v++ = vget_lane_s32(vget_low_s32(result0123), 1) * *scales++;
if (num_out > 2)
*v++ = vget_lane_s32(vget_high_s32(result0123), 0) * *scales++;
if (num_out > 3)
*v++ = vget_lane_s32(vget_high_s32(result0123), 1) * *scales++;
if (num_out > 4)
*v++ = vget_lane_s32(vget_low_s32(result4567), 0) * *scales++;
if (num_out > 5)
*v++ = vget_lane_s32(vget_low_s32(result4567), 1) * *scales++;
if (num_out > 6)
*v++ = vget_lane_s32(vget_high_s32(result4567), 0) * *scales++;
if (num_out > 7)
*v = vget_lane_s32(vget_high_s32(result4567), 1) * *scales;
}
}
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
const int8_t *u, double *v) {
const int num_out = dim1;
const int num_in = dim2 - 1;
// Each call to a partial_func_ produces group_size outputs, except the
// last one, which can produce less.
const int rounded_num_in = IntSimdMatrix::Roundup(num_in, kNumInputsPerGroup);
int group_size = kNumOutputsPerRegister * kMaxOutputRegisters;
int output = 0;
int w_step = (rounded_num_in + 1) * group_size;
for (; output + group_size <= num_out; output += group_size) {
PartialMatrixDotVector8(wi, scales, u, rounded_num_in, v, kNumOutputsPerRegister);
wi += w_step;
scales += group_size;
v += group_size;
}
if (output < num_out)
PartialMatrixDotVector8(wi, scales, u, rounded_num_in, v,
num_out & (kNumOutputsPerRegister - 1));
}
const IntSimdMatrix IntSimdMatrix::intSimdMatrixNEON = {
// Function.
matrixDotVector,
// Number of 32 bit outputs held in each register.
kNumOutputsPerRegister,
// Maximum number of registers that we will use to hold outputs.
kMaxOutputRegisters,
// Number of 8 bit inputs in the inputs register.
kNumInputsPerRegister,
// Number of inputs in each weight group.
kNumInputsPerGroup};
} // namespace tesseract.
#endif /* __ARM_NEON */

View File

@ -16,20 +16,22 @@
///////////////////////////////////////////////////////////////////////
#if !defined(__SSE4_1__)
#error Implementation only for SSE 4.1 capable architectures
#endif
# if defined(__i686__) || defined(__x86_64__)
# error Implementation only for SSE 4.1 capable architectures
# endif
#else
#include "intsimdmatrix.h"
# include "intsimdmatrix.h"
#include <cstdint>
#include <emmintrin.h>
#include <smmintrin.h>
# include <emmintrin.h>
# include <smmintrin.h>
# include <cstdint>
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
static int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n) {
static int32_t IntDotProductSSE(const int8_t *u, const int8_t *v, int n) {
int max_offset = n - 8;
int offset = 0;
// Accumulate a set of 4 32-bit sums in sum, by loading 8 pairs of 8-bit
@ -37,8 +39,8 @@ static int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n) {
int32_t result = 0;
if (offset <= max_offset) {
offset = 8;
__m128i packed1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(u));
__m128i packed2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(v));
__m128i packed1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(u));
__m128i packed2 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(v));
__m128i sum = _mm_cvtepi8_epi16(packed1);
packed2 = _mm_cvtepi8_epi16(packed2);
// The magic _mm_add_epi16 is perfect here. It multiplies 8 pairs of 16 bit
@ -46,8 +48,8 @@ static int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n) {
// to make 4 32 bit results that still fit in a 128 bit register.
sum = _mm_madd_epi16(sum, packed2);
while (offset <= max_offset) {
packed1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(u + offset));
packed2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(v + offset));
packed1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(u + offset));
packed2 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(v + offset));
offset += 8;
packed1 = _mm_cvtepi8_epi16(packed1);
packed2 = _mm_cvtepi8_epi16(packed2);
@ -67,16 +69,15 @@ static int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n) {
}
// Computes part of matrix.vector v = Wu. Computes 1 result.
static void PartialMatrixDotVector1(const int8_t* wi, const double* scales,
const int8_t* u, int num_in,
double* v) {
static void PartialMatrixDotVector1(const int8_t *wi, const double *scales, const int8_t *u,
int num_in, double *v) {
double total = IntDotProductSSE(u, wi, num_in);
// Add in the bias and correct for integer values.
*v = (total / INT8_MAX + wi[num_in]) * *scales;
*v = (total + wi[num_in] * INT8_MAX) * *scales;
}
static void matrixDotVector(int dim1, int dim2, const int8_t* wi,
const double* scales, const int8_t* u, double* v) {
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
const int8_t *u, double *v) {
const int num_out = dim1;
const int num_in = dim2 - 1;
int output = 0;
@ -90,15 +91,16 @@ static void matrixDotVector(int dim1, int dim2, const int8_t* wi,
}
const IntSimdMatrix IntSimdMatrix::intSimdMatrixSSE = {
matrixDotVector,
// Number of 32 bit outputs held in each register.
1,
// Maximum number of registers that we will use to hold outputs.
1,
// Number of 8 bit inputs in the inputs register.
1,
// Number of inputs in each weight group.
1
};
matrixDotVector,
// Number of 32 bit outputs held in each register.
1,
// Maximum number of registers that we will use to hold outputs.
1,
// Number of 8 bit inputs in the inputs register.
1,
// Number of inputs in each weight group.
1};
} // namespace tesseract.
} // namespace tesseract.
#endif

View File

@ -15,24 +15,36 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "config_auto.h" // for HAVE_AVX, ...
#include <numeric> // for std::inner_product
#include "simddetect.h"
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // for HAVE_AVX, ...
#endif
#include <numeric> // for std::inner_product
#include "dotproduct.h"
#include "intsimdmatrix.h" // for IntSimdMatrix
#include "params.h" // for STRING_VAR
#include "tprintf.h" // for tprintf
#include "intsimdmatrix.h" // for IntSimdMatrix
#include "params.h" // for STRING_VAR
#include "simddetect.h"
#include "tprintf.h" // for tprintf
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA) || defined(HAVE_SSE4_1)
# define HAS_CPUID
# define HAS_CPUID
#endif
#if defined(HAS_CPUID)
#if defined(__GNUC__)
# include <cpuid.h>
#elif defined(_WIN32)
# include <intrin.h>
# if defined(__GNUC__)
# include <cpuid.h>
# elif defined(_WIN32)
# include <intrin.h>
# endif
#endif
#if defined(HAVE_NEON) && !defined(__aarch64__)
# ifdef ANDROID
# include <cpu-features.h>
# else
/* Assume linux */
# include <asm/hwcap.h>
# include <sys/auxv.h>
# endif
#endif
namespace tesseract {
@ -49,11 +61,17 @@ namespace tesseract {
// in AVX registers.
DotProductFunction DotProduct;
static STRING_VAR(dotproduct, "auto",
"Function used for calculation of dot product");
static STRING_VAR(dotproduct, "auto", "Function used for calculation of dot product");
SIMDDetect SIMDDetect::detector;
#if defined(__aarch64__)
// ARMv8 always has NEON.
bool SIMDDetect::neon_available_ = true;
#elif defined(HAVE_NEON)
// If true, then Neon has been detected.
bool SIMDDetect::neon_available_;
#else
// If true, then AVX has been detected.
bool SIMDDetect::avx_available_;
bool SIMDDetect::avx2_available_;
@ -63,20 +81,22 @@ bool SIMDDetect::avx512BW_available_;
bool SIMDDetect::fma_available_;
// If true, then SSe4.1 has been detected.
bool SIMDDetect::sse_available_;
#endif
// Computes and returns the dot product of the two n-vectors u and v.
static double DotProductGeneric(const double* u, const double* v, int n) {
static double DotProductGeneric(const double *u, const double *v, int n) {
double total = 0.0;
for (int k = 0; k < n; ++k) total += u[k] * v[k];
for (int k = 0; k < n; ++k)
total += u[k] * v[k];
return total;
}
// Compute dot product using std::inner_product.
static double DotProductStdInnerProduct(const double* u, const double* v, int n) {
static double DotProductStdInnerProduct(const double *u, const double *v, int n) {
return std::inner_product(u, u + n, v, 0.0);
}
static void SetDotProduct(DotProductFunction f, const IntSimdMatrix* m = nullptr) {
static void SetDotProduct(DotProductFunction f, const IntSimdMatrix *m = nullptr) {
DotProduct = f;
IntSimdMatrix::intSimdMatrix = m;
}
@ -91,29 +111,39 @@ SIMDDetect::SIMDDetect() {
SetDotProduct(DotProductGeneric);
#if defined(HAS_CPUID)
#if defined(__GNUC__)
# if defined(__GNUC__)
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
// Note that these tests all use hex because the older compilers don't have
// the newer flags.
#if defined(HAVE_SSE4_1)
# if defined(HAVE_SSE4_1)
sse_available_ = (ecx & 0x00080000) != 0;
#endif
#if defined(HAVE_FMA)
fma_available_ = (ecx & 0x00001000) != 0;
#endif
#if defined(HAVE_AVX)
avx_available_ = (ecx & 0x10000000) != 0;
if (avx_available_) {
// There is supposed to be a __get_cpuid_count function, but this is all
// there is in my cpuid.h. It is a macro for an asm statement and cannot
// be used inside an if.
__cpuid_count(7, 0, eax, ebx, ecx, edx);
avx2_available_ = (ebx & 0x00000020) != 0;
avx512F_available_ = (ebx & 0x00010000) != 0;
avx512BW_available_ = (ebx & 0x40000000) != 0;
# endif
# if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA)
auto xgetbv = []() {
uint32_t xcr0;
__asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx");
return xcr0;
};
if ((ecx & 0x08000000) && ((xgetbv() & 6) == 6)) {
// OSXSAVE bit is set, XMM state and YMM state are fine.
# if defined(HAVE_FMA)
fma_available_ = (ecx & 0x00001000) != 0;
# endif
# if defined(HAVE_AVX)
avx_available_ = (ecx & 0x10000000) != 0;
if (avx_available_) {
// There is supposed to be a __get_cpuid_count function, but this is all
// there is in my cpuid.h. It is a macro for an asm statement and cannot
// be used inside an if.
__cpuid_count(7, 0, eax, ebx, ecx, edx);
avx2_available_ = (ebx & 0x00000020) != 0;
avx512F_available_ = (ebx & 0x00010000) != 0;
avx512BW_available_ = (ebx & 0x40000000) != 0;
}
# endif
}
#endif
# endif
}
# elif defined(_WIN32)
int cpuInfo[4];
@ -122,32 +152,45 @@ SIMDDetect::SIMDDetect() {
max_function_id = cpuInfo[0];
if (max_function_id >= 1) {
__cpuid(cpuInfo, 1);
#if defined(HAVE_SSE4_1)
# if defined(HAVE_SSE4_1)
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
#endif
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA)
# endif
# if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA)
if ((cpuInfo[2] & 0x08000000) && ((_xgetbv(0) & 6) == 6)) {
// OSXSAVE bit is set, XMM state and YMM state are fine.
#if defined(HAVE_FMA)
# if defined(HAVE_FMA)
fma_available_ = (cpuInfo[2] & 0x00001000) != 0;
#endif
#if defined(HAVE_AVX)
# endif
# if defined(HAVE_AVX)
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
#endif
#if defined(HAVE_AVX2)
# endif
# if defined(HAVE_AVX2)
if (max_function_id >= 7) {
__cpuid(cpuInfo, 7);
avx2_available_ = (cpuInfo[1] & 0x00000020) != 0;
avx512F_available_ = (cpuInfo[1] & 0x00010000) != 0;
avx512BW_available_ = (cpuInfo[1] & 0x40000000) != 0;
}
#endif
# endif
}
#endif
# endif
}
#else
#error "I don't know how to test for SIMD with this compiler"
# else
# error "I don't know how to test for SIMD with this compiler"
# endif
#endif
#if defined(HAVE_NEON) && !defined(__aarch64__)
# ifdef ANDROID
{
AndroidCpuFamily family = android_getCpuFamily();
if (family == ANDROID_CPU_FAMILY_ARM)
neon_available_ = (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON);
}
# else
/* Assume linux */
neon_available_ = getauxval(AT_HWCAP) & HWCAP_NEON;
# endif
#endif
// Select code for calculation of dot product based on autodetection.
@ -167,6 +210,11 @@ SIMDDetect::SIMDDetect() {
} else if (sse_available_) {
// SSE detected.
SetDotProduct(DotProductSSE, &IntSimdMatrix::intSimdMatrixSSE);
#endif
#if defined(HAVE_NEON) || defined(__aarch64__)
} else if (neon_available_) {
// NEON detected.
SetDotProduct(DotProduct, &IntSimdMatrix::intSimdMatrixNEON);
#endif
}
}
@ -174,7 +222,7 @@ SIMDDetect::SIMDDetect() {
void SIMDDetect::Update() {
// Select code for calculation of dot product based on the
// value of the config variable if that value is not empty.
const char* dotproduct_method = "generic";
const char *dotproduct_method = "generic";
if (!strcmp(dotproduct.c_str(), "auto")) {
// Automatic detection. Nothing to be done.
} else if (!strcmp(dotproduct.c_str(), "generic")) {
@ -217,17 +265,18 @@ void SIMDDetect::Update() {
// Unsupported value of config variable.
tprintf("Warning, ignoring unsupported config variable value: dotproduct=%s\n",
dotproduct.c_str());
tprintf("Support values for dotproduct: auto generic native"
tprintf(
"Support values for dotproduct: auto generic native"
#if defined(HAVE_AVX)
" avx"
" avx"
#endif
#if defined(HAVE_SSE4_1)
" sse"
" sse"
#endif
" std::inner_product.\n");
" std::inner_product.\n");
}
dotproduct.set_value(dotproduct_method);
}
} // namespace tesseract
} // namespace tesseract

View File

@ -17,19 +17,19 @@
#ifndef TESSERACT_ARCH_SIMDDETECT_H_
#define TESSERACT_ARCH_SIMDDETECT_H_
#include <tesseract/platform.h>
#include <tesseract/export.h>
namespace tesseract {
// Function pointer for best calculation of dot product.
using DotProductFunction = double (*)(const double*, const double*, int);
using DotProductFunction = double (*)(const double *, const double *, int);
extern DotProductFunction DotProduct;
// Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one.
class SIMDDetect {
public:
public:
// Returns true if AVX is available on this system.
static inline bool IsAVXAvailable() {
return detector.avx_available_;
@ -54,15 +54,19 @@ class SIMDDetect {
static inline bool IsSSEAvailable() {
return detector.sse_available_;
}
// Returns true if NEON is available on this system.
static inline bool IsNEONAvailable() {
return detector.neon_available_;
}
// Update settings after config variable was set.
static TESS_API void Update();
private:
private:
// Constructor, must set all static member variables.
SIMDDetect();
private:
private:
// Singleton.
static SIMDDetect detector;
// If true, then AVX has been detected.
@ -74,8 +78,10 @@ class SIMDDetect {
static TESS_API bool fma_available_;
// If true, then SSe4.1 has been detected.
static TESS_API bool sse_available_;
// If true, then NEON has been detected.
static TESS_API bool neon_available_;
};
} // namespace tesseract
} // namespace tesseract
#endif // TESSERACT_ARCH_SIMDDETECT_H_
#endif // TESSERACT_ARCH_SIMDDETECT_H_

View File

@ -19,32 +19,30 @@
#include <cctype>
#include <cstring>
#include "tessvars.h"
#include "reject.h"
#include "control.h"
#include "reject.h"
#include "stopper.h"
#include "tesseractclass.h"
#include "tessvars.h"
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
# include "config_auto.h"
#endif
namespace tesseract {
bool Tesseract::word_adaptable( //should we adapt?
WERD_RES* word,
uint16_t mode) {
bool Tesseract::word_adaptable( // should we adapt?
WERD_RES *word, uint16_t mode) {
if (tessedit_adaption_debug) {
tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
word->best_choice->unichar_string().c_str(),
word->best_choice->rating(), word->best_choice->certainty());
word->best_choice->unichar_string().c_str(), word->best_choice->rating(),
word->best_choice->certainty());
}
bool status = false;
BITS16 flags(mode);
enum MODES
{
enum MODES {
ADAPTABLE_WERD,
ACCEPTABLE_WERD,
CHECK_DAWGS,
@ -54,54 +52,57 @@ bool Tesseract::word_adaptable( //should we adapt?
};
/*
0: NO adaption
*/
0: NO adaption
*/
if (mode == 0) {
if (tessedit_adaption_debug) tprintf("adaption disabled\n");
if (tessedit_adaption_debug)
tprintf("adaption disabled\n");
return false;
}
if (flags.bit (ADAPTABLE_WERD)) {
status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
if (flags[ADAPTABLE_WERD]) {
status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
if (tessedit_adaption_debug && !status) {
tprintf("tess_would_adapt bit is false\n");
}
}
if (flags.bit (ACCEPTABLE_WERD)) {
if (flags[ACCEPTABLE_WERD]) {
status |= word->tess_accepted;
if (tessedit_adaption_debug && !status) {
tprintf("tess_accepted bit is false\n");
}
}
if (!status) { // If not set then
return false; // ignore other checks
if (!status) { // If not set then
return false; // ignore other checks
}
if (flags.bit (CHECK_DAWGS) &&
(word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
(word->best_choice->permuter () != FREQ_DAWG_PERM) &&
(word->best_choice->permuter () != USER_DAWG_PERM) &&
(word->best_choice->permuter () != NUMBER_PERM)) {
if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
if (flags[CHECK_DAWGS] && (word->best_choice->permuter() != SYSTEM_DAWG_PERM) &&
(word->best_choice->permuter() != FREQ_DAWG_PERM) &&
(word->best_choice->permuter() != USER_DAWG_PERM) &&
(word->best_choice->permuter() != NUMBER_PERM)) {
if (tessedit_adaption_debug)
tprintf("word not in dawgs\n");
return false;
}
if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, false)) {
if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) {
if (tessedit_adaption_debug)
tprintf("word has ell conflict\n");
return false;
}
if (flags.bit (CHECK_SPACES) &&
(strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
if (tessedit_adaption_debug) tprintf("word contains spaces\n");
if (flags[CHECK_SPACES] &&
(strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
if (tessedit_adaption_debug)
tprintf("word contains spaces\n");
return false;
}
if (flags.bit (CHECK_AMBIG_WERD) &&
word->best_choice->dangerous_ambig_found()) {
if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) {
if (tessedit_adaption_debug)
tprintf("word is ambiguous\n");
return false;
}
@ -111,4 +112,4 @@ bool Tesseract::word_adaptable( //should we adapt?
return status;
}
} // namespace tesseract
} // namespace tesseract

View File

@ -16,22 +16,26 @@
*
**********************************************************************/
#include <cctype>
#include <cerrno>
#include <cstring>
#include "allheaders.h"
#include "boxread.h"
#include "pageres.h"
#ifndef DISABLED_LEGACY_ENGINE
# include <allheaders.h>
# include <cctype>
# include <cerrno>
# include <cstring>
# include "boxread.h"
#endif // ndef DISABLED_LEGACY_ENGINE
#include <tesseract/unichar.h>
#include "unicharset.h"
#include "genericvector.h"
#include "pageres.h"
#include "tesseractclass.h"
#include <tesseract/genericvector.h>
#include "unicharset.h"
#ifndef DISABLED_LEGACY_ENGINE
/** Max number of blobs to classify together in FindSegmentation. */
const int kMaxGroupSize = 4;
/// Max fraction of median allowed as deviation in xheight before switching
/// to median.
const double kMaxXHeightDeviationFraction = 0.125;
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* The box file is assumed to contain box definitions, one per line, of the
@ -73,20 +77,18 @@ namespace tesseract {
#ifndef DISABLED_LEGACY_ENGINE
static void clear_any_old_text(BLOCK_LIST *block_list) {
BLOCK_IT block_it(block_list);
for (block_it.mark_cycle_pt();
!block_it.cycled_list(); block_it.forward()) {
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
ROW_IT row_it(block_it.data()->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
WERD_IT word_it(row_it.data()->word_list());
for (word_it.mark_cycle_pt();
!word_it.cycled_list(); word_it.forward()) {
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word_it.data()->set_text("");
}
}
}
}
// Applies the box file based on the image name fname, and resegments
// Applies the box file based on the image name filename, and resegments
// the words in the block_list (page), with:
// blob-mode: one blob per line in the box file, words as input.
// word/line-mode: one blob per space-delimited unit after the #, and one word
@ -106,14 +108,12 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
// Instead, the correct_text member of WERD_RES is set, and this may be later
// converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
// is not required before calling ApplyBoxTraining.
PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
bool find_segmentation,
PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation,
BLOCK_LIST *block_list) {
GenericVector<TBOX> boxes;
GenericVector<STRING> texts, full_texts;
if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts,
nullptr)) {
return nullptr; // Can't do it.
std::vector<TBOX> boxes;
std::vector<std::string> texts, full_texts;
if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts, nullptr)) {
return nullptr; // Can't do it.
}
const int box_count = boxes.size();
@ -121,27 +121,22 @@ PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
// In word mode, we use the boxes to make a word for each box, but
// in blob mode we use the existing words and maximally chop them first.
PAGE_RES* page_res = find_segmentation ?
nullptr : SetupApplyBoxes(boxes, block_list);
PAGE_RES *page_res = find_segmentation ? nullptr : SetupApplyBoxes(boxes, block_list);
clear_any_old_text(block_list);
for (int i = 0; i < box_count; i++) {
bool foundit = false;
if (page_res != nullptr) {
foundit = ResegmentCharBox(page_res,
(i == 0) ? nullptr : &boxes[i - 1],
boxes[i],
(i == box_count - 1) ? nullptr : &boxes[i + 1],
full_texts[i].c_str());
foundit =
ResegmentCharBox(page_res, (i == 0) ? nullptr : &boxes[i - 1], boxes[i],
(i == box_count - 1) ? nullptr : &boxes[i + 1], full_texts[i].c_str());
} else {
foundit = ResegmentWordBox(block_list, boxes[i],
(i == box_count - 1) ? nullptr : &boxes[i + 1],
texts[i].c_str());
(i == box_count - 1) ? nullptr : &boxes[i + 1], texts[i].c_str());
}
if (!foundit) {
box_failures++;
ReportFailedBox(i, boxes[i], texts[i].c_str(),
"FAILURE! Couldn't find a matching blob");
ReportFailedBox(i, boxes[i], texts[i].c_str(), "FAILURE! Couldn't find a matching blob");
}
}
@ -160,14 +155,12 @@ PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
TidyUp(page_res);
return page_res;
}
#endif // ndef DISABLED_LEGACY_ENGINE
// Helper computes median xheight in the image.
static double MedianXHeight(BLOCK_LIST *block_list) {
BLOCK_IT block_it(block_list);
STATS xheights(0, block_it.data()->pdblk.bounding_box().height());
for (block_it.mark_cycle_pt();
!block_it.cycled_list(); block_it.forward()) {
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
ROW_IT row_it(block_it.data()->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
xheights.add(IntCastRounded(row_it.data()->x_height()), 1);
@ -184,15 +177,14 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
// Strip all fuzzy space markers to simplify the PAGE_RES.
BLOCK_IT b_it(block_list);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOCK* block = b_it.data();
BLOCK *block = b_it.data();
ROW_IT r_it(block->row_list());
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) {
ROW* row = r_it.data();
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
ROW *row = r_it.data();
const double diff = fabs(row->x_height() - median_xheight);
if (diff > max_deviation) {
if (applybox_debug) {
tprintf("row xheight=%g, but median xheight = %g\n",
row->x_height(), median_xheight);
tprintf("row xheight=%g, but median xheight = %g\n", row->x_height(), median_xheight);
}
row->set_x_height(static_cast<float>(median_xheight));
}
@ -200,23 +192,20 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
}
}
#ifndef DISABLED_LEGACY_ENGINE
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
/// All fuzzy spaces are removed, and all the words are maximally chopped.
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
BLOCK_LIST *block_list) {
PAGE_RES *Tesseract::SetupApplyBoxes(const std::vector<TBOX> &boxes, BLOCK_LIST *block_list) {
PreenXHeights(block_list);
// Strip all fuzzy space markers to simplify the PAGE_RES.
BLOCK_IT b_it(block_list);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOCK* block = b_it.data();
BLOCK *block = b_it.data();
ROW_IT r_it(block->row_list());
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) {
ROW* row = r_it.data();
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
ROW *row = r_it.data();
WERD_IT w_it(row->word_list());
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
WERD* word = w_it.data();
WERD *word = w_it.data();
if (word->cblob_list()->empty()) {
delete w_it.extract();
} else {
@ -226,12 +215,11 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
}
}
}
auto* page_res = new PAGE_RES(false, block_list, nullptr);
auto *page_res = new PAGE_RES(false, block_list, nullptr);
PAGE_RES_IT pr_it(page_res);
WERD_RES* word_res;
WERD_RES *word_res;
while ((word_res = pr_it.word()) != nullptr) {
MaximallyChopWord(boxes, pr_it.block()->block,
pr_it.row()->row, word_res);
MaximallyChopWord(boxes, pr_it.block()->block, pr_it.row()->row, word_res);
pr_it.forward();
}
return page_res;
@ -240,15 +228,11 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
/// Tests the chopper by exhaustively running chop_one_blob.
/// The word_res will contain filled chopped_word, seam_array, denorm,
/// box_word and best_state for the maximally chopped word.
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
BLOCK* block, ROW* row,
WERD_RES* word_res) {
if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, nullptr,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block)) {
void Tesseract::MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block, ROW *row,
WERD_RES *word_res) {
if (!word_res->SetupForRecognition(unicharset, this, BestPix(), tessedit_ocr_engine_mode, nullptr,
classify_bln_numeric_mode, textord_use_cjk_fp_model,
poly_allow_detailed_fx, row, block)) {
word_res->CloneChoppedToRebuild();
return;
}
@ -256,7 +240,7 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
tprintf("Maximally chopping word at:");
word_res->word->bounding_box().print();
}
GenericVector<BLOB_CHOICE*> blob_choices;
GenericVector<BLOB_CHOICE *> blob_choices;
ASSERT_HOST(!word_res->chopped_word->blobs.empty());
auto rating = static_cast<float>(INT8_MAX);
for (int i = 0; i < word_res->chopped_word->NumBlobs(); ++i) {
@ -268,28 +252,25 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
// produced, however much chopping is required. The chops are thus only
// limited by the ability of the chopper to find suitable chop points,
// and not by the value of the certainties.
auto* choice =
new BLOB_CHOICE(0, rating, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE);
auto *choice = new BLOB_CHOICE(0, rating, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE);
blob_choices.push_back(choice);
rating -= 0.125f;
}
const double e = exp(1.0); // The base of natural logs.
const double e = exp(1.0); // The base of natural logs.
int blob_number;
int right_chop_index = 0;
if (!assume_fixed_pitch_char_segment) {
// We only chop if the language is not fixed pitch like CJK.
SEAM* seam = nullptr;
while ((seam = chop_one_blob(boxes, blob_choices, word_res,
&blob_number)) != nullptr) {
SEAM *seam = nullptr;
while ((seam = chop_one_blob(boxes, blob_choices, word_res, &blob_number)) != nullptr) {
word_res->InsertSeam(blob_number, seam);
BLOB_CHOICE* left_choice = blob_choices[blob_number];
BLOB_CHOICE *left_choice = blob_choices[blob_number];
rating = left_choice->rating() / e;
left_choice->set_rating(rating);
left_choice->set_certainty(-rating);
// combine confidence w/ serial #
auto* right_choice = new BLOB_CHOICE(++right_chop_index,
rating - 0.125f, -rating, -1,
0.0f, 0.0f, 0.0f, BCC_FAKE);
auto *right_choice = new BLOB_CHOICE(++right_chop_index, rating - 0.125f, -rating, -1, 0.0f,
0.0f, 0.0f, BCC_FAKE);
blob_choices.insert(right_choice, blob_number + 1);
}
}
@ -308,7 +289,7 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
/// Given a box with area A, and a blob with area B, with overlap area C,
/// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum
/// miss metric gets the blob.
static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
static double BoxMissMetric(const TBOX &box1, const TBOX &box2) {
const int overlap_area = box1.intersection(box2).area();
const int a = box1.area();
const int b = box2.area();
@ -326,16 +307,14 @@ static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
///
/// This means that occasionally, blobs may be incorrectly segmented if the
/// chopper fails to find a suitable chop point.
bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box,
const TBOX& box, const TBOX* next_box,
const char* correct_text) {
bool Tesseract::ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box,
const TBOX *next_box, const char *correct_text) {
if (applybox_debug > 1) {
tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text);
}
PAGE_RES_IT page_res_it(page_res);
WERD_RES* word_res;
for (word_res = page_res_it.word(); word_res != nullptr;
word_res = page_res_it.forward()) {
WERD_RES *word_res;
for (word_res = page_res_it.word(); word_res != nullptr; word_res = page_res_it.forward()) {
if (!word_res->box_word->bounding_box().major_overlap(box))
continue;
if (applybox_debug > 1) {
@ -351,18 +330,18 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box,
if (!blob_box.major_overlap(box))
break;
if (word_res->correct_text[i + blob_count].length() > 0)
break; // Blob is claimed already.
break; // Blob is claimed already.
if (next_box != nullptr) {
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box);
if (applybox_debug > 2) {
tprintf("Checking blob:");
blob_box.print();
tprintf("Current miss metric = %g, next = %g\n",
current_box_miss_metric, next_box_miss_metric);
tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric,
next_box_miss_metric);
}
if (current_box_miss_metric > next_box_miss_metric)
break; // Blob is a better match for next box.
break; // Blob is a better match for next box.
}
char_box += blob_box;
}
@ -371,7 +350,7 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box,
tprintf("Index [%d, %d) seem good.\n", i, i + blob_count);
}
if (!char_box.almost_equal(box, 3) &&
((next_box != nullptr && box.x_gap(*next_box) < -3)||
((next_box != nullptr && box.x_gap(*next_box) < -3) ||
(prev_box != nullptr && prev_box->x_gap(box) < -3))) {
return false;
}
@ -419,7 +398,7 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box,
if (applybox_debug > 0) {
tprintf("FAIL!\n");
}
return false; // Failure.
return false; // Failure.
}
/// Consume all source blobs that strongly overlap the given box,
@ -428,38 +407,36 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX* prev_box,
/// applying the blobs to box or next_box with the least non-overlap.
/// @return false if the box was in error, which can only be caused by
/// failing to find an overlapping blob for a box.
bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
const TBOX& box, const TBOX* next_box,
const char* correct_text) {
bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const TBOX *next_box,
const char *correct_text) {
if (applybox_debug > 1) {
tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text);
}
WERD* new_word = nullptr;
WERD *new_word = nullptr;
BLOCK_IT b_it(block_list);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOCK* block = b_it.data();
BLOCK *block = b_it.data();
if (!box.major_overlap(block->pdblk.bounding_box()))
continue;
ROW_IT r_it(block->row_list());
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
ROW* row = r_it.data();
ROW *row = r_it.data();
if (!box.major_overlap(row->bounding_box()))
continue;
WERD_IT w_it(row->word_list());
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
WERD* word = w_it.data();
WERD *word = w_it.data();
if (applybox_debug > 2) {
tprintf("Checking word:");
word->bounding_box().print();
}
if (word->text() != nullptr && word->text()[0] != '\0')
continue; // Ignore words that are already done.
continue; // Ignore words that are already done.
if (!box.major_overlap(word->bounding_box()))
continue;
C_BLOB_IT blob_it(word->cblob_list());
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
blob_it.forward()) {
C_BLOB* blob = blob_it.data();
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB *blob = blob_it.data();
TBOX blob_box = blob->bounding_box();
if (!blob_box.major_overlap(box))
continue;
@ -469,11 +446,11 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
if (applybox_debug > 2) {
tprintf("Checking blob:");
blob_box.print();
tprintf("Current miss metric = %g, next = %g\n",
current_box_miss_metric, next_box_miss_metric);
tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric,
next_box_miss_metric);
}
if (current_box_miss_metric > next_box_miss_metric)
continue; // Blob is a better match for next box.
continue; // Blob is a better match for next box.
}
if (applybox_debug > 2) {
tprintf("Blob match: blob:");
@ -497,44 +474,40 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
}
}
}
if (new_word == nullptr && applybox_debug > 0) tprintf("FAIL!\n");
if (new_word == nullptr && applybox_debug > 0)
tprintf("FAIL!\n");
return new_word != nullptr;
}
/// Resegments the words by running the classifier in an attempt to find the
/// correct segmentation that produces the required string.
void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
void Tesseract::ReSegmentByClassification(PAGE_RES *page_res) {
PAGE_RES_IT pr_it(page_res);
WERD_RES* word_res;
WERD_RES *word_res;
for (; (word_res = pr_it.word()) != nullptr; pr_it.forward()) {
const WERD* word = word_res->word;
const WERD *word = word_res->word;
if (word->text() == nullptr || word->text()[0] == '\0')
continue; // Ignore words that have no text.
continue; // Ignore words that have no text.
// Convert the correct text to a vector of UNICHAR_ID
GenericVector<UNICHAR_ID> target_text;
if (!ConvertStringToUnichars(word->text(), &target_text)) {
tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n",
word->text());
tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n", word->text());
pr_it.DeleteCurrentWord();
continue;
}
if (!FindSegmentation(target_text, word_res)) {
tprintf("APPLY_BOX: FAILURE: can't find segmentation for '%s'\n",
word->text());
tprintf("APPLY_BOX: FAILURE: can't find segmentation for '%s'\n", word->text());
pr_it.DeleteCurrentWord();
continue;
}
}
}
#endif // ndef DISABLED_LEGACY_ENGINE
/// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
/// @return false if an invalid UNICHAR_ID is encountered.
bool Tesseract::ConvertStringToUnichars(const char* utf8,
GenericVector<UNICHAR_ID>* class_ids) {
bool Tesseract::ConvertStringToUnichars(const char *utf8, GenericVector<UNICHAR_ID> *class_ids) {
for (int step = 0; *utf8 != '\0'; utf8 += step) {
const char* next_space = strchr(utf8, ' ');
const char *next_space = strchr(utf8, ' ');
if (next_space == nullptr)
next_space = utf8 + strlen(utf8);
step = next_space - utf8;
@ -549,26 +522,21 @@ bool Tesseract::ConvertStringToUnichars(const char* utf8,
return true;
}
#ifndef DISABLED_LEGACY_ENGINE
/// Resegments the word to achieve the target_text from the classifier.
/// Returns false if the re-segmentation fails.
/// Uses brute-force combination of up to #kMaxGroupSize adjacent blobs, and
/// applies a full search on the classifier results to find the best classified
/// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
/// substitutions ARE used.
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
WERD_RES* word_res) {
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID> &target_text, WERD_RES *word_res) {
// Classify all required combinations of blobs and save results in choices.
const int word_length = word_res->box_word->length();
auto* choices =
new GenericVector<BLOB_CHOICE_LIST*>[word_length];
auto *choices = new GenericVector<BLOB_CHOICE_LIST *>[word_length];
for (int i = 0; i < word_length; ++i) {
for (int j = 1; j <= kMaxGroupSize && i + j <= word_length; ++j) {
BLOB_CHOICE_LIST* match_result = classify_piece(
word_res->seam_array, i, i + j - 1, "Applybox",
word_res->chopped_word, word_res->blamer_bundle);
BLOB_CHOICE_LIST *match_result =
classify_piece(word_res->seam_array, i, i + j - 1, "Applybox", word_res->chopped_word,
word_res->blamer_bundle);
if (applybox_debug > 2) {
tprintf("%d+%d:", i, j);
print_ratings_list("Segment:", match_result, unicharset);
@ -582,17 +550,17 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
word_res->best_state.clear();
GenericVector<int> search_segmentation;
float best_rating = 0.0f;
SearchForText(choices, 0, word_length, target_text, 0, 0.0f,
&search_segmentation, &best_rating, &word_res->best_state);
SearchForText(choices, 0, word_length, target_text, 0, 0.0f, &search_segmentation, &best_rating,
&word_res->best_state);
for (int i = 0; i < word_length; ++i)
choices[i].delete_data_pointers();
delete [] choices;
delete[] choices;
if (word_res->best_state.empty()) {
// Build the original segmentation and if it is the same length as the
// truth, assume it will do.
int blob_count = 1;
for (int s = 0; s < word_res->seam_array.size(); ++s) {
SEAM* seam = word_res->seam_array[s];
SEAM *seam = word_res->seam_array[s];
if (!seam->HasAnySplits()) {
word_res->best_state.push_back(blob_count);
blob_count = 1;
@ -602,14 +570,13 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
}
word_res->best_state.push_back(blob_count);
if (word_res->best_state.size() != target_text.size()) {
word_res->best_state.clear(); // No good. Original segmentation bad size.
word_res->best_state.clear(); // No good. Original segmentation bad size.
return false;
}
}
word_res->correct_text.clear();
for (int i = 0; i < target_text.size(); ++i) {
word_res->correct_text.push_back(
STRING(unicharset.id_to_unichar(target_text[i])));
word_res->correct_text.push_back(unicharset.id_to_unichar(target_text[i]));
}
return true;
}
@ -628,22 +595,18 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
/// @param segmentation
/// @param best_rating
/// @param best_segmentation
void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
int choices_pos, int choices_length,
const GenericVector<UNICHAR_ID>& target_text,
int text_index,
float rating, GenericVector<int>* segmentation,
float* best_rating,
GenericVector<int>* best_segmentation) {
const UnicharAmbigsVector& table = getDict().getUnicharAmbigs().dang_ambigs();
void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST *> *choices, int choices_pos,
int choices_length, const GenericVector<UNICHAR_ID> &target_text,
int text_index, float rating, GenericVector<int> *segmentation,
float *best_rating, GenericVector<int> *best_segmentation) {
const UnicharAmbigsVector &table = getDict().getUnicharAmbigs().dang_ambigs();
for (int length = 1; length <= choices[choices_pos].size(); ++length) {
// Rating of matching choice or worst choice if no match.
float choice_rating = 0.0f;
// Find the corresponding best BLOB_CHOICE.
BLOB_CHOICE_IT choice_it(choices[choices_pos][length - 1]);
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
choice_it.forward()) {
const BLOB_CHOICE* choice = choice_it.data();
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {
const BLOB_CHOICE *choice = choice_it.data();
choice_rating = choice->rating();
UNICHAR_ID class_id = choice->unichar_id();
if (class_id == target_text[text_index]) {
@ -652,8 +615,7 @@ void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
// Search ambigs table.
if (class_id < table.size() && table[class_id] != nullptr) {
AmbigSpec_IT spec_it(table[class_id]);
for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();
spec_it.forward()) {
for (spec_it.mark_cycle_pt(); !spec_it.cycled_list(); spec_it.forward()) {
const AmbigSpec *ambig_spec = spec_it.data();
// We'll only do 1-1.
if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID &&
@ -661,14 +623,13 @@ void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
break;
}
if (!spec_it.cycled_list())
break; // Found an ambig.
break; // Found an ambig.
}
}
if (choice_it.cycled_list())
continue; // No match.
continue; // No match.
segmentation->push_back(length);
if (choices_pos + length == choices_length &&
text_index + 1 == target_text.size()) {
if (choices_pos + length == choices_length && text_index + 1 == target_text.size()) {
// This is a complete match. If the rating is good record a new best.
if (applybox_debug > 2) {
tprintf("Complete match, rating = %g, best=%g, seglength=%d, best=%d\n",
@ -679,19 +640,15 @@ void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
*best_segmentation = *segmentation;
*best_rating = rating + choice_rating;
}
} else if (choices_pos + length < choices_length &&
text_index + 1 < target_text.size()) {
} else if (choices_pos + length < choices_length && text_index + 1 < target_text.size()) {
if (applybox_debug > 3) {
tprintf("Match found for %d=%s:%s, at %d+%d, recursing...\n",
target_text[text_index],
tprintf("Match found for %d=%s:%s, at %d+%d, recursing...\n", target_text[text_index],
unicharset.id_to_unichar(target_text[text_index]),
choice_it.data()->unichar_id() == target_text[text_index]
? "Match" : "Ambig",
choice_it.data()->unichar_id() == target_text[text_index] ? "Match" : "Ambig",
choices_pos, length);
}
SearchForText(choices, choices_pos + length, choices_length, target_text,
text_index + 1, rating + choice_rating, segmentation,
best_rating, best_segmentation);
SearchForText(choices, choices_pos + length, choices_length, target_text, text_index + 1,
rating + choice_rating, segmentation, best_rating, best_segmentation);
if (applybox_debug > 3) {
tprintf("End recursion for %d=%s\n", target_text[text_index],
unicharset.id_to_unichar(target_text[text_index]));
@ -705,17 +662,17 @@ void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
/// - Deletes all unused or emptied words, counting the unused ones.
/// - Resets W_BOL and W_EOL flags correctly.
/// - Builds the rebuild_word and rebuilds the box_word and the best_choice.
void Tesseract::TidyUp(PAGE_RES* page_res) {
void Tesseract::TidyUp(PAGE_RES *page_res) {
int ok_blob_count = 0;
int bad_blob_count = 0;
int ok_word_count = 0;
int unlabelled_words = 0;
PAGE_RES_IT pr_it(page_res);
WERD_RES* word_res;
WERD_RES *word_res;
for (; (word_res = pr_it.word()) != nullptr; pr_it.forward()) {
int ok_in_word = 0;
int blob_count = word_res->correct_text.size();
auto* word_choice = new WERD_CHOICE(word_res->uch_set, blob_count);
auto *word_choice = new WERD_CHOICE(word_res->uch_set, blob_count);
word_choice->set_permuter(TOP_CHOICE_PERM);
for (int c = 0; c < blob_count; ++c) {
if (word_res->correct_text[c].length() > 0) {
@ -725,8 +682,8 @@ void Tesseract::TidyUp(PAGE_RES* page_res) {
// unichar_ids do not matter. Which is fortunate, since TidyUp()
// can be called while training Tesseract, at the stage where
// unicharset is not meaningful yet.
word_choice->append_unichar_id_space_allocated(
INVALID_UNICHAR_ID, word_res->best_state[c], 1.0f, -1.0f);
word_choice->append_unichar_id_space_allocated(INVALID_UNICHAR_ID, word_res->best_state[c],
1.0f, -1.0f);
}
if (ok_in_word > 0) {
ok_blob_count += ok_in_word;
@ -754,40 +711,45 @@ void Tesseract::TidyUp(PAGE_RES* page_res) {
if (applybox_debug > 0) {
tprintf(" Found %d good blobs.\n", ok_blob_count);
if (bad_blob_count > 0) {
tprintf(" Leaving %d unlabelled blobs in %d words.\n",
bad_blob_count, ok_word_count);
tprintf(" Leaving %d unlabelled blobs in %d words.\n", bad_blob_count, ok_word_count);
}
if (unlabelled_words > 0)
tprintf(" %d remaining unlabelled words deleted.\n", unlabelled_words);
}
}
#endif // ndef DISABLED_LEGACY_ENGINE
/** Logs a bad box by line in the box file and box coords.*/
void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box,
const char *box_ch, const char *err_msg) {
tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n",
boxfile_lineno + 1, box_ch,
void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch,
const char *err_msg) {
tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n", boxfile_lineno + 1, box_ch,
box.left(), box.bottom(), box.right(), box.top(), err_msg);
}
/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/
void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
/// Calls #LearnWord to extract features for labelled blobs within each word.
/// Features are stored in an internal buffer.
void Tesseract::ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res) {
PAGE_RES_IT pr_it(page_res);
for (WERD_RES *word_res = pr_it.word(); word_res != nullptr;
word_res = pr_it.forward()) {
auto* choice = new WERD_CHOICE(word_res->uch_set,
word_res->correct_text.size());
int word_count = 0;
for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) {
LearnWord(fontname.c_str(), word_res);
++word_count;
}
tprintf("Generated training data for %d words\n", word_count);
}
#endif // ndef DISABLED_LEGACY_ENGINE
/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/
void Tesseract::CorrectClassifyWords(PAGE_RES *page_res) {
PAGE_RES_IT pr_it(page_res);
for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) {
auto *choice = new WERD_CHOICE(word_res->uch_set, word_res->correct_text.size());
for (int i = 0; i < word_res->correct_text.size(); ++i) {
// The part before the first space is the real ground truth, and the
// rest is the bounding box location and page number.
GenericVector<STRING> tokens;
word_res->correct_text[i].split(' ', &tokens);
std::vector<std::string> tokens = split(word_res->correct_text[i], ' ');
UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].c_str());
choice->append_unichar_id_space_allocated(char_id,
word_res->best_state[i],
0.0f, 0.0f);
choice->append_unichar_id_space_allocated(char_id, word_res->best_state[i], 0.0f, 0.0f);
}
word_res->ClearWordChoices();
word_res->LogNewRawChoice(choice);
@ -795,22 +757,4 @@ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
}
}
#ifndef DISABLED_LEGACY_ENGINE
/// Calls #LearnWord to extract features for labelled blobs within each word.
/// Features are stored in an internal buffer.
void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
PAGE_RES_IT pr_it(page_res);
int word_count = 0;
for (WERD_RES *word_res = pr_it.word(); word_res != nullptr;
word_res = pr_it.forward()) {
LearnWord(fontname.c_str(), word_res);
++word_count;
}
tprintf("Generated training data for %d words\n", word_count);
}
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract
} // namespace tesseract

File diff suppressed because it is too large Load Diff

View File

@ -25,14 +25,13 @@
#ifndef CONTROL_H
#define CONTROL_H
enum ACCEPTABLE_WERD_TYPE
{
AC_UNACCEPTABLE, ///< Unacceptable word
AC_LOWER_CASE, ///< ALL lower case
AC_UPPER_CASE, ///< ALL upper case
AC_INITIAL_CAP, ///< ALL but initial lc
AC_LC_ABBREV, ///< a.b.c.
AC_UC_ABBREV ///< A.B.C.
enum ACCEPTABLE_WERD_TYPE {
AC_UNACCEPTABLE, ///< Unacceptable word
AC_LOWER_CASE, ///< ALL lower case
AC_UPPER_CASE, ///< ALL upper case
AC_INITIAL_CAP, ///< ALL but initial lc
AC_LC_ABBREV, ///< a.b.c.
AC_UC_ABBREV ///< A.B.C.
};
#endif

View File

@ -16,27 +16,27 @@
*
**********************************************************************/
#include <cctype>
#include "docqual.h"
#include <cctype>
#include "reject.h"
#include "tessvars.h"
#include "tesseractclass.h"
#include "tessvars.h"
namespace tesseract{
namespace tesseract {
static void countMatchingBlobs(int16_t& match_count, int /*index*/) {
static void countMatchingBlobs(int16_t &match_count, int /*index*/) {
++match_count;
}
static void countAcceptedBlobs(WERD_RES* word, int16_t& match_count,
int16_t& accepted_match_count, int index) {
static void countAcceptedBlobs(WERD_RES *word, int16_t &match_count, int16_t &accepted_match_count,
int index) {
if (word->reject_map[index].accepted()) {
++accepted_match_count;
}
++match_count;
}
static void acceptIfGoodQuality(WERD_RES* word, int index) {
static void acceptIfGoodQuality(WERD_RES *word, int index) {
if (word->reject_map[index].accept_if_good_quality()) {
word->reject_map[index].setrej_quality_accept();
}
@ -48,14 +48,13 @@ static void acceptIfGoodQuality(WERD_RES* word, int index) {
* ASSUME blobs in both initial word and box_word are in ascending order of
* left hand blob edge.
*************************************************************************/
int16_t Tesseract::word_blob_quality(WERD_RES* word) {
int16_t Tesseract::word_blob_quality(WERD_RES *word) {
int16_t match_count = 0;
if (word->bln_boxes != nullptr && word->rebuild_word != nullptr &&
!word->rebuild_word->blobs.empty()) {
using namespace std::placeholders; // for _1
word->bln_boxes->ProcessMatchedBlobs(
*word->rebuild_word,
std::bind(countMatchingBlobs, match_count, _1));
using namespace std::placeholders; // for _1
word->bln_boxes->ProcessMatchedBlobs(*word->rebuild_word,
std::bind(countMatchingBlobs, match_count, _1));
}
return match_count;
}
@ -66,9 +65,8 @@ int16_t Tesseract::word_outline_errs(WERD_RES *word) {
if (word->rebuild_word != nullptr) {
for (int b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
TBLOB* blob = word->rebuild_word->blobs[b];
err_count += count_outline_errs(word->best_choice->unichar_string()[i],
blob->NumOutlines());
TBLOB *blob = word->rebuild_word->blobs[b];
err_count += count_outline_errs(word->best_choice->unichar_string()[i], blob->NumOutlines());
i++;
}
}
@ -80,17 +78,16 @@ int16_t Tesseract::word_outline_errs(WERD_RES *word) {
* Combination of blob quality and outline quality - how many good chars are
* there? - I.e chars which pass the blob AND outline tests.
*************************************************************************/
void Tesseract::word_char_quality(WERD_RES* word, int16_t* match_count,
int16_t* accepted_match_count) {
void Tesseract::word_char_quality(WERD_RES *word, int16_t *match_count,
int16_t *accepted_match_count) {
*match_count = 0;
*accepted_match_count = 0;
if (word->bln_boxes != nullptr && word->rebuild_word != nullptr &&
!word->rebuild_word->blobs.empty()) {
using namespace std::placeholders; // for _1
using namespace std::placeholders; // for _1
word->bln_boxes->ProcessMatchedBlobs(
*word->rebuild_word,
std::bind(countAcceptedBlobs,
word, *match_count, *accepted_match_count, _1));
std::bind(countAcceptedBlobs, word, *match_count, *accepted_match_count, _1));
}
}
@ -98,29 +95,28 @@ void Tesseract::word_char_quality(WERD_RES* word, int16_t* match_count,
* unrej_good_chs()
* Unreject POTENTIAL rejects if the blob passes the blob and outline checks
*************************************************************************/
void Tesseract::unrej_good_chs(WERD_RES* word) {
void Tesseract::unrej_good_chs(WERD_RES *word) {
if (word->bln_boxes != nullptr && word->rebuild_word != nullptr &&
word->rebuild_word->blobs.empty()) {
using namespace std::placeholders; // for _1
word->bln_boxes->ProcessMatchedBlobs(
*word->rebuild_word, std::bind(acceptIfGoodQuality, word, _1));
using namespace std::placeholders; // for _1
word->bln_boxes->ProcessMatchedBlobs(*word->rebuild_word,
std::bind(acceptIfGoodQuality, word, _1));
}
}
int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) {
int expected_outline_count;
if (STRING (outlines_odd).contains (c))
return 0; // Don't use this char
else if (STRING (outlines_2).contains (c))
if (outlines_odd.contains(c))
return 0; // Don't use this char
else if (outlines_2.contains(c))
expected_outline_count = 2;
else
expected_outline_count = 1;
return abs (outline_count - expected_outline_count);
return abs(outline_count - expected_outline_count);
}
void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
bool good_quality_doc) {
void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, bool good_quality_doc) {
if ((tessedit_good_quality_unrej && good_quality_doc))
unrej_good_quality_words(page_res_it);
doc_and_block_rejection(page_res_it, good_quality_doc);
@ -141,71 +137,65 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
* - CAN'T do it in a single pass without a bit of fiddling
* - keep it simple but inefficient
*************************************************************************/
void Tesseract::unrej_good_quality_words( //unreject potential
PAGE_RES_IT &page_res_it) {
void Tesseract::unrej_good_quality_words( // unreject potential
PAGE_RES_IT &page_res_it) {
WERD_RES *word;
ROW_RES *current_row;
BLOCK_RES *current_block;
int i;
page_res_it.restart_page ();
while (page_res_it.word () != nullptr) {
check_debug_pt (page_res_it.word (), 100);
page_res_it.restart_page();
while (page_res_it.word() != nullptr) {
check_debug_pt(page_res_it.word(), 100);
if (bland_unrej) {
word = page_res_it.word ();
for (i = 0; i < word->reject_map.length (); i++) {
if (word->reject_map[i].accept_if_good_quality ())
word->reject_map[i].setrej_quality_accept ();
word = page_res_it.word();
for (i = 0; i < word->reject_map.length(); i++) {
if (word->reject_map[i].accept_if_good_quality())
word->reject_map[i].setrej_quality_accept();
}
page_res_it.forward ();
}
else if ((page_res_it.row ()->char_count > 0) &&
((page_res_it.row ()->rej_count /
static_cast<float>(page_res_it.row ()->char_count)) <=
quality_rowrej_pc)) {
word = page_res_it.word ();
page_res_it.forward();
} else if ((page_res_it.row()->char_count > 0) &&
((page_res_it.row()->rej_count /
static_cast<float>(page_res_it.row()->char_count)) <= quality_rowrej_pc)) {
word = page_res_it.word();
if (word->reject_map.quality_recoverable_rejects() &&
(tessedit_unrej_any_wd ||
acceptable_word_string(*word->uch_set,
word->best_choice->unichar_string().c_str(),
word->best_choice->unichar_lengths().c_str())
!= AC_UNACCEPTABLE)) {
acceptable_word_string(*word->uch_set, word->best_choice->unichar_string().c_str(),
word->best_choice->unichar_lengths().c_str()) !=
AC_UNACCEPTABLE)) {
unrej_good_chs(word);
}
page_res_it.forward ();
}
else {
page_res_it.forward();
} else {
// Skip to end of dodgy row.
current_row = page_res_it.row ();
while ((page_res_it.word () != nullptr) &&
(page_res_it.row () == current_row))
page_res_it.forward ();
current_row = page_res_it.row();
while ((page_res_it.word() != nullptr) && (page_res_it.row() == current_row))
page_res_it.forward();
}
check_debug_pt (page_res_it.word (), 110);
check_debug_pt(page_res_it.word(), 110);
}
page_res_it.restart_page ();
page_res_it.restart_page();
page_res_it.page_res->char_count = 0;
page_res_it.page_res->rej_count = 0;
current_block = nullptr;
current_row = nullptr;
while (page_res_it.word () != nullptr) {
if (current_block != page_res_it.block ()) {
current_block = page_res_it.block ();
while (page_res_it.word() != nullptr) {
if (current_block != page_res_it.block()) {
current_block = page_res_it.block();
current_block->char_count = 0;
current_block->rej_count = 0;
}
if (current_row != page_res_it.row ()) {
current_row = page_res_it.row ();
if (current_row != page_res_it.row()) {
current_row = page_res_it.row();
current_row->char_count = 0;
current_row->rej_count = 0;
current_row->whole_word_rej_count = 0;
}
page_res_it.rej_stat_word ();
page_res_it.forward ();
page_res_it.rej_stat_word();
page_res_it.forward();
}
}
/*************************************************************************
* doc_and_block_rejection()
*
@ -213,9 +203,8 @@ void Tesseract::unrej_good_quality_words( //unreject potential
* If any block has too many rejects - reject all words in the block
*************************************************************************/
void Tesseract::doc_and_block_rejection( //reject big chunks
PAGE_RES_IT &page_res_it,
bool good_quality_doc) {
void Tesseract::doc_and_block_rejection( // reject big chunks
PAGE_RES_IT &page_res_it, bool good_quality_doc) {
int16_t block_no = 0;
int16_t row_no = 0;
BLOCK_RES *current_block;
@ -226,49 +215,43 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
int16_t char_quality = 0;
int16_t accepted_char_quality;
if (page_res_it.page_res->rej_count * 100.0 /
page_res_it.page_res->char_count > tessedit_reject_doc_percent) {
if (page_res_it.page_res->rej_count * 100.0 / page_res_it.page_res->char_count >
tessedit_reject_doc_percent) {
reject_whole_page(page_res_it);
if (tessedit_debug_doc_rejection) {
tprintf("REJECT ALL #chars: %d #Rejects: %d; \n",
page_res_it.page_res->char_count,
tprintf("REJECT ALL #chars: %d #Rejects: %d; \n", page_res_it.page_res->char_count,
page_res_it.page_res->rej_count);
}
} else {
if (tessedit_debug_doc_rejection) {
tprintf("NO PAGE REJECTION #chars: %d # Rejects: %d; \n",
page_res_it.page_res->char_count,
tprintf("NO PAGE REJECTION #chars: %d # Rejects: %d; \n", page_res_it.page_res->char_count,
page_res_it.page_res->rej_count);
}
/* Walk blocks testing for block rejection */
page_res_it.restart_page();
WERD_RES* word;
WERD_RES *word;
while ((word = page_res_it.word()) != nullptr) {
current_block = page_res_it.block();
block_no = current_block->block->pdblk.index();
if (current_block->char_count > 0 &&
(current_block->rej_count * 100.0 / current_block->char_count) >
tessedit_reject_block_percent) {
tessedit_reject_block_percent) {
if (tessedit_debug_block_rejection) {
tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n",
block_no, current_block->char_count,
current_block->rej_count);
tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n", block_no,
current_block->char_count, current_block->rej_count);
}
prev_word_rejected = false;
while ((word = page_res_it.word()) != nullptr &&
(page_res_it.block() == current_block)) {
while ((word = page_res_it.word()) != nullptr && (page_res_it.block() == current_block)) {
if (tessedit_preserve_blk_rej_perfect_wds) {
rej_word = word->reject_map.reject_count() > 0 ||
word->reject_map.length () < tessedit_preserve_min_wd_len;
word->reject_map.length() < tessedit_preserve_min_wd_len;
if (rej_word && tessedit_dont_blkrej_good_wds &&
word->reject_map.length() >= tessedit_preserve_min_wd_len &&
acceptable_word_string(
*word->uch_set,
word->best_choice->unichar_string().c_str(),
word->best_choice->unichar_lengths().c_str()) !=
AC_UNACCEPTABLE) {
acceptable_word_string(*word->uch_set, word->best_choice->unichar_string().c_str(),
word->best_choice->unichar_lengths().c_str()) !=
AC_UNACCEPTABLE) {
word_char_quality(word, &char_quality, &accepted_char_quality);
rej_word = char_quality != word->reject_map.length();
}
@ -277,14 +260,12 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
}
if (rej_word) {
/*
Reject spacing if both current and prev words are rejected.
NOTE - this is NOT restricted to FUZZY spaces. - When tried this
generated more space errors.
*/
if (tessedit_use_reject_spaces &&
prev_word_rejected &&
page_res_it.prev_row() == page_res_it.row() &&
word->word->space() == 1)
Reject spacing if both current and prev words are rejected.
NOTE - this is NOT restricted to FUZZY spaces. - When tried this
generated more space errors.
*/
if (tessedit_use_reject_spaces && prev_word_rejected &&
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1)
word->reject_spaces = true;
word->reject_map.rej_word_block_rej();
}
@ -293,53 +274,46 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
}
} else {
if (tessedit_debug_block_rejection) {
tprintf("NOT REJECTING BLOCK %d #chars: %d # Rejects: %d; \n",
block_no, page_res_it.block()->char_count,
page_res_it.block()->rej_count);
tprintf("NOT REJECTING BLOCK %d #chars: %d # Rejects: %d; \n", block_no,
page_res_it.block()->char_count, page_res_it.block()->rej_count);
}
/* Walk rows in block testing for row rejection */
row_no = 0;
while (page_res_it.word() != nullptr &&
page_res_it.block() == current_block) {
while (page_res_it.word() != nullptr && page_res_it.block() == current_block) {
current_row = page_res_it.row();
row_no++;
/* Reject whole row if:
fraction of chars on row which are rejected exceed a limit AND
fraction rejects which occur in WHOLE WERD rejects is LESS THAN a
limit
*/
fraction of chars on row which are rejected exceed a limit AND
fraction rejects which occur in WHOLE WERD rejects is LESS THAN a
limit
*/
if (current_row->char_count > 0 &&
(current_row->rej_count * 100.0 / current_row->char_count) >
tessedit_reject_row_percent &&
(current_row->whole_word_rej_count * 100.0 /
current_row->rej_count) <
tessedit_whole_wd_rej_row_percent) {
tessedit_reject_row_percent &&
(current_row->whole_word_rej_count * 100.0 / current_row->rej_count) <
tessedit_whole_wd_rej_row_percent) {
if (tessedit_debug_block_rejection) {
tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n",
row_no, current_row->char_count,
current_row->rej_count);
tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n", row_no,
current_row->char_count, current_row->rej_count);
}
prev_word_rejected = false;
while ((word = page_res_it.word()) != nullptr &&
page_res_it.row () == current_row) {
while ((word = page_res_it.word()) != nullptr && page_res_it.row() == current_row) {
/* Preserve words on good docs unless they are mostly rejected*/
if (!tessedit_row_rej_good_docs && good_quality_doc) {
rej_word = word->reject_map.reject_count() /
static_cast<float>(word->reject_map.length()) >
tessedit_good_doc_still_rowrej_wd;
static_cast<float>(word->reject_map.length()) >
tessedit_good_doc_still_rowrej_wd;
} else if (tessedit_preserve_row_rej_perfect_wds) {
/* Preserve perfect words anyway */
rej_word = word->reject_map.reject_count() > 0 ||
word->reject_map.length () < tessedit_preserve_min_wd_len;
word->reject_map.length() < tessedit_preserve_min_wd_len;
if (rej_word && tessedit_dont_rowrej_good_wds &&
word->reject_map.length() >= tessedit_preserve_min_wd_len &&
acceptable_word_string(*word->uch_set,
word->best_choice->unichar_string().c_str(),
word->best_choice->unichar_lengths().c_str()) !=
AC_UNACCEPTABLE) {
word_char_quality(word, &char_quality,
&accepted_char_quality);
acceptable_word_string(
*word->uch_set, word->best_choice->unichar_string().c_str(),
word->best_choice->unichar_lengths().c_str()) != AC_UNACCEPTABLE) {
word_char_quality(word, &char_quality, &accepted_char_quality);
rej_word = char_quality != word->reject_map.length();
}
} else {
@ -347,14 +321,12 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
}
if (rej_word) {
/*
Reject spacing if both current and prev words are rejected.
NOTE - this is NOT restricted to FUZZY spaces. - When tried
this generated more space errors.
*/
if (tessedit_use_reject_spaces &&
prev_word_rejected &&
page_res_it.prev_row() == page_res_it.row() &&
word->word->space () == 1)
Reject spacing if both current and prev words are rejected.
NOTE - this is NOT restricted to FUZZY spaces. - When tried
this generated more space errors.
*/
if (tessedit_use_reject_spaces && prev_word_rejected &&
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1)
word->reject_spaces = true;
word->reject_map.rej_word_row_rej();
}
@ -363,11 +335,10 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
}
} else {
if (tessedit_debug_block_rejection) {
tprintf("NOT REJECTING ROW %d #chars: %d # Rejects: %d; \n",
row_no, current_row->char_count, current_row->rej_count);
tprintf("NOT REJECTING ROW %d #chars: %d # Rejects: %d; \n", row_no,
current_row->char_count, current_row->rej_count);
}
while (page_res_it.word() != nullptr &&
page_res_it.row() == current_row)
while (page_res_it.word() != nullptr && page_res_it.row() == current_row)
page_res_it.forward();
}
}
@ -376,8 +347,6 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
}
}
} // namespace tesseract
/*************************************************************************
* reject_whole_page()
* Don't believe any of it - set the reject map to 00..00 in all words
@ -385,16 +354,15 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
*************************************************************************/
void reject_whole_page(PAGE_RES_IT &page_res_it) {
page_res_it.restart_page ();
while (page_res_it.word () != nullptr) {
page_res_it.word ()->reject_map.rej_word_doc_rej ();
page_res_it.forward ();
page_res_it.restart_page();
while (page_res_it.word() != nullptr) {
page_res_it.word()->reject_map.rej_word_doc_rej();
page_res_it.forward();
}
//whole page is rejected
// whole page is rejected
page_res_it.page_res->rejected = true;
}
namespace tesseract {
void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
WERD_RES *word;
GARBAGE_LEVEL garbage_level;
@ -405,7 +373,7 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
page_res_it.restart_page();
while (page_res_it.word() != nullptr) {
POLY_BLOCK* pb = page_res_it.block()->block->pdblk.poly_block();
POLY_BLOCK *pb = page_res_it.block()->block->pdblk.poly_block();
if (pb != nullptr && !pb->IsText()) {
page_res_it.forward();
continue;
@ -418,109 +386,93 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
if (crunch_early_merge_tess_fails)
word->merge_tess_fails();
if (word->reject_map.accept_count () != 0) {
if (word->reject_map.accept_count() != 0) {
found_terrible_word = false;
//Forget earlier potential crunches
// Forget earlier potential crunches
prev_potential_marked = false;
}
else {
} else {
ok_dict_word = safe_dict_word(word);
garbage_level = garbage_word(word, ok_dict_word);
if ((garbage_level != G_NEVER_CRUNCH) &&
(terrible_word_crunch (word, garbage_level))) {
if ((garbage_level != G_NEVER_CRUNCH) && (terrible_word_crunch(word, garbage_level))) {
if (crunch_debug > 0) {
tprintf ("T CRUNCHING: \"%s\"\n",
word->best_choice->unichar_string().c_str());
tprintf("T CRUNCHING: \"%s\"\n", word->best_choice->unichar_string().c_str());
}
word->unlv_crunch_mode = CR_KEEP_SPACE;
if (prev_potential_marked) {
while (copy_it.word () != word) {
while (copy_it.word() != word) {
if (crunch_debug > 0) {
tprintf ("P1 CRUNCHING: \"%s\"\n",
copy_it.word()->best_choice->unichar_string().c_str());
tprintf("P1 CRUNCHING: \"%s\"\n",
copy_it.word()->best_choice->unichar_string().c_str());
}
copy_it.word ()->unlv_crunch_mode = CR_KEEP_SPACE;
copy_it.forward ();
copy_it.word()->unlv_crunch_mode = CR_KEEP_SPACE;
copy_it.forward();
}
prev_potential_marked = false;
}
found_terrible_word = true;
}
else if ((garbage_level != G_NEVER_CRUNCH) &&
(potential_word_crunch (word,
garbage_level, ok_dict_word))) {
} else if ((garbage_level != G_NEVER_CRUNCH) &&
(potential_word_crunch(word, garbage_level, ok_dict_word))) {
if (found_terrible_word) {
if (crunch_debug > 0) {
tprintf ("P2 CRUNCHING: \"%s\"\n",
word->best_choice->unichar_string().c_str());
tprintf("P2 CRUNCHING: \"%s\"\n", word->best_choice->unichar_string().c_str());
}
word->unlv_crunch_mode = CR_KEEP_SPACE;
}
else if (!prev_potential_marked) {
} else if (!prev_potential_marked) {
copy_it = page_res_it;
prev_potential_marked = true;
if (crunch_debug > 1) {
tprintf ("P3 CRUNCHING: \"%s\"\n",
word->best_choice->unichar_string().c_str());
tprintf("P3 CRUNCHING: \"%s\"\n", word->best_choice->unichar_string().c_str());
}
}
}
else {
} else {
found_terrible_word = false;
//Forget earlier potential crunches
// Forget earlier potential crunches
prev_potential_marked = false;
if (crunch_debug > 2) {
tprintf ("NO CRUNCH: \"%s\"\n",
word->best_choice->unichar_string().c_str());
tprintf("NO CRUNCH: \"%s\"\n", word->best_choice->unichar_string().c_str());
}
}
}
page_res_it.forward ();
page_res_it.forward();
}
}
bool Tesseract::terrible_word_crunch(WERD_RES* word,
GARBAGE_LEVEL garbage_level) {
bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) {
float rating_per_ch;
int adjusted_len;
int crunch_mode = 0;
if ((word->best_choice->unichar_string().length() == 0) ||
(strspn(word->best_choice->unichar_string().c_str(), " ") ==
word->best_choice->unichar_string().unsigned_size()))
word->best_choice->unichar_string().size()))
crunch_mode = 1;
else {
adjusted_len = word->reject_map.length ();
adjusted_len = word->reject_map.length();
if (adjusted_len > crunch_rating_max)
adjusted_len = crunch_rating_max;
rating_per_ch = word->best_choice->rating () / adjusted_len;
rating_per_ch = word->best_choice->rating() / adjusted_len;
if (rating_per_ch > crunch_terrible_rating)
crunch_mode = 2;
else if (crunch_terrible_garbage && (garbage_level == G_TERRIBLE))
crunch_mode = 3;
else if ((word->best_choice->certainty () < crunch_poor_garbage_cert) &&
(garbage_level != G_OK))
else if ((word->best_choice->certainty() < crunch_poor_garbage_cert) && (garbage_level != G_OK))
crunch_mode = 4;
else if ((rating_per_ch > crunch_poor_garbage_rate) &&
(garbage_level != G_OK))
else if ((rating_per_ch > crunch_poor_garbage_rate) && (garbage_level != G_OK))
crunch_mode = 5;
}
if (crunch_mode > 0) {
if (crunch_debug > 2) {
tprintf ("Terrible_word_crunch (%d) on \"%s\"\n",
crunch_mode, word->best_choice->unichar_string().c_str());
tprintf("Terrible_word_crunch (%d) on \"%s\"\n", crunch_mode,
word->best_choice->unichar_string().c_str());
}
return true;
}
else
} else
return false;
}
bool Tesseract::potential_word_crunch(WERD_RES* word,
GARBAGE_LEVEL garbage_level,
bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level,
bool ok_dict_word) {
float rating_per_ch;
int adjusted_len;
@ -529,11 +481,9 @@ bool Tesseract::potential_word_crunch(WERD_RES* word,
bool word_crunchable;
int poor_indicator_count = 0;
word_crunchable = !crunch_leave_accept_strings ||
word->reject_map.length() < 3 ||
(acceptable_word_string(*word->uch_set,
str, lengths) == AC_UNACCEPTABLE &&
!ok_dict_word);
word_crunchable =
!crunch_leave_accept_strings || word->reject_map.length() < 3 ||
(acceptable_word_string(*word->uch_set, str, lengths) == AC_UNACCEPTABLE && !ok_dict_word);
adjusted_len = word->reject_map.length();
if (adjusted_len > 10)
@ -542,25 +492,21 @@ bool Tesseract::potential_word_crunch(WERD_RES* word,
if (rating_per_ch > crunch_pot_poor_rate) {
if (crunch_debug > 2) {
tprintf("Potential poor rating on \"%s\"\n",
word->best_choice->unichar_string().c_str());
tprintf("Potential poor rating on \"%s\"\n", word->best_choice->unichar_string().c_str());
}
poor_indicator_count++;
}
if (word_crunchable &&
word->best_choice->certainty() < crunch_pot_poor_cert) {
if (word_crunchable && word->best_choice->certainty() < crunch_pot_poor_cert) {
if (crunch_debug > 2) {
tprintf("Potential poor cert on \"%s\"\n",
word->best_choice->unichar_string().c_str());
tprintf("Potential poor cert on \"%s\"\n", word->best_choice->unichar_string().c_str());
}
poor_indicator_count++;
}
if (garbage_level != G_OK) {
if (crunch_debug > 2) {
tprintf("Potential garbage on \"%s\"\n",
word->best_choice->unichar_string().c_str());
tprintf("Potential garbage on \"%s\"\n", word->best_choice->unichar_string().c_str());
}
poor_indicator_count++;
}
@ -581,62 +527,55 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
while (page_res_it.word() != nullptr) {
word = page_res_it.word();
delete_mode = word_deletable (word, debug_delete_mode);
delete_mode = word_deletable(word, debug_delete_mode);
if (delete_mode != CR_NONE) {
if (word->word->flag (W_BOL) || deleting_from_bol) {
if (word->word->flag(W_BOL) || deleting_from_bol) {
if (crunch_debug > 0) {
tprintf ("BOL CRUNCH DELETING(%d): \"%s\"\n",
debug_delete_mode,
word->best_choice->unichar_string().c_str());
tprintf("BOL CRUNCH DELETING(%d): \"%s\"\n", debug_delete_mode,
word->best_choice->unichar_string().c_str());
}
word->unlv_crunch_mode = delete_mode;
deleting_from_bol = true;
} else if (word->word->flag(W_EOL)) {
if (marked_delete_point) {
while (copy_it.word() != word) {
x_delete_mode = word_deletable (copy_it.word (),
x_debug_delete_mode);
x_delete_mode = word_deletable(copy_it.word(), x_debug_delete_mode);
if (crunch_debug > 0) {
tprintf ("EOL CRUNCH DELETING(%d): \"%s\"\n",
x_debug_delete_mode,
copy_it.word()->best_choice->unichar_string().c_str());
tprintf("EOL CRUNCH DELETING(%d): \"%s\"\n", x_debug_delete_mode,
copy_it.word()->best_choice->unichar_string().c_str());
}
copy_it.word ()->unlv_crunch_mode = x_delete_mode;
copy_it.forward ();
copy_it.word()->unlv_crunch_mode = x_delete_mode;
copy_it.forward();
}
}
if (crunch_debug > 0) {
tprintf ("EOL CRUNCH DELETING(%d): \"%s\"\n",
debug_delete_mode,
word->best_choice->unichar_string().c_str());
tprintf("EOL CRUNCH DELETING(%d): \"%s\"\n", debug_delete_mode,
word->best_choice->unichar_string().c_str());
}
word->unlv_crunch_mode = delete_mode;
deleting_from_bol = false;
marked_delete_point = false;
}
else {
} else {
if (!marked_delete_point) {
copy_it = page_res_it;
marked_delete_point = true;
}
}
}
else {
} else {
deleting_from_bol = false;
//Forget earlier potential crunches
// Forget earlier potential crunches
marked_delete_point = false;
}
/*
The following step has been left till now as the tess fails are used to
determine if the word is deletable.
*/
The following step has been left till now as the tess fails are used to
determine if the word is deletable.
*/
if (!crunch_early_merge_tess_fails)
word->merge_tess_fails();
page_res_it.forward ();
page_res_it.forward();
}
}
void Tesseract::convert_bad_unlv_chs(WERD_RES *word_res) {
int i;
UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-");
@ -646,20 +585,19 @@ void Tesseract::convert_bad_unlv_chs(WERD_RES *word_res) {
for (i = 0; i < word_res->reject_map.length(); ++i) {
if (word_res->best_choice->unichar_id(i) == unichar_tilde) {
word_res->best_choice->set_unichar_id(unichar_dash, i);
if (word_res->reject_map[i].accepted ())
word_res->reject_map[i].setrej_unlv_rej ();
if (word_res->reject_map[i].accepted())
word_res->reject_map[i].setrej_unlv_rej();
}
if (word_res->best_choice->unichar_id(i) == unichar_pow) {
word_res->best_choice->set_unichar_id(unichar_space, i);
if (word_res->reject_map[i].accepted ())
word_res->reject_map[i].setrej_unlv_rej ();
if (word_res->reject_map[i].accepted())
word_res->reject_map[i].setrej_unlv_rej();
}
}
}
GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
enum STATES
{
enum STATES {
JUNK,
FIRST_UPPER,
FIRST_LOWER,
@ -690,7 +628,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
for (; *str != '\0'; str += *(lengths++)) {
len++;
if (word->uch_set->get_isupper (str, *lengths)) {
if (word->uch_set->get_isupper(str, *lengths)) {
total_alpha_count++;
switch (state) {
case SUBSEQUENT_UPPER:
@ -704,8 +642,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
if (longest_alpha_repetition_count < alpha_repetition_count) {
longest_alpha_repetition_count = alpha_repetition_count;
}
}
else {
} else {
last_char = word->uch_set->unichar_to_id(str, *lengths);
alpha_repetition_count = 1;
}
@ -720,8 +657,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
upper_string_count = 1;
break;
}
}
else if (word->uch_set->get_islower (str, *lengths)) {
} else if (word->uch_set->get_islower(str, *lengths)) {
total_alpha_count++;
switch (state) {
case SUBSEQUENT_LOWER:
@ -735,8 +671,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
if (longest_alpha_repetition_count < alpha_repetition_count) {
longest_alpha_repetition_count = alpha_repetition_count;
}
}
else {
} else {
last_char = word->uch_set->unichar_to_id(str, *lengths);
alpha_repetition_count = 1;
}
@ -751,8 +686,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
lower_string_count = 1;
break;
}
}
else if (word->uch_set->get_isdigit (str, *lengths)) {
} else if (word->uch_set->get_isdigit(str, *lengths)) {
total_digit_count++;
switch (state) {
case FIRST_NUM:
@ -767,8 +701,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
state = FIRST_NUM;
break;
}
}
else {
} else {
if (*lengths == 1 && *str == ' ')
tess_rejs++;
else
@ -802,63 +735,51 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
total_alpha_count += total_digit_count - isolated_digits;
}
if (crunch_leave_ok_strings && len >= 4 &&
2 * (total_alpha_count - isolated_alphas) > len &&
if (crunch_leave_ok_strings && len >= 4 && 2 * (total_alpha_count - isolated_alphas) > len &&
longest_alpha_repetition_count < crunch_long_repetitions) {
if ((crunch_accept_ok &&
acceptable_word_string(*word->uch_set, str, lengths) !=
AC_UNACCEPTABLE) ||
acceptable_word_string(*word->uch_set, str, lengths) != AC_UNACCEPTABLE) ||
longest_lower_run_len > crunch_leave_lc_strings ||
longest_upper_run_len > crunch_leave_uc_strings)
return G_NEVER_CRUNCH;
}
if (word->reject_map.length() > 1 &&
strpbrk(str, " ") == nullptr &&
if (word->reject_map.length() > 1 && strpbrk(str, " ") == nullptr &&
(word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
word->best_choice->permuter() == USER_DAWG_PERM ||
word->best_choice->permuter() == NUMBER_PERM ||
acceptable_word_string(*word->uch_set, str, lengths) !=
AC_UNACCEPTABLE || ok_dict_word))
acceptable_word_string(*word->uch_set, str, lengths) != AC_UNACCEPTABLE || ok_dict_word))
return G_OK;
ok_chars = len - bad_char_count - isolated_digits -
isolated_alphas - tess_rejs;
ok_chars = len - bad_char_count - isolated_digits - isolated_alphas - tess_rejs;
if (crunch_debug > 3) {
tprintf("garbage_word: \"%s\"\n",
word->best_choice->unichar_string().c_str());
tprintf("LEN: %d bad: %d iso_N: %d iso_A: %d rej: %d\n",
len,
bad_char_count, isolated_digits, isolated_alphas, tess_rejs);
tprintf("garbage_word: \"%s\"\n", word->best_choice->unichar_string().c_str());
tprintf("LEN: %d bad: %d iso_N: %d iso_A: %d rej: %d\n", len, bad_char_count,
isolated_digits, isolated_alphas, tess_rejs);
}
if (bad_char_count == 0 &&
tess_rejs == 0 &&
if (bad_char_count == 0 && tess_rejs == 0 &&
(len > isolated_digits + isolated_alphas || len <= 2))
return G_OK;
if (tess_rejs > ok_chars ||
(tess_rejs > 0 && (bad_char_count + tess_rejs) * 2 > len))
if (tess_rejs > ok_chars || (tess_rejs > 0 && (bad_char_count + tess_rejs) * 2 > len))
return G_TERRIBLE;
if (len > 4) {
dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits +
isolated_alphas;
dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits + isolated_alphas;
if (dodgy_chars > 5 || (dodgy_chars / static_cast<float>(len)) > 0.5)
return G_DODGY;
else
return G_OK;
} else {
dodgy_chars = 2 * tess_rejs + bad_char_count;
if ((len == 4 && dodgy_chars > 2) ||
(len == 3 && dodgy_chars > 2) || dodgy_chars >= len)
if ((len == 4 && dodgy_chars > 2) || (len == 3 && dodgy_chars > 2) || dodgy_chars >= len)
return G_DODGY;
else
return G_OK;
}
}
/*************************************************************************
* word_deletable()
* DELETE WERDS AT ENDS OF ROWS IF
@ -876,9 +797,9 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
*************************************************************************/
CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) {
int word_len = word->reject_map.length ();
int word_len = word->reject_map.length();
float rating_per_ch;
TBOX box; //BB of word
TBOX box; // BB of word
if (word->unlv_crunch_mode == CR_NONE) {
delete_mode = 0;
@ -893,7 +814,7 @@ CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) {
if (word->rebuild_word != nullptr) {
// Cube leaves rebuild_word nullptr.
box = word->rebuild_word->bounding_box();
if (box.height () < crunch_del_min_ht * kBlnXHeight) {
if (box.height() < crunch_del_min_ht * kBlnXHeight) {
delete_mode = 4;
return CR_DELETE;
}
@ -904,40 +825,39 @@ CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) {
}
}
if ((failure_count (word) * 1.5) > word_len) {
if ((failure_count(word) * 1.5) > word_len) {
delete_mode = 2;
return CR_LOOSE_SPACE;
}
if (word->best_choice->certainty () < crunch_del_cert) {
if (word->best_choice->certainty() < crunch_del_cert) {
delete_mode = 7;
return CR_LOOSE_SPACE;
}
rating_per_ch = word->best_choice->rating () / word_len;
rating_per_ch = word->best_choice->rating() / word_len;
if (rating_per_ch > crunch_del_rating) {
delete_mode = 8;
return CR_LOOSE_SPACE;
}
if (box.top () < kBlnBaselineOffset - crunch_del_low_word * kBlnXHeight) {
if (box.top() < kBlnBaselineOffset - crunch_del_low_word * kBlnXHeight) {
delete_mode = 9;
return CR_LOOSE_SPACE;
}
if (box.bottom () >
kBlnBaselineOffset + crunch_del_high_word * kBlnXHeight) {
if (box.bottom() > kBlnBaselineOffset + crunch_del_high_word * kBlnXHeight) {
delete_mode = 10;
return CR_LOOSE_SPACE;
}
if (box.height () > crunch_del_max_ht * kBlnXHeight) {
if (box.height() > crunch_del_max_ht * kBlnXHeight) {
delete_mode = 11;
return CR_LOOSE_SPACE;
}
if (box.width () < crunch_del_min_width * kBlnXHeight) {
if (box.width() < crunch_del_min_width * kBlnXHeight) {
delete_mode = 3;
return CR_LOOSE_SPACE;
}
@ -957,17 +877,16 @@ int16_t Tesseract::failure_count(WERD_RES *word) {
return tess_rejs;
}
bool Tesseract::noise_outlines(TWERD* word) {
TBOX box; // BB of outline
bool Tesseract::noise_outlines(TWERD *word) {
TBOX box; // BB of outline
int16_t outline_count = 0;
int16_t small_outline_count = 0;
int16_t max_dimension;
float small_limit = kBlnXHeight * crunch_small_outlines_size;
for (int b = 0; b < word->NumBlobs(); ++b) {
TBLOB* blob = word->blobs[b];
for (TESSLINE* ol = blob->outlines; ol != nullptr; ol = ol->next) {
TBLOB *blob = word->blobs[b];
for (TESSLINE *ol = blob->outlines; ol != nullptr; ol = ol->next) {
outline_count++;
box = ol->bounding_box();
if (box.height() > box.width())
@ -981,4 +900,4 @@ bool Tesseract::noise_outlines(TWERD* word) {
return small_outline_count >= outline_count;
}
} // namespace tesseract
} // namespace tesseract

View File

@ -19,21 +19,19 @@
#ifndef DOCQUAL_H
#define DOCQUAL_H
#include <cstdint> // for int16_t
#include <cstdint> // for int16_t
namespace tesseract {
class PAGE_RES_IT;
class ROW;
class WERD_RES;
enum GARBAGE_LEVEL
{
G_NEVER_CRUNCH,
G_OK,
G_DODGY,
G_TERRIBLE
};
enum GARBAGE_LEVEL { G_NEVER_CRUNCH, G_OK, G_DODGY, G_TERRIBLE };
int16_t word_blob_quality(WERD_RES* word);
int16_t word_blob_quality(WERD_RES *word);
void reject_whole_page(PAGE_RES_IT &page_res_it);
} // namespace tesseract
#endif

File diff suppressed because it is too large Load Diff

View File

@ -19,11 +19,11 @@
#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H_
#define TESSERACT_CCMAIN_EQUATIONDETECT_H_
#include "blobbox.h" // for BLOBNBOX (ptr only), BlobSpecialText...
#include "equationdetectbase.h" // for EquationDetectBase
#include <tesseract/genericvector.h> // for GenericVector
#include "tesseractclass.h" // for Tesseract
#include <tesseract/unichar.h> // for UNICHAR_ID
#include <tesseract/unichar.h> // for UNICHAR_ID
#include "blobbox.h" // for BLOBNBOX (ptr only), BlobSpecialText...
#include "equationdetectbase.h" // for EquationDetectBase
#include "genericvector.h" // for GenericVector
#include "tesseractclass.h" // for Tesseract
class TBOX;
class UNICHARSET;
@ -35,54 +35,46 @@ class ColPartition;
class ColPartitionGrid;
class ColPartitionSet;
class EquationDetect : public EquationDetectBase {
public:
EquationDetect(const char* equ_datapath,
const char* equ_language);
class TESS_API EquationDetect : public EquationDetectBase {
public:
EquationDetect(const char *equ_datapath, const char *equ_language);
~EquationDetect() override;
enum IndentType {
NO_INDENT,
LEFT_INDENT,
RIGHT_INDENT,
BOTH_INDENT,
INDENT_TYPE_COUNT
};
enum IndentType { NO_INDENT, LEFT_INDENT, RIGHT_INDENT, BOTH_INDENT, INDENT_TYPE_COUNT };
// Reset the lang_tesseract_ pointer. This function should be called before we
// do any detector work.
void SetLangTesseract(Tesseract* lang_tesseract);
void SetLangTesseract(Tesseract *lang_tesseract);
// Iterate over the blobs inside to_block, and set the blobs that we want to
// process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function
// returns 0 upon success.
int LabelSpecialText(TO_BLOCK* to_block) override;
int LabelSpecialText(TO_BLOCK *to_block) override;
// Find possible equation partitions from part_grid. Should be called
// after the special_text_type of blobs are set.
// It returns 0 upon success.
int FindEquationParts(ColPartitionGrid* part_grid,
ColPartitionSet** best_columns) override;
int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns) override;
// Reset the resolution of the processing image. TEST only function.
void SetResolution(const int resolution);
protected:
protected:
// Identify the special text type for one blob, and update its field. When
// height_th is set (> 0), we will label the blob as BSTT_NONE if its height
// is less than height_th.
void IdentifySpecialText(BLOBNBOX *blob, const int height_th);
// Estimate the type for one unichar.
BlobSpecialTextType EstimateTypeForUnichar(
const UNICHARSET& unicharset, const UNICHAR_ID id) const;
BlobSpecialTextType EstimateTypeForUnichar(const UNICHARSET &unicharset,
const UNICHAR_ID id) const;
// Compute special text type for each blobs in part_grid_.
void IdentifySpecialText();
// Identify blobs that we want to skip during special blob type
// classification.
void IdentifyBlobsToSkip(ColPartition* part);
void IdentifyBlobsToSkip(ColPartition *part);
// The ColPartitions in part_grid_ maybe over-segmented, particularly in the
// block equation regions. So we like to identify these partitions and merge
@ -94,62 +86,56 @@ class EquationDetect : public EquationDetectBase {
// parts_overlap. Note: this function may update the part_grid_, so if the
// caller is also running ColPartitionGridSearch, use the RepositionIterator
// to continue.
void SearchByOverlap(ColPartition* seed,
GenericVector<ColPartition*>* parts_overlap);
void SearchByOverlap(ColPartition *seed, GenericVector<ColPartition *> *parts_overlap);
// Insert part back into part_grid_, after it absorbs some other parts.
void InsertPartAfterAbsorb(ColPartition* part);
void InsertPartAfterAbsorb(ColPartition *part);
// Identify the colparitions in part_grid_, label them as PT_EQUATION, and
// save them into cp_seeds_.
void IdentifySeedParts();
// Check the blobs count for a seed region candidate.
bool CheckSeedBlobsCount(ColPartition* part);
bool CheckSeedBlobsCount(ColPartition *part);
// Compute the foreground pixel density for a tbox area.
float ComputeForegroundDensity(const TBOX& tbox);
float ComputeForegroundDensity(const TBOX &tbox);
// Check if part from seed2 label: with low math density and left indented. We
// are using two checks:
// 1. If its left is aligned with any coordinates in indented_texts_left,
// which we assume have been sorted.
// 2. If its foreground density is over foreground_density_th.
bool CheckForSeed2(
const GenericVector<int>& indented_texts_left,
const float foreground_density_th,
ColPartition* part);
bool CheckForSeed2(const GenericVector<int> &indented_texts_left,
const float foreground_density_th, ColPartition *part);
// Count the number of values in sorted_vec that is close to val, used to
// check if a partition is aligned with text partitions.
int CountAlignment(
const GenericVector<int>& sorted_vec, const int val) const;
int CountAlignment(const GenericVector<int> &sorted_vec, const int val) const;
// Check for a seed candidate using the foreground pixel density. And we
// return true if the density is below a certain threshold, because characters
// in equation regions usually are apart with more white spaces.
bool CheckSeedFgDensity(const float density_th, ColPartition* part);
bool CheckSeedFgDensity(const float density_th, ColPartition *part);
// A light version of SplitCPHor: instead of really doing the part split, we
// simply compute the union bounding box of each split part.
void SplitCPHorLite(ColPartition* part, GenericVector<TBOX>* splitted_boxes);
void SplitCPHorLite(ColPartition *part, GenericVector<TBOX> *splitted_boxes);
// Split the part (horizontally), and save the split result into
// parts_splitted. Note that it is caller's responsibility to release the
// memory owns by parts_splitted. On the other hand, the part is unchanged
// during this process and still owns the blobs, so do NOT call DeleteBoxes
// when freeing the colpartitions in parts_splitted.
void SplitCPHor(ColPartition* part,
GenericVector<ColPartition*>* parts_splitted);
void SplitCPHor(ColPartition *part, GenericVector<ColPartition *> *parts_splitted);
// Check the density for a seed candidate (part) using its math density and
// italic density, returns true if the check passed.
bool CheckSeedDensity(const float math_density_high,
const float math_density_low,
const ColPartition* part) const;
bool CheckSeedDensity(const float math_density_high, const float math_density_low,
const ColPartition *part) const;
// Check if part is indented.
IndentType IsIndented(ColPartition* part);
IndentType IsIndented(ColPartition *part);
// Identify inline partitions from cp_seeds_, and re-label them.
void IdentifyInlineParts();
@ -165,38 +151,32 @@ class EquationDetect : public EquationDetectBase {
int EstimateTextPartLineSpacing();
// Identify inline partitions from cp_seeds_ using vertical search.
void IdentifyInlinePartsVertical(const bool top_to_bottom,
const int textPartsLineSpacing);
void IdentifyInlinePartsVertical(const bool top_to_bottom, const int textPartsLineSpacing);
// Check if part is an inline equation zone. This should be called after we
// identified the seed regions.
bool IsInline(const bool search_bottom,
const int textPartsLineSpacing,
ColPartition* part);
bool IsInline(const bool search_bottom, const int textPartsLineSpacing, ColPartition *part);
// For a given seed partition, we search the part_grid_ and see if there is
// any partition can be merged with it. It returns true if the seed has been
// expanded.
bool ExpandSeed(ColPartition* seed);
bool ExpandSeed(ColPartition *seed);
// Starting from the seed position, we search the part_grid_
// horizontally/vertically, find all partitions that can be
// merged with seed, remove them from part_grid_, and put them into
// parts_to_merge.
void ExpandSeedHorizontal(const bool search_left,
ColPartition* seed,
GenericVector<ColPartition*>* parts_to_merge);
void ExpandSeedVertical(const bool search_bottom,
ColPartition* seed,
GenericVector<ColPartition*>* parts_to_merge);
void ExpandSeedHorizontal(const bool search_left, ColPartition *seed,
GenericVector<ColPartition *> *parts_to_merge);
void ExpandSeedVertical(const bool search_bottom, ColPartition *seed,
GenericVector<ColPartition *> *parts_to_merge);
// Check if a part_box is the small neighbor of seed_box.
bool IsNearSmallNeighbor(const TBOX& seed_box,
const TBOX& part_box) const;
bool IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const;
// Perform the density check for part, which we assume is nearing a seed
// partition. It returns true if the check passed.
bool CheckSeedNeighborDensity(const ColPartition* part) const;
bool CheckSeedNeighborDensity(const ColPartition *part) const;
// After identify the math blocks, we do one more scanning on all text
// partitions, and check if any of them is the satellite of:
@ -210,56 +190,54 @@ class EquationDetect : public EquationDetectBase {
// Check if part is the satellite of one/two math blocks. If it is, we return
// true, and save the blocks into math_blocks.
bool IsMathBlockSatellite(
ColPartition* part, GenericVector<ColPartition*>* math_blocks);
bool IsMathBlockSatellite(ColPartition *part, GenericVector<ColPartition *> *math_blocks);
// Search the nearest neighbor of part in one vertical direction as defined in
// search_bottom. It returns the neighbor found that major x overlap with it,
// or nullptr when not found.
ColPartition* SearchNNVertical(const bool search_bottom,
const ColPartition* part);
ColPartition *SearchNNVertical(const bool search_bottom, const ColPartition *part);
// Check if the neighbor with vertical distance of y_gap is a near and math
// block partition.
bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const;
// Generate the tiff file name for output/debug file.
void GetOutputTiffName(const char* name, STRING* image_name) const;
void GetOutputTiffName(const char *name, std::string &image_name) const;
// Debugger function that renders ColPartitions on the input image, where:
// parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION
// will be painted in green, and other parts will be painted in blue.
void PaintColParts(const STRING& outfile) const;
void PaintColParts(const std::string &outfile) const;
// Debugger function that renders the blobs in part_grid_ over the input
// image.
void PaintSpecialTexts(const STRING& outfile) const;
void PaintSpecialTexts(const std::string &outfile) const;
// Debugger function that print the math blobs density values for a
// ColPartition object.
void PrintSpecialBlobsDensity(const ColPartition* part) const;
void PrintSpecialBlobsDensity(const ColPartition *part) const;
// The tesseract engine initialized from equation training data.
Tesseract equ_tesseract_;
// The tesseract engine used for OCR. This pointer is passed in by the caller,
// so do NOT destroy it in this class.
Tesseract* lang_tesseract_;
Tesseract *lang_tesseract_;
// The ColPartitionGrid that we are processing. This pointer is passed in from
// the caller, so do NOT destroy it in the class.
ColPartitionGrid* part_grid_ = nullptr;
ColPartitionGrid *part_grid_ = nullptr;
// A simple array of pointers to the best assigned column division at
// each grid y coordinate. This pointer is passed in from the caller, so do
// NOT destroy it in the class.
ColPartitionSet** best_columns_ = nullptr;
ColPartitionSet **best_columns_ = nullptr;
// The super bounding box of all cps in the part_grid_.
TBOX* cps_super_bbox_;
TBOX *cps_super_bbox_;
// The seed ColPartition for equation region.
GenericVector<ColPartition*> cp_seeds_;
GenericVector<ColPartition *> cp_seeds_;
// The resolution (dpi) of the processing image.
int resolution_;
@ -268,6 +246,6 @@ class EquationDetect : public EquationDetectBase {
int page_count_;
};
} // namespace tesseract
} // namespace tesseract
#endif // TESSERACT_CCMAIN_EQUATIONDETECT_H_
#endif // TESSERACT_CCMAIN_EQUATIONDETECT_H_

View File

@ -19,31 +19,33 @@
**********************************************************************/
#include "fixspace.h"
#include <cstdint> // for INT16_MAX, int16_t, int32_t
#include "blobs.h" // for TWERD, TBLOB, TESSLINE
#include "boxword.h" // for BoxWord
#include "errcode.h" // for ASSERT_HOST
#include "normalis.h" // for kBlnXHeight, kBlnBaselineOffset
#include <tesseract/ocrclass.h> // for ETEXT_DESC
#include "pageres.h" // for WERD_RES_IT, WERD_RES, WERD_RES_LIST
#include "params.h" // for IntParam, StringParam, BoolParam, Doub...
#include "ratngs.h" // for WERD_CHOICE, FREQ_DAWG_PERM, NUMBER_PERM
#include "rect.h" // for TBOX
#include "stepblob.h" // for C_BLOB_IT, C_BLOB_LIST, C_BLOB
#include <tesseract/strngs.h> // for STRING
#include "tesseractclass.h" // for Tesseract, TesseractStats, WordData
#include "tessvars.h" // for debug_fp
#include "tprintf.h" // for tprintf
#include <tesseract/unichar.h> // for UNICHAR_ID
#include "unicharset.h" // for UNICHARSET
#include "werd.h" // for WERD, W_EOL, W_FUZZY_NON, W_FUZZY_SP
#include "blobs.h" // for TWERD, TBLOB, TESSLINE
#include "boxword.h" // for BoxWord
#include "errcode.h" // for ASSERT_HOST
#include "normalis.h" // for kBlnXHeight, kBlnBaselineOffset
#include "pageres.h" // for WERD_RES_IT, WERD_RES, WERD_RES_LIST
#include "params.h" // for IntParam, StringParam, BoolParam, Doub...
#include "ratngs.h" // for WERD_CHOICE, FREQ_DAWG_PERM, NUMBER_PERM
#include "rect.h" // for TBOX
#include "stepblob.h" // for C_BLOB_IT, C_BLOB_LIST, C_BLOB
#include "tesseractclass.h" // for Tesseract, TesseractStats, WordData
#include "tessvars.h" // for debug_fp
#include "tprintf.h" // for tprintf
#include "unicharset.h" // for UNICHARSET
#include "werd.h" // for WERD, W_EOL, W_FUZZY_NON, W_FUZZY_SP
#include <tesseract/ocrclass.h> // for ETEXT_DESC
#include <tesseract/unichar.h> // for UNICHAR_ID
#include <cstdint> // for INT16_MAX, int16_t, int32_t
namespace tesseract {
class BLOCK;
class ROW;
#define PERFECT_WERDS 999
namespace tesseract {
#define PERFECT_WERDS 999
/**********************************************************************
* c_blob_comparator()
@ -52,14 +54,14 @@ namespace tesseract {
* order of left edge.
**********************************************************************/
static int c_blob_comparator( // sort blobs
const void *blob1p, // ptr to ptr to blob1
const void *blob2p // ptr to ptr to blob2
) {
const C_BLOB *blob1 = *reinterpret_cast<const C_BLOB* const*>(blob1p);
const C_BLOB *blob2 = *reinterpret_cast<const C_BLOB* const*>(blob2p);
static int c_blob_comparator( // sort blobs
const void *blob1p, // ptr to ptr to blob1
const void *blob2p // ptr to ptr to blob2
) {
const C_BLOB *blob1 = *reinterpret_cast<const C_BLOB *const *>(blob1p);
const C_BLOB *blob2 = *reinterpret_cast<const C_BLOB *const *>(blob2p);
return blob1->bounding_box ().left () - blob2->bounding_box ().left ();
return blob1->bounding_box().left() - blob2->bounding_box().left();
}
/**
@ -72,9 +74,7 @@ static int c_blob_comparator( // sort blobs
* @param word_count count of words in doc
* @param[out] page_res
*/
void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
int32_t word_count,
PAGE_RES *page_res) {
void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_RES *page_res) {
BLOCK_RES_IT block_res_it;
ROW_RES_IT row_res_it;
WERD_RES_IT word_res_it_from;
@ -82,16 +82,14 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
WERD_RES *word_res;
WERD_RES_LIST fuzzy_space_words;
int16_t new_length;
bool prevent_null_wd_fixsp; // DON'T process blobless wds
int32_t word_index; // current word
bool prevent_null_wd_fixsp; // DON'T process blobless wds
int32_t word_index; // current word
block_res_it.set_to_list(&page_res->block_res_list);
word_index = 0;
for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list();
block_res_it.forward()) {
for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list(); block_res_it.forward()) {
row_res_it.set_to_list(&block_res_it.data()->row_res_list);
for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
row_res_it.forward()) {
for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list(); row_res_it.forward()) {
word_res_it_from.set_to_list(&row_res_it.data()->word_res_list);
while (!word_res_it_from.at_last()) {
word_res = word_res_it_from.data();
@ -99,8 +97,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
!(word_res->combination ||
word_res_it_from.data_relative(1)->word->flag(W_FUZZY_NON) ||
word_res_it_from.data_relative(1)->word->flag(W_FUZZY_SP))) {
fix_sp_fp_word(word_res_it_from, row_res_it.data()->row,
block_res_it.data()->block);
fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, block_res_it.data()->block);
word_res = word_res_it_from.forward();
word_index++;
if (monitor != nullptr) {
@ -109,14 +106,13 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
if (monitor->deadline_exceeded() ||
(monitor->cancel != nullptr &&
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
return;
return;
}
}
if (!word_res_it_from.at_last()) {
word_res_it_to = word_res_it_from;
prevent_null_wd_fixsp =
word_res->word->cblob_list()->empty();
prevent_null_wd_fixsp = word_res->word->cblob_list()->empty();
if (check_debug_pt(word_res, 60))
debug_fix_space_level.set_value(10);
word_res_it_to.forward();
@ -127,9 +123,9 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
if (monitor->deadline_exceeded() ||
(monitor->cancel != nullptr &&
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
return;
return;
}
while (!word_res_it_to.at_last () &&
while (!word_res_it_to.at_last() &&
(word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) ||
word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) {
if (check_debug_pt(word_res, 60))
@ -145,39 +141,32 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
if (prevent_null_wd_fixsp) {
word_res_it_from = word_res_it_to;
} else {
fuzzy_space_words.assign_to_sublist(&word_res_it_from,
&word_res_it_to);
fix_fuzzy_space_list(fuzzy_space_words,
row_res_it.data()->row,
fuzzy_space_words.assign_to_sublist(&word_res_it_from, &word_res_it_to);
fix_fuzzy_space_list(fuzzy_space_words, row_res_it.data()->row,
block_res_it.data()->block);
new_length = fuzzy_space_words.length();
word_res_it_from.add_list_before(&fuzzy_space_words);
for (;
!word_res_it_from.at_last() && new_length > 0;
new_length--) {
for (; !word_res_it_from.at_last() && new_length > 0; new_length--) {
word_res_it_from.forward();
}
}
if (test_pt)
debug_fix_space_level.set_value(0);
}
fix_sp_fp_word(word_res_it_from, row_res_it.data()->row,
block_res_it.data()->block);
fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, block_res_it.data()->block);
// Last word in row
}
}
}
}
void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm,
ROW *row,
BLOCK* block) {
void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) {
int16_t best_score;
WERD_RES_LIST current_perm;
int16_t current_score;
bool improved = false;
best_score = eval_word_spacing(best_perm); // default score
best_score = eval_word_spacing(best_perm); // default score
dump_words(best_perm, best_score, 1, improved);
if (best_score != PERFECT_WERDS)
@ -199,8 +188,6 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm,
dump_words(best_perm, best_score, 3, improved);
}
} // namespace tesseract
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
WERD_RES_IT src_it(&src_list);
WERD_RES_IT new_it(&new_list);
@ -218,10 +205,7 @@ void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
}
}
namespace tesseract {
void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
BLOCK* block) {
void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK *block) {
WERD_RES_IT word_it(&words);
WERD_RES *word;
// Since we are not using PAGE_RES to iterate over words, we need to update
@ -253,10 +237,10 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
* The solution is to NOT COUNT the score of any word which has a digit at one
* end and a "1Il" as the character the other side of the space.
*
* Conversely, any character next to a "1" within a word is counted as a positive
* score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of
* the "1" joined). "56163" would score 7 - all chars in a numeric word + 2
* sides of a "1" joined.
* Conversely, any character next to a "1" within a word is counted as a
* positive score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1
* side of the "1" joined). "56163" would score 7 - all chars in a numeric word
* + 2 sides of a "1" joined.
*
* The joined 1 rule is applied to any word REGARDLESS of contextual
* confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally
@ -268,24 +252,19 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
int16_t total_score = 0;
int16_t word_count = 0;
int16_t done_word_count = 0;
int16_t word_len;
int16_t i;
int16_t offset;
WERD_RES *word; // current word
int16_t prev_word_score = 0;
bool prev_word_done = false;
bool prev_char_1 = false; // prev ch a "1/I/l"?
bool prev_char_digit = false; // prev ch 2..9 or 0
bool current_char_1 = false;
bool current_word_ok_so_far;
STRING punct_chars = "!\"`',.:;";
bool prev_char_1 = false; // prev ch a "1/I/l"?
bool prev_char_digit = false; // prev ch 2..9 or 0
const char *punct_chars = "!\"`',.:;";
bool prev_char_punct = false;
bool current_char_punct = false;
bool word_done = false;
do {
word = word_res_it.data();
word_done = fixspace_thinks_word_done(word);
// current word
WERD_RES *word = word_res_it.data();
bool word_done = fixspace_thinks_word_done(word);
word_count++;
if (word->tess_failed) {
total_score += prev_word_score;
@ -297,19 +276,18 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
prev_word_done = false;
} else {
/*
Can we add the prev word score and potentially count this word?
Yes IF it didn't end in a 1 when the first char of this word is a digit
AND it didn't end in a digit when the first char of this word is a 1
*/
word_len = word->reject_map.length();
current_word_ok_so_far = false;
Can we add the prev word score and potentially count this word?
Yes IF it didn't end in a 1 when the first char of this word is a digit
AND it didn't end in a digit when the first char of this word is a 1
*/
auto word_len = word->reject_map.length();
bool current_word_ok_so_far = false;
if (!((prev_char_1 && digit_or_numeric_punct(word, 0)) ||
(prev_char_digit && (
(word_done &&
word->best_choice->unichar_lengths().c_str()[0] == 1 &&
word->best_choice->unichar_string()[0] == '1') ||
(!word_done && STRING(conflict_set_I_l_1).contains(
word->best_choice->unichar_string()[0])))))) {
(prev_char_digit &&
((word_done && word->best_choice->unichar_lengths().c_str()[0] == 1 &&
word->best_choice->unichar_string()[0] == '1') ||
(!word_done &&
conflict_set_I_l_1.contains(word->best_choice->unichar_string()[0])))))) {
total_score += prev_word_score;
if (prev_word_done)
done_word_count++;
@ -325,33 +303,33 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
}
/* Add 1 to total score for every joined 1 regardless of context and
rejtn */
rejtn */
for (i = 0, prev_char_1 = false; i < word_len; i++) {
current_char_1 = word->best_choice->unichar_string()[i] == '1';
bool current_char_1 = word->best_choice->unichar_string()[i] == '1';
if (prev_char_1 || (current_char_1 && (i > 0)))
total_score++;
prev_char_1 = current_char_1;
}
/* Add 1 to total score for every joined punctuation regardless of context
and rejtn */
and rejtn */
if (tessedit_prefer_joined_punct) {
for (i = 0, offset = 0, prev_char_punct = false; i < word_len;
offset += word->best_choice->unichar_lengths()[i++]) {
current_char_punct =
punct_chars.contains(word->best_choice->unichar_string()[offset]);
bool current_char_punct =
strchr(punct_chars, word->best_choice->unichar_string()[offset]) != nullptr;
if (prev_char_punct || (current_char_punct && i > 0))
total_score++;
prev_char_punct = current_char_punct;
}
}
prev_char_digit = digit_or_numeric_punct(word, word_len - 1);
for (i = 0, offset = 0; i < word_len - 1;
offset += word->best_choice->unichar_lengths()[i++]);
for (i = 0, offset = 0; i < word_len - 1; offset += word->best_choice->unichar_lengths()[i++])
;
prev_char_1 =
((word_done && (word->best_choice->unichar_string()[offset] == '1'))
|| (!word_done && STRING(conflict_set_I_l_1).contains(
word->best_choice->unichar_string()[offset])));
((word_done && (word->best_choice->unichar_string()[offset] == '1')) ||
(!word_done &&
conflict_set_I_l_1.contains(word->best_choice->unichar_string()[offset])));
}
/* Find next word */
do {
@ -371,20 +349,15 @@ bool Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) {
int i;
int offset;
for (i = 0, offset = 0; i < char_position;
offset += word->best_choice->unichar_lengths()[i++]);
for (i = 0, offset = 0; i < char_position; offset += word->best_choice->unichar_lengths()[i++])
;
return (
word->uch_set->get_isdigit(
word->best_choice->unichar_string().c_str() + offset,
word->best_choice->unichar_lengths()[i]) ||
word->uch_set->get_isdigit(word->best_choice->unichar_string().c_str() + offset,
word->best_choice->unichar_lengths()[i]) ||
(word->best_choice->permuter() == NUMBER_PERM &&
STRING(numeric_punctuation).contains(
word->best_choice->unichar_string().c_str()[offset])));
numeric_punctuation.contains(word->best_choice->unichar_string().c_str()[offset])));
}
} // namespace tesseract
/**
* @name transform_to_next_perm()
* Examines the current word list to find the smallest word gap size. Then walks
@ -421,11 +394,10 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
}
}
if (min_gap < INT16_MAX) {
prev_right = -INT16_MAX; // back to start
prev_right = -INT16_MAX; // back to start
word_it.set_to_list(&words);
// Note: we can't use cycle_pt due to inserted combos at start of list.
for (; (prev_right == -INT16_MAX) || !word_it.at_first();
word_it.forward()) {
for (; (prev_right == -INT16_MAX) || !word_it.at_first(); word_it.forward()) {
word = word_it.data();
if (!word->part_of_combo) {
box = word->word->bounding_box();
@ -461,30 +433,26 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
combo->done = false;
combo->ClearResults();
} else {
prev_word_it = word_it; // catch up
prev_word_it = word_it; // catch up
}
}
prev_right = box.right();
}
}
} else {
words.clear(); // signal termination
words.clear(); // signal termination
}
}
namespace tesseract {
void Tesseract::dump_words(WERD_RES_LIST &perm, int16_t score,
int16_t mode, bool improved) {
void Tesseract::dump_words(WERD_RES_LIST &perm, int16_t score, int16_t mode, bool improved) {
WERD_RES_IT word_res_it(&perm);
if (debug_fix_space_level > 0) {
if (mode == 1) {
stats_.dump_words_str = "";
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
word_res_it.forward()) {
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); word_res_it.forward()) {
if (!word_res_it.data()->part_of_combo) {
stats_.dump_words_str +=
word_res_it.data()->best_choice->unichar_string();
stats_.dump_words_str += word_res_it.data()->best_choice->unichar_string();
stats_.dump_words_str += ' ';
}
}
@ -503,22 +471,18 @@ void Tesseract::dump_words(WERD_RES_LIST &perm, int16_t score,
break;
}
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
word_res_it.forward()) {
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); word_res_it.forward()) {
if (!word_res_it.data()->part_of_combo) {
tprintf("%s/%1d ",
word_res_it.data()->best_choice->unichar_string().c_str(),
tprintf("%s/%1d ", word_res_it.data()->best_choice->unichar_string().c_str(),
static_cast<int>(word_res_it.data()->best_choice->permuter()));
}
}
tprintf("\"\n");
} else if (improved) {
tprintf("FIX SPACING \"%s\" => \"", stats_.dump_words_str.c_str());
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
word_res_it.forward()) {
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); word_res_it.forward()) {
if (!word_res_it.data()->part_of_combo) {
tprintf("%s/%1d ",
word_res_it.data()->best_choice->unichar_string().c_str(),
tprintf("%s/%1d ", word_res_it.data()->best_choice->unichar_string().c_str(),
static_cast<int>(word_res_it.data()->best_choice->permuter()));
}
}
@ -532,13 +496,12 @@ bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
return true;
/*
Use all the standard pass 2 conditions for mode 5 in set_done() in
reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T
CARE WHETHER WE HAVE of/at on/an etc.
*/
Use all the standard pass 2 conditions for mode 5 in set_done() in
reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T
CARE WHETHER WE HAVE of/at on/an etc.
*/
if (fixsp_done_mode > 0 &&
(word->tess_accepted ||
(fixsp_done_mode == 2 && word->reject_map.reject_count() == 0) ||
(word->tess_accepted || (fixsp_done_mode == 2 && word->reject_map.reject_count() == 0) ||
fixsp_done_mode == 3) &&
(strchr(word->best_choice->unichar_string().c_str(), ' ') == nullptr) &&
((word->best_choice->permuter() == SYSTEM_DAWG_PERM) ||
@ -551,7 +514,6 @@ bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
}
}
/**
* @name fix_sp_fp_word()
* Test the current word to see if it can be split by deleting noise blobs. If
@ -559,8 +521,7 @@ bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
* Return with the iterator pointing to the same place if the word is unchanged,
* or the last of the replacement words.
*/
void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row,
BLOCK* block) {
void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block) {
WERD_RES *word_res;
WERD_RES_LIST sub_word_list;
WERD_RES_IT sub_word_list_it(&sub_word_list);
@ -569,9 +530,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row,
float junk;
word_res = word_res_it.data();
if (word_res->word->flag(W_REP_CHAR) ||
word_res->combination ||
word_res->part_of_combo ||
if (word_res->word->flag(W_REP_CHAR) || word_res->combination || word_res->part_of_combo ||
!word_res->word->flag(W_DONT_CHOP))
return;
@ -580,8 +539,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row,
return;
if (debug_fix_space_level > 1) {
tprintf("FP fixspace working on \"%s\"\n",
word_res->best_choice->unichar_string().c_str());
tprintf("FP fixspace working on \"%s\"\n", word_res->best_choice->unichar_string().c_str());
}
word_res->word->rej_cblob_list()->sort(c_blob_comparator);
sub_word_list_it.add_after_stay_put(word_res_it.extract());
@ -593,8 +551,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row,
}
}
void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row,
BLOCK* block) {
void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) {
int16_t best_score;
WERD_RES_IT best_perm_it(&best_perm);
WERD_RES_LIST current_perm;
@ -603,16 +560,16 @@ void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row,
int16_t current_score;
bool improved = false;
best_score = fp_eval_word_spacing(best_perm); // default score
best_score = fp_eval_word_spacing(best_perm); // default score
dump_words(best_perm, best_score, 1, improved);
old_word_res = best_perm_it.data();
// Even deep_copy doesn't copy the underlying WERD unless its combination
// flag is true!.
old_word_res->combination = true; // Kludge to force deep copy
old_word_res->combination = true; // Kludge to force deep copy
current_perm_it.add_to_end(WERD_RES::deep_copy(old_word_res));
old_word_res->combination = false; // Undo kludge
old_word_res->combination = false; // Undo kludge
break_noisiest_blob_word(current_perm);
@ -633,7 +590,6 @@ void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row,
dump_words(best_perm, best_score, 3, improved);
}
/**
* break_noisiest_blob_word()
* Find the word with the blob which looks like the worst noise.
@ -643,9 +599,9 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
WERD_RES_IT word_it(&words);
WERD_RES_IT worst_word_it;
float worst_noise_score = 9999;
int worst_blob_index = -1; // Noisiest blob of noisiest wd
int blob_index; // of wds noisiest blob
float noise_score; // of wds noisiest blob
int worst_blob_index = -1; // Noisiest blob of noisiest wd
int blob_index; // of wds noisiest blob
float noise_score; // of wds noisiest blob
WERD_RES *word_res;
C_BLOB_IT blob_it;
C_BLOB_IT rej_cblob_it;
@ -665,7 +621,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
}
}
if (worst_blob_index < 0) {
words.clear(); // signal termination
words.clear(); // signal termination
return;
}
@ -681,59 +637,57 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
new_blob_it.add_after_then_move(blob_it.extract());
}
start_of_noise_blob = blob_it.data()->bounding_box().left();
delete blob_it.extract(); // throw out noise blob
delete blob_it.extract(); // throw out noise blob
new_word = new WERD(&new_blob_list, word_res->word);
new_word->set_flag(W_EOL, false);
word_res->word->set_flag(W_BOL, false);
word_res->word->set_blanks(1); // After break
word_res->word->set_blanks(1); // After break
new_rej_cblob_it.set_to_list(new_word->rej_cblob_list());
rej_cblob_it.set_to_list(word_res->word->rej_cblob_list());
for (;
(!rej_cblob_it.empty() &&
(rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob));
for (; (!rej_cblob_it.empty() &&
(rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob));
rej_cblob_it.forward()) {
new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract());
}
auto* new_word_res = new WERD_RES(new_word);
auto *new_word_res = new WERD_RES(new_word);
new_word_res->combination = true;
worst_word_it.add_before_then_move(new_word_res);
word_res->ClearResults();
}
int16_t Tesseract::worst_noise_blob(WERD_RES *word_res,
float *worst_noise_score) {
int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score) {
float noise_score[512];
int i;
int min_noise_blob; // 1st contender
int max_noise_blob; // last contender
int min_noise_blob; // 1st contender
int max_noise_blob; // last contender
int non_noise_count;
int worst_noise_blob; // Worst blob
int worst_noise_blob; // Worst blob
float small_limit = kBlnXHeight * fixsp_small_outlines_size;
float non_noise_limit = kBlnXHeight * 0.8;
if (word_res->rebuild_word == nullptr)
return -1; // Can't handle cube words.
return -1; // Can't handle cube words.
// Normalised.
int blob_count = word_res->box_word->length();
ASSERT_HOST(blob_count <= 512);
if (blob_count < 5)
return -1; // too short to split
return -1; // too short to split
/* Get the noise scores for all blobs */
/* Get the noise scores for all blobs */
#ifndef SECURE_NAMES
#ifndef SECURE_NAMES
if (debug_fix_space_level > 5)
tprintf("FP fixspace Noise metrics for \"%s\": ",
word_res->best_choice->unichar_string().c_str());
#endif
#endif
for (i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) {
TBLOB* blob = word_res->rebuild_word->blobs[i];
TBLOB *blob = word_res->rebuild_word->blobs[i];
if (word_res->reject_map[i].accepted())
noise_score[i] = non_noise_limit;
else
@ -759,8 +713,7 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res,
min_noise_blob = i;
non_noise_count = 0;
for (i = blob_count - 1; i >= 0 && non_noise_count < fixsp_non_noise_limit;
i--) {
for (i = blob_count - 1; i >= 0 && non_noise_count < fixsp_non_noise_limit; i--) {
if (noise_score[i] >= non_noise_limit) {
non_noise_count++;
}
@ -785,12 +738,12 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res,
}
float Tesseract::blob_noise_score(TBLOB *blob) {
TBOX box; // BB of outline
TBOX box; // BB of outline
int16_t outline_count = 0;
int16_t max_dimension;
int16_t largest_outline_dimension = 0;
for (TESSLINE* ol = blob->outlines; ol != nullptr; ol= ol->next) {
for (TESSLINE *ol = blob->outlines; ol != nullptr; ol = ol->next) {
outline_count++;
box = ol->bounding_box();
if (box.height() > box.width()) {
@ -809,15 +762,13 @@ float Tesseract::blob_noise_score(TBLOB *blob) {
}
box = blob->bounding_box();
if (box.bottom() > kBlnBaselineOffset * 4 ||
box.top() < kBlnBaselineOffset / 2) {
if (box.bottom() > kBlnBaselineOffset * 4 || box.top() < kBlnBaselineOffset / 2) {
// Lax blob is if high or low
largest_outline_dimension /= 2;
}
return largest_outline_dimension;
}
} // namespace tesseract
void fixspace_dbg(WERD_RES *word) {
TBOX box = word->word->bounding_box();
@ -826,10 +777,8 @@ void fixspace_dbg(WERD_RES *word) {
box.print();
tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
word->word->cblob_list()->length(),
word->rebuild_word->NumBlobs(),
word->box_word->length());
tprintf("Blob count: %d (word); %d/%d (rebuild word)\n", word->word->cblob_list()->length(),
word->rebuild_word->NumBlobs(), word->box_word->length());
word->reject_map.print(debug_fp);
tprintf("\n");
if (show_map_detail) {
@ -844,7 +793,6 @@ void fixspace_dbg(WERD_RES *word) {
tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
}
/**
* fp_eval_word_spacing()
* Evaluation function for fixed pitch word lists.
@ -853,7 +801,6 @@ void fixspace_dbg(WERD_RES *word) {
* acceptable words or in dict words and are not rejected.
* Penalise any potential noise chars
*/
namespace tesseract {
int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
WERD_RES_IT word_it(&word_res_list);
WERD_RES *word;
@ -864,20 +811,16 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word = word_it.data();
if (word->rebuild_word == nullptr)
continue; // Can't handle cube words.
if (word->done ||
word->tess_accepted ||
word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
continue; // Can't handle cube words.
if (word->done || word->tess_accepted || word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
word->best_choice->permuter() == USER_DAWG_PERM ||
safe_dict_word(word) > 0) {
word->best_choice->permuter() == USER_DAWG_PERM || safe_dict_word(word) > 0) {
int num_blobs = word->rebuild_word->NumBlobs();
UNICHAR_ID space = word->uch_set->unichar_to_id(" ");
for (i = 0; i < word->best_choice->length() && i < num_blobs; ++i) {
TBLOB* blob = word->rebuild_word->blobs[i];
if (word->best_choice->unichar_id(i) == space ||
blob_noise_score(blob) < small_limit) {
score -= 1; // penalise possibly erroneous non-space
TBLOB *blob = word->rebuild_word->blobs[i];
if (word->best_choice->unichar_id(i) == space || blob_noise_score(blob) < small_limit) {
score -= 1; // penalise possibly erroneous non-space
} else if (word->reject_map[i].accepted()) {
score++;
}
@ -889,4 +832,4 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
return score;
}
} // namespace tesseract
} // namespace tesseract

View File

@ -22,6 +22,8 @@
#ifndef FIXSPACE_H
#define FIXSPACE_H
namespace tesseract {
class WERD_RES;
class WERD_RES_LIST;
@ -29,4 +31,6 @@ void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
void transform_to_next_perm(WERD_RES_LIST &words);
void fixspace_dbg(WERD_RES *word);
} // namespace tesseract
#endif

View File

@ -17,13 +17,14 @@
*
**********************************************************************/
#include <algorithm>
#include <cstring>
#include <cctype>
#include "params.h"
#include "float2int.h"
#include "params.h"
#include "tesseractclass.h"
#include <algorithm>
#include <cctype>
#include <cstring>
namespace tesseract {
// Fixxht overview.
@ -71,25 +72,23 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
int bad_blobs = 0;
int num_blobs = word_res->rebuild_word->NumBlobs();
for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
TBLOB *blob = word_res->rebuild_word->blobs[blob_id];
UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
int top = blob->bounding_box().top();
if (top >= INT_FEAT_RANGE)
top = INT_FEAT_RANGE - 1;
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
&min_top, &max_top);
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top);
if (max_top - min_top > kMaxCharTopRange)
continue;
bool bad = top < min_top - x_ht_acceptance_tolerance ||
top > max_top + x_ht_acceptance_tolerance;
bool bad =
top < min_top - x_ht_acceptance_tolerance || top > max_top + x_ht_acceptance_tolerance;
if (bad)
++bad_blobs;
if (debug_x_ht_level >= 1) {
tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n",
unicharset.id_to_unichar(class_id),
bad ? "Misfit" : "OK", top, min_top, max_top,
unicharset.id_to_unichar(class_id), bad ? "Misfit" : "OK", top, min_top, max_top,
static_cast<int>(x_ht_acceptance_tolerance));
}
}
@ -99,8 +98,7 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
// Returns a new x-height maximally compatible with the result in word_res.
// See comment above for overall algorithm.
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
float* baseline_shift) {
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift) {
STATS top_stats(0, UINT8_MAX);
STATS shift_stats(-UINT8_MAX, UINT8_MAX);
int bottom_shift = 0;
@ -109,43 +107,36 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
top_stats.clear();
shift_stats.clear();
for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
TBLOB *blob = word_res->rebuild_word->blobs[blob_id];
UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
if (unicharset.get_isalpha(class_id) ||
unicharset.get_isdigit(class_id)) {
if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
int top = blob->bounding_box().top() + bottom_shift;
// Clip the top to the limit of normalized feature space.
if (top >= INT_FEAT_RANGE)
top = INT_FEAT_RANGE - 1;
int bottom = blob->bounding_box().bottom() + bottom_shift;
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
&min_top, &max_top);
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top);
// Chars with a wild top range would mess up the result so ignore them.
if (max_top - min_top > kMaxCharTopRange)
continue;
int misfit_dist = std::max((min_top - x_ht_acceptance_tolerance) - top,
top - (max_top + x_ht_acceptance_tolerance));
top - (max_top + x_ht_acceptance_tolerance));
int height = top - kBlnBaselineOffset;
if (debug_x_ht_level >= 2) {
tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d: ",
unicharset.id_to_unichar(class_id),
height, min_bottom, max_bottom, min_top, max_top,
bottom, top);
unicharset.id_to_unichar(class_id), height, min_bottom, max_bottom, min_top,
max_top, bottom, top);
}
// Use only chars that fit in the expected bottom range, and where
// the range of tops is sensibly near the xheight.
if (min_bottom <= bottom + x_ht_acceptance_tolerance &&
bottom - x_ht_acceptance_tolerance <= max_bottom &&
min_top > kBlnBaselineOffset &&
max_top - kBlnBaselineOffset >= kBlnXHeight &&
misfit_dist > 0) {
bottom - x_ht_acceptance_tolerance <= max_bottom && min_top > kBlnBaselineOffset &&
max_top - kBlnBaselineOffset >= kBlnXHeight && misfit_dist > 0) {
// Compute the x-height position using proportionality between the
// actual height and expected height.
int min_xht = DivRounded(height * kBlnXHeight,
max_top - kBlnBaselineOffset);
int max_xht = DivRounded(height * kBlnXHeight,
min_top - kBlnBaselineOffset);
int min_xht = DivRounded(height * kBlnXHeight, max_top - kBlnBaselineOffset);
int max_xht = DivRounded(height * kBlnXHeight, min_top - kBlnBaselineOffset);
if (debug_x_ht_level >= 2) {
tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht);
}
@ -188,8 +179,7 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
tprintf("Applying bottom shift=%d\n", bottom_shift);
}
}
} while (bottom_shift != 0 &&
top_stats.get_total() < shift_stats.get_total());
} while (bottom_shift != 0 && top_stats.get_total() < shift_stats.get_total());
// Baseline shift is opposite sign to the bottom shift.
*baseline_shift = -bottom_shift / word_res->denorm.y_scale();
if (debug_x_ht_level >= 2) {
@ -202,8 +192,8 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
float new_xht = top_stats.median();
if (debug_x_ht_level >= 2) {
tprintf("Median xht=%f\n", new_xht);
tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n",
new_xht, new_xht / word_res->denorm.y_scale());
tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n", new_xht,
new_xht / word_res->denorm.y_scale());
}
// The xheight must change by at least x_ht_min_change to be used.
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change)
@ -212,4 +202,4 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
return bottom_shift != 0 ? word_res->x_height : 0.0f;
}
} // namespace tesseract
} // namespace tesseract

View File

@ -17,14 +17,12 @@
#include "tesseractclass.h"
#include "allheaders.h"
#include <allheaders.h>
#include "boxread.h"
#include "imagedata.h"
#ifndef ANDROID_BUILD
#include "imagedata.h" // for ImageData
#include "lstmrecognizer.h"
#include "recodebeam.h"
#endif
#include "pageres.h"
#include "recodebeam.h"
#include "tprintf.h"
#include <algorithm>
@ -40,10 +38,9 @@ const float kWorstDictCertainty = -25.0f;
// Breaks the page into lines, according to the boxes, and writes them to a
// serialized DocumentData based on output_basename.
// Return true if successful, false if an error occurred.
bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
const STRING& output_basename,
bool Tesseract::TrainLineRecognizer(const char *input_imagename, const std::string &output_basename,
BLOCK_LIST *block_list) {
STRING lstmf_name = output_basename + ".lstmf";
std::string lstmf_name = output_basename + ".lstmf";
DocumentData images(lstmf_name);
if (applybox_page > 0) {
// Load existing document for the previous pages.
@ -52,18 +49,17 @@ bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
return false;
}
}
GenericVector<TBOX> boxes;
GenericVector<STRING> texts;
std::vector<TBOX> boxes;
std::vector<std::string> texts;
// Get the boxes for this page, if there are any.
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
nullptr) ||
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr, nullptr) ||
boxes.empty()) {
tprintf("Failed to read boxes from %s\n", input_imagename.c_str());
tprintf("Failed to read boxes from %s\n", input_imagename);
return false;
}
TrainFromBoxes(boxes, texts, block_list, &images);
if (images.PagesSize() == 0) {
tprintf("Failed to read pages from %s\n", input_imagename.c_str());
tprintf("Failed to read pages from %s\n", input_imagename);
return false;
}
images.Shuffle();
@ -77,33 +73,31 @@ bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
// Generates training data for training a line recognizer, eg LSTM.
// Breaks the boxes into lines, normalizes them, converts to ImageData and
// appends them to the given training_data.
void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
BLOCK_LIST *block_list,
DocumentData* training_data) {
void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
BLOCK_LIST *block_list, DocumentData *training_data) {
int box_count = boxes.size();
// Process all the text lines in this page, as defined by the boxes.
int end_box = 0;
// Don't let \t, which marks newlines in the box file, get into the line
// content, as that makes the line unusable in training.
while (end_box < texts.size() && texts[end_box] == "\t") ++end_box;
while (end_box < texts.size() && texts[end_box] == "\t")
++end_box;
for (int start_box = end_box; start_box < box_count; start_box = end_box) {
// Find the textline of boxes starting at start and their bounding box.
TBOX line_box = boxes[start_box];
STRING line_str = texts[start_box];
for (end_box = start_box + 1; end_box < box_count && texts[end_box] != "\t";
++end_box) {
std::string line_str = texts[start_box];
for (end_box = start_box + 1; end_box < box_count && texts[end_box] != "\t"; ++end_box) {
line_box += boxes[end_box];
line_str += texts[end_box];
}
// Find the most overlapping block.
BLOCK* best_block = nullptr;
BLOCK *best_block = nullptr;
int best_overlap = 0;
BLOCK_IT b_it(block_list);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOCK* block = b_it.data();
BLOCK *block = b_it.data();
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText())
continue; // Not a text block.
continue; // Not a text block.
TBOX block_box = block->pdblk.bounding_box();
block_box.rotate(block->re_rotation());
if (block_box.major_overlap(line_box)) {
@ -114,39 +108,37 @@ void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
}
}
}
ImageData* imagedata = nullptr;
ImageData *imagedata = nullptr;
if (best_block == nullptr) {
tprintf("No block overlapping textline: %s\n", line_str.c_str());
} else {
imagedata = GetLineData(line_box, boxes, texts, start_box, end_box,
*best_block);
imagedata = GetLineData(line_box, boxes, texts, start_box, end_box, *best_block);
}
if (imagedata != nullptr)
training_data->AddPageToDocument(imagedata);
// Don't let \t, which marks newlines in the box file, get into the line
// content, as that makes the line unusable in training.
while (end_box < texts.size() && texts[end_box] == "\t") ++end_box;
while (end_box < texts.size() && texts[end_box] == "\t")
++end_box;
}
}
// Returns an Imagedata containing the image of the given box,
// and ground truth boxes/truth text if available in the input.
// The image is not normalized in any way.
ImageData* Tesseract::GetLineData(const TBOX& line_box,
const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
int start_box, int end_box,
const BLOCK& block) {
ImageData *Tesseract::GetLineData(const TBOX &line_box, const std::vector<TBOX> &boxes,
const std::vector<std::string> &texts, int start_box, int end_box,
const BLOCK &block) {
TBOX revised_box;
ImageData* image_data = GetRectImage(line_box, block, kImagePadding,
&revised_box);
if (image_data == nullptr) return nullptr;
ImageData *image_data = GetRectImage(line_box, block, kImagePadding, &revised_box);
if (image_data == nullptr)
return nullptr;
image_data->set_page_number(applybox_page);
// Copy the boxes and shift them so they are relative to the image.
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
ICOORD shift = -revised_box.botleft();
GenericVector<TBOX> line_boxes;
GenericVector<STRING> line_texts;
std::vector<TBOX> line_boxes;
std::vector<std::string> line_texts;
for (int b = start_box; b < end_box; ++b) {
TBOX box = boxes[b];
box.rotate(block_rotation);
@ -154,8 +146,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box,
line_boxes.push_back(box);
line_texts.push_back(texts[b]);
}
GenericVector<int> page_numbers;
page_numbers.init_to_size(line_boxes.size(), applybox_page);
std::vector<int> page_numbers;
page_numbers.resize(line_boxes.size(), applybox_page);
image_data->AddBoxes(line_boxes, line_texts, page_numbers);
return image_data;
}
@ -166,8 +158,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box,
// is set in the returned ImageData if the text was originally vertical, which
// can be used to invoke a different CJK recognition engine. The revised_box
// is also returned to enable calculation of output bounding boxes.
ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
int padding, TBOX* revised_box) const {
ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padding,
TBOX *revised_box) const {
TBOX wbox = box;
wbox.pad(padding, padding);
*revised_box = wbox;
@ -186,27 +178,29 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
revised_box->rotate(block.re_rotation());
// Now revised_box always refers to the image.
// BestPix is never colormapped, but may be of any depth.
Pix* pix = BestPix();
Pix *pix = BestPix();
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
TBOX image_box(0, 0, width, height);
// Clip to image bounds;
*revised_box &= image_box;
if (revised_box->null_box()) return nullptr;
Box* clip_box = boxCreate(revised_box->left(), height - revised_box->top(),
revised_box->width(), revised_box->height());
Pix* box_pix = pixClipRectangle(pix, clip_box, nullptr);
if (box_pix == nullptr) return nullptr;
if (revised_box->null_box())
return nullptr;
Box *clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(),
revised_box->height());
Pix *box_pix = pixClipRectangle(pix, clip_box, nullptr);
boxDestroy(&clip_box);
if (box_pix == nullptr)
return nullptr;
if (num_rotations > 0) {
Pix* rot_pix = pixRotateOrth(box_pix, num_rotations);
Pix *rot_pix = pixRotateOrth(box_pix, num_rotations);
pixDestroy(&box_pix);
box_pix = rot_pix;
}
// Convert sub-8-bit images to 8 bit.
int depth = pixGetDepth(box_pix);
if (depth < 8) {
Pix* grey;
Pix *grey;
grey = pixConvertTo8(box_pix, false);
pixDestroy(&box_pix);
box_pix = grey;
@ -222,15 +216,13 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
return new ImageData(vertical_text, box_pix);
}
#ifndef ANDROID_BUILD
// Recognizes a word or group of words, converting to WERD_RES in *words.
// Analogous to classify_word_pass1, but can handle a group of words as well.
void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
PointerVector<WERD_RES>* words) {
void Tesseract::LSTMRecognizeWord(const BLOCK &block, ROW *row, WERD_RES *word,
PointerVector<WERD_RES> *words) {
TBOX word_box = word->word->bounding_box();
// Get the word image - no frills.
if (tessedit_pageseg_mode == PSM_SINGLE_WORD ||
tessedit_pageseg_mode == PSM_RAW_LINE) {
if (tessedit_pageseg_mode == PSM_SINGLE_WORD || tessedit_pageseg_mode == PSM_RAW_LINE) {
// In single word mode, use the whole image without any other row/word
// interpretation.
word_box = TBOX(0, 0, ImageWidth(), ImageHeight());
@ -241,14 +233,14 @@ void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
if (baseline + row->x_height() + row->ascenders() > word_box.top())
word_box.set_top(baseline + row->x_height() + row->ascenders());
}
ImageData* im_data = GetRectImage(word_box, block, kImagePadding, &word_box);
if (im_data == nullptr) return;
ImageData *im_data = GetRectImage(word_box, block, kImagePadding, &word_box);
if (im_data == nullptr)
return;
bool do_invert = tessedit_do_invert;
lstm_recognizer_->RecognizeLine(*im_data, do_invert, classify_debug_level > 0,
kWorstDictCertainty / kCertaintyScale,
word_box, words, lstm_choice_mode,
lstm_choice_iterations);
kWorstDictCertainty / kCertaintyScale, word_box, words,
lstm_choice_mode, lstm_choice_iterations);
delete im_data;
SearchWords(words);
}
@ -256,24 +248,24 @@ void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
// Apply segmentation search to the given set of words, within the constraints
// of the existing ratings matrix. If there is already a best_choice on a word
// leaves it untouched and just sets the done/accepted etc flags.
void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
void Tesseract::SearchWords(PointerVector<WERD_RES> *words) {
// Run the segmentation search on the network outputs and make a BoxWord
// for each of the output words.
// If we drop a word as junk, then there is always a space in front of the
// next.
const Dict* stopper_dict = lstm_recognizer_->GetDict();
if (stopper_dict == nullptr) stopper_dict = &getDict();
const Dict *stopper_dict = lstm_recognizer_->GetDict();
if (stopper_dict == nullptr)
stopper_dict = &getDict();
bool any_nonspace_delimited = false;
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
if (word->best_choice != nullptr &&
word->best_choice->ContainsAnyNonSpaceDelimited()) {
WERD_RES *word = (*words)[w];
if (word->best_choice != nullptr && word->best_choice->ContainsAnyNonSpaceDelimited()) {
any_nonspace_delimited = true;
break;
}
}
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
WERD_RES *word = (*words)[w];
if (word->best_choice == nullptr) {
// It is a dud.
word->SetupFake(lstm_recognizer_->GetUnicharset());
@ -289,14 +281,12 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
word->tess_would_adapt = false;
word->done = true;
word->tesseract = this;
float word_certainty = std::min(word->space_certainty,
word->best_choice->certainty());
float word_certainty = std::min(word->space_certainty, word->best_choice->certainty());
word_certainty *= kCertaintyScale;
if (getDict().stopper_debug_level >= 1) {
tprintf("Best choice certainty=%g, space=%g, scaled=%g, final=%g\n",
word->best_choice->certainty(), word->space_certainty,
std::min(word->space_certainty, word->best_choice->certainty()) *
kCertaintyScale,
std::min(word->space_certainty, word->best_choice->certainty()) * kCertaintyScale,
word_certainty);
word->best_choice->print();
}
@ -306,6 +296,5 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
}
}
}
#endif // ANDROID_BUILD
} // namespace tesseract.
} // namespace tesseract.

View File

@ -19,21 +19,20 @@
#include <tesseract/ltrresultiterator.h>
#include "allheaders.h"
#include "pageres.h"
#include <tesseract/strngs.h>
#include "tesseractclass.h"
#include <allheaders.h>
namespace tesseract {
LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
int scale, int scaled_yres, int rect_left,
int rect_top, int rect_width,
LTRResultIterator::LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height)
: PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top,
rect_width, rect_height),
line_separator_("\n"),
paragraph_separator_("\n") {}
: PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width,
rect_height)
, line_separator_("\n")
, paragraph_separator_("\n") {}
// Destructor.
// It is defined here, so the compiler can create a single vtable
@ -42,23 +41,23 @@ LTRResultIterator::~LTRResultIterator() = default;
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
if (it_->word() == nullptr)
return nullptr; // Already at the end!
STRING text;
return nullptr; // Already at the end!
std::string text;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE* best_choice = res_it.word()->best_choice;
WERD_CHOICE *best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != nullptr);
if (level == RIL_SYMBOL) {
text = res_it.word()->BestUTF8(blob_index_, false);
} else if (level == RIL_WORD) {
text = best_choice->unichar_string();
} else {
bool eol = false; // end of line?
bool eop = false; // end of paragraph?
do { // for each paragraph in a block
do { // for each text line in a paragraph
do { // for each word in a text line
bool eol = false; // end of line?
bool eop = false; // end of paragraph?
do { // for each paragraph in a block
do { // for each text line in a paragraph
do { // for each word in a text line
best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != nullptr);
text += best_choice->unichar_string();
@ -66,7 +65,7 @@ char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
res_it.forward();
eol = res_it.row() != res_it.prev_row();
} while (!eol);
text.truncate_at(text.length() - 1);
text.resize(text.length() - 1);
text += line_separator_;
eop = res_it.block() != res_it.prev_block() ||
res_it.row()->row->para() != res_it.prev_row()->row->para();
@ -76,18 +75,18 @@ char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
}
int length = text.length() + 1;
char* result = new char[length];
char *result = new char[length];
strncpy(result, text.c_str(), length);
return result;
}
// Set the string inserted at the end of each text line. "\n" by default.
void LTRResultIterator::SetLineSeparator(const char* new_line) {
void LTRResultIterator::SetLineSeparator(const char *new_line) {
line_separator_ = new_line;
}
// Set the string inserted at the end of each paragraph. "\n" by default.
void LTRResultIterator::SetParagraphSeparator(const char* new_para) {
void LTRResultIterator::SetParagraphSeparator(const char *new_para) {
paragraph_separator_ = new_para;
}
@ -95,11 +94,11 @@ void LTRResultIterator::SetParagraphSeparator(const char* new_para) {
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float LTRResultIterator::Confidence(PageIteratorLevel level) const {
if (it_->word() == nullptr)
return 0.0f; // Already at the end!
return 0.0f; // Already at the end!
float mean_certainty = 0.0f;
int certainty_count = 0;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE* best_choice = res_it.word()->best_choice;
WERD_CHOICE *best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != nullptr);
switch (level) {
case RIL_BLOCK:
@ -145,10 +144,10 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const {
return 0.0f;
}
void LTRResultIterator::RowAttributes(float* row_height, float* descenders,
float* ascenders) const {
*row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
it_->row()->row->descenders();
void LTRResultIterator::RowAttributes(float *row_height, float *descenders,
float *ascenders) const {
*row_height =
it_->row()->row->x_height() + it_->row()->row->ascenders() - it_->row()->row->descenders();
*descenders = it_->row()->row->descenders();
*ascenders = it_->row()->row->ascenders();
}
@ -161,37 +160,35 @@ void LTRResultIterator::RowAttributes(float* row_height, float* descenders,
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char* LTRResultIterator::WordFontAttributes(
bool* is_bold, bool* is_italic, bool* is_underlined, bool* is_monospace,
bool* is_serif, bool* is_smallcaps, int* pointsize, int* font_id) const {
const char* result = nullptr;
const char *LTRResultIterator::WordFontAttributes(bool *is_bold, bool *is_italic,
bool *is_underlined, bool *is_monospace,
bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const {
const char *result = nullptr;
if (it_->word() == nullptr) {
// Already at the end!
*pointsize = 0;
} else {
float row_height = it_->row()->row->x_height() +
it_->row()->row->ascenders() -
it_->row()->row->descenders();
float row_height =
it_->row()->row->x_height() + it_->row()->row->ascenders() - it_->row()->row->descenders();
// Convert from pixels to printers points.
*pointsize =
scaled_yres_ > 0
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
: 0;
scaled_yres_ > 0 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) : 0;
#ifndef DISABLED_LEGACY_ENGINE
const FontInfo* font_info = it_->word()->fontinfo;
#ifndef DISABLED_LEGACY_ENGINE
const FontInfo *font_info = it_->word()->fontinfo;
if (font_info) {
// Font information available.
*font_id = font_info->universal_id;
*is_bold = font_info->is_bold();
*is_italic = font_info->is_italic();
*is_underlined = false; // TODO(rays) fix this!
*is_underlined = false; // TODO(rays) fix this!
*is_monospace = font_info->is_fixed_pitch();
*is_serif = font_info->is_serif();
result = font_info->name;
}
#endif // ndef DISABLED_LEGACY_ENGINE
#endif // ndef DISABLED_LEGACY_ENGINE
*is_smallcaps = it_->word()->small_caps;
}
@ -210,7 +207,7 @@ const char* LTRResultIterator::WordFontAttributes(
}
// Returns the name of the language used to recognize this word.
const char* LTRResultIterator::WordRecognitionLanguage() const {
const char *LTRResultIterator::WordRecognitionLanguage() const {
if (it_->word() == nullptr || it_->word()->tesseract == nullptr)
return nullptr;
return it_->word()->tesseract->lang.c_str();
@ -234,10 +231,9 @@ StrongScriptDirection LTRResultIterator::WordDirection() const {
// Returns true if the current word was found in a dictionary.
bool LTRResultIterator::WordIsFromDictionary() const {
if (it_->word() == nullptr)
return false; // Already at the end!
return false; // Already at the end!
int permuter = it_->word()->best_choice->permuter();
return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
permuter == USER_DAWG_PERM;
return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || permuter == USER_DAWG_PERM;
}
// Returns the number of blanks before the current word.
@ -250,7 +246,7 @@ int LTRResultIterator::BlanksBeforeWord() const {
// Returns true if the current word is numeric.
bool LTRResultIterator::WordIsNumeric() const {
if (it_->word() == nullptr)
return false; // Already at the end!
return false; // Already at the end!
int permuter = it_->word()->best_choice->permuter();
return permuter == NUMBER_PERM;
}
@ -264,39 +260,38 @@ bool LTRResultIterator::HasBlamerInfo() const {
#ifndef DISABLED_LEGACY_ENGINE
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void* LTRResultIterator::GetParamsTrainingBundle() const {
const void *LTRResultIterator::GetParamsTrainingBundle() const {
return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr)
? &(it_->word()->blamer_bundle->params_training_bundle())
: nullptr;
}
#endif // ndef DISABLED_LEGACY_ENGINE
#endif // ndef DISABLED_LEGACY_ENGINE
// Returns the pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char* LTRResultIterator::GetBlamerDebug() const {
const char *LTRResultIterator::GetBlamerDebug() const {
return it_->word()->blamer_bundle->debug().c_str();
}
// Returns the pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char* LTRResultIterator::GetBlamerMisadaptionDebug() const {
const char *LTRResultIterator::GetBlamerMisadaptionDebug() const {
return it_->word()->blamer_bundle->misadaption_debug().c_str();
}
// Returns true if a truth string was recorded for the current word.
bool LTRResultIterator::HasTruthString() const {
if (it_->word() == nullptr)
return false; // Already at the end!
if (it_->word()->blamer_bundle == nullptr ||
it_->word()->blamer_bundle->NoTruth()) {
return false; // no truth information for this word
return false; // Already at the end!
if (it_->word()->blamer_bundle == nullptr || it_->word()->blamer_bundle->NoTruth()) {
return false; // no truth information for this word
}
return true;
}
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool LTRResultIterator::EquivalentToTruth(const char* str) const {
bool LTRResultIterator::EquivalentToTruth(const char *str) const {
if (!HasTruthString())
return false;
ASSERT_HOST(it_->word()->uch_set != nullptr);
@ -306,39 +301,39 @@ bool LTRResultIterator::EquivalentToTruth(const char* str) const {
// Returns the null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char* LTRResultIterator::WordTruthUTF8Text() const {
char *LTRResultIterator::WordTruthUTF8Text() const {
if (!HasTruthString())
return nullptr;
STRING truth_text = it_->word()->blamer_bundle->TruthString();
std::string truth_text = it_->word()->blamer_bundle->TruthString();
int length = truth_text.length() + 1;
char* result = new char[length];
char *result = new char[length];
strncpy(result, truth_text.c_str(), length);
return result;
}
// Returns the null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char* LTRResultIterator::WordNormedUTF8Text() const {
char *LTRResultIterator::WordNormedUTF8Text() const {
if (it_->word() == nullptr)
return nullptr; // Already at the end!
STRING ocr_text;
WERD_CHOICE* best_choice = it_->word()->best_choice;
const UNICHARSET* unicharset = it_->word()->uch_set;
return nullptr; // Already at the end!
std::string ocr_text;
WERD_CHOICE *best_choice = it_->word()->best_choice;
const UNICHARSET *unicharset = it_->word()->uch_set;
ASSERT_HOST(best_choice != nullptr);
for (int i = 0; i < best_choice->length(); ++i) {
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
}
int length = ocr_text.length() + 1;
char* result = new char[length];
char *result = new char[length];
strncpy(result, ocr_text.c_str(), length);
return result;
}
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char* LTRResultIterator::WordLattice(int* lattice_size) const {
const char *LTRResultIterator::WordLattice(int *lattice_size) const {
if (it_->word() == nullptr)
return nullptr; // Already at the end!
return nullptr; // Already at the end!
if (it_->word()->blamer_bundle == nullptr)
return nullptr;
*lattice_size = it_->word()->blamer_bundle->lattice_size();
@ -350,8 +345,7 @@ const char* LTRResultIterator::WordLattice(int* lattice_size) const {
// this will return the attributes of the first symbol in that word.
bool LTRResultIterator::SymbolIsSuperscript() const {
if (cblob_it_ == nullptr && it_->word() != nullptr)
return it_->word()->best_choice->BlobPosition(blob_index_) ==
SP_SUPERSCRIPT;
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUPERSCRIPT;
return false;
}
@ -373,7 +367,7 @@ bool LTRResultIterator::SymbolIsDropcap() const {
return false;
}
ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
ChoiceIterator::ChoiceIterator(const LTRResultIterator &result_it) {
ASSERT_HOST(result_it.it_->word() != nullptr);
word_res_ = result_it.it_->word();
oemLSTM_ = word_res_->tesseract->AnyLSTMLang();
@ -383,7 +377,7 @@ ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
bool lstm_choice_mode = word_res_->tesseract->lstm_choice_mode;
rating_coefficient_ = word_res_->tesseract->lstm_rating_coefficient;
blanks_before_word_ = result_it.BlanksBeforeWord();
BLOB_CHOICE_LIST* choices = nullptr;
BLOB_CHOICE_LIST *choices = nullptr;
tstep_index_ = &result_it.blob_index_;
if (oemLSTM_ && !word_res_->CTC_symbol_choices.empty()) {
if (!word_res_->CTC_symbol_choices[0].empty() &&
@ -417,8 +411,7 @@ ChoiceIterator::~ChoiceIterator() {
// are none left.
bool ChoiceIterator::Next() {
if (oemLSTM_ && LSTM_choices_ != nullptr && !LSTM_choices_->empty()) {
if (LSTM_choice_it_ != LSTM_choices_->end() &&
next(LSTM_choice_it_) == LSTM_choices_->end()) {
if (LSTM_choice_it_ != LSTM_choices_->end() && next(LSTM_choice_it_) == LSTM_choices_->end()) {
return false;
} else {
++LSTM_choice_it_;
@ -434,9 +427,9 @@ bool ChoiceIterator::Next() {
// Returns the null terminated UTF-8 encoded text string for the current
// choice. Do NOT use delete [] to free after use.
const char* ChoiceIterator::GetUTF8Text() const {
const char *ChoiceIterator::GetUTF8Text() const {
if (oemLSTM_ && LSTM_choices_ != nullptr && !LSTM_choices_->empty()) {
std::pair<const char*, float> choice = *LSTM_choice_it_;
std::pair<const char *, float> choice = *LSTM_choice_it_;
return choice.first;
} else {
if (choice_it_ == nullptr)
@ -455,7 +448,7 @@ const char* ChoiceIterator::GetUTF8Text() const {
float ChoiceIterator::Confidence() const {
float confidence;
if (oemLSTM_ && LSTM_choices_ != nullptr && !LSTM_choices_->empty()) {
std::pair<const char*, float> choice = *LSTM_choice_it_;
std::pair<const char *, float> choice = *LSTM_choice_it_;
confidence = 100 - rating_coefficient_ * choice.second;
} else {
if (choice_it_ == nullptr)
@ -466,8 +459,7 @@ float ChoiceIterator::Confidence() const {
}
// Returns the set of timesteps which belong to the current symbol
std::vector<std::vector<std::pair<const char*, float>>>*
ChoiceIterator::Timesteps() const {
std::vector<std::vector<std::pair<const char *, float>>> *ChoiceIterator::Timesteps() const {
int offset = *tstep_index_ + blanks_before_word_;
if (offset >= word_res_->segmented_timesteps.size() || !oemLSTM_) {
return nullptr;
@ -478,7 +470,7 @@ ChoiceIterator::Timesteps() const {
void ChoiceIterator::filterSpaces() {
if (LSTM_choices_->empty())
return;
std::vector<std::pair<const char*, float>>::iterator it;
std::vector<std::pair<const char *, float>>::iterator it;
for (it = LSTM_choices_->begin(); it != LSTM_choices_->end();) {
if (!strcmp(it->first, " ")) {
it = LSTM_choices_->erase(it);
@ -487,4 +479,4 @@ void ChoiceIterator::filterSpaces() {
}
}
}
} // namespace tesseract.
} // namespace tesseract.

View File

@ -21,4 +21,4 @@ namespace tesseract {
// instead of weak vtables in every compilation unit.
MutableIterator::~MutableIterator() = default;
} // namespace tesseract.
} // namespace tesseract.

View File

@ -3,7 +3,6 @@
// Description: Iterator for tesseract results providing access to
// both high-level API and Tesseract internal data structures.
// Author: David Eger
// Created: Thu Feb 24 19:01:06 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -36,29 +35,28 @@ class Tesseract;
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See tesseract/apitypes.h for the definition of PageIteratorLevel.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// ResultIterator adds text-specific methods for access to OCR output.
// MutableIterator adds access to internal data structures.
class MutableIterator : public ResultIterator {
public:
class TESS_API MutableIterator : public ResultIterator {
public:
// See argument descriptions in ResultIterator()
MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
int scale, int scaled_yres,
int rect_left, int rect_top,
int rect_width, int rect_height)
: ResultIterator(
LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
rect_top, rect_width, rect_height)) {}
MutableIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres,
int rect_left, int rect_top, int rect_width, int rect_height)
: ResultIterator(LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
rect_top, rect_width, rect_height)) {}
~MutableIterator() override;
// See PageIterator and ResultIterator for most calls.
// Return access to Tesseract internals.
const PAGE_RES_IT *PageResIt() const { return it_; }
const PAGE_RES_IT *PageResIt() const {
return it_;
}
};
} // namespace tesseract.
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_

View File

@ -17,10 +17,6 @@
//
///////////////////////////////////////////////////////////////////////
#include <algorithm>
#include <cmath> // for std::fabs
#include <memory>
#include <tesseract/osdetect.h>
#include "blobbox.h"
@ -32,11 +28,16 @@
#include "oldlist.h"
#include "qrsequence.h"
#include "ratngs.h"
#include <tesseract/strngs.h>
#include "tabvector.h"
#include "tesseractclass.h"
#include "textord.h"
#include <algorithm>
#include <cmath> // for std::fabs
#include <memory>
namespace tesseract {
const float kSizeRatioToReject = 2.0;
const int kMinAcceptableBlobHeight = 10;
@ -48,16 +49,16 @@ const float kHanRatioInJapanese = 0.3;
const float kNonAmbiguousMargin = 1.0;
// General scripts
static const char* han_script = "Han";
static const char* latin_script = "Latin";
static const char* katakana_script = "Katakana";
static const char* hiragana_script = "Hiragana";
static const char* hangul_script = "Hangul";
static const char *han_script = "Han";
static const char *latin_script = "Latin";
static const char *katakana_script = "Katakana";
static const char *hiragana_script = "Hiragana";
static const char *hangul_script = "Hangul";
// Pseudo-scripts Name
const char* ScriptDetector::korean_script_ = "Korean";
const char* ScriptDetector::japanese_script_ = "Japanese";
const char* ScriptDetector::fraktur_script_ = "Fraktur";
const char *ScriptDetector::korean_script_ = "Korean";
const char *ScriptDetector::japanese_script_ = "Japanese";
const char *ScriptDetector::fraktur_script_ = "Fraktur";
void OSResults::update_best_orientation() {
float first = orientations[0];
@ -105,8 +106,8 @@ void OSResults::update_best_script(int orientation) {
second = scripts_na[orientation][i];
}
}
best_result.sconfidence = (second == 0.0f) ? 2.0f :
(first / second - 1.0) / (kScriptAcceptRatio - 1.0);
best_result.sconfidence =
(second == 0.0f) ? 2.0f : (first / second - 1.0) / (kScriptAcceptRatio - 1.0);
}
int OSResults::get_best_script(int orientation_id) const {
@ -114,8 +115,7 @@ int OSResults::get_best_script(int orientation_id) const {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
const char *script = unicharset->get_script_from_script_id(j);
if (strcmp(script, "Common") && strcmp(script, "NULL")) {
if (max_id == -1 ||
scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id])
if (max_id == -1 || scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id])
max_id = j;
}
}
@ -135,13 +135,13 @@ void OSResults::print_scores(int orientation_id) const {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
if (scripts_na[orientation_id][j]) {
tprintf("%12s\t: %f\n", unicharset->get_script_from_script_id(j),
scripts_na[orientation_id][j]);
scripts_na[orientation_id][j]);
}
}
}
// Accumulate scores with given OSResults instance and update the best script.
void OSResults::accumulate(const OSResults& osr) {
void OSResults::accumulate(const OSResults &osr) {
for (int i = 0; i < 4; ++i) {
orientations[i] += osr.orientations[i];
for (int j = 0; j < kMaxNumberOfScripts; ++j)
@ -154,8 +154,7 @@ void OSResults::accumulate(const OSResults& osr) {
// Detect and erase horizontal/vertical lines and picture regions from the
// image, so that non-text blobs are removed from consideration.
static void remove_nontext_regions(tesseract::Tesseract *tess,
BLOCK_LIST *blocks,
static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
TO_BLOCK_LIST *to_blocks) {
Pix *pix = tess->pix_binary();
ASSERT_HOST(pix != nullptr);
@ -166,37 +165,31 @@ static void remove_nontext_regions(tesseract::Tesseract *tess,
int resolution;
if (kMinCredibleResolution > pixGetXRes(pix)) {
resolution = kMinCredibleResolution;
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
pixGetXRes(pix), resolution);
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", pixGetXRes(pix), resolution);
} else {
resolution = pixGetXRes(pix);
}
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
&vertical_x, &vertical_y,
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, &vertical_y,
nullptr, &v_lines, &h_lines);
Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
Pix *im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
if (im_pix != nullptr) {
pixSubtract(pix, pix, im_pix);
pixDestroy(&im_pix);
}
tess->mutable_textord()->find_components(tess->pix_binary(),
blocks, to_blocks);
tess->mutable_textord()->find_components(tess->pix_binary(), blocks, to_blocks);
}
// Find connected components in the page and process a subset until finished or
// a stopping criterion is met.
// Returns the number of blobs used in making the estimate. 0 implies failure.
int orientation_and_script_detection(STRING& filename,
OSResults* osr,
tesseract::Tesseract* tess) {
STRING name = filename; //truncated name
const char *lastdot; //of name
TBOX page_box;
int orientation_and_script_detection(const char *filename, OSResults *osr,
tesseract::Tesseract *tess) {
std::string name = filename; // truncated name
lastdot = strrchr(name.c_str(), '.');
const char *lastdot = strrchr(name.c_str(), '.');
if (lastdot != nullptr)
name[lastdot-name.c_str()] = '\0';
name[lastdot - name.c_str()] = '\0';
ASSERT_HOST(tess->pix_binary() != nullptr);
int width = pixGetWidth(tess->pix_binary());
@ -212,16 +205,11 @@ int orientation_and_script_detection(STRING& filename,
if (port_blocks.empty()) {
// page segmentation did not succeed, so we need to find_components first.
tess->mutable_textord()->find_components(tess->pix_binary(),
&blocks, &port_blocks);
tess->mutable_textord()->find_components(tess->pix_binary(), &blocks, &port_blocks);
} else {
page_box.set_left(0);
page_box.set_bottom(0);
page_box.set_right(width);
page_box.set_top(height);
TBOX page_box(0, 0, width, height);
// Filter_blobs sets up the TO_BLOCKs the same as find_components does.
tess->mutable_textord()->filter_blobs(page_box.topright(),
&port_blocks, true);
tess->mutable_textord()->filter_blobs(page_box.topright(), &port_blocks, true);
}
return os_detect(&port_blocks, osr, tess);
@ -230,8 +218,7 @@ int orientation_and_script_detection(STRING& filename,
// Filter and sample the blobs.
// Returns a non-zero number of blobs if the page was successfully processed, or
// zero if the page had too few characters to be reliable
int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
tesseract::Tesseract* tess) {
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess) {
int blobs_total = 0;
TO_BLOCK_IT block_it;
block_it.set_to_list(port_blocks);
@ -239,30 +226,31 @@ int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
BLOBNBOX_CLIST filtered_list;
BLOBNBOX_C_IT filtered_it(&filtered_list);
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
block_it.forward ()) {
TO_BLOCK* to_block = block_it.data();
if (to_block->block->pdblk.poly_block() &&
!to_block->block->pdblk.poly_block()->IsText()) continue;
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
TO_BLOCK *to_block = block_it.data();
if (to_block->block->pdblk.poly_block() && !to_block->block->pdblk.poly_block()->IsText())
continue;
BLOBNBOX_IT bbox_it;
bbox_it.set_to_list(&to_block->blobs);
for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list ();
bbox_it.forward ()) {
BLOBNBOX* bbox = bbox_it.data();
C_BLOB* blob = bbox->cblob();
TBOX box = blob->bounding_box();
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
BLOBNBOX *bbox = bbox_it.data();
C_BLOB *blob = bbox->cblob();
TBOX box = blob->bounding_box();
++blobs_total;
// Catch illegal value of box width and avoid division by zero.
if (box.width() == 0) continue;
if (box.width() == 0)
continue;
// TODO: Can height and width be negative? If not, remove fabs.
float y_x = std::fabs((box.height() * 1.0f) / box.width());
float x_y = 1.0f / y_x;
// Select a >= 1.0 ratio
float ratio = x_y > y_x ? x_y : y_x;
// Blob is ambiguous
if (ratio > kSizeRatioToReject) continue;
if (box.height() < kMinAcceptableBlobHeight) continue;
if (ratio > kSizeRatioToReject)
continue;
if (box.height() < kMinAcceptableBlobHeight)
continue;
filtered_it.add_to_end(bbox);
}
}
@ -275,9 +263,8 @@ int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
// If allowed_scripts is non-null and non-empty, it is a list of scripts that
// constrains both orientation and script detection to consider only scripts
// from the list.
int os_detect_blobs(const GenericVector<int>* allowed_scripts,
BLOBNBOX_CLIST* blob_list, OSResults* osr,
tesseract::Tesseract* tess) {
int os_detect_blobs(const std::vector<int> *allowed_scripts, BLOBNBOX_CLIST *blob_list,
OSResults *osr, tesseract::Tesseract *tess) {
OSResults osr_;
int minCharactersToTry = tess->min_characters_to_try;
int maxCharactersToTry = 5 * minCharactersToTry;
@ -300,22 +287,20 @@ int os_detect_blobs(const GenericVector<int>* allowed_scripts,
return 0;
}
auto** blobs = new BLOBNBOX*[filtered_it.length()];
auto **blobs = new BLOBNBOX *[filtered_it.length()];
int number_of_blobs = 0;
for (filtered_it.mark_cycle_pt (); !filtered_it.cycled_list ();
filtered_it.forward ()) {
for (filtered_it.mark_cycle_pt(); !filtered_it.cycled_list(); filtered_it.forward()) {
blobs[number_of_blobs++] = filtered_it.data();
}
QRSequenceGenerator sequence(number_of_blobs);
int num_blobs_evaluated = 0;
for (int i = 0; i < real_max; ++i) {
if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess)
&& i > minCharactersToTry) {
if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) && i > minCharactersToTry) {
break;
}
++num_blobs_evaluated;
}
delete [] blobs;
delete[] blobs;
// Make sure the best_result is up-to-date
int orientation = o.get_orientation();
@ -326,13 +311,12 @@ int os_detect_blobs(const GenericVector<int>* allowed_scripts,
// Processes a single blob to estimate script and orientation.
// Return true if estimate of orientation and script satisfies stopping
// criteria.
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
ScriptDetector* s, OSResults* osr,
tesseract::Tesseract* tess) {
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *osr,
tesseract::Tesseract *tess) {
tess->tess_cn_matching.set_value(true); // turn it on
tess->tess_bn_matching.set_value(false);
C_BLOB* blob = bbox->cblob();
TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
C_BLOB *blob = bbox->cblob();
TBLOB *tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
TBOX box = tblob->bounding_box();
FCOORD current_rotation(1.0f, 0.0f);
FCOORD rotation90(0.0f, 1.0f);
@ -354,10 +338,8 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
x_origin = i == 1 ? box.left() : box.right();
}
std::unique_ptr<TBLOB> rotated_blob(new TBLOB(*tblob));
rotated_blob->Normalize(nullptr, &current_rotation, nullptr,
x_origin, y_origin, scaling, scaling,
0.0f, static_cast<float>(kBlnBaselineOffset),
false, nullptr);
rotated_blob->Normalize(nullptr, &current_rotation, nullptr, x_origin, y_origin, scaling,
scaling, 0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
tess->AdaptiveClassifier(rotated_blob.get(), ratings + i);
current_rotation.rotate(rotation90);
}
@ -370,27 +352,25 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
return stop;
}
OrientationDetector::OrientationDetector(
const GenericVector<int>* allowed_scripts, OSResults* osr) {
OrientationDetector::OrientationDetector(const std::vector<int> *allowed_scripts, OSResults *osr) {
osr_ = osr;
allowed_scripts_ = allowed_scripts;
}
// Score the given blob and return true if it is now sure of the orientation
// after adding this block.
bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
float blob_o_score[4] = {0.0f, 0.0f, 0.0f, 0.0f};
float total_blob_o_score = 0.0f;
for (int i = 0; i < 4; ++i) {
BLOB_CHOICE_IT choice_it(scores + i);
if (!choice_it.empty()) {
BLOB_CHOICE* choice = nullptr;
BLOB_CHOICE *choice = nullptr;
if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) {
// Find the top choice in an allowed script.
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() &&
choice == nullptr; choice_it.forward()) {
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() && choice == nullptr;
choice_it.forward()) {
int choice_script = choice_it.data()->script_id();
int s = 0;
for (s = 0; s < allowed_scripts_->size(); ++s) {
@ -411,7 +391,8 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
}
}
}
if (total_blob_o_score == 0.0) return false;
if (total_blob_o_score == 0.0)
return false;
// Fill in any blanks with the worst score of the others. This is better than
// picking an arbitrary probability for it and way better than -inf.
float worst_score = 0.0f;
@ -427,7 +408,7 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
// Lower worst if there is only one.
worst_score /= 2.0f;
}
for (float& f : blob_o_score) {
for (float &f : blob_o_score) {
if (f == 0.0f) {
f = worst_score;
total_blob_o_score += worst_score;
@ -449,9 +430,8 @@ int OrientationDetector::get_orientation() {
return osr_->best_result.orientation_id;
}
ScriptDetector::ScriptDetector(const GenericVector<int>* allowed_scripts,
OSResults* osr, tesseract::Tesseract* tess) {
ScriptDetector::ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess) {
osr_ = osr;
tess_ = tess;
allowed_scripts_ = allowed_scripts;
@ -465,12 +445,11 @@ ScriptDetector::ScriptDetector(const GenericVector<int>* allowed_scripts,
fraktur_id_ = tess_->unicharset.add_script(fraktur_script_);
}
// Score the given blob and return true if it is now sure of the script after
// adding this blob.
void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
for (int i = 0; i < 4; ++i) {
bool done[kMaxNumberOfScripts] = { false };
bool done[kMaxNumberOfScripts] = {false};
BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(scores + i);
@ -479,23 +458,25 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
int script_count = 0;
int prev_id = -1;
int prev_fontinfo_id = -1;
const char* prev_unichar = "";
const char* unichar = "";
const char *prev_unichar = "";
const char *unichar = "";
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
choice_it.forward()) {
BLOB_CHOICE* choice = choice_it.data();
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {
BLOB_CHOICE *choice = choice_it.data();
int id = choice->script_id();
if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) {
// Check that the choice is in an allowed script.
int s = 0;
for (s = 0; s < allowed_scripts_->size(); ++s) {
if ((*allowed_scripts_)[s] == id) break;
if ((*allowed_scripts_)[s] == id)
break;
}
if (s == allowed_scripts_->size()) continue; // Not found in list.
if (s == allowed_scripts_->size())
continue; // Not found in list.
}
// Script already processed before.
if (done[id]) continue;
if (done[id])
continue;
done[id] = true;
unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
@ -527,9 +508,8 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
// Workaround for Fraktur
if (prev_id == latin_id_) {
if (prev_fontinfo_id >= 0) {
const tesseract::FontInfo &fi =
tess_->get_fontinfo_table().get(prev_fontinfo_id);
//printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
const tesseract::FontInfo &fi = tess_->get_fontinfo_table().get(prev_fontinfo_id);
// printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
// fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),
// fi.is_serif(), fi.is_fraktur(),
// prev_unichar);
@ -552,7 +532,7 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
}
}
} // iterate over each orientation
} // iterate over each orientation
}
bool ScriptDetector::must_stop(int orientation) {
@ -563,7 +543,7 @@ bool ScriptDetector::must_stop(int orientation) {
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
int OrientationIdToValue(const int& id) {
int OrientationIdToValue(const int &id) {
switch (id) {
case 0:
return 0;
@ -577,3 +557,5 @@ int OrientationIdToValue(const int& id) {
return -1;
}
}
} // namespace tesseract

View File

@ -16,73 +16,72 @@
*
**********************************************************************/
#include <cctype>
#include <cerrno>
#include <cstring>
#include "control.h"
#include <tesseract/helpers.h>
#include "output.h"
#include "control.h"
#include "tesseractclass.h"
#include "tessvars.h"
#ifndef DISABLED_LEGACY_ENGINE
#include "docqual.h"
#include "reject.h"
# include "docqual.h"
# include "reject.h"
#endif
#define CTRL_NEWLINE '\012' //newline
#define CTRL_HARDLINE '\015' //cr
#include "helpers.h"
#include <cctype>
#include <cerrno>
#include <cstring>
#define CTRL_NEWLINE '\012' // newline
#define CTRL_HARDLINE '\015' // cr
namespace tesseract {
void Tesseract::output_pass( //Tess output pass //send to api
PAGE_RES_IT &page_res_it,
const TBOX *target_word_box) {
void Tesseract::output_pass( // Tess output pass //send to api
PAGE_RES_IT &page_res_it, const TBOX *target_word_box) {
BLOCK_RES *block_of_last_word;
bool force_eol; //During output
BLOCK *nextblock; //block of next word
WERD *nextword; //next word
bool force_eol; // During output
BLOCK *nextblock; // block of next word
WERD *nextword; // next word
page_res_it.restart_page ();
page_res_it.restart_page();
block_of_last_word = nullptr;
while (page_res_it.word () != nullptr) {
check_debug_pt (page_res_it.word (), 120);
while (page_res_it.word() != nullptr) {
check_debug_pt(page_res_it.word(), 120);
if (target_word_box) {
TBOX current_word_box = page_res_it.word()->word->bounding_box();
FCOORD center_pt(
(current_word_box.right() + current_word_box.left()) / 2,
(current_word_box.bottom() + current_word_box.top()) / 2);
FCOORD center_pt((current_word_box.right() + current_word_box.left()) / 2,
(current_word_box.bottom() + current_word_box.top()) / 2);
if (!target_word_box->contains(center_pt)) {
page_res_it.forward();
continue;
}
}
if (tessedit_write_block_separators &&
block_of_last_word != page_res_it.block ()) {
block_of_last_word = page_res_it.block ();
if (tessedit_write_block_separators && block_of_last_word != page_res_it.block()) {
block_of_last_word = page_res_it.block();
}
force_eol = (tessedit_write_block_separators &&
(page_res_it.block () != page_res_it.next_block ())) ||
(page_res_it.next_word () == nullptr);
force_eol =
(tessedit_write_block_separators && (page_res_it.block() != page_res_it.next_block())) ||
(page_res_it.next_word() == nullptr);
if (page_res_it.next_word () != nullptr)
nextword = page_res_it.next_word ()->word;
if (page_res_it.next_word() != nullptr)
nextword = page_res_it.next_word()->word;
else
nextword = nullptr;
if (page_res_it.next_block () != nullptr)
nextblock = page_res_it.next_block ()->block;
if (page_res_it.next_block() != nullptr)
nextblock = page_res_it.next_block()->block;
else
nextblock = nullptr;
//regardless of tilde crunching
// regardless of tilde crunching
write_results(page_res_it,
determine_newline_type(page_res_it.word()->word,
page_res_it.block()->block,
nextword, nextblock), force_eol);
determine_newline_type(page_res_it.word()->word, page_res_it.block()->block,
nextword, nextblock),
force_eol);
page_res_it.forward();
}
}
/*************************************************************************
* write_results()
*
@ -95,8 +94,8 @@ void Tesseract::output_pass( //Tess output pass //send to api
* inset list - a list of bounding boxes of reject insets - indexed by the
* reject strings in the epchoice text.
*************************************************************************/
void Tesseract::write_results(PAGE_RES_IT& page_res_it,
char newline_type, // type of newline
void Tesseract::write_results(PAGE_RES_IT &page_res_it,
char newline_type, // type of newline
bool force_eol) { // override tilde crunch?
WERD_RES *word = page_res_it.word();
const UNICHARSET &uchset = *word->uch_set;
@ -104,19 +103,14 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it,
bool need_reject = false;
UNICHAR_ID space = uchset.unichar_to_id(" ");
if ((word->unlv_crunch_mode != CR_NONE ||
word->best_choice->length() == 0) &&
if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->length() == 0) &&
!tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
if ((word->unlv_crunch_mode != CR_DELETE) &&
(!stats_.tilde_crunch_written ||
((word->unlv_crunch_mode == CR_KEEP_SPACE) &&
(word->word->space () > 0) &&
!word->word->flag (W_FUZZY_NON) &&
!word->word->flag (W_FUZZY_SP)))) {
if (!word->word->flag (W_BOL) &&
(word->word->space () > 0) &&
!word->word->flag (W_FUZZY_NON) &&
!word->word->flag (W_FUZZY_SP)) {
((word->unlv_crunch_mode == CR_KEEP_SPACE) && (word->word->space() > 0) &&
!word->word->flag(W_FUZZY_NON) && !word->word->flag(W_FUZZY_SP)))) {
if (!word->word->flag(W_BOL) && (word->word->space() > 0) && !word->word->flag(W_FUZZY_NON) &&
!word->word->flag(W_FUZZY_SP)) {
stats_.last_char_was_tilde = false;
}
need_reject = true;
@ -130,7 +124,7 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it,
stats_.write_results_empty_block = false;
}
if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) {
if ((word->word->flag(W_EOL) && !stats_.last_char_was_newline) || force_eol) {
stats_.tilde_crunch_written = false;
stats_.last_char_was_newline = true;
stats_.last_char_was_tilde = false;
@ -148,40 +142,35 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it,
stats_.last_char_was_newline = true;
else
stats_.last_char_was_newline = false;
stats_.write_results_empty_block = force_eol; // about to write a real word
stats_.write_results_empty_block = force_eol; // about to write a real word
if (unlv_tilde_crunching &&
stats_.last_char_was_tilde &&
(word->word->space() == 0) &&
if (unlv_tilde_crunching && stats_.last_char_was_tilde && (word->word->space() == 0) &&
!(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) &&
(word->best_choice->unichar_id(0) == space)) {
/* Prevent adjacent tilde across words - we know that adjacent tildes within
words have been removed */
words have been removed */
word->MergeAdjacentBlobs(0);
}
if (newline_type ||
(word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes))
if (newline_type || (word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes))
stats_.last_char_was_tilde = false;
else {
if (word->reject_map.length () > 0) {
if (word->reject_map.length() > 0) {
if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)
stats_.last_char_was_tilde = true;
else
stats_.last_char_was_tilde = false;
}
else if (word->word->space () > 0)
} else if (word->word->space() > 0)
stats_.last_char_was_tilde = false;
/* else it is unchanged as there are no output chars */
}
ASSERT_HOST (word->best_choice->length() == word->reject_map.length());
ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
set_unlv_suspects(word);
check_debug_pt (word, 120);
check_debug_pt(word, 120);
if (tessedit_rejection_debug) {
tprintf ("Dict word: \"%s\": %d\n",
word->best_choice->debug_string().c_str(),
dict_word(*(word->best_choice)));
tprintf("Dict word: \"%s\": %d\n", word->best_choice->debug_string().c_str(),
dict_word(*(word->best_choice)));
}
if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) {
if (tessedit_zero_rejection) {
@ -194,14 +183,12 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it,
if (tessedit_minimal_rejection) {
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
for (i = 0; i < word->best_choice->length(); ++i) {
if ((word->best_choice->unichar_id(i) != space) &&
word->reject_map[i].rejected())
if ((word->best_choice->unichar_id(i) != space) && word->reject_map[i].rejected())
word->reject_map[i].setrej_minimal_rej_accept();
}
}
}
}
} // namespace tesseract
/**********************************************************************
* determine_newline_type
@ -210,31 +197,31 @@ void Tesseract::write_results(PAGE_RES_IT& page_res_it,
* Return false if not at end of line.
**********************************************************************/
char determine_newline_type( //test line ends
WERD *word, //word to do
BLOCK *block, //current block
WERD *next_word, //next word
BLOCK *next_block //block of next word
) {
int16_t end_gap; //to right edge
int16_t width; //of next word
TBOX word_box; //bounding
TBOX next_box; //next word
TBOX block_box; //block bounding
char determine_newline_type( // test line ends
WERD *word, // word to do
BLOCK *block, // current block
WERD *next_word, // next word
BLOCK *next_block // block of next word
) {
int16_t end_gap; // to right edge
int16_t width; // of next word
TBOX word_box; // bounding
TBOX next_box; // next word
TBOX block_box; // block bounding
if (!word->flag (W_EOL))
return false; //not end of line
if (!word->flag(W_EOL))
return false; // not end of line
if (next_word == nullptr || next_block == nullptr || block != next_block)
return CTRL_NEWLINE;
if (next_word->space () > 0)
return CTRL_HARDLINE; //it is tabbed
word_box = word->bounding_box ();
next_box = next_word->bounding_box ();
block_box = block->pdblk.bounding_box ();
//gap to eol
end_gap = block_box.right () - word_box.right ();
end_gap -= static_cast<int32_t>(block->space ());
width = next_box.right () - next_box.left ();
if (next_word->space() > 0)
return CTRL_HARDLINE; // it is tabbed
word_box = word->bounding_box();
next_box = next_word->bounding_box();
block_box = block->pdblk.bounding_box();
// gap to eol
end_gap = block_box.right() - word_box.right();
end_gap -= static_cast<int32_t>(block->space());
width = next_box.right() - next_box.left();
// tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",
// block_box.right(),word_box.right(),end_gap,
// next_box.right(),next_box.left(),width,
@ -247,11 +234,10 @@ char determine_newline_type( //test line ends
* Return the first accepted character from the repetition string. This is the
* character which is repeated - as determined earlier by fix_rep_char()
*************************************************************************/
namespace tesseract {
UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
int i;
for (i = 0; ((i < word->reject_map.length()) &&
(word->reject_map[i].rejected())); ++i);
for (i = 0; ((i < word->reject_map.length()) && (word->reject_map[i].rejected())); ++i)
;
if (i < word->reject_map.length()) {
return word->best_choice->unichar_id(i);
@ -286,16 +272,14 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
}
if (suspect_level >= 3)
return; //Use defaults
return; // Use defaults
/* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/
if (safe_dict_word(word_res) &&
(count_alphas(word) > suspect_short_words)) {
if (safe_dict_word(word_res) && (count_alphas(word) > suspect_short_words)) {
/* Unreject alphas in dictionary words */
for (i = 0; i < len; ++i) {
if (word_res->reject_map[i].rejected() &&
uchset.get_isalpha(word.unichar_id(i)))
if (word_res->reject_map[i].rejected() && uchset.get_isalpha(word.unichar_id(i)))
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
@ -303,13 +287,12 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
rating_per_ch = word.rating() / word_res->reject_map.length();
if (rating_per_ch >= suspect_rating_per_ch)
return; // Don't touch bad ratings
return; // Don't touch bad ratings
if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
/* Unreject any Tess Acceptable word - but NOT tess reject chs*/
for (i = 0; i < len; ++i) {
if (word_res->reject_map[i].rejected() &&
(!uchset.eq(word.unichar_id(i), " ")))
if (word_res->reject_map[i].rejected() && (!uchset.eq(word.unichar_id(i), " ")))
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
@ -328,34 +311,28 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
if (suspect_level == 2)
return;
if (!suspect_constrain_1Il ||
(word_res->reject_map.length() <= suspect_short_words)) {
if (!suspect_constrain_1Il || (word_res->reject_map.length() <= suspect_short_words)) {
for (i = 0; i < len; i++) {
if (word_res->reject_map[i].rejected()) {
if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
word_res->reject_map[i].flag(R_POSTNN_1IL)))
word_res->reject_map[i].flag(R_POSTNN_1IL)))
word_res->reject_map[i].setrej_minimal_rej_accept();
if (!suspect_constrain_1Il &&
word_res->reject_map[i].flag(R_MM_REJECT))
if (!suspect_constrain_1Il && word_res->reject_map[i].flag(R_MM_REJECT))
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
}
if (acceptable_word_string(*word_res->uch_set,
word.unichar_string().c_str(),
word.unichar_lengths().c_str()) !=
AC_UNACCEPTABLE ||
acceptable_number_string(word.unichar_string().c_str(),
word.unichar_lengths().c_str())) {
if (acceptable_word_string(*word_res->uch_set, word.unichar_string().c_str(),
word.unichar_lengths().c_str()) != AC_UNACCEPTABLE ||
acceptable_number_string(word.unichar_string().c_str(), word.unichar_lengths().c_str())) {
if (word_res->reject_map.length() > suspect_short_words) {
for (i = 0; i < len; i++) {
if (word_res->reject_map[i].rejected() &&
(!word_res->reject_map[i].perm_rejected() ||
word_res->reject_map[i].flag (R_1IL_CONFLICT) ||
word_res->reject_map[i].flag (R_POSTNN_1IL) ||
word_res->reject_map[i].flag (R_MM_REJECT))) {
if (word_res->reject_map[i].rejected() && (!word_res->reject_map[i].perm_rejected() ||
word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
word_res->reject_map[i].flag(R_POSTNN_1IL) ||
word_res->reject_map[i].flag(R_MM_REJECT))) {
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
@ -372,7 +349,6 @@ int16_t Tesseract::count_alphas(const WERD_CHOICE &word) {
return count;
}
int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) {
int count = 0;
for (int i = 0; i < word.length(); ++i) {
@ -383,29 +359,24 @@ int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) {
return count;
}
bool Tesseract::acceptable_number_string(const char* s,
const char* lengths) {
bool Tesseract::acceptable_number_string(const char *s, const char *lengths) {
bool prev_digit = false;
if (*lengths == 1 && *s == '(')
s++;
if (*lengths == 1 &&
((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))
if (*lengths == 1 && ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))
s++;
for (; *s != '\0'; s += *(lengths++)) {
if (unicharset.get_isdigit(s, *lengths))
prev_digit = true;
else if (prev_digit &&
(*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))
else if (prev_digit && (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))
prev_digit = false;
else if (prev_digit && *lengths == 1 &&
(*(s + *lengths) == '\0') && ((*s == '%') || (*s == ')')))
else if (prev_digit && *lengths == 1 && (*(s + *lengths) == '\0') &&
((*s == '%') || (*s == ')')))
return true;
else if (prev_digit &&
*lengths == 1 && (*s == '%') &&
else if (prev_digit && *lengths == 1 && (*s == '%') &&
(*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
(*(s + *lengths + *(lengths + 1)) == '\0'))
return true;
@ -414,4 +385,4 @@ bool Tesseract::acceptable_number_string(const char* s,
}
return true;
}
} // namespace tesseract
} // namespace tesseract

View File

@ -20,14 +20,18 @@
#ifndef OUTPUT_H
#define OUTPUT_H
namespace tesseract {
class BLOCK;
class WERD;
/** test line ends */
char determine_newline_type(WERD *word, ///< word to do
BLOCK *block, ///< current block
WERD *next_word, ///< next word
BLOCK *next_block ///< block of next word
);
char determine_newline_type(WERD *word, ///< word to do
BLOCK *block, ///< current block
WERD *next_word, ///< next word
BLOCK *next_block ///< block of next word
);
} // namespace tesseract
#endif

View File

@ -17,9 +17,9 @@
//
///////////////////////////////////////////////////////////////////////
#include <allheaders.h>
#include <tesseract/pageiterator.h>
#include "allheaders.h"
#include <tesseract/helpers.h>
#include "helpers.h"
#include "pageres.h"
#include "tesseractclass.h"
@ -27,23 +27,22 @@
namespace tesseract {
PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height)
: page_res_(page_res),
tesseract_(tesseract),
word_(nullptr),
word_length_(0),
blob_index_(0),
cblob_it_(nullptr),
include_upper_dots_(false),
include_lower_dots_(false),
scale_(scale),
scaled_yres_(scaled_yres),
rect_left_(rect_left),
rect_top_(rect_top),
rect_width_(rect_width),
rect_height_(rect_height) {
PageIterator::PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres,
int rect_left, int rect_top, int rect_width, int rect_height)
: page_res_(page_res)
, tesseract_(tesseract)
, word_(nullptr)
, word_length_(0)
, blob_index_(0)
, cblob_it_(nullptr)
, include_upper_dots_(false)
, include_lower_dots_(false)
, scale_(scale)
, scaled_yres_(scaled_yres)
, rect_left_(rect_left)
, rect_top_(rect_top)
, rect_width_(rect_width)
, rect_height_(rect_height) {
it_ = new PAGE_RES_IT(page_res);
PageIterator::Begin();
}
@ -58,26 +57,26 @@ PageIterator::~PageIterator() {
* all the objects at a lower level, while maintaining an iterator to
* objects at a higher level.
*/
PageIterator::PageIterator(const PageIterator& src)
: page_res_(src.page_res_),
tesseract_(src.tesseract_),
word_(nullptr),
word_length_(src.word_length_),
blob_index_(src.blob_index_),
cblob_it_(nullptr),
include_upper_dots_(src.include_upper_dots_),
include_lower_dots_(src.include_lower_dots_),
scale_(src.scale_),
scaled_yres_(src.scaled_yres_),
rect_left_(src.rect_left_),
rect_top_(src.rect_top_),
rect_width_(src.rect_width_),
rect_height_(src.rect_height_) {
PageIterator::PageIterator(const PageIterator &src)
: page_res_(src.page_res_)
, tesseract_(src.tesseract_)
, word_(nullptr)
, word_length_(src.word_length_)
, blob_index_(src.blob_index_)
, cblob_it_(nullptr)
, include_upper_dots_(src.include_upper_dots_)
, include_lower_dots_(src.include_lower_dots_)
, scale_(src.scale_)
, scaled_yres_(src.scaled_yres_)
, rect_left_(src.rect_left_)
, rect_top_(src.rect_top_)
, rect_width_(src.rect_width_)
, rect_height_(src.rect_height_) {
it_ = new PAGE_RES_IT(*src.it_);
BeginWord(src.blob_index_);
}
const PageIterator& PageIterator::operator=(const PageIterator& src) {
const PageIterator &PageIterator::operator=(const PageIterator &src) {
page_res_ = src.page_res_;
tesseract_ = src.tesseract_;
include_upper_dots_ = src.include_upper_dots_;
@ -94,9 +93,9 @@ const PageIterator& PageIterator::operator=(const PageIterator& src) {
return *this;
}
bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const {
bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT *other) const {
return (it_ == nullptr && it_ == other) ||
((other != nullptr) && (it_ != nullptr) && (*it_ == *other));
((other != nullptr) && (it_ != nullptr) && (*it_ == *other));
}
// ============= Moving around within the page ============.
@ -108,7 +107,8 @@ void PageIterator::Begin() {
}
void PageIterator::RestartParagraph() {
if (it_->block() == nullptr) return; // At end of the document.
if (it_->block() == nullptr)
return; // At end of the document.
PAGE_RES_IT para(page_res_);
PAGE_RES_IT next_para(para);
next_para.forward_paragraph();
@ -145,7 +145,8 @@ void PageIterator::RestartRow() {
* the appropriate language has been loaded into Tesseract.
*/
bool PageIterator::Next(PageIteratorLevel level) {
if (it_->block() == nullptr) return false; // Already at the end!
if (it_->block() == nullptr)
return false; // Already at the end!
if (it_->word() == nullptr)
level = RIL_BLOCK;
@ -157,8 +158,8 @@ bool PageIterator::Next(PageIteratorLevel level) {
it_->forward_paragraph();
break;
case RIL_TEXTLINE:
for (it_->forward_with_empties(); it_->row() == it_->prev_row();
it_->forward_with_empties());
for (it_->forward_with_empties(); it_->row() == it_->prev_row(); it_->forward_with_empties())
;
break;
case RIL_WORD:
it_->forward_with_empties();
@ -183,15 +184,16 @@ bool PageIterator::Next(PageIteratorLevel level) {
* moved to the start of a RIL_PARA.
*/
bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
if (it_->block() == nullptr) return false; // Already at the end!
if (it_->word() == nullptr) return true; // In an image block.
if (it_->block() == nullptr)
return false; // Already at the end!
if (it_->word() == nullptr)
return true; // In an image block.
switch (level) {
case RIL_BLOCK:
return blob_index_ == 0 && it_->block() != it_->prev_block();
case RIL_PARA:
return blob_index_ == 0 &&
(it_->block() != it_->prev_block() ||
it_->row()->row->para() != it_->prev_row()->row->para());
return blob_index_ == 0 && (it_->block() != it_->prev_block() ||
it_->row()->row->para() != it_->prev_row()->row->para());
case RIL_TEXTLINE:
return blob_index_ == 0 && it_->row() != it_->prev_row();
case RIL_WORD:
@ -206,9 +208,9 @@ bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
* Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block)
*/
bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const {
if (Empty(element)) return true; // Already at the end!
bool PageIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const {
if (Empty(element))
return true; // Already at the end!
// The result is true if we step forward by element and find we are
// at the the end of the page or at beginning of *all* levels in:
// [level, element).
@ -217,7 +219,8 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
// word on a line, so we also have to be at the first symbol in a word.
PageIterator next(*this);
next.Next(element);
if (next.Empty(element)) return true; // Reached the end of the page.
if (next.Empty(element))
return true; // Reached the end of the page.
while (element > level) {
element = static_cast<PageIteratorLevel>(element - 1);
if (!next.IsAtBeginningOf(element))
@ -262,28 +265,24 @@ int PageIterator::Cmp(const PageIterator &other) const {
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
*/
bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
int* left, int* top,
int* right, int* bottom) const {
bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const {
if (Empty(level))
return false;
TBOX box;
PARA *para = nullptr;
switch (level) {
case RIL_BLOCK:
box = it_->block()->block->restricted_bounding_box(include_upper_dots_,
include_lower_dots_);
box = it_->block()->block->restricted_bounding_box(include_upper_dots_, include_lower_dots_);
break;
case RIL_PARA:
para = it_->row()->row->para();
// Fall through.
case RIL_TEXTLINE:
box = it_->row()->row->restricted_bounding_box(include_upper_dots_,
include_lower_dots_);
box = it_->row()->row->restricted_bounding_box(include_upper_dots_, include_lower_dots_);
break;
case RIL_WORD:
box = it_->word()->word->restricted_bounding_box(include_upper_dots_,
include_lower_dots_);
box = it_->word()->word->restricted_bounding_box(include_upper_dots_, include_lower_dots_);
break;
case RIL_SYMBOL:
if (cblob_it_ == nullptr)
@ -295,10 +294,8 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
PageIterator other = *this;
other.Begin();
do {
if (other.it_->block() &&
other.it_->block()->block == it_->block()->block &&
other.it_->row() && other.it_->row()->row &&
other.it_->row()->row->para() == para) {
if (other.it_->block() && other.it_->block()->block == it_->block()->block &&
other.it_->row() && other.it_->row()->row && other.it_->row()->row->para() == para) {
box = box.bounding_union(other.it_->row()->row->bounding_box());
}
} while (other.Next(RIL_TEXTLINE));
@ -322,65 +319,64 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
*/
bool PageIterator::BoundingBox(PageIteratorLevel level,
int* left, int* top,
int* right, int* bottom) const {
bool PageIterator::BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const {
return BoundingBox(level, 0, left, top, right, bottom);
}
bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
int* left, int* top,
int* right, int* bottom) const {
bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, int *left, int *top,
int *right, int *bottom) const {
if (!BoundingBoxInternal(level, left, top, right, bottom))
return false;
// Convert to the coordinate system of the original image.
*left = ClipToRange(*left / scale_ + rect_left_ - padding,
rect_left_, rect_left_ + rect_width_);
*top = ClipToRange(*top / scale_ + rect_top_ - padding,
rect_top_, rect_top_ + rect_height_);
*right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
*left, rect_left_ + rect_width_);
*bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
*top, rect_top_ + rect_height_);
*left = ClipToRange(*left / scale_ + rect_left_ - padding, rect_left_, rect_left_ + rect_width_);
*top = ClipToRange(*top / scale_ + rect_top_ - padding, rect_top_, rect_top_ + rect_height_);
*right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding, *left,
rect_left_ + rect_width_);
*bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding, *top,
rect_top_ + rect_height_);
return true;
}
/** Return that there is no such object at a given level. */
bool PageIterator::Empty(PageIteratorLevel level) const {
if (it_->block() == nullptr) return true; // Already at the end!
if (it_->word() == nullptr && level != RIL_BLOCK) return true; // image block
if (it_->block() == nullptr)
return true; // Already at the end!
if (it_->word() == nullptr && level != RIL_BLOCK)
return true; // image block
if (level == RIL_SYMBOL && blob_index_ >= word_length_)
return true; // Zero length word, or already at the end of it.
return true; // Zero length word, or already at the end of it.
return false;
}
/** Returns the type of the current block. See tesseract/apitypes.h for PolyBlockType. */
/** Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType. */
PolyBlockType PageIterator::BlockType() const {
if (it_->block() == nullptr || it_->block()->block == nullptr)
return PT_UNKNOWN; // Already at the end!
return PT_UNKNOWN; // Already at the end!
if (it_->block()->block->pdblk.poly_block() == nullptr)
return PT_FLOWING_TEXT; // No layout analysis used - assume text.
return PT_FLOWING_TEXT; // No layout analysis used - assume text.
return it_->block()->block->pdblk.poly_block()->isA();
}
/** Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. */
Pta* PageIterator::BlockPolygon() const {
Pta *PageIterator::BlockPolygon() const {
if (it_->block() == nullptr || it_->block()->block == nullptr)
return nullptr; // Already at the end!
return nullptr; // Already at the end!
if (it_->block()->block->pdblk.poly_block() == nullptr)
return nullptr; // No layout analysis used - no polygon.
return nullptr; // No layout analysis used - no polygon.
// Copy polygon, so we can unrotate it to image coordinates.
POLY_BLOCK* internal_poly = it_->block()->block->pdblk.poly_block();
POLY_BLOCK *internal_poly = it_->block()->block->pdblk.poly_block();
ICOORDELT_LIST vertices;
vertices.deep_copy(internal_poly->points(), ICOORDELT::deep_copy);
POLY_BLOCK poly(&vertices, internal_poly->isA());
poly.rotate(it_->block()->block->re_rotation());
ICOORDELT_IT it(poly.points());
Pta* pta = ptaCreate(it.length());
Pta *pta = ptaCreate(it.length());
int num_pts = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
ICOORD* pt = it.data();
ICOORD *pt = it.data();
// Convert to top-down coords within the input image.
int x = static_cast<float>(pt->x()) / scale_ + rect_left_;
int y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_;
@ -413,20 +409,19 @@ Pta* PageIterator::BlockPolygon() const {
* should be good, even with xor, since the images come from the connected
* components.
*/
Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const {
int left, top, right, bottom;
if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
return nullptr;
if (level == RIL_SYMBOL && cblob_it_ != nullptr &&
cblob_it_->data()->area() != 0)
if (level == RIL_SYMBOL && cblob_it_ != nullptr && cblob_it_->data()->area() != 0)
return cblob_it_->data()->render();
Box* box = boxCreate(left, top, right - left, bottom - top);
Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
Box *box = boxCreate(left, top, right - left, bottom - top);
Pix *pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
boxDestroy(&box);
if (level == RIL_BLOCK || level == RIL_PARA) {
// Clip to the block polygon as well.
TBOX mask_box;
Pix* mask = it_->block()->block->render_mask(&mask_box);
Pix *mask = it_->block()->block->render_mask(&mask_box);
int mask_x = left - mask_box.left();
int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
// AND the mask and pix, putting the result in pix.
@ -449,9 +444,8 @@ Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
Pix* original_img,
int* left, int* top) const {
Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left,
int *top) const {
int right, bottom;
if (!BoundingBox(level, left, top, &right, &bottom))
return nullptr;
@ -463,24 +457,23 @@ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
*top = std::max(*top - padding, 0);
right = std::min(right + padding, rect_width_);
bottom = std::min(bottom + padding, rect_height_);
Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
Pix* grey_pix = pixClipRectangle(original_img, box, nullptr);
Box *box = boxCreate(*left, *top, right - *left, bottom - *top);
Pix *grey_pix = pixClipRectangle(original_img, box, nullptr);
boxDestroy(&box);
if (level == RIL_BLOCK || level == RIL_PARA) {
// Clip to the block polygon as well.
TBOX mask_box;
Pix* mask = it_->block()->block->render_mask(&mask_box);
Pix *mask = it_->block()->block->render_mask(&mask_box);
// Copy the mask registered correctly into an image the size of grey_pix.
int mask_x = *left - mask_box.left();
int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
int width = pixGetWidth(grey_pix);
int height = pixGetHeight(grey_pix);
Pix* resized_mask = pixCreate(width, height, 1);
pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height,
PIX_SRC, mask, std::max(0, mask_x), std::max(0, mask_y));
Pix *resized_mask = pixCreate(width, height, 1);
pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height, PIX_SRC,
mask, std::max(0, mask_x), std::max(0, mask_y));
pixDestroy(&mask);
pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
2 * padding + 1);
pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, 2 * padding + 1);
pixInvert(resized_mask, resized_mask);
pixSetMasked(grey_pix, resized_mask, UINT32_MAX);
pixDestroy(&resized_mask);
@ -493,14 +486,13 @@ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
* The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical!
*/
bool PageIterator::Baseline(PageIteratorLevel level,
int* x1, int* y1, int* x2, int* y2) const {
if (it_->word() == nullptr) return false; // Already at the end!
ROW* row = it_->row()->row;
WERD* word = it_->word()->word;
TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
? word->bounding_box()
: row->bounding_box();
bool PageIterator::Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const {
if (it_->word() == nullptr)
return false; // Already at the end!
ROW *row = it_->row()->row;
WERD *word = it_->word()->word;
TBOX box =
(level == RIL_WORD || level == RIL_SYMBOL) ? word->bounding_box() : row->bounding_box();
int left = box.left();
ICOORD startpt(left, static_cast<int16_t>(row->base_line(left) + 0.5));
int right = box.right();
@ -519,7 +511,7 @@ void PageIterator::Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const {
BLOCK* block = it_->block()->block;
BLOCK *block = it_->block()->block;
// Orientation
FCOORD up_in_image(0.0, 1.0);
@ -541,30 +533,23 @@ void PageIterator::Orientation(tesseract::Orientation *orientation,
// Writing direction
bool is_vertical_text = (block->classify_rotation().x() == 0.0);
bool right_to_left = block->right_to_left();
*writing_direction =
is_vertical_text
? WRITING_DIRECTION_TOP_TO_BOTTOM
: (right_to_left
? WRITING_DIRECTION_RIGHT_TO_LEFT
: WRITING_DIRECTION_LEFT_TO_RIGHT);
*writing_direction = is_vertical_text ? WRITING_DIRECTION_TOP_TO_BOTTOM
: (right_to_left ? WRITING_DIRECTION_RIGHT_TO_LEFT
: WRITING_DIRECTION_LEFT_TO_RIGHT);
// Textline Order
const bool is_mongolian = false; // TODO(eger): fix me
*textline_order = is_vertical_text
? (is_mongolian
? TEXTLINE_ORDER_LEFT_TO_RIGHT
: TEXTLINE_ORDER_RIGHT_TO_LEFT)
: TEXTLINE_ORDER_TOP_TO_BOTTOM;
const bool is_mongolian = false; // TODO(eger): fix me
*textline_order = is_vertical_text ? (is_mongolian ? TEXTLINE_ORDER_LEFT_TO_RIGHT
: TEXTLINE_ORDER_RIGHT_TO_LEFT)
: TEXTLINE_ORDER_TOP_TO_BOTTOM;
// Deskew angle
FCOORD skew = block->skew(); // true horizontal for textlines
FCOORD skew = block->skew(); // true horizontal for textlines
*deskew_angle = -skew.angle();
}
void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
bool *is_list_item,
bool *is_crown,
int *first_line_indent) const {
void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, bool *is_list_item,
bool *is_crown, int *first_line_indent) const {
*just = tesseract::JUSTIFICATION_UNKNOWN;
if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
!it_->row()->row->para()->model)
@ -573,8 +558,7 @@ void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
PARA *para = it_->row()->row->para();
*is_list_item = para->is_list_item;
*is_crown = para->is_very_first_or_continuation;
*first_line_indent = para->model->first_indent() -
para->model->body_indent();
*first_line_indent = para->model->first_indent() - para->model->body_indent();
*just = para->model->justification();
}
@ -583,7 +567,7 @@ void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
* moves the iterator to the given offset.
*/
void PageIterator::BeginWord(int offset) {
WERD_RES* word_res = it_->word();
WERD_RES *word_res = it_->word();
if (word_res == nullptr) {
// This is a non-text block, so there is no word.
word_length_ = 0;
@ -597,9 +581,8 @@ void PageIterator::BeginWord(int offset) {
word_length_ = word_res->best_choice->length();
if (word_res->box_word != nullptr) {
if (word_res->box_word->length() != word_length_) {
tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
word_length_, word_res->best_choice->unichar_string().c_str(),
word_res->box_word->length());
tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ", word_length_,
word_res->best_choice->unichar_string().c_str(), word_res->box_word->length());
word_res->box_word->bounding_box().print();
}
ASSERT_HOST(word_res->box_word->length() == word_length_);
@ -613,7 +596,8 @@ void PageIterator::BeginWord(int offset) {
word_ = word_res->word;
ASSERT_HOST(word_->cblob_list() != nullptr);
word_length_ = word_->cblob_list()->length();
if (cblob_it_ == nullptr) cblob_it_ = new C_BLOB_IT;
if (cblob_it_ == nullptr)
cblob_it_ = new C_BLOB_IT;
cblob_it_->set_to_list(word_->cblob_list());
}
for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
@ -631,4 +615,4 @@ bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) {
}
}
} // namespace tesseract.
} // namespace tesseract.

View File

@ -17,28 +17,30 @@
**********************************************************************/
#ifdef _WIN32
#ifndef unlink
#include <io.h>
#endif
# ifndef unlink
# include <io.h>
# endif
#else
#include <unistd.h>
#endif // _WIN32
# include <unistd.h>
#endif // _WIN32
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
# include "config_auto.h"
#endif
#include "allheaders.h"
#include <allheaders.h>
#include "blobbox.h"
#include "blread.h"
#include "colfind.h"
#include "debugpixa.h"
#include "equationdetect.h"
#ifndef DISABLED_LEGACY_ENGINE
# include "equationdetect.h"
#endif
#include <tesseract/osdetect.h>
#include "imagefind.h"
#include "linefind.h"
#include "makerow.h"
#include <tesseract/osdetect.h>
#include "tabvector.h"
#include "tesseractclass.h"
#include "tessvars.h"
@ -56,19 +58,19 @@ const int kMaxCircleErosions = 8;
// The returned pix must be pixDestroyed after use. nullptr may be returned
// if the image doesn't meet the trivial conditions that it uses to determine
// success.
static Pix* RemoveEnclosingCircle(Pix* pixs) {
Pix* pixsi = pixInvert(nullptr, pixs);
Pix* pixc = pixCreateTemplate(pixs);
static Pix *RemoveEnclosingCircle(Pix *pixs) {
Pix *pixsi = pixInvert(nullptr, pixs);
Pix *pixc = pixCreateTemplate(pixs);
pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
pixSeedfillBinary(pixc, pixc, pixsi, 4);
pixInvert(pixc, pixc);
pixDestroy(&pixsi);
Pix* pixt = pixAnd(nullptr, pixs, pixc);
Pix *pixt = pixAnd(nullptr, pixs, pixc);
l_int32 max_count;
pixCountConnComp(pixt, 8, &max_count);
// The count has to go up before we start looking for the minimum.
l_int32 min_count = INT32_MAX;
Pix* pixout = nullptr;
Pix *pixout = nullptr;
for (int i = 1; i < kMaxCircleErosions; i++) {
pixDestroy(&pixt);
pixErodeBrick(pixc, pixc, 3, 3);
@ -78,12 +80,12 @@ static Pix* RemoveEnclosingCircle(Pix* pixs) {
if (i == 1 || count > max_count) {
max_count = count;
min_count = count;
} else if (i > 1 && count < min_count) {
} else if (count < min_count) {
min_count = count;
pixDestroy(&pixout);
pixout = pixCopy(nullptr, pixt); // Save the best.
pixout = pixCopy(nullptr, pixt); // Save the best.
} else if (count >= min_count) {
break; // We have passed by the best.
break; // We have passed by the best.
}
}
pixDestroy(&pixt);
@ -96,19 +98,17 @@ static Pix* RemoveEnclosingCircle(Pix* pixs) {
* pix_binary_ is used as the source image and should not be nullptr.
* On return the blocks list owns all the constructed page layout.
*/
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
Tesseract* osd_tess, OSResults* osr) {
int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess,
OSResults *osr) {
ASSERT_HOST(pix_binary_ != nullptr);
int width = pixGetWidth(pix_binary_);
int height = pixGetHeight(pix_binary_);
// Get page segmentation mode.
auto pageseg_mode = static_cast<PageSegMode>(
static_cast<int>(tessedit_pageseg_mode));
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tessedit_pageseg_mode));
// If a UNLV zone file can be found, use that instead of segmentation.
if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
input_file != nullptr && input_file->length() > 0) {
STRING name = *input_file;
const char* lastdot = strrchr(name.c_str(), '.');
if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file[0] != '\0') {
std::string name = input_file;
const char *lastdot = strrchr(name.c_str(), '.');
if (lastdot != nullptr)
name[lastdot - name.c_str()] = '\0';
read_unlv_file(name, width, height, blocks);
@ -117,7 +117,7 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
// No UNLV file present. Work according to the PageSegMode.
// First make a single block covering the whole image.
BLOCK_IT block_it(blocks);
auto* block = new BLOCK("", true, 0, 0, 0, 0, width, height);
auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
block->set_right_to_left(right_to_left());
block_it.add_to_end(block);
} else {
@ -135,9 +135,9 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
TO_BLOCK_LIST to_blocks;
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
PSM_SPARSE(pageseg_mode)) {
auto_page_seg_ret_val = AutoPageSeg(
pageseg_mode, blocks, &to_blocks,
enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr);
auto_page_seg_ret_val =
AutoPageSeg(pageseg_mode, blocks, &to_blocks,
enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr);
if (pageseg_mode == PSM_OSD_ONLY)
return auto_page_seg_ret_val;
// To create blobs from the image region bounds uncomment this line:
@ -146,7 +146,7 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
if (pageseg_mode == PSM_CIRCLE_WORD) {
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
Pix *pixcleaned = RemoveEnclosingCircle(pix_binary_);
if (pixcleaned != nullptr) {
pixDestroy(&pix_binary_);
pix_binary_ = pixcleaned;
@ -161,15 +161,13 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
if (blocks->empty()) {
if (textord_debug_tabfind)
tprintf("Empty page\n");
return 0; // AutoPageSeg found an empty page.
return 0; // AutoPageSeg found an empty page.
}
bool splitting =
pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
bool splitting = pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
bool cjk_mode = textord_use_cjk_fp_model;
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
pix_thresholds_, pix_grey_, splitting || cjk_mode,
&diacritic_blobs, blocks, &to_blocks);
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_,
pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks);
return auto_page_seg_ret_val;
}
@ -197,42 +195,42 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
* another Tesseract that was initialized especially for osd, and the results
* will be output into osr (orientation and script result).
*/
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
TO_BLOCK_LIST* to_blocks,
BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
OSResults* osr) {
Pix* photomask_pix = nullptr;
Pix* musicmask_pix = nullptr;
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks,
BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr) {
Pix *photomask_pix = nullptr;
Pix *musicmask_pix = nullptr;
// The blocks made by the ColumnFinder. Moved to blocks before return.
BLOCK_LIST found_blocks;
TO_BLOCK_LIST temp_blocks;
ColumnFinder* finder = SetupPageSegAndDetectOrientation(
ColumnFinder *finder = SetupPageSegAndDetectOrientation(
pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
pageseg_apply_music_mask ? &musicmask_pix : nullptr);
int result = 0;
if (finder != nullptr) {
TO_BLOCK_IT to_block_it(&temp_blocks);
TO_BLOCK* to_block = to_block_it.data();
TO_BLOCK *to_block = to_block_it.data();
if (musicmask_pix != nullptr) {
// TODO(rays) pass the musicmask_pix into FindBlocks and mark music
// blocks separately. For now combine with photomask_pix.
pixOr(photomask_pix, photomask_pix, musicmask_pix);
}
#ifndef DISABLED_LEGACY_ENGINE
if (equ_detect_) {
finder->SetEquationDetect(equ_detect_);
}
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
to_block, photomask_pix, pix_thresholds_,
pix_grey_, &pixa_debug_, &found_blocks,
diacritic_blobs, to_blocks);
#endif // ndef DISABLED_LEGACY_ENGINE
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, to_block,
photomask_pix, pix_thresholds_, pix_grey_, &pixa_debug_,
&found_blocks, diacritic_blobs, to_blocks);
if (result >= 0)
finder->GetDeskewVectors(&deskew_, &reskew_);
delete finder;
}
pixDestroy(&photomask_pix);
pixDestroy(&musicmask_pix);
if (result < 0) return result;
if (result < 0)
return result;
blocks->clear();
BLOCK_IT block_it(blocks);
@ -243,12 +241,11 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
// Helper adds all the scripts from sid_set converted to ids from osd_set to
// allowed_ids.
static void AddAllScriptsConverted(const UNICHARSET& sid_set,
const UNICHARSET& osd_set,
GenericVector<int>* allowed_ids) {
static void AddAllScriptsConverted(const UNICHARSET &sid_set, const UNICHARSET &osd_set,
std::vector<int> *allowed_ids) {
for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
if (i != sid_set.null_sid()) {
const char* script = sid_set.get_script_from_script_id(i);
const char *script = sid_set.get_script_from_script_id(i);
allowed_ids->push_back(osd_set.get_script_id_from_name(script));
}
}
@ -258,19 +255,20 @@ static void AddAllScriptsConverted(const UNICHARSET& sid_set,
* Sets up auto page segmentation, determines the orientation, and corrects it.
* Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to
* facilitate testing.
* photo_mask_pix is a pointer to a nullptr pointer that will be filled on return
* with the leptonica photo mask, which must be pixDestroyed by the caller.
* to_blocks is an empty list that will be filled with (usually a single)
* block that is used during layout analysis. This ugly API is required
* photo_mask_pix is a pointer to a nullptr pointer that will be filled on
* return with the leptonica photo mask, which must be pixDestroyed by the
* caller. to_blocks is an empty list that will be filled with (usually a
* single) block that is used during layout analysis. This ugly API is required
* because of the possibility of a unlv zone file.
* TODO(rays) clean this up.
* See AutoPageSeg for other arguments.
* The returned ColumnFinder must be deleted after use.
*/
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
Pix** music_mask_pix) {
ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode,
BLOCK_LIST *blocks, Tesseract *osd_tess,
OSResults *osr, TO_BLOCK_LIST *to_blocks,
Pix **photo_mask_pix,
Pix **music_mask_pix) {
int vertical_x = 0;
int vertical_y = 1;
TabVector_LIST v_lines;
@ -282,19 +280,25 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
pixa_debug_.AddPix(pix_binary_, "PageSegInput");
}
// Leptonica is used to find the rule/separator lines in the input.
LineFinder::FindAndRemoveLines(source_resolution_,
textord_tabfind_show_vlines, pix_binary_,
&vertical_x, &vertical_y, music_mask_pix,
&v_lines, &h_lines);
LineFinder::FindAndRemoveLines(source_resolution_, textord_tabfind_show_vlines, pix_binary_,
&vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "NoLines");
}
// Leptonica is used to find a mask of the photo regions in the input.
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "NoImages");
Pix *pix_no_image_ = nullptr;
if (*photo_mask_pix != nullptr) {
pix_no_image_ = pixSubtract(nullptr, pix_binary_, *photo_mask_pix);
} else {
pix_no_image_ = pixClone(pix_binary_);
}
pixa_debug_.AddPix(pix_no_image_, "NoImages");
pixDestroy(&pix_no_image_);
}
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
if (!PSM_COL_FIND_ENABLED(pageseg_mode))
v_lines.clear();
// The rest of the algorithm uses the usual connected components.
textord_.find_components(pix_binary_, blocks, to_blocks);
@ -303,9 +307,9 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
// There must be exactly one input block.
// TODO(rays) handle new textline finding with a UNLV zone file.
ASSERT_HOST(to_blocks->singleton());
TO_BLOCK* to_block = to_block_it.data();
TO_BLOCK *to_block = to_block_it.data();
TBOX blkbox = to_block->block->pdblk.bounding_box();
ColumnFinder* finder = nullptr;
ColumnFinder *finder = nullptr;
int estimated_resolution = source_resolution_;
if (source_resolution_ == kMinCredibleResolution) {
// Try to estimate resolution from typical body text size.
@ -317,11 +321,10 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
}
if (to_block->line_size >= 2) {
finder = new ColumnFinder(static_cast<int>(to_block->line_size),
blkbox.botleft(), blkbox.topright(),
estimated_resolution, textord_use_cjk_fp_model,
textord_tabfind_aligned_gap_fraction, &v_lines,
&h_lines, vertical_x, vertical_y);
finder = new ColumnFinder(static_cast<int>(to_block->line_size), blkbox.botleft(),
blkbox.topright(), estimated_resolution, textord_use_cjk_fp_model,
textord_tabfind_aligned_gap_fraction, &v_lines, &h_lines, vertical_x,
vertical_y);
finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
@ -337,23 +340,20 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
// We want the text lines horizontal, (vertical text indicates vertical
// textlines) which may conflict (eg vertically written CJK).
int osd_orientation = 0;
bool vertical_text = textord_tabfind_force_vertical_text ||
pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
if (!vertical_text && textord_tabfind_vertical_text &&
PSM_ORIENTATION_ENABLED(pageseg_mode)) {
vertical_text =
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
to_block, &osd_blobs);
bool vertical_text =
textord_tabfind_force_vertical_text || pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
if (!vertical_text && textord_tabfind_vertical_text && PSM_ORIENTATION_ENABLED(pageseg_mode)) {
vertical_text = finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio, to_block,
&osd_blobs);
}
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) {
GenericVector<int> osd_scripts;
std::vector<int> osd_scripts;
if (osd_tess != this) {
// We are running osd as part of layout analysis, so constrain the
// scripts to those allowed by *this.
AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);
for (int s = 0; s < sub_langs_.size(); ++s) {
AddAllScriptsConverted(sub_langs_[s]->unicharset,
osd_tess->unicharset, &osd_scripts);
AddAllScriptsConverted(sub_langs_[s]->unicharset, osd_tess->unicharset, &osd_scripts);
}
}
os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
@ -365,20 +365,17 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
double osd_score = osr->orientations[osd_orientation];
double osd_margin = min_orientation_margin * 2;
for (int i = 0; i < 4; ++i) {
if (i != osd_orientation &&
osd_score - osr->orientations[i] < osd_margin) {
if (i != osd_orientation && osd_score - osr->orientations[i] < osd_margin) {
osd_margin = osd_score - osr->orientations[i];
}
}
int best_script_id = osr->best_result.script_id;
const char* best_script_str =
osd_tess->unicharset.get_script_from_script_id(best_script_id);
const char *best_script_str = osd_tess->unicharset.get_script_from_script_id(best_script_id);
bool cjk = best_script_id == osd_tess->unicharset.han_sid() ||
best_script_id == osd_tess->unicharset.hiragana_sid() ||
best_script_id == osd_tess->unicharset.katakana_sid() ||
strcmp("Japanese", best_script_str) == 0 ||
strcmp("Korean", best_script_str) == 0 ||
strcmp("Hangul", best_script_str) == 0;
best_script_id == osd_tess->unicharset.hiragana_sid() ||
best_script_id == osd_tess->unicharset.katakana_sid() ||
strcmp("Japanese", best_script_str) == 0 ||
strcmp("Korean", best_script_str) == 0 || strcmp("Hangul", best_script_str) == 0;
if (cjk) {
finder->set_cjk_script(true);
}
@ -386,8 +383,10 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
// The margin is weak.
if (!cjk && !vertical_text && osd_orientation == 2) {
// upside down latin text is improbable with such a weak margin.
tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "
"Don't rotate.\n", osd_margin);
tprintf(
"OSD: Weak margin (%.2f), horiz textlines, not CJK: "
"Don't rotate.\n",
osd_margin);
osd_orientation = 0;
} else {
tprintf(
@ -400,10 +399,10 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
osd_blobs.shallow_clear();
finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
#endif // ndef DISABLED_LEGACY_ENGINE
#endif // ndef DISABLED_LEGACY_ENGINE
}
return finder;
}
} // namespace tesseract.
} // namespace tesseract.

View File

@ -28,16 +28,14 @@ namespace tesseract {
* to each word that overlaps the selection_box.
*/
void Tesseract::process_selected_words(
PAGE_RES* page_res, // blocks to check
TBOX& selection_box,
bool (tesseract::Tesseract::* word_processor)(PAGE_RES_IT* pr_it)) {
for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != nullptr;
page_res_it.forward()) {
WERD* word = page_res_it.word()->word;
PAGE_RES *page_res, // blocks to check
TBOX &selection_box, bool (tesseract::Tesseract::*word_processor)(PAGE_RES_IT *pr_it)) {
for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != nullptr; page_res_it.forward()) {
WERD *word = page_res_it.word()->word;
if (word->bounding_box().overlap(selection_box)) {
if (!(this->*word_processor)(&page_res_it))
return;
}
}
}
} // namespace tesseract
} // namespace tesseract

View File

@ -18,32 +18,31 @@
#include "tesseractclass.h"
#ifdef _OPENMP
#include <omp.h>
#endif // _OPENMP
# include <omp.h>
#endif // _OPENMP
namespace tesseract {
struct BlobData {
BlobData() = default;
BlobData(int index, Tesseract* tess, const WERD_RES& word)
: blob(word.chopped_word->blobs[index]),
tesseract(tess),
choices(&(*word.ratings)(index, index)) {}
BlobData(int index, Tesseract *tess, const WERD_RES &word)
: blob(word.chopped_word->blobs[index])
, tesseract(tess)
, choices(&(*word.ratings)(index, index)) {}
TBLOB* blob = nullptr;
Tesseract* tesseract = nullptr;
BLOB_CHOICE_LIST** choices = nullptr;
TBLOB *blob = nullptr;
Tesseract *tesseract = nullptr;
BLOB_CHOICE_LIST **choices = nullptr;
};
void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
void Tesseract::PrerecAllWordsPar(const std::vector<WordData> &words) {
// Prepare all the blobs.
GenericVector<BlobData> blobs;
for (int w = 0; w < words.size(); ++w) {
if (words[w].word->ratings != nullptr &&
words[w].word->ratings->get(0, 0) == nullptr) {
std::vector<BlobData> blobs;
for (size_t w = 0; w < words.size(); ++w) {
if (words[w].word->ratings != nullptr && words[w].word->ratings->get(0, 0) == nullptr) {
for (int s = 0; s < words[w].lang_words.size(); ++s) {
Tesseract* sub = s < sub_langs_.size() ? sub_langs_[s] : this;
const WERD_RES& word = *words[w].lang_words[s];
Tesseract *sub = s < sub_langs_.size() ? sub_langs_[s] : this;
const WERD_RES &word = *words[w].lang_words[s];
for (int b = 0; b < word.chopped_word->NumBlobs(); ++b) {
blobs.push_back(BlobData(b, sub, word));
}
@ -53,19 +52,19 @@ void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
// Pre-classify all the blobs.
if (tessedit_parallelize > 1) {
#ifdef _OPENMP
#pragma omp parallel for num_threads(10)
#endif // _OPENMP
for (int b = 0; b < blobs.size(); ++b) {
# pragma omp parallel for num_threads(10)
#endif // _OPENMP
for (size_t b = 0; b < blobs.size(); ++b) {
*blobs[b].choices =
blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, nullptr);
blobs[b].tesseract->classify_blob(blobs[b].blob, "par", ScrollView::WHITE, nullptr);
}
} else {
// TODO(AMD) parallelize this.
for (int b = 0; b < blobs.size(); ++b) {
for (size_t b = 0; b < blobs.size(); ++b) {
*blobs[b].choices =
blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, nullptr);
blobs[b].tesseract->classify_blob(blobs[b].blob, "par", ScrollView::WHITE, nullptr);
}
}
}
} // namespace tesseract.
} // namespace tesseract.

File diff suppressed because it is too large Load Diff

View File

@ -20,44 +20,44 @@
#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
#define TESSERACT_CCMAIN_PARAGRAPHS_H_
#include "rect.h" // for TBOX
#include <tesseract/strngs.h> // for STRING
class PARA_LIST;
class ParagraphModel;
struct PARA;
template <typename T> class GenericVector;
#include <list>
#include <string>
#include "rect.h" // for TBOX
namespace tesseract {
class MutableIterator;
class ParagraphModel;
class PARA_LIST;
struct PARA;
template <typename T>
class GenericVector;
// This structure captures all information needed about a text line for the
// purposes of paragraph detection. It is meant to be exceedingly light-weight
// so that we can easily test paragraph detection independent of the rest of
// Tesseract.
class RowInfo {
public:
public:
// Constant data derived from Tesseract output.
STRING text; // the full UTF-8 text of the line.
bool ltr; // whether the majority of the text is left-to-right
// TODO(eger) make this more fine-grained.
std::string text; // the full UTF-8 text of the line.
bool ltr; // whether the majority of the text is left-to-right
// TODO(eger) make this more fine-grained.
bool has_leaders; // does the line contain leader dots (.....)?
bool has_drop_cap; // does the line have a drop cap?
int pix_ldistance; // distance to the left pblock boundary in pixels
int pix_rdistance; // distance to the right pblock boundary in pixels
float pix_xheight; // guessed xheight for the line
bool has_leaders; // does the line contain leader dots (.....)?
bool has_drop_cap; // does the line have a drop cap?
int pix_ldistance; // distance to the left pblock boundary in pixels
int pix_rdistance; // distance to the right pblock boundary in pixels
float pix_xheight; // guessed xheight for the line
int average_interword_space; // average space between words in pixels.
int num_words;
TBOX lword_box; // in normalized (horiz text rows) space
TBOX rword_box; // in normalized (horiz text rows) space
TBOX lword_box; // in normalized (horiz text rows) space
TBOX rword_box; // in normalized (horiz text rows) space
STRING lword_text; // the UTF-8 text of the leftmost werd
STRING rword_text; // the UTF-8 text of the rightmost werd
std::string lword_text; // the UTF-8 text of the leftmost werd
std::string rword_text; // the UTF-8 text of the rightmost werd
// The text of a paragraph typically starts with the start of an idea and
// ends with the end of an idea. Here we define paragraph as something that
@ -88,22 +88,20 @@ class RowInfo {
// paragraphs - this is the actual list of PARA objects.
// models - the list of paragraph models referenced by the PARA objects.
// caller is responsible for deleting the models.
void DetectParagraphs(int debug_level,
GenericVector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners,
PARA_LIST *paragraphs,
GenericVector<ParagraphModel *> *models);
TESS_API
void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners, PARA_LIST *paragraphs,
std::vector<ParagraphModel *> *models);
// Given a MutableIterator to the start of a block, run DetectParagraphs on
// that block and commit the results to the underlying ROW and BLOCK structs,
// saving the ParagraphModels in models. Caller owns the models.
// We use unicharset during the function to answer questions such as "is the
// first letter of this word upper case?"
void DetectParagraphs(int debug_level,
bool after_text_recognition,
const MutableIterator *block_start,
GenericVector<ParagraphModel *> *models);
TESS_API
void DetectParagraphs(int debug_level, bool after_text_recognition,
const MutableIterator *block_start, std::vector<ParagraphModel *> *models);
} // namespace
} // namespace tesseract
#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_
#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_

Some files were not shown because too many files have changed in this diff Show More