Merge branch 'main' into improve-build-for-android-integrate-with-cpu_features-library

2024-11-23 18:49:08 +08:00 · 2022-06-23 16:43:37 +02:00 · 2022-06-23 16:43:37 +02:00 · a96b2abb1a
commit a96b2abb1a
parent 12cd761ff6 18fb5aa977
150 changed files with 2440 additions and 2350 deletions
--- a/.github/workflows/autotools-macos.yml
+++ b/.github/workflows/autotools-macos.yml
@ -5,6 +5,7 @@ on:
  #push:
  schedule:
    - cron: 0 20 * * *
+  workflow_dispatch:
 jobs:

  brew:
@ -42,7 +43,7 @@ jobs:

    - name: Configure Tesseract
      run: |
-           ./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2' "PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/opt/libffi/lib/pkgconfig"
+           ./configure '--disable-shared' '--disable-openmp' '--disable-doc' '--with-pic' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'

    - name: Make and Install Tesseract
      run: |
@ -130,6 +131,9 @@ jobs:
    - name: Install Macports
      run: |
        curl -LO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install
+        # --remove-brew does not remove the Homebrew entries in bin,
+        # so remove them now.
+        rm -v $(brew --prefix)/bin/*

    - name: Install Dependencies
      run: |
--- a/.github/workflows/autotools-openmp.yml
+++ b/.github/workflows/autotools-openmp.yml
@ -0,0 +1,83 @@
+name: autotools-openmp
+# autotools on Ubuntu - run benchmark test. '--enable-openmp' no training tools
+on:
+  #push:
+  #schedule:
+  #  - cron: 0 20 * * *
+  workflow_dispatch:
+jobs:
+
+  linux:
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - { name: 18.04-openmp, os: ubuntu-18.04 }
+          - { name: 20.04-openmp, os: ubuntu-20.04 }
+
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    - name: Download fonts, tessdata and langdata required for tests
+      run: |
+           git clone https://github.com/egorpugin/tessdata tessdata_unittest
+           cp tessdata_unittest/fonts/* test/testing/
+           mv tessdata_unittest/* ../
+
+    - name: Install dependencies
+      run: |
+           sudo apt-get update
+           sudo apt-get install autoconf libleptonica-dev -y
+           sudo apt-get install libpango1.0-dev -y
+           sudo apt-get install cabextract libarchive-dev -y
+           sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
+
+    - name: Setup Tesseract
+      run: |
+           mkdir -p m4
+           ./autogen.sh
+
+    - name: Configure Tesseract
+      run: |
+           ./configure '--disable-shared' '--enable-openmp' '--disable-doc' 'CXX=g++' 'CXXFLAGS=-g -O2'
+           grep -i OpenMP config.log
+
+    - name: Make and Install Tesseract
+      run: |
+           make
+           sudo make install
+
+    - name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file
+      run: |
+           wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg
+           printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list
+
+    - name: Run Tesseract using image from issue 263 with tessdata_fast
+      run: |
+           lscpu
+           free
+           g++ --version
+           tesseract -v
+           time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1
+           echo "tessdata_fast"
+
+    - name: Run Tesseract using image from issue 263 with tessdata_fast and OpenMP Thread Limit
+      run: |
+           for lmt in {1..3}; do
+                time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_fast"
+           done
+
+    - name: Run Tesseract using image from issue 263 with tessdata_best and OpenMP Thread Limit
+      run: |
+           for lmt in {1..3}; do
+                time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata_best"
+           done
+
+    - name: Run Tesseract using image from issue 263 with tessdata and OpenMP Thread Limit
+      run: |
+           for lmt in {1..3}; do
+                time OMP_THREAD_LIMIT=$lmt tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1 && echo "OMP_THREAD_LIMIT=" $lmt "tessdata"
+           done
--- a/.github/workflows/cmake-win64.yml
+++ b/.github/workflows/cmake-win64.yml
@ -5,93 +5,81 @@ on:
  #push:
  schedule:
    - cron: 0 23 * * *
+  workflow_dispatch:
+
+env:
+  ILOC: d:/a/local

 jobs:
  build:
-    name: ${{ matrix.config.name }}
-    runs-on: ${{ matrix.config.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-        - {
-            name: "Windows Latest MSVC - cmake",
-            os: windows-latest,
-            cc: "cl",
-            cxx: "cl",
-            environment_script: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars64.bat"
-          }
-
+    name: cmake-win64
+    runs-on: windows-latest
    steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
+      - uses: ilammy/setup-nasm@v1
+      - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+        uses: actions/checkout@v3
        with:
          submodules: recursive
-
-      - name: Build and Install leptonica dependencies
+      - name: Get the version
+        id: get_version
+        run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
+      - name: Setup Instalation Location
+        run: |
+             mkdir ${{env.ILOC}}
+      - name: Build and Install zlib-ng
        shell: cmd
        run: |
-             mkdir d:/a/local
-             set PKG_CONFIG_PATH=d:/a/local/lib/pkgconfig
-             echo "PKG_CONFIG_PATH=d:/a/local/lib/pkgconfig" >> $GITHUB_ENV
-
-      - name: Build and Install zlib
-        shell: cmd
-        run: |
-             curl -sSL -o zlib1211.zip https://zlib.net/zlib1211.zip
-             unzip.exe zlib1211.zip
-             cd zlib-1.2.11
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             git clone --depth 1 https://github.com/zlib-ng/zlib-ng.git
+             cd zlib-ng
+             cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
+             cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF -DINSTALL_UTILS=OFF
             cmake --build build --config Release --target install
+             cd ..

      - name: Build and Install libpng
        shell: cmd
        run: |
             curl -sSL -o lpng1637.zip https://download.sourceforge.net/libpng/lpng1637.zip
-             unzip.exe lpng1637.zip
+             unzip.exe  -qq lpng1637.zip
             cd lpng1637
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
-             cmake --build build --config Release --target install
-
-      - name: Build and Install webp
-        shell: cmd
-        run: |
-             git clone --depth 1 https://github.com/webmproject/libwebp.git && cd libwebp
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF
             cmake --build build --config Release --target install
+             cd ..

      - name: Build and Install giflib
        shell: cmd
        run: |
             curl -sSL -o giflib-master.zip https://codeload.github.com/xbmc/giflib/zip/master
-             unzip giflib-master.zip
+             unzip -qq giflib-master.zip
             cd giflib-master
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}}
             cmake --build build --config Release --target install
+             cd ..

      - name: Build and Install libjpeg
        shell: cmd
        run: |
             git clone --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git
             cd libjpeg-turbo
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWITH_TURBOJPEG=OFF -DENABLE_SHARED=OFF
             cmake --build build --config Release --target install
+             cd ..

-      - name: Build and Install openjpeg
+      - name: Build and Install webp
        shell: cmd
        run: |
-             git clone --depth 1 https://github.com/uclouvain/openjpeg.git
-             cd openjpeg
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             git clone --depth 1 https://github.com/webmproject/libwebp.git
+             cd libwebp
+             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DWEBP_BUILD_ANIM_UTILS=OFF -DWEBP_BUILD_CWEBP=OFF -DWEBP_BUILD_DWEBP=OFF -DWEBP_BUILD_GIF2WEBP=OFF -DWEBP_BUILD_IMG2WEBP=OFF -DWEBP_BUILD_VWEBP=OFF -DWEBP_BUILD_WEBPMUX=OFF -DWEBP_BUILD_WEBPINFO=OFF -DWEBP_BUILD_EXTRAS=OFF
             cmake --build build --config Release --target install
             cd ..

      - name: Build and Install jbigkit
        shell: cmd
        run: |
-             git clone --depth 1 https://github.com/zdenop/jbigkit
-             cd jbigkit-2.1
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             git clone --depth 1 https://github.com/zdenop/jbigkit.git
+             cd jbigkit
+             cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_PROGRAMS=OFF -DBUILD_TOOLS=OFF -DCMAKE_WARN_DEPRECATED=OFF
             cmake --build build --config Release --target install
             cd ..

@ -99,17 +87,26 @@ jobs:
        shell: cmd
        run: |
             git clone --depth 1 https://github.com/facebook/zstd.git
-             cd zstd
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             cd zstd/build/cmake
+             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DZSTD_BUILD_PROGRAMS=OFF -DBUILD_TESTING=OFF
             cmake --build build --config Release --target install
             cd ..

      - name: Build and Install libtiff
        shell: cmd
        run: |
-             git clone --depth 1 https://gitlab.com/libtiff/libtiff
+             git clone --depth 1 https://gitlab.com/libtiff/libtiff.git
             cd libtiff
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF
+             cmake --build build --config Release --target install
+             cd ..
+
+      - name: Build and Install openjpeg
+        shell: cmd
+        run: |
+             git clone --depth 1 https://github.com/uclouvain/openjpeg.git
+             cd openjpeg
+             cmake -Bbuild -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DBUILD_CODEC=OFF -DBUILD_TESTING=OFF -DBUILD_DOC=OFF -DCMAKE_WARN_DEPRECATED=OFF
             cmake --build build --config Release --target install
             cd ..

@ -117,26 +114,45 @@ jobs:
        shell: cmd
        run: |
             echo "Building leptonica..."
-             git clone --depth 1 https://github.com/DanBloomberg/leptonica.git && cd leptonica
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DBUILD_PROG=OFF -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             git clone --depth 1 https://github.com/DanBloomberg/leptonica.git
+             cd leptonica
+             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_PROG=OFF -DBUILD_SHARED_LIBS=ON
             cmake --build build --config Release --target install

+      - name: Build and Install libarchive
+        shell: cmd
+        run: |
+             git clone --depth 1 https://github.com/libarchive/libarchive.git
+             cd libarchive
+             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DENABLE_TEST=OFF
+             cmake --build build --config Release --target install
+
+      - name: Remove not needed tools Before building tesseract
+        shell: cmd
+        run: >
+             rm -Rf ${{env.ILOC}}/bin/*.exe
+
      - name: Build and Install tesseract
        shell: cmd
        run: |
-             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DBUILD_TRAINING_TOOLS=OFF -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DOPENMP_BUILD=OFF -DCMAKE_PREFIX_PATH=d:/a/local -DCMAKE_INSTALL_PREFIX=d:/a/local
+             REM cmake -E env CXXFLAGS="/Qpar /fp:fast"
+             cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DENABLE_LTO=ON -DBUILD_TRAINING_TOOLS=OFF -DFAST_FLOAT=ON -DGRAPHICS_DISABLED=ON -DOPENMP_BUILD=ON
             cmake --build build --config Release --target install

      - name: Display Tesseract Version and Test Command Line Usage
        shell: cmd
        run: |
-          git clone --depth 1 https://github.com/tesseract-ocr/tessconfigs
-          mkdir d:/a/local/share
-          move tessconfigs d:/a/local/share
-          curl -L https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output d:/a/local/share/tessconfigs/eng.traineddata
-          curl -L https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output d:/a/local/share/tessconfigs/osd.traineddata
-          set TESSDATA_PREFIX=d:/a/local/share/tessconfigs
-          set PATH=d:/a/local/bin;%PATH%
+          curl -L https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output ${{env.ILOC}}/share/tessdata/eng.traineddata
+          curl -L https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output ${{env.ILOC}}/share/tessdata/osd.traineddata
+          set TESSDATA_PREFIX=${{env.ILOC}}/share/tessdata
+          set PATH=${{env.ILOC}}/bin;%PATH%
          tesseract -v
          tesseract --list-langs
          tesseract test/testing/phototest.tif -
+          
+      - name: Upload Build Results
+        uses: actions/upload-artifact@v2
+        with:
+          name: tesseract-${{ steps.get_version.outputs.VERSION }}-VS2019_win64
+          path: ${{env.ILOC}}
+          retention-days: 5
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@ -50,23 +50,26 @@ jobs:
    steps:
      - name: Install compilers on Linux
        run: |
+             sudo apt-get update
             sudo apt-get install ${{ matrix.config.cxx }} -y
        if: runner.os == 'Linux'

-# sudo apt-get install libarchive-dev libcurl4-openssl-dev libcurl4 curl -y
      - name: Install dependencies on Linux
        run: |
           sudo apt-get install autoconf libleptonica-dev -y
+           sudo apt-get install libarchive-dev libcurl4-openssl-dev -y
           sudo apt-get install libpango1.0-dev -y
           sudo apt-get install cabextract -y
           sudo apt-get install ninja-build -y
+           cmake --version
        if: runner.os == 'Linux'

      - name: Install dependencies on macOS
        run: |
           brew install autoconf automake
           brew install leptonica
-           brew install cairo pango icu4c
+           brew install libarchive
+           brew install pango
           brew install cabextract
           brew install ninja
           ninja --version
@ -147,15 +150,15 @@ jobs:
        run: |
             export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
             cd test
-             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++11
+             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++11
             ./basicapitest
        if: runner.os == 'Linux'

      - name: Build and run basicapitest (macOS)
        run: |
-             export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
+             export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH"
             cd test
-             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" `pkg-config --cflags --libs tesseract lept ` -lcurl -pthread -std=c++11
+             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++11
             ./basicapitest
        if: runner.os == 'macOS'

--- a/.github/workflows/sw.yml
+++ b/.github/workflows/sw.yml
@ -13,7 +13,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [windows-latest, windows-2022, ubuntu-20.04, macOS-latest]
+        os: [windows-2022, windows-2019, ubuntu-22.04, ubuntu-20.04, macOS-latest]

    steps:
    - uses: actions/checkout@v2
@ -22,50 +22,50 @@ jobs:
    - uses: egorpugin/sw-action@master

    - name: build
-      if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
+      if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
      run: ./sw -static -shared -platform x86,x64 -config d,r build

    - name: build
-      if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
+      if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
      run: ./sw -static -shared -config d,r build -Dwith-tests=1

    - name: download test data
      run: git clone https://github.com/egorpugin/tessdata tessdata_unittest

    - name: copy fonts
-      if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
+      if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
      run: cp tessdata_unittest/fonts/* test/testing/

    - name: copy fonts
-      if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
+      if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
      run: Copy-Item -Path "tessdata_unittest\fonts\*" -Destination "test\testing" -Recurse
      shell: pwsh

    - name: test
-      if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
+      if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
      run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
      continue-on-error: true

    - name: test-nightly
-      if: matrix.os != 'windows-latest' && matrix.os != 'windows-2022' && github.event.schedule=='0 0 * * *'
+      if: matrix.os != 'windows-2022' && matrix.os != 'windows-2019' && github.event.schedule=='0 0 * * *'
      run: ./sw -static -shared -config "d,r" test -Dwith-tests=1
      continue-on-error: true

    # windows tests hang here for some reason, investigate
    #- name: test
-      #if: matrix.os == 'windows-latest' || matrix.os == 'windows-2022'
+      #if: matrix.os == 'windows-2022' || matrix.os == 'windows-2019'
      #run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
      #continue-on-error: true

    - name: Upload Unit Test Results
-      if: always() && matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
+      if: always() && matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
      uses: actions/upload-artifact@v2
      with:
        name: Test Results (${{ matrix.os }})
        path: .sw/test/results.xml

    - name: Publish Test Report
-      if: always() && matrix.os != 'windows-latest' && matrix.os != 'windows-2022'
+      if: always() && matrix.os != 'windows-2022' && matrix.os != 'windows-2019'
      uses: mikepenz/action-junit-report@v1
      with:
        check_name: test (${{ matrix.os }})
--- a/.github/workflows/unittest-disablelegacy.yml
+++ b/.github/workflows/unittest-disablelegacy.yml
@ -24,10 +24,11 @@ jobs:

    - name: Install dependencies
      run: |
+           sudo apt-get update
           sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
           sudo apt-get install cabextract -y
           #sudo apt-get install libc++-7-dev libc++abi-7-dev -y
-           
+
    - name: Setup
      run: |
           mkdir -p m4
--- a/.github/workflows/unittest-macos.yml
+++ b/.github/workflows/unittest-macos.yml
@ -36,8 +36,7 @@ jobs:
      run: |
           ./configure '--disable-shared' '--with-pic' \
                'CXX=${{ matrix.config.cxx }}' \
-                'CXXFLAGS=-g -O2 -fsanitize=address,undefined' \
-                "PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/opt/libffi/lib/pkgconfig"
+                'CXXFLAGS=-g -O2 -fsanitize=address,undefined'

    - name: Make and Install Tesseract
      run: |
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@ -24,6 +24,7 @@ jobs:

    - name: Install dependencies (Linux)
      run: |
+           sudo apt-get update
           sudo apt-get install autoconf libleptonica-dev libpango1.0-dev -y
           sudo apt-get install cabextract -y

--- a/.gitignore
+++ b/.gitignore
@ -63,6 +63,7 @@ config_auto.h
 # ignore compilation files
 build/*
 /bin
+/cmake-*
 .deps
 .dirstamp
 /.libs
--- a/.travis.yml
+++ b/.travis.yml
@ -1,40 +0,0 @@
-# Travis CI configuration for Tesseract
-
-sudo: false
-notifications:
-  email: false
-language: cpp
-
-os: linux
-dist: focal
-arch:
-  - amd64
-  - arm64
-  - ppc64le
-  - s390x
-compiler:
-  - gcc
-  - clang
-env:
-cache:
-  directories:
-
-before_install:
-  - sudo apt-get install libleptonica-dev libpango1.0-dev libtiff5-dev -y
-
-install:
-
-script:
-  - mkdir build
-  - cd build
-  - cmake .. -DSW_BUILD=OFF
-  - make
-  - sudo make install
-
-#after_script: # let those commands trigger build errors
-  - tesseract -v
-  - text2image -v
-  - lstmtraining -v
-  - ls /home/travis/build/tesseract-ocr/tesseract/test/testing/*.tif
-  - wget https://github.com/egorpugin/tessdata/raw/master/tessdata/eng.traineddata
-  - tesseract /home/travis/build/tesseract-ocr/tesseract/test/testing/phototest.tif - -l eng --tessdata-dir ./
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
--- a/19
+++ b/19
@ -1,3 +1,22 @@
+2022-03-01 - V5.1.0
+  * Handle image and line regions in output formats ALTO, hOCR and text.
+  * New parameter curl_timeout for curl_easy_setop.
+  * Build fixes and improvements.
+  * Catch nullptr in PageIterator::Orientation to improve robustness.
+  * Remove unused code.
+
+2022-01-06 - V5.0.1
+  * Add SPDX-License-Identifier to public include files.
+  * Support redirections when running OCR on a URL.
+  * Lots of fixes and improvements for cmake builds.
+    Distributions should use the autoconf build.
+  * Fix broken msys2 build with gcc 11.
+  * Fix parameter certainty_scale (was duplicated).
+  * Fix some compiler warnings and clean code.
+  * Correctly detect amd64 and i386 on FreeBSD.
+  * Add libarchive and libcurl in continuous integration actions.
+  * Update submodule googletest to release v1.11.0.
+
 2021-11-22 - V5.0.0
  * Faster training and recognition by default (float instead of
    double calculations)
--- a/Makefile.am
+++ b/Makefile.am
@ -107,7 +107,7 @@ libtesseract_la_LDFLAGS += $(libarchive_LIBS)
 libtesseract_la_LDFLAGS += $(libcurl_LIBS)
 libtesseract_la_LDFLAGS += $(TENSORFLOW_LIBS)
 if T_WIN
-libtesseract_la_LDFLAGS += -no-undefined -Wl,--as-needed -lws2_32
+libtesseract_la_LDFLAGS += -no-undefined -lws2_32
 else
 libtesseract_la_LDFLAGS += $(NOUNDEFINED)
 endif
@ -160,6 +160,14 @@ libtesseract_la_LIBADD += libtesseract_avx2.la
 noinst_LTLIBRARIES += libtesseract_avx2.la
 endif

+if HAVE_AVX512F
+libtesseract_avx512_la_CXXFLAGS = -mavx512f
+libtesseract_avx512_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
+libtesseract_avx512_la_SOURCES = src/arch/dotproductavx512.cpp
+libtesseract_la_LIBADD += libtesseract_avx512.la
+noinst_LTLIBRARIES += libtesseract_avx512.la
+endif
+
 if HAVE_FMA
 libtesseract_fma_la_CXXFLAGS = -mfma
 libtesseract_fma_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
@ -379,7 +387,6 @@ libtesseract_ccutil_la_SOURCES += src/ccutil/clst.cpp
 libtesseract_ccutil_la_SOURCES += src/ccutil/elst2.cpp
 libtesseract_ccutil_la_SOURCES += src/ccutil/elst.cpp
 libtesseract_ccutil_la_SOURCES += src/ccutil/errcode.cpp
-libtesseract_ccutil_la_SOURCES += src/ccutil/mainblk.cpp
 libtesseract_ccutil_la_SOURCES += src/ccutil/serialis.cpp
 libtesseract_ccutil_la_SOURCES += src/ccutil/scanutils.cpp
 libtesseract_ccutil_la_SOURCES += src/ccutil/tessdatamanager.cpp
--- a/README.md
+++ b/README.md
@ -1,8 +1,7 @@
 # Tesseract OCR

-[![Build Status](https://travis-ci.org/tesseract-ocr/tesseract.svg?branch=master)](https://travis-ci.org/tesseract-ocr/tesseract)
 [![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
-![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)<br>
+[![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)<br>
 [![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr)
 [![Code Quality: Cpp](https://img.shields.io/lgtm/grade/cpp/g/tesseract-ocr/tesseract.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tesseract-ocr/tesseract/context:cpp)
 [![Total Alerts](https://img.shields.io/lgtm/alerts/g/tesseract-ocr/tesseract.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tesseract-ocr/tesseract/alerts)
@ -34,7 +33,7 @@ on line recognition, but also still supports the legacy Tesseract OCR engine of
 Tesseract 3 which works by recognizing character patterns. Compatibility with
 Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0).
 It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files which support the legacy engine, for example
-those from the tessdata repository.
+those from the [tessdata](https://github.com/tesseract-ocr/tessdata) repository.

 The lead developer is Ray Smith. The maintainer is Zdenko Podobny.
 For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS)
@ -42,7 +41,9 @@ and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/gr

 Tesseract has **unicode (UTF-8) support**, and can **recognize more than 100 languages** "out of the box".

-Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV. The main branch also has experimental support for ALTO (XML) output.
+Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF.
+
+Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO (the last one - since version 4.1.0).

 You should note that in many cases, in order to get better OCR results,
 you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract.
@ -60,7 +61,11 @@ at Hewlett-Packard Co, Greeley Colorado between 1985 and 1994, with some
 more changes made in 1996 to port to Windows, and some C++izing in 1998.
 In 2005 Tesseract was open sourced by HP. From 2006 until November 2018 it was developed by Google.

-The latest stable version is **[5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0)**, released on November 30, 2021.
+Major version 5 is the current stable version and started with release
+[5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0) on November 30, 2021.
+Newer minor versions and bugfix versions are available from
+[GitHub](https://github.com/tesseract-ocr/tesseract/releases/).
+
 Latest source code is available from [main branch on GitHub](https://github.com/tesseract-ocr/tesseract/tree/main).
 Open issues can be found in [issue tracker](https://github.com/tesseract-ocr/tesseract/issues),
 and [planning documentation](https://tesseract-ocr.github.io/tessdoc/Planning.html).
--- a/2
+++ b/2
@ -1 +1 @@
-5.0.0
+5.1.0
--- a/cmake/Configure.cmake
+++ b/cmake/Configure.cmake
@ -96,13 +96,20 @@ set(include_files_list
    pango-1.0/pango/pango-features.h
    unicode/uchar.h
 )
-check_includes(include_files_list)
+# check_includes(include_files_list)

 set(types_list
    "long long int"
    wchar_t
 )
-check_types(types_list)
+# check_types(types_list)
+
+list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
+list(APPEND CMAKE_REQUIRED_LIBRARIES -lm)
+set(functions_list
+    feenableexcept
+)
+check_functions(functions_list)

 file(APPEND ${AUTOCONFIG_SRC} "
 /* Version number */
@ -113,6 +120,7 @@ file(APPEND ${AUTOCONFIG_SRC} "
 #cmakedefine HAVE_TIFFIO_H ${HAVE_TIFFIO_H}
 #cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE}
 #cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL}
+#cmakedefine USE_OPENCL ${USE_OPENCL}
 ")

 if(TESSDATA_PREFIX)
--- a/configure.ac
+++ b/configure.ac
@ -7,7 +7,7 @@
 # ----------------------------------------
 AC_PREREQ([2.69])
 AC_INIT([tesseract],
-        [m4_esyscmd_s([test -d .git && git describe --abbrev=4 || cat VERSION])],
+        [m4_esyscmd_s([git describe --abbrev=4 2>/dev/null || cat VERSION])],
        [https://github.com/tesseract-ocr/tesseract/issues],,
        [https://github.com/tesseract-ocr/tesseract/])

@ -28,8 +28,8 @@ AM_INIT_AUTOMAKE([foreign subdir-objects nostdinc])

 # Define date of package, etc. Could be useful in auto-generated
 # documentation.
-PACKAGE_YEAR=2021
-PACKAGE_DATE="11/30"
+PACKAGE_YEAR=2022
+PACKAGE_DATE="03/01"

 abs_top_srcdir=`AS_DIRNAME([$0])`

@ -91,7 +91,7 @@ case "${host_os}" in
    mingw*)
        AM_CONDITIONAL([T_WIN], true)
        AM_CONDITIONAL([ADD_RT], false)
-        AC_SUBST([AM_LDFLAGS], ['-Wl,-no-undefined -Wl,--as-needed'])
+        AC_SUBST([AM_LDFLAGS], ['-no-undefined'])
        ;;
    cygwin*)
        AM_CONDITIONAL([ADD_RT], false)
@ -129,6 +129,7 @@ AX_CHECK_COMPILE_FLAG([-Werror=unused-command-line-argument], [WERROR=-Werror=un

 AM_CONDITIONAL([HAVE_AVX], false)
 AM_CONDITIONAL([HAVE_AVX2], false)
+AM_CONDITIONAL([HAVE_AVX512F], false)
 AM_CONDITIONAL([HAVE_FMA], false)
 AM_CONDITIONAL([HAVE_SSE4_1], false)
 AM_CONDITIONAL([HAVE_NEON], false)
@ -149,6 +150,12 @@ case "${host_cpu}" in
      AC_DEFINE([HAVE_AVX2], [1], [Enable AVX2 instructions])
    fi

+    AX_CHECK_COMPILE_FLAG([-mavx512f], [avx512f=true], [avx512f=false], [$WERROR])
+    AM_CONDITIONAL([HAVE_AVX512F], $avx512f)
+    if $avx512f; then
+      AC_DEFINE([HAVE_AVX512F], [1], [Enable AVX512F instructions])
+    fi
+
    AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR])
    AM_CONDITIONAL([HAVE_FMA], $fma)
    if $fma; then
@ -163,7 +170,7 @@ case "${host_cpu}" in

    ;;

-  aarch64)
+  aarch64|arm64)

    # ARMv8 always has NEON and does not need special compiler flags.
    AM_CONDITIONAL([HAVE_NEON], true)
@ -178,6 +185,7 @@ case "${host_cpu}" in
      AC_DEFINE([HAVE_NEON], [1], [Enable NEON instructions])
      NEON_CXXFLAGS="-mfpu=neon"
      AC_SUBST([NEON_CXXFLAGS])
+      check_for_neon=1
    fi

    ;;
@ -188,6 +196,19 @@ case "${host_cpu}" in

 esac

+# check whether feenableexcept is supported. some C libraries (e.g. uclibc) don't.
+AC_CHECK_FUNCS([feenableexcept])
+
+# additional checks for NEON targets
+if test x$check_for_neon = x1; then
+  AC_MSG_NOTICE([checking how to detect NEON availability])
+  AC_CHECK_FUNCS([getauxval elf_aux_info android_getCpuFamily])
+
+  if test $ac_cv_func_getauxval = no && test $ac_cv_func_elf_aux_info = no && test $ac_cv_func_android_getCpuFamily = no; then
+      AC_MSG_WARN([NEON is available, but we don't know how to check for it.  Will not be able to use NEON.])
+  fi
+fi
+
 AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR])
 AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd)

@ -443,6 +464,15 @@ esac

 AC_SEARCH_LIBS([pthread_create], [pthread])

+# Set PKG_CONFIG_PATH for MacOS with Homebrew unless it is already set.
+AC_CHECK_PROG([have_brew], brew, true, false)
+if $have_brew; then
+  brew_prefix=$(brew --prefix)
+  if test -z "$PKG_CONFIG_PATH"; then
+    PKG_CONFIG_PATH=$brew_prefix/opt/icu4c/lib/pkgconfig:$brew_prefix/opt/libarchive/lib/pkgconfig
+    export PKG_CONFIG_PATH
+  fi
+fi

 # ----------------------------------------
 # Check for programs needed to build documentation.
@ -462,9 +492,7 @@ AS_IF([test "$enable_doc" != "no"], [
  if $have_asciidoc && $have_xsltproc; then
    AM_CONDITIONAL([ASCIIDOC], true)
    XML_CATALOG_FILES=
-    AC_CHECK_PROG([have_brew], brew, true, false)
    if $have_brew; then
-      brew_prefix=$(brew --prefix)
      catalog_file=$brew_prefix/etc/xml/catalog
      if test -f $catalog_file; then
        AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], true)
--- a/include/tesseract/baseapi.h
+++ b/include/tesseract/baseapi.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        baseapi.h
 // Description: Simple API for calling tesseract.
 // Author:      Ray Smith
@ -13,8 +13,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_API_BASEAPI_H_
 #define TESSERACT_API_BASEAPI_H_
--- a/include/tesseract/capi.h
+++ b/include/tesseract/capi.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        capi.h
 // Description: C-API TessBaseAPI
 //
@ -12,8 +12,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef API_CAPI_H_
 #define API_CAPI_H_
@ -233,6 +231,12 @@ TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
                              char **vars_values, size_t vars_vec_size,
                              BOOL set_only_non_debug_params);

+TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
+                              const char *language, TessOcrEngineMode mode,
+                              char **configs, int configs_size, char **vars_vec,
+                              char **vars_values, size_t vars_vec_size,
+                              BOOL set_only_non_debug_params);
+
 TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
    const TessBaseAPI *handle);
 TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
--- a/include/tesseract/export.h
+++ b/include/tesseract/export.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        export.h
 // Description: Place holder
 //
@ -12,8 +12,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_PLATFORM_H_
 #define TESSERACT_PLATFORM_H_
--- a/include/tesseract/ltrresultiterator.h
+++ b/include/tesseract/ltrresultiterator.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        ltrresultiterator.h
 // Description: Iterator for tesseract results in strict left-to-right
 //              order that avoids using tesseract internal data structures.
@ -14,8 +14,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
 #define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
@ -183,7 +181,7 @@ class TESS_API ChoiceIterator {
 public:
  // Construction is from a LTRResultIterator that points to the symbol of
  // interest. The ChoiceIterator allows a one-shot iteration over the
-  // choices for this symbol and after that is is useless.
+  // choices for this symbol and after that it is useless.
  explicit ChoiceIterator(const LTRResultIterator &result_it);
  ~ChoiceIterator();

--- a/include/tesseract/ocrclass.h
+++ b/include/tesseract/ocrclass.h
@ -1,3 +1,4 @@
+// SPDX-License-Identifier: Apache-2.0
 /**********************************************************************
 * File:        ocrclass.h
 * Description: Class definitions and constants for the OCR API.
--- a/include/tesseract/osdetect.h
+++ b/include/tesseract/osdetect.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        osdetect.h
 // Description: Orientation and script detection.
 // Author:      Samuel Charron
@ -14,8 +14,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_CCMAIN_OSDETECT_H_
 #define TESSERACT_CCMAIN_OSDETECT_H_
--- a/include/tesseract/pageiterator.h
+++ b/include/tesseract/pageiterator.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        pageiterator.h
 // Description: Iterator for tesseract page structure that avoids using
 //              tesseract internal data structures.
@ -14,8 +14,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
 #define TESSERACT_CCMAIN_PAGEITERATOR_H_
--- a/include/tesseract/publictypes.h
+++ b/include/tesseract/publictypes.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        publictypes.h
 // Description: Types used in both the API and internally
 // Author:      Ray Smith
@ -13,8 +13,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
 #define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/include/tesseract/renderer.h
+++ b/include/tesseract/renderer.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        renderer.h
 // Description: Rendering interface to inject into TessBaseAPI
 //
@ -12,8 +12,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_API_RENDERER_H_
 #define TESSERACT_API_RENDERER_H_
@ -23,6 +21,7 @@
 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
 // complexity of includes here. Use forward declarations wherever possible
 // and hide includes of complex types in baseapi.cpp.
+#include <cstdint>
 #include <string> // for std::string
 #include <vector> // for std::vector

@ -232,7 +231,7 @@ private:
  // used to make everything that isn't easily handled in a
  // streaming fashion.
  long int obj_;                  // counter for PDF objects
-  std::vector<long int> offsets_; // offset of every PDF object in bytes
+  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
  std::vector<long int> pages_;   // object number for every /Page object
  std::string datadir_;           // where to find the custom font
  bool textonly_;                 // skip images if set
--- a/include/tesseract/resultiterator.h
+++ b/include/tesseract/resultiterator.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        resultiterator.h
 // Description: Iterator for tesseract results that is capable of
 //              iterating in proper reading order over Bi Directional
@ -15,8 +15,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
 #define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
--- a/include/tesseract/unichar.h
+++ b/include/tesseract/unichar.h
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        unichar.h
 // Description: Unicode character/ligature class.
 // Author:      Ray Smith
@ -13,8 +13,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_CCUTIL_UNICHAR_H_
 #define TESSERACT_CCUTIL_UNICHAR_H_
@ -99,10 +97,10 @@ public:
  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
  //        it != UNICHAR::end(str, len);
  //        ++it) {
-  //     tprintf("UCS-4 symbol code = %d\n", *it);
+  //     printf("UCS-4 symbol code = %d\n", *it);
  //     char buf[5];
  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
-  //     tprintf("Char = %s\n", buf);
+  //     printf("Char = %s\n", buf);
  //   }
  class TESS_API const_iterator {
    using CI = const_iterator;
--- a/include/tesseract/version.h.in
+++ b/include/tesseract/version.h.in
@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////
+// SPDX-License-Identifier: Apache-2.0
 // File:        version.h
 // Description: Version information
 //
@ -12,8 +12,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_API_VERSION_H_
 #define TESSERACT_API_VERSION_H_
--- a/src/api/altorenderer.cpp
+++ b/src/api/altorenderer.cpp
@ -13,9 +13,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include "errcode.h" // for ASSERT_HOST
 #ifdef _WIN32
-#  include "host.h" // windows.h for MultiByteToWideChar, ...
+#  include "host.h"  // windows.h for MultiByteToWideChar, ...
 #endif
+#include "tprintf.h" // for tprintf

 #include <tesseract/baseapi.h>
 #include <tesseract/renderer.h>
@ -174,6 +176,36 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
      continue;
    }

+    int left, top, right, bottom;
+    auto block_type = res_it->BlockType();
+
+    switch (block_type) {
+      case PT_FLOWING_IMAGE:
+      case PT_HEADING_IMAGE:
+      case PT_PULLOUT_IMAGE: {
+        // Handle all kinds of images.
+        // TODO: optionally add TYPE, for example TYPE="photo".
+        alto_str << "\t\t\t\t<Illustration ID=\"cblock_" << bcnt++ << "\"";
+        AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
+        alto_str << "</Illustration>\n";
+        res_it->Next(RIL_BLOCK);
+        continue;
+      }
+      case PT_HORZ_LINE:
+      case PT_VERT_LINE:
+        // Handle horizontal and vertical lines.
+        alto_str << "\t\t\t\t<GraphicalElement ID=\"cblock_" << bcnt++ << "\"";
+        AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
+        alto_str << "</GraphicalElement >\n";
+        res_it->Next(RIL_BLOCK);
+        continue;
+      case PT_NOISE:
+        tprintf("TODO: Please report image which triggers the noise case.\n");
+        ASSERT_HOST(false);
+      default:
+        break;
+    }
+
    if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
      alto_str << "\t\t\t\t<ComposedBlock ID=\"cblock_" << bcnt << "\"";
      AddBoxToAlto(res_it, RIL_BLOCK, alto_str);
@ -200,7 +232,6 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
    bool last_word_in_tblock = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
    bool last_word_in_cblock = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);

-    int left, top, right, bottom;
    res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);

    do {
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@ -99,6 +99,9 @@ namespace tesseract {

 static BOOL_VAR(stream_filelist, false, "Stream a filelist from stdin");
 static STRING_VAR(document_title, "", "Title of output document (used for hOCR and PDF output)");
+#ifdef HAVE_LIBCURL
+static INT_VAR(curl_timeout, 0, "Timeout for curl in seconds");
+#endif

 /** Minimum sensible image size to be worth running tesseract. */
 const int kMinRectSize = 10;
@ -1150,6 +1153,17 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
      if (curlcode != CURLE_OK) {
        return error("curl_easy_setopt");
      }
+      int timeout = curl_timeout;
+      if (timeout > 0) {
+        curlcode = curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
+        if (curlcode != CURLE_OK) {
+          return error("curl_easy_setopt");
+        }
+        curlcode = curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
+        if (curlcode != CURLE_OK) {
+          return error("curl_easy_setopt");
+        }
+      }
      curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
      if (curlcode != CURLE_OK) {
        return error("curl_easy_setopt");
@ -1357,6 +1371,22 @@ char *TessBaseAPI::GetUTF8Text() {
    if (it->Empty(RIL_PARA)) {
      continue;
    }
+    auto block_type = it->BlockType();
+    switch (block_type) {
+      case PT_FLOWING_IMAGE:
+      case PT_HEADING_IMAGE:
+      case PT_PULLOUT_IMAGE:
+      case PT_HORZ_LINE:
+      case PT_VERT_LINE:
+        // Ignore images and lines for text output.
+        continue;
+      case PT_NOISE:
+        tprintf("TODO: Please report image which triggers the noise case.\n");
+        ASSERT_HOST(false);
+      default:
+        break;
+    }
+
    const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
    text += para_text.get();
  } while (it->Next(RIL_PARA));
--- a/src/api/capi.cpp
+++ b/src/api/capi.cpp
@ -228,6 +228,22 @@ int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath, const char *lang
  return handle->Init(datapath, language);
 }

+int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size, const char *language,
+                     TessOcrEngineMode mode, char **configs, int configs_size, char **vars_vec,
+                     char **vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params) {
+  std::vector<std::string> varNames;
+  std::vector<std::string> varValues;
+  if (vars_vec != nullptr && vars_values != nullptr) {
+    for (size_t i = 0; i < vars_vec_size; i++) {
+      varNames.emplace_back(vars_vec[i]);
+      varValues.emplace_back(vars_values[i]);
+    }
+  }
+
+  return handle->Init(data, data_size, language, mode, configs, configs_size, &varNames, &varValues,
+                      set_only_non_debug_params != 0, nullptr);
+}
+
 const char *TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI *handle) {
  return handle->GetInitLanguagesAsString();
 }
--- a/src/api/hocrrenderer.cpp
+++ b/src/api/hocrrenderer.cpp
@ -189,6 +189,36 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {

  std::unique_ptr<ResultIterator> res_it(GetIterator());
  while (!res_it->Empty(RIL_BLOCK)) {
+    int left, top, right, bottom;
+    auto block_type = res_it->BlockType();
+    switch (block_type) {
+      case PT_FLOWING_IMAGE:
+      case PT_HEADING_IMAGE:
+      case PT_PULLOUT_IMAGE: {
+        // Handle all kinds of images.
+        res_it.get()->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
+        hocr_str << "   <div class='ocr_photo' id='block_" << page_id << '_'
+                 << bcnt++ << "' title=\"bbox " << left << " " << top << " "
+                 << right << " " << bottom << "\"></div>\n";
+        res_it->Next(RIL_BLOCK);
+        continue;
+      }
+      case PT_HORZ_LINE:
+      case PT_VERT_LINE:
+        // Handle horizontal and vertical lines.
+        res_it.get()->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
+        hocr_str << "   <div class='ocr_separator' id='block_" << page_id << '_'
+                 << bcnt++ << "' title=\"bbox " << left << " " << top << " "
+                 << right << " " << bottom << "\"></div>\n";
+        res_it->Next(RIL_BLOCK);
+        continue;
+      case PT_NOISE:
+        tprintf("TODO: Please report image which triggers the noise case.\n");
+        ASSERT_HOST(false);
+      default:
+        break;
+    }
+
    if (res_it->Empty(RIL_WORD)) {
      res_it->Next(RIL_WORD);
      continue;
@ -218,7 +248,7 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
    }
    if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
      hocr_str << "\n     <span class='";
-      switch (res_it->BlockType()) {
+      switch (block_type) {
        case PT_HEADING_TEXT:
          hocr_str << "ocr_header";
          break;
@ -228,6 +258,11 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
        case PT_CAPTION_TEXT:
          hocr_str << "ocr_caption";
          break;
+        case PT_FLOWING_IMAGE:
+        case PT_HEADING_IMAGE:
+        case PT_PULLOUT_IMAGE:
+          ASSERT_HOST(false);
+          break;
        default:
          hocr_str << "ocr_line";
      }
@ -248,12 +283,10 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
    hocr_str << "\n      <span class='ocrx_word'"
             << " id='"
             << "word_" << page_id << "_" << wcnt << "'";
-    int left, top, right, bottom;
    bool bold, italic, underlined, monospace, serif, smallcaps;
    int pointsize, font_id;
-    const char *font_name;
    res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
-    font_name =
+    const char *font_name =
        res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
                                   &serif, &smallcaps, &pointsize, &font_id);
    hocr_str << " title='bbox " << left << " " << top << " " << right << " "
--- a/src/api/renderer.cpp
+++ b/src/api/renderer.cpp
@ -109,6 +109,9 @@ bool TessResultRenderer::EndDocument() {
 }

 void TessResultRenderer::AppendString(const char *s) {
+  if (s == nullptr) {
+    return;
+  }
  AppendData(s, strlen(s));
 }

--- a/src/arch/dotproduct.h
+++ b/src/arch/dotproduct.h
@ -27,6 +27,9 @@ TFloat DotProductNative(const TFloat *u, const TFloat *v, int n);
 // Uses Intel AVX intrinsics to access the SIMD instruction set.
 TFloat DotProductAVX(const TFloat *u, const TFloat *v, int n);

+// Uses Intel AVX512F intrinsics to access the SIMD instruction set.
+TFloat DotProductAVX512F(const TFloat *u, const TFloat *v, int n);
+
 // Use Intel FMA.
 TFloat DotProductFMA(const TFloat *u, const TFloat *v, int n);

--- a/src/arch/dotproductavx512.cpp
+++ b/src/arch/dotproductavx512.cpp
@ -0,0 +1,70 @@
+///////////////////////////////////////////////////////////////////////
+// File:        dotproductavx512.cpp
+// Description: Architecture-specific dot-product function.
+// Author:      Stefan Weil
+//
+// (C) Copyright 2022
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#if !defined(__AVX__)
+#  if defined(__i686__) || defined(__x86_64__)
+#    error Implementation only for AVX capable architectures
+#  endif
+#else
+
+#  include <immintrin.h>
+#  include <cstdint>
+#  include "dotproduct.h"
+
+namespace tesseract {
+
+// Computes and returns the dot product of the n-vectors u and v.
+// Uses Intel AVX intrinsics to access the SIMD instruction set.
+#  if defined(FAST_FLOAT)
+float DotProductAVX512F(const float *u, const float *v, int n) {
+  const unsigned quot = n / 16;
+  const unsigned rem = n % 16;
+  __m512 t0 = _mm512_setzero_ps();
+  for (unsigned k = 0; k < quot; k++) {
+    __m512 f0 = _mm512_loadu_ps(u);
+    __m512 f1 = _mm512_loadu_ps(v);
+    t0 = _mm512_fmadd_ps(f0, f1, t0);
+    u += 16;
+    v += 16;
+  }
+  float result = _mm512_reduce_add_ps(t0);
+  for (unsigned k = 0; k < rem; k++) {
+    result += *u++ * *v++;
+  }
+  return result;
+}
+#  else
+double DotProductAVX512F(const double *u, const double *v, int n) {
+  const unsigned quot = n / 8;
+  const unsigned rem = n % 8;
+  __m512d t0 = _mm512_setzero_pd();
+  for (unsigned k = 0; k < quot; k++) {
+    t0 = _mm512_fmadd_pd(_mm512_loadu_pd(u), _mm512_loadu_pd(v), t0);
+    u += 8;
+    v += 8;
+  }
+  double result = _mm512_reduce_add_pd(t0);
+  for (unsigned k = 0; k < rem; k++) {
+    result += *u++ * *v++;
+  }
+  return result;
+}
+#  endif
+
+} // namespace tesseract.
+
+#endif
--- a/src/arch/intsimdmatrixavx2.cpp
+++ b/src/arch/intsimdmatrixavx2.cpp
@ -27,6 +27,14 @@
 #  include <cstdint>
 #  include <vector>

+#  if defined(_MSC_VER) && _MSC_VER >= 1925 && _MSC_VER <= 1929 && \
+      defined(_WIN32) && !defined(_WIN64)
+// Optimize for size (/Os) instead of using the default optimization for some
+// versions of the 32 bit Visual Studio compiler which generate buggy code.
+#    pragma optimize("", off)
+#    pragma optimize("s", on)
+#  endif
+
 namespace tesseract {

 // Number of outputs held in each register. 8 x 32 bit ints.
--- a/src/arch/simddetect.cpp
+++ b/src/arch/simddetect.cpp
@ -53,12 +53,14 @@
 #endif

 #if defined(HAVE_NEON) && !defined(__aarch64__)
-#  ifdef ANDROID
+#  if defined(HAVE_ANDROID_GETCPUFAMILY)
 #    include <cpu-features.h>
-#  else
-/* Assume linux */
+#  elif defined(HAVE_GETAUXVAL)
 #    include <asm/hwcap.h>
 #    include <sys/auxv.h>
+#  elif defined(HAVE_ELF_AUX_INFO)
+#    include <sys/auxv.h>
+#    include <sys/elf.h>
 #  endif
 #endif

@ -210,21 +212,29 @@ SIMDDetect::SIMDDetect() {
 #endif

 #if defined(HAVE_NEON) && !defined(__aarch64__)
-#  ifdef ANDROID
+#  if defined(HAVE_ANDROID_GETCPUFAMILY)
  {
    AndroidCpuFamily family = android_getCpuFamily();
    if (family == ANDROID_CPU_FAMILY_ARM)
      neon_available_ = (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON);
  }
-#  else
-  /* Assume linux */
+#  elif defined(HAVE_GETAUXVAL)
  neon_available_ = getauxval(AT_HWCAP) & HWCAP_NEON;
+#  elif defined(HAVE_ELF_AUX_INFO)
+  unsigned long hwcap = 0;
+  elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
+  neon_available_ = hwcap & HWCAP_NEON;
 #  endif
 #endif

  // Select code for calculation of dot product based on autodetection.
  if (false) {
    // This is a dummy to support conditional compilation.
+#if defined(HAVE_AVX512F)
+  } else if (avx512F_available_) {
+    // AVX512F detected.
+    SetDotProduct(DotProductAVX512F, &IntSimdMatrix::intSimdMatrixAVX2);
+#endif
 #if defined(HAVE_AVX2)
  } else if (avx2_available_) {
    // AVX2 detected.
--- a/src/ccmain/applybox.cpp
+++ b/src/ccmain/applybox.cpp
@ -159,7 +159,7 @@ PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation,
 // Helper computes median xheight in the image.
 static double MedianXHeight(BLOCK_LIST *block_list) {
  BLOCK_IT block_it(block_list);
-  STATS xheights(0, block_it.data()->pdblk.bounding_box().height());
+  STATS xheights(0, block_it.data()->pdblk.bounding_box().height() - 1);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    ROW_IT row_it(block_it.data()->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
--- a/src/ccmain/control.cpp
+++ b/src/ccmain/control.cpp
@ -2015,7 +2015,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
 void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
  PAGE_RES_IT page_res_it(page_res);
  WERD_RES *word;                       // current word
-  STATS doc_fonts(0, font_table_size_); // font counters
+  STATS doc_fonts(0, font_table_size_ - 1); // font counters

  // Gather font id statistics.
  for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
--- a/src/ccmain/fixxht.cpp
+++ b/src/ccmain/fixxht.cpp
@ -103,8 +103,8 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
 // Returns a new x-height maximally compatible with the result in word_res.
 // See comment above for overall algorithm.
 float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_shift) {
-  STATS top_stats(0, UINT8_MAX);
-  STATS shift_stats(-UINT8_MAX, UINT8_MAX);
+  STATS top_stats(0, UINT8_MAX - 1);
+  STATS shift_stats(-UINT8_MAX, UINT8_MAX - 1);
  int bottom_shift = 0;
  int num_blobs = word_res->rebuild_word->NumBlobs();
  do {
--- a/src/ccmain/pageiterator.cpp
+++ b/src/ccmain/pageiterator.cpp
@ -225,7 +225,7 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
    return true; // Already at the end!
  }
  // The result is true if we step forward by element and find we are
-  // at the the end of the page or at beginning of *all* levels in:
+  // at the end of the page or at beginning of *all* levels in:
  // [level, element).
  // When there is more than one level difference between element and level,
  // we could for instance move forward one symbol and still be at the first
@ -566,7 +566,15 @@ void PageIterator::Orientation(tesseract::Orientation *orientation,
                               tesseract::WritingDirection *writing_direction,
                               tesseract::TextlineOrder *textline_order,
                               float *deskew_angle) const {
-  BLOCK *block = it_->block()->block;
+  auto *block_res = it_->block();
+  if (block_res == nullptr) {
+    // Nothing can be done, so return default values.
+    *orientation = ORIENTATION_PAGE_UP;
+    *writing_direction = WRITING_DIRECTION_LEFT_TO_RIGHT;
+    *textline_order = TEXTLINE_ORDER_TOP_TO_BOTTOM;
+    return;
+  }
+  auto *block = block_res->block;

  // Orientation
  FCOORD up_in_image(0.0, 1.0);
--- a/src/ccmain/pagesegmain.cpp
+++ b/src/ccmain/pagesegmain.cpp
@ -108,10 +108,8 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
  // If a UNLV zone file can be found, use that instead of segmentation.
  if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file[0] != '\0') {
    std::string name = input_file;
-    const char *lastdot = strrchr(name.c_str(), '.');
-    if (lastdot != nullptr) {
-      name[lastdot - name.c_str()] = '\0';
-    }
+    std::size_t lastdot = name.find_last_of(".");
+    name = name.substr(0, lastdot);
    read_unlv_file(name, width, height, blocks);
  }
  if (blocks->empty()) {
--- a/src/ccmain/paragraphs.cpp
+++ b/src/ccmain/paragraphs.cpp
@ -1623,8 +1623,8 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
    UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
    UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
  }
-  STATS lefts(lmin, lmax + 1);
-  STATS rights(rmin, rmax + 1);
+  STATS lefts(lmin, lmax);
+  STATS rights(rmin, rmax);
  for (int i = start; i < end; i++) {
    RowScratchRegisters &sr = (*rows)[i];
    if (sr.ri_->num_words == 0) {
@ -1655,7 +1655,7 @@ int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start,
      (rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
  int word_width =
      (rows[row_start].ri_->lword_box.width() + rows[row_end - 1].ri_->lword_box.width()) / 2;
-  STATS spacing_widths(0, 5 + word_width);
+  STATS spacing_widths(0, 4 + word_width);
  for (int i = row_start; i < row_end; i++) {
    if (rows[i].ri_->num_words > 1) {
      spacing_widths.add(rows[i].ri_->average_interword_space, 1);
--- a/src/ccmain/resultiterator.cpp
+++ b/src/ccmain/resultiterator.cpp
@ -616,7 +616,7 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel
    return true; // Already at the end!
  }
  // The result is true if we step forward by element and find we are
-  // at the the end of the page or at beginning of *all* levels in:
+  // at the end of the page or at beginning of *all* levels in:
  // [level, element).
  // When there is more than one level difference between element and level,
  // we could for instance move forward one symbol and still be at the first
@ -731,10 +731,12 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) {
    std::vector<int> textline_order;
    std::vector<StrongScriptDirection> dirs;
    CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &dirs, &textline_order);
-    tprintf("Strong Script dirs     [%p/P=%s]: ", it_->row(),
+    tprintf("Strong Script dirs     [%p/P=%s]: ",
+            static_cast<void *>(it_->row()),
            current_paragraph_is_ltr_ ? "ltr" : "rtl");
    PrintScriptDirs(dirs);
-    tprintf("Logical textline order [%p/P=%s]: ", it_->row(),
+    tprintf("Logical textline order [%p/P=%s]: ",
+            static_cast<void *>(it_->row()),
            current_paragraph_is_ltr_ ? "ltr" : "rtl");
    for (int i : textline_order) {
      tprintf("%d ", i);
--- a/src/ccmain/tessedit.cpp
+++ b/src/ccmain/tessedit.cpp
@ -23,8 +23,6 @@
 #  include "config_auto.h"
 #endif

-#include <regex> // for std::regex_match
-
 #include "control.h"
 #include "matchdefs.h"
 #include "pageres.h"
@ -248,12 +246,11 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
  std::string remains(lang_str);
  // Look whether the model file uses a prefix which must be applied to
  // included model files as well.
-  std::regex e("(.*)/[^/]*");
-  std::cmatch cm;
  std::string prefix;
-  if (std::regex_match(lang.c_str(), cm, e, std::regex_constants::match_default)) {
+  size_t found = lang.find_last_of('/');
+  if (found != std::string::npos) {
    // A prefix was found.
-    prefix = cm[1].str() + "/";
+    prefix = lang.substr(0, found + 1);
  }
  while (!remains.empty()) {
    // Find the start of the lang code and which vector to add to.
--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
@ -86,27 +86,27 @@ Tesseract::Tesseract()
    , double_MEMBER(thresholding_window_size, 0.33,
                    "Window size for measuring local statistics (to be "
                    "multiplied by image DPI). "
-                    "This parameter is used by the Sauvola thresolding method",
+                    "This parameter is used by the Sauvola thresholding method",
                    this->params())
    , double_MEMBER(thresholding_kfactor, 0.34,
                    "Factor for reducing threshold due to variance. "
-                    "This parameter is used by the Sauvola thresolding method."
+                    "This parameter is used by the Sauvola thresholding method."
                    " Normal range: 0.2-0.5",
                    this->params())
    , double_MEMBER(thresholding_tile_size, 0.33,
                    "Desired tile size (to be multiplied by image DPI). "
-                    "This parameter is used by the LeptonicaOtsu thresolding "
+                    "This parameter is used by the LeptonicaOtsu thresholding "
                    "method",
                    this->params())
    , double_MEMBER(thresholding_smooth_kernel_size, 0.0,
                    "Size of convolution kernel applied to threshold array "
                    "(to be multiplied by image DPI). Use 0 for no smoothing. "
-                    "This parameter is used by the LeptonicaOtsu thresolding "
+                    "This parameter is used by the LeptonicaOtsu thresholding "
                    "method",
                    this->params())
    , double_MEMBER(thresholding_score_fraction, 0.1,
                    "Fraction of the max Otsu score. "
-                    "This parameter is used by the LeptonicaOtsu thresolding "
+                    "This parameter is used by the LeptonicaOtsu thresholding "
                    "method. "
                    "For standard Otsu use 0.0, otherwise 0.1 is recommended",
                    this->params())
--- a/src/ccmain/thresholder.cpp
+++ b/src/ccmain/thresholder.cpp
@ -16,6 +16,11 @@
 //
 ///////////////////////////////////////////////////////////////////////

+// Include automatically generated configuration file
+#ifdef HAVE_CONFIG_H
+#  include "config_auto.h"
+#endif
+
 #include "otsuthr.h"
 #include "thresholder.h"
 #include "tprintf.h" // for tprintf
@ -27,7 +32,8 @@
 #include <allheaders.h>
 #include <tesseract/baseapi.h> // for api->GetIntVariable()

-#include <cstdint> // for uint32_t
+#include <algorithm> // for std::max, std::min
+#include <cstdint>   // for uint32_t
 #include <cstring>
 #include <tuple>

@ -164,16 +170,7 @@ void ImageThresholder::SetImage(const Image pix) {
  // Convert the image as necessary so it is one of binary, plain RGB, or
  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
  // not just a clone of the input.
-  if (pixGetColormap(src)) {
-    Image tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
-    depth = pixGetDepth(tmp);
-    if (depth > 1 && depth < 8) {
-      pix_ = pixConvertTo8(tmp, false);
-      tmp.destroy();
-    } else {
-      pix_ = tmp;
-    }
-  } else if (depth > 1 && depth < 8) {
+  if (depth > 1 && depth < 8) {
    pix_ = pixConvertTo8(src, false);
  } else {
    pix_ = src.copy();
--- a/src/ccstruct/blobbox.cpp
+++ b/src/ccstruct/blobbox.cpp
@ -810,7 +810,7 @@ void TO_ROW::compute_vertical_projection() { // project whole row
    row_box += blob_it.data()->bounding_box();
  }

-  projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN);
+  projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN - 1);
  projection_left = row_box.left() - PROJECTION_MARGIN;
  projection_right = row_box.right() + PROJECTION_MARGIN;
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
--- a/src/ccstruct/blobbox.h
+++ b/src/ccstruct/blobbox.h
@ -725,8 +725,8 @@ public:
    ASSERT_HOST(block->pdblk.poly_block() != nullptr);
    block->rotate(rotation);
    // Update the median size statistic from the blobs list.
-    STATS widths(0, block->pdblk.bounding_box().width());
-    STATS heights(0, block->pdblk.bounding_box().height());
+    STATS widths(0, block->pdblk.bounding_box().width() - 1);
+    STATS heights(0, block->pdblk.bounding_box().height() - 1);
    BLOBNBOX_IT blob_it(&blobs);
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
      widths.add(blob_it.data()->bounding_box().width(), 1);
@ -769,7 +769,7 @@ public:
 #ifndef GRAPHICS_DISABLED
  // Draw the noise blobs from all lists in red.
  void plot_noise_blobs(ScrollView *to_win);
-  // Draw the blobs on on the various lists in the block in different colors.
+  // Draw the blobs on the various lists in the block in different colors.
  void plot_graded_blobs(ScrollView *to_win);
 #endif

--- a/src/ccstruct/ccstruct.cpp
+++ b/src/ccstruct/ccstruct.cpp
@ -28,9 +28,4 @@ const double CCStruct::kAscenderFraction = 0.25;
 const double CCStruct::kXHeightCapRatio =
    CCStruct::kXHeightFraction / (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);

-// Destructor.
-// It is defined here, so the compiler can create a single vtable
-// instead of weak vtables in every compilation unit.
-CCStruct::~CCStruct() = default;
-
 } // namespace tesseract
--- a/src/ccstruct/ccstruct.h
+++ b/src/ccstruct/ccstruct.h
@ -22,11 +22,9 @@
 #include "ccutil.h" // for CCUtil

 namespace tesseract {
-class TESS_API CCStruct : public CCUtil {
-public:
-  CCStruct() = default;
-  ~CCStruct() override;

+class CCStruct : public CCUtil {
+public:
  // Globally accessible constants.
  // APPROXIMATIONS of the fractions of the character cell taken by
  // the descenders, ascenders, and x-height.
@ -36,6 +34,7 @@ public:
  // Derived value giving the x-height as a fraction of cap-height.
  static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
 };
+
 } // namespace tesseract

 #endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_
--- a/src/ccstruct/coutln.cpp
+++ b/src/ccstruct/coutln.cpp
@ -871,7 +871,7 @@ void C_OUTLINE::ComputeBinaryOffsets() {
    increment_step(s, 1, &head_pos, dir_counts, pos_totals);
  }
  for (int s = 0; s < stepcount; pos += step(s++)) {
-    // At step s, s in in the middle of [s-2, s+2].
+    // At step s, s in the middle of [s-2, s+2].
    increment_step(s + 2, 1, &head_pos, dir_counts, pos_totals);
    int dir_index = chain_code(s);
    ICOORD step_vec = step(s);
--- a/src/ccstruct/linlsq.cpp
+++ b/src/ccstruct/linlsq.cpp
@ -81,7 +81,7 @@ void LLSQ::add(const LLSQ &other) {

 void LLSQ::remove(double x, double y) { // delete an element
  if (total_weight <= 0.0) {            // illegal
-    EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr);
+    EMPTY_LLSQ.error("LLSQ::remove", ABORT);
  }
  total_weight--; // count elements
  sigx -= x;      // update accumulators
--- a/src/ccstruct/normalis.cpp
+++ b/src/ccstruct/normalis.cpp
@ -180,7 +180,7 @@ static void ComputeRunlengthImage(const TBOX &box,
      ++y;
    }
  }
-  // Now set the image pixels the the MIN of the x and y runlengths.
+  // Now set the image pixels the MIN of the x and y runlengths.
  for (int iy = 0; iy < height; ++iy) {
    int x = 0;
    for (auto x_coord : x_coords[iy]) {
--- a/src/ccstruct/pdblock.cpp
+++ b/src/ccstruct/pdblock.cpp
@ -347,7 +347,7 @@ void BLOCK_RECT_IT::forward() { // next rectangle
 /**********************************************************************
 * BLOCK_LINE_IT::get_line
 *
- * Get the the start and width of a line in the block.
+ * Get the start and width of a line in the block.
 **********************************************************************/

 TDimension BLOCK_LINE_IT::get_line( // get a line
--- a/src/ccstruct/ratngs.h
+++ b/src/ccstruct/ratngs.h
@ -59,8 +59,8 @@ public:
    unichar_id_ = UNICHAR_SPACE;
    fontinfo_id_ = -1;
    fontinfo_id2_ = -1;
-    rating_ = 10.0;
-    certainty_ = -1.0;
+    rating_ = 10.0f;
+    certainty_ = -1.0f;
    script_id_ = -1;
    min_xheight_ = 0.0f;
    max_xheight_ = 0.0f;
@ -170,13 +170,17 @@ public:
  bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const;

  void print(const UNICHARSET *unicharset) const {
-    tprintf("r%.2f c%.2f x[%g,%g]: %d %s", rating_, certainty_, min_xheight_, max_xheight_,
+    tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
+            static_cast<double>(rating_),
+            static_cast<double>(certainty_),
+            static_cast<double>(min_xheight_),
+            static_cast<double>(max_xheight_),
            unichar_id_, (unicharset == nullptr) ? "" : unicharset->debug_str(unichar_id_).c_str());
  }
  void print_full() const {
    print(nullptr);
    tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
-            fontinfo_id2_, yshift_, classifier_);
+            fontinfo_id2_, static_cast<double>(yshift_), classifier_);
  }
  // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
  static int SortByRating(const void *p1, const void *p2) {
--- a/src/ccstruct/statistc.cpp
+++ b/src/ccstruct/statistc.cpp
@ -40,14 +40,14 @@ namespace tesseract {
 *
 * Construct a new stats element by allocating and zeroing the memory.
 **********************************************************************/
-STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
-  if (max_bucket_value_plus_1 <= min_bucket_value) {
+STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value) {
+  if (max_bucket_value < min_bucket_value) {
    min_bucket_value = 0;
-    max_bucket_value_plus_1 = 1;
+    max_bucket_value = 1;
  }
  rangemin_ = min_bucket_value; // setup
-  rangemax_ = max_bucket_value_plus_1;
-  buckets_ = new int32_t[rangemax_ - rangemin_];
+  rangemax_ = max_bucket_value;
+  buckets_ = new int32_t[1 + rangemax_ - rangemin_];
  clear();
 }

@ -56,16 +56,16 @@ STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
 *
 * Alter the range on an existing stats element.
 **********************************************************************/
-bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
-  if (max_bucket_value_plus_1 <= min_bucket_value) {
+bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value) {
+  if (max_bucket_value < min_bucket_value) {
    return false;
  }
-  if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
+  if (rangemax_ - rangemin_ != max_bucket_value - min_bucket_value) {
    delete[] buckets_;
-    buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value];
+    buckets_ = new int32_t[1 + max_bucket_value - min_bucket_value];
  }
  rangemin_ = min_bucket_value; // setup
-  rangemax_ = max_bucket_value_plus_1;
+  rangemax_ = max_bucket_value;
  clear(); // zero it
  return true;
 }
@ -78,7 +78,7 @@ bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
 void STATS::clear() { // clear out buckets
  total_count_ = 0;
  if (buckets_ != nullptr) {
-    memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
+    memset(buckets_, 0, (1 + rangemax_ - rangemin_) * sizeof(buckets_[0]));
  }
 }

@ -97,12 +97,11 @@ STATS::~STATS() {
 * Add a set of samples to (or delete from) a pile.
 **********************************************************************/
 void STATS::add(int32_t value, int32_t count) {
-  if (buckets_ == nullptr) {
-    return;
+  if (buckets_ != nullptr) {
+    value = ClipToRange(value, rangemin_, rangemax_);
+    buckets_[value - rangemin_] += count;
+    total_count_ += count; // keep count of total
  }
-  value = ClipToRange(value, rangemin_, rangemax_ - 1);
-  buckets_[value - rangemin_] += count;
-  total_count_ += count; // keep count of total
 }

 /**********************************************************************
@ -116,7 +115,7 @@ int32_t STATS::mode() const { // get mode of samples
  }
  int32_t max = buckets_[0]; // max cell count
  int32_t maxindex = 0;      // index of max
-  for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
+  for (int index = rangemax_ - rangemin_; index > 0; --index) {
    if (buckets_[index] > max) {
      max = buckets_[index]; // find biggest
      maxindex = index;
@ -135,7 +134,7 @@ double STATS::mean() const { // get mean of samples
    return static_cast<double>(rangemin_);
  }
  int64_t sum = 0;
-  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
+  for (int index = rangemax_ - rangemin_; index >= 0; --index) {
    sum += static_cast<int64_t>(index) * buckets_[index];
  }
  return static_cast<double>(sum) / total_count_ + rangemin_;
@ -152,7 +151,7 @@ double STATS::sd() const { // standard deviation
  }
  int64_t sum = 0;
  double sqsum = 0.0;
-  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
+  for (int index = rangemax_ - rangemin_; index >= 0; --index) {
    sum += static_cast<int64_t>(index) * buckets_[index];
    sqsum += static_cast<double>(index) * index * buckets_[index];
  }
@ -186,7 +185,7 @@ double STATS::ile(double frac) const {
 #endif
  int sum = 0;
  int index = 0;
-  for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
+  for (index = 0; index <= rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
    ;
  }
  if (index > 0) {
@ -207,7 +206,7 @@ int32_t STATS::min_bucket() const { // Find min
    return rangemin_;
  }
  int32_t min = 0;
-  for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
+  for (min = 0; (min <= rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
    ;
  }
  return rangemin_ + min;
@ -224,7 +223,7 @@ int32_t STATS::max_bucket() const { // Find max
    return rangemin_;
  }
  int32_t max;
-  for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
+  for (max = rangemax_ - rangemin_; max > 0 && buckets_[max] == 0; max--) {
    ;
  }
  return rangemin_ + max;
@ -270,7 +269,7 @@ bool STATS::local_min(int32_t x) const {
  if (buckets_ == nullptr) {
    return false;
  }
-  x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
+  x = ClipToRange(x, rangemin_, rangemax_) - rangemin_;
  if (buckets_[x] == 0) {
    return true;
  }
@ -281,10 +280,10 @@ bool STATS::local_min(int32_t x) const {
  if (index >= 0 && buckets_[index] < buckets_[x]) {
    return false;
  }
-  for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
+  for (index = x + 1; index <= rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
    ;
  }
-  if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
+  if (index <= rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
    return false;
  } else {
    return true;
@ -304,7 +303,7 @@ void STATS::smooth(int32_t factor) {
    return;
  }
  STATS result(rangemin_, rangemax_);
-  int entrycount = rangemax_ - rangemin_;
+  int entrycount = 1 + rangemax_ - rangemin_;
  for (int entry = 0; entry < entrycount; entry++) {
    // centre weight
    int count = buckets_[entry] * factor;
@ -368,7 +367,7 @@ int32_t STATS::cluster(float lower, // thresholds
        clusters[0].add(entry, count);
      }
    }
-    for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
+    for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
                                 pile_count(entry) <= pile_count(entry - 1);
         entry++) {
      count = pile_count(entry) - clusters[0].pile_count(entry);
@ -386,7 +385,7 @@ int32_t STATS::cluster(float lower, // thresholds
  do {
    new_cluster = false;
    new_mode = 0;
-    for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
+    for (entry = 0; entry <= rangemax_ - rangemin_; entry++) {
      count = buckets_[entry] - clusters[0].buckets_[entry];
      // remaining pile
      if (count > 0) { // any to handle
@ -433,7 +432,7 @@ int32_t STATS::cluster(float lower, // thresholds
          clusters[0].add(entry, count);
        }
      }
-      for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax_ &&
+      for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry <= rangemax_ &&
                                   pile_count(entry) <= pile_count(entry - 1);
           entry++) {
        count = pile_count(entry) - clusters[0].pile_count(entry);
@ -482,7 +481,7 @@ int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes)
  if (max_modes <= 0) {
    return 0;
  }
-  int src_count = rangemax_ - rangemin_;
+  int src_count = 1 + rangemax_ - rangemin_;
  // Used copies the counts in buckets_ as they get used.
  STATS used(rangemin_, rangemax_);
  modes.clear();
@ -605,7 +604,7 @@ void STATS::plot(ScrollView *window, // to draw in
  }
  window->Pen(colour);

-  for (int index = 0; index < rangemax_ - rangemin_; index++) {
+  for (int index = 0; index <= rangemax_ - rangemin_; index++) {
    window->Rectangle(xorigin + xscale * index, yorigin, xorigin + xscale * (index + 1),
                      yorigin + yscale * buckets_[index]);
  }
@ -630,7 +629,7 @@ void STATS::plotline(ScrollView *window, // to draw in
  }
  window->Pen(colour);
  window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
-  for (int index = 0; index < rangemax_ - rangemin_; index++) {
+  for (int index = 0; index <= rangemax_ - rangemin_; index++) {
    window->DrawTo(xorigin + xscale * index, yorigin + yscale * buckets_[index]);
  }
 }
--- a/src/ccstruct/statistc.h
+++ b/src/ccstruct/statistc.h
@ -30,23 +30,20 @@ namespace tesseract {
 class TESS_API STATS {
 public:
  // The histogram buckets are in the range
-  // [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.
  // [min_bucket_value, max_bucket_value].
  // Any data under min_bucket value is silently mapped to min_bucket_value,
  // and likewise, any data over max_bucket_value is silently mapped to
  // max_bucket_value.
  // In the internal array, min_bucket_value maps to 0 and
-  // max_bucket_value_plus_1 - min_bucket_value to the array size.
-  // TODO(rays) This is ugly. Convert the second argument to
-  // max_bucket_value and all the code that uses it.
-  STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
+  // 1 + max_bucket_value - min_bucket_value to the array size.
+  STATS(int32_t min_bucket_value, int32_t max_bucket_value);
  STATS() = default; // empty for arrays

  ~STATS();

  // (Re)Sets the range and clears the counts.
  // See the constructor for info on max and min values.
-  bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
+  bool set_range(int32_t min_bucket_value, int32_t max_bucket_value);

  void clear(); // empty buckets

@ -73,11 +70,14 @@ public:
  double median() const; // get median of samples
  // Returns the count of the given value.
  int32_t pile_count(int32_t value) const {
+    if (buckets_ == nullptr) {
+      return 0;
+    }
    if (value <= rangemin_) {
      return buckets_[0];
    }
-    if (value >= rangemax_ - 1) {
-      return buckets_[rangemax_ - rangemin_ - 1];
+    if (value >= rangemax_) {
+      return buckets_[rangemax_ - rangemin_];
    }
    return buckets_[value - rangemin_];
  }
@ -139,7 +139,6 @@ public:

 private:
  int32_t rangemin_ = 0; // min of range
-  // rangemax_ is not well named as it is really one past the max.
  int32_t rangemax_ = 0;       // max of range
  int32_t total_count_ = 0;    // no of samples
  int32_t *buckets_ = nullptr; // array of cells
--- a/src/ccutil/ambigs.cpp
+++ b/src/ccutil/ambigs.cpp
@ -142,7 +142,6 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
            for (j = 0;
                 j < adaption_ambigs_entry->size() && (*adaption_ambigs_entry)[j] > id_to_insert;
                 ++j) {
-              ;
            }
            if (j < adaption_ambigs_entry->size()) {
              if ((*adaption_ambigs_entry)[j] != id_to_insert) {
--- a/src/ccutil/ambigs.h
+++ b/src/ccutil/ambigs.h
@ -116,7 +116,7 @@ public:

  // Comparator function for sorting AmbigSpec_LISTs. The lists will
  // be sorted by their wrong_ngram arrays. Example of wrong_ngram vectors
-  // in a a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1].
+  // in a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1].
  static int compare_ambig_specs(const void *spec1, const void *spec2) {
    const AmbigSpec *s1 = *static_cast<const AmbigSpec *const *>(spec1);
    const AmbigSpec *s2 = *static_cast<const AmbigSpec *const *>(spec2);
--- a/src/ccutil/ccutil.cpp
+++ b/src/ccutil/ccutil.cpp
@ -10,13 +10,21 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#if defined(_WIN32)
+#  include <io.h> // for _access
+#endif
+
 #include "ccutil.h"

+#include <cstdlib>
+#include <cstring> // for std::strrchr
+
 namespace tesseract {
+
 CCUtil::CCUtil()
    : params_()
-    , INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", &params_)
-    , BOOL_MEMBER(use_ambigs_for_adaption, false,
+      , INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", &params_)
+      , BOOL_MEMBER(use_ambigs_for_adaption, false,
                  "Use ambigs for deciding"
                  " whether to adapt to a character",
                  &params_) {}
@ -26,4 +34,61 @@ CCUtil::CCUtil()
 // instead of weak vtables in every compilation unit.
 CCUtil::~CCUtil() = default;

+/**
+ * @brief CCUtil::main_setup - set location of tessdata and name of image
+ *
+ * @param argv0 - paths to the directory with language files and config files.
+ * An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
+ * used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
+ * previous is not successful - use current directory.
+ * @param basename - name of image
+ */
+void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
+  imagebasename = basename; /**< name of image */
+
+  char *tessdata_prefix = getenv("TESSDATA_PREFIX");
+
+  if (!argv0.empty()) {
+    /* Use tessdata prefix from the command line. */
+    datadir = argv0;
+  } else if (tessdata_prefix) {
+    /* Use tessdata prefix from the environment. */
+    datadir = tessdata_prefix;
+#if defined(_WIN32)
+  } else if (datadir.empty() || _access(datadir.c_str(), 0) != 0) {
+    /* Look for tessdata in directory of executable. */
+    char path[_MAX_PATH];
+    DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
+    if (length > 0 && length < sizeof(path)) {
+      char *separator = std::strrchr(path, '\\');
+      if (separator != nullptr) {
+        *separator = '\0';
+        std::string subdir = path;
+        subdir += "/tessdata";
+        if (_access(subdir.c_str(), 0) == 0) {
+          datadir = subdir;
+        }
+      }
+    }
+#endif /* _WIN32 */
+  }
+
+  // datadir may still be empty:
+  if (datadir.empty()) {
+#if defined(TESSDATA_PREFIX)
+    // Use tessdata prefix which was compiled in.
+    datadir = TESSDATA_PREFIX "/tessdata";
+#else
+    datadir = "./";
+#endif /* TESSDATA_PREFIX */
+  }
+
+  // check for missing directory separator
+  const char *lastchar = datadir.c_str();
+  lastchar += datadir.length() - 1;
+  if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) {
+    datadir += "/";
+  }
+}
+
 } // namespace tesseract
--- a/src/ccutil/clst.cpp
+++ b/src/ccutil/clst.cpp
@ -89,7 +89,7 @@ void CLIST::assign_to_sublist( // to this list
  constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");

  if (!empty()) {
-    LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT, nullptr);
+    LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT);
  }

  last = start_it->extract_sublist(end_it);
@ -246,9 +246,9 @@ void *CLIST_ITERATOR::data_relative( // get data + or - ...

 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("CLIST_ITERATOR::data_relative", ABORT, nullptr);
+    NO_LIST.error("CLIST_ITERATOR::data_relative", ABORT);
  if (list->empty())
-    EMPTY_LIST.error("CLIST_ITERATOR::data_relative", ABORT, nullptr);
+    EMPTY_LIST.error("CLIST_ITERATOR::data_relative", ABORT);
  if (offset < -1)
    BAD_PARAMETER.error("CLIST_ITERATOR::data_relative", ABORT, "offset < -l");
 #endif
@ -308,7 +308,7 @@ link */
  /* Error if either current element is deleted */

  if (!current || !other_it->current) {
-    DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT, nullptr);
+    DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT);
  }

  /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
@ -389,12 +389,12 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current
  constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points");

  if (list != other_it->list)
-    BAD_EXTRACTION_PTS.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
+    BAD_EXTRACTION_PTS.error("CLIST_ITERATOR.extract_sublist", ABORT);
  if (list->empty())
-    EMPTY_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT, nullptr);
+    EMPTY_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT);

  if (!current || !other_it->current)
-    DONT_EXTRACT_DELETED.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
+    DONT_EXTRACT_DELETED.error("CLIST_ITERATOR.extract_sublist", ABORT);
 #endif

  ex_current_was_last = other_it->ex_current_was_last = false;
@ -404,7 +404,7 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current
  temp_it.mark_cycle_pt();
  do {                         // walk sublist
    if (temp_it.cycled_list()) { // can't find end pt
-      BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
+      BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT);
    }

    if (temp_it.at_last()) {
--- a/src/ccutil/clst.h
+++ b/src/ccutil/clst.h
@ -190,7 +190,7 @@ public:
  void *data() { // get current data
 #ifndef NDEBUG
    if (!list) {
-      NO_LIST.error("CLIST_ITERATOR::data", ABORT, nullptr);
+      NO_LIST.error("CLIST_ITERATOR::data", ABORT);
    }
 #endif
    return current->data;
@ -523,7 +523,7 @@ inline void *CLIST_ITERATOR::extract() {
 #ifndef NDEBUG
  if (!current) { // list empty or
                  // element extracted
-    NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT, nullptr);
+    NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT);
  }
 #endif

@ -576,7 +576,7 @@ inline void *CLIST_ITERATOR::move_to_first() {
 inline void CLIST_ITERATOR::mark_cycle_pt() {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT, nullptr);
+    NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT);
  }
 #endif

@ -666,7 +666,7 @@ inline void CLIST_ITERATOR::add_to_end( // element to add
    void *new_data) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT, nullptr);
+    NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT);
  }
  if (!new_data) {
    BAD_PARAMETER.error("CLIST_ITERATOR::add_to_end", ABORT, "new_data is nullptr");
@ -702,15 +702,12 @@ public:
  }
 };

-#define CLISTIZEH(CLASSNAME)                                          \
-  class CLASSNAME##_CLIST : public X_CLIST<CLASSNAME> {               \
-  public:                                                             \
-    using X_CLIST<CLASSNAME>::X_CLIST;                                \
-  };                                                                  \
-  class CLASSNAME##_C_IT : public X_ITER<CLIST_ITERATOR, CLASSNAME> { \
-  public:                                                             \
-    using X_ITER<CLIST_ITERATOR, CLASSNAME>::X_ITER;                  \
-    CLASSNAME##_C_IT(CLASSNAME##_CLIST *list) : X_ITER(list) {}       \
+#define CLISTIZEH(CLASSNAME)                                    \
+  class CLASSNAME##_CLIST : public X_CLIST<CLASSNAME> {         \
+    using X_CLIST<CLASSNAME>::X_CLIST;                          \
+  };                                                            \
+  struct CLASSNAME##_C_IT : X_ITER<CLIST_ITERATOR, CLASSNAME> { \
+    using X_ITER<CLIST_ITERATOR, CLASSNAME>::X_ITER;            \
  };

 } // namespace tesseract
--- a/src/ccutil/elst.cpp
+++ b/src/ccutil/elst.cpp
@ -70,7 +70,7 @@ void ELIST::assign_to_sublist( // to this list
  constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");

  if (!empty()) {
-    LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT, nullptr);
+    LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT);
  }

  last = start_it->extract_sublist(end_it);
@ -169,7 +169,7 @@ ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void *
 ELIST_LINK *ELIST_ITERATOR::forward() {
 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("ELIST_ITERATOR::forward", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::forward", ABORT);
 #endif
  if (list->empty()) {
    return nullptr;
@ -189,13 +189,17 @@ ELIST_LINK *ELIST_ITERATOR::forward() {
  }
 #ifndef NDEBUG
  if (!current)
-    NULL_DATA.error("ELIST_ITERATOR::forward", ABORT, nullptr);
+    NULL_DATA.error("ELIST_ITERATOR::forward", ABORT);
 #endif
  next = current->next;

 #ifndef NDEBUG
-  if (!next)
-    NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT, "This is: %p  Current is: %p", this, current);
+  if (!next) {
+    NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT,
+                    "This is: %p  Current is: %p",
+                    static_cast<void *>(this),
+                    static_cast<void *>(current));
+  }
 #endif
  return current;
 }
@ -214,9 +218,9 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...

 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT);
  if (list->empty())
-    EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
+    EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT);
  if (offset < -1)
    BAD_PARAMETER.error("ELIST_ITERATOR::data_relative", ABORT, "offset < -l");
 #endif
@ -231,7 +235,7 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...

 #ifndef NDEBUG
  if (!ptr)
-    NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT, nullptr);
+    NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT);
 #endif

  return ptr;
@ -248,7 +252,7 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
 ELIST_LINK *ELIST_ITERATOR::move_to_last() {
 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT);
 #endif

  while (current != list->last) {
@ -276,7 +280,7 @@ void ELIST_ITERATOR::exchange(  // positions of 2 links

 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("ELIST_ITERATOR::exchange", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::exchange", ABORT);
  if (!other_it)
    BAD_PARAMETER.error("ELIST_ITERATOR::exchange", ABORT, "other_it nullptr");
  if (!(other_it->list))
@ -293,7 +297,7 @@ link */
  /* Error if either current element is deleted */

  if (!current || !other_it->current) {
-    DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT, nullptr);
+    DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT);
  }

  /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
@ -379,14 +383,14 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current
  if (!other_it)
    BAD_PARAMETER.error("ELIST_ITERATOR::extract_sublist", ABORT, "other_it nullptr");
  if (!list)
-    NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT);
  if (list != other_it->list)
-    BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
+    BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT);
  if (list->empty())
-    EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT, nullptr);
+    EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT);

  if (!current || !other_it->current)
-    DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
+    DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT);
 #endif

  ex_current_was_last = other_it->ex_current_was_last = false;
@ -396,7 +400,7 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current
  temp_it.mark_cycle_pt();
  do {                         // walk sublist
    if (temp_it.cycled_list()) { // can't find end pt
-      BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
+      BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT);
    }

    if (temp_it.at_last()) {
--- a/src/ccutil/elst.h
+++ b/src/ccutil/elst.h
@ -231,10 +231,10 @@ public:
  ELIST_LINK *data() { // get current data
 #ifndef NDEBUG
    if (!list) {
-      NO_LIST.error("ELIST_ITERATOR::data", ABORT, nullptr);
+      NO_LIST.error("ELIST_ITERATOR::data", ABORT);
    }
    if (!current) {
-      NULL_DATA.error("ELIST_ITERATOR::data", ABORT, nullptr);
+      NULL_DATA.error("ELIST_ITERATOR::data", ABORT);
    }
 #endif
    return current;
@ -256,7 +256,7 @@ public:
  bool empty() const { // is list empty?
 #ifndef NDEBUG
    if (!list) {
-      NO_LIST.error("ELIST_ITERATOR::empty", ABORT, nullptr);
+      NO_LIST.error("ELIST_ITERATOR::empty", ABORT);
    }
 #endif
    return list->empty();
@ -334,13 +334,13 @@ inline void ELIST_ITERATOR::add_after_then_move( // element to add
    ELIST_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT, nullptr);
+    STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT);
  }
 #endif

@ -381,13 +381,13 @@ inline void ELIST_ITERATOR::add_after_stay_put( // element to add
    ELIST_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT, nullptr);
+    STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT);
  }
 #endif

@ -430,13 +430,13 @@ inline void ELIST_ITERATOR::add_before_then_move( // element to add
    ELIST_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT, nullptr);
+    STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT);
  }
 #endif

@ -473,13 +473,13 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add
    ELIST_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT, nullptr);
+    STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT);
  }
 #endif

@ -517,7 +517,7 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add
 inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT);
  }
  if (!list_to_add) {
    BAD_PARAMETER.error("ELIST_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr");
@ -564,7 +564,7 @@ inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) {
 inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT);
  }
  if (!list_to_add) {
    BAD_PARAMETER.error("ELIST_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr");
@ -612,11 +612,11 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() {

 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::extract", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::extract", ABORT);
  }
  if (!current) { // list empty or
                  // element extracted
-    NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT, nullptr);
+    NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT);
  }
 #endif

@ -649,7 +649,7 @@ inline ELIST_LINK *ELIST_ITERATOR::extract() {
 inline ELIST_LINK *ELIST_ITERATOR::move_to_first() {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT);
  }
 #endif

@ -673,7 +673,7 @@ inline ELIST_LINK *ELIST_ITERATOR::move_to_first() {
 inline void ELIST_ITERATOR::mark_cycle_pt() {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT);
  }
 #endif

@ -695,7 +695,7 @@ inline void ELIST_ITERATOR::mark_cycle_pt() {
 inline bool ELIST_ITERATOR::at_first() const {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::at_first", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::at_first", ABORT);
  }
 #endif

@ -715,7 +715,7 @@ inline bool ELIST_ITERATOR::at_first() const {
 inline bool ELIST_ITERATOR::at_last() const {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::at_last", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::at_last", ABORT);
  }
 #endif

@ -735,7 +735,7 @@ inline bool ELIST_ITERATOR::at_last() const {
 inline bool ELIST_ITERATOR::cycled_list() const {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT);
  }
 #endif

@ -754,7 +754,7 @@ inline void ELIST_ITERATOR::sort( // sort elements
        const void *, const void *)) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::sort", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::sort", ABORT);
  }
 #endif

@ -776,13 +776,13 @@ inline void ELIST_ITERATOR::add_to_end( // element to add
    ELIST_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT, nullptr);
+    NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST_ITERATOR::add_to_end", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT, nullptr);
+    STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT);
  }
 #endif

@ -802,13 +802,10 @@ inline void ELIST_ITERATOR::add_to_end( // element to add

 #define ELISTIZEH(CLASSNAME)                                                 \
  class CLASSNAME##_LIST : public X_LIST<ELIST, ELIST_ITERATOR, CLASSNAME> { \
-  public:                                                                    \
    using X_LIST<ELIST, ELIST_ITERATOR, CLASSNAME>::X_LIST;                  \
  };                                                                         \
  class CLASSNAME##_IT : public X_ITER<ELIST_ITERATOR, CLASSNAME> {          \
-  public:                                                                    \
    using X_ITER<ELIST_ITERATOR, CLASSNAME>::X_ITER;                         \
-    CLASSNAME##_IT(CLASSNAME##_LIST *list) : X_ITER(list) {}                 \
  };

 } // namespace tesseract
--- a/src/ccutil/elst2.cpp
+++ b/src/ccutil/elst2.cpp
@ -71,7 +71,7 @@ void ELIST2::assign_to_sublist( // to this list
  constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");

  if (!empty()) {
-    LIST_NOT_EMPTY.error("ELIST2.assign_to_sublist", ABORT, nullptr);
+    LIST_NOT_EMPTY.error("ELIST2.assign_to_sublist", ABORT);
  }

  last = start_it->extract_sublist(end_it);
@ -162,7 +162,7 @@ void ELIST2::add_sorted(int comparator(const void *, const void *), ELIST2_LINK
 ELIST2_LINK *ELIST2_ITERATOR::forward() {
 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("ELIST2_ITERATOR::forward", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::forward", ABORT);
 #endif
  if (list->empty()) {
    return nullptr;
@ -183,15 +183,18 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() {

 #ifndef NDEBUG
  if (!current)
-    NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT, nullptr);
+    NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT);
 #endif

  next = current->next;

 #ifndef NDEBUG
-  if (!next)
-    NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT, "This is: %p  Current is: %p", this,
-                    current);
+  if (!next) {
+    NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT,
+                    "This is: %p  Current is: %p",
+                    static_cast<void *>(this),
+                    static_cast<void *>(current));
+  }
 #endif

  return current;
@ -207,7 +210,7 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() {
 ELIST2_LINK *ELIST2_ITERATOR::backward() {
 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("ELIST2_ITERATOR::backward", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::backward", ABORT);
 #endif
  if (list->empty()) {
    return nullptr;
@ -228,10 +231,13 @@ ELIST2_LINK *ELIST2_ITERATOR::backward() {

 #ifndef NDEBUG
  if (!current)
-    NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT, nullptr);
-  if (!prev)
-    NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT, "This is: %p  Current is: %p", this,
-                    current);
+    NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT);
+  if (!prev) {
+    NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT,
+                    "This is: %p  Current is: %p",
+                    static_cast<void *>(this),
+                    static_cast<void *>(current));
+  }
 #endif

  prev = current->prev;
@ -251,9 +257,9 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( // get data + or - ..

 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT);
  if (list->empty())
-    EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
+    EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT);
 #endif

  if (offset < 0) {
@ -268,7 +274,7 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( // get data + or - ..

 #ifndef NDEBUG
  if (!ptr)
-    NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT, nullptr);
+    NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT);
 #endif

  return ptr;
@ -292,7 +298,7 @@ void ELIST2_ITERATOR::exchange(  // positions of 2 links

 #ifndef NDEBUG
  if (!list)
-    NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT);
  if (!other_it)
    BAD_PARAMETER.error("ELIST2_ITERATOR::exchange", ABORT, "other_it nullptr");
  if (!(other_it->list))
@ -309,7 +315,7 @@ link */
  /* Error if either current element is deleted */

  if (!current || !other_it->current) {
-    DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT, nullptr);
+    DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT);
  }

  /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
@ -407,14 +413,14 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( // from this current
  if (!other_it)
    BAD_PARAMETER.error("ELIST2_ITERATOR::extract_sublist", ABORT, "other_it nullptr");
  if (!list)
-    NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT);
  if (list != other_it->list)
-    BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
+    BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT);
  if (list->empty())
-    EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT, nullptr);
+    EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT);

  if (!current || !other_it->current)
-    DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
+    DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT);
 #endif

  ex_current_was_last = other_it->ex_current_was_last = false;
@ -424,7 +430,7 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( // from this current
  temp_it.mark_cycle_pt();
  do {                         // walk sublist
    if (temp_it.cycled_list()) { // can't find end pt
-      BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT, nullptr);
+      BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT);
    }

    if (temp_it.at_last()) {
--- a/src/ccutil/elst2.h
+++ b/src/ccutil/elst2.h
@ -191,10 +191,10 @@ public:
  ELIST2_LINK *data() { // get current data
 #ifndef NDEBUG
    if (!current) {
-      NULL_DATA.error("ELIST2_ITERATOR::data", ABORT, nullptr);
+      NULL_DATA.error("ELIST2_ITERATOR::data", ABORT);
    }
    if (!list) {
-      NO_LIST.error("ELIST2_ITERATOR::data", ABORT, nullptr);
+      NO_LIST.error("ELIST2_ITERATOR::data", ABORT);
    }
 #endif
    return current;
@ -219,7 +219,7 @@ public:
  bool empty() const { // is list empty?
 #ifndef NDEBUG
    if (!list) {
-      NO_LIST.error("ELIST2_ITERATOR::empty", ABORT, nullptr);
+      NO_LIST.error("ELIST2_ITERATOR::empty", ABORT);
    }
 #endif
    return list->empty();
@ -301,13 +301,13 @@ inline void ELIST2_ITERATOR::add_after_then_move( // element to add
    ELIST2_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT, nullptr);
+    STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT);
  }
 #endif

@ -352,13 +352,13 @@ inline void ELIST2_ITERATOR::add_after_stay_put( // element to add
    ELIST2_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, nullptr);
+    STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT);
  }
 #endif

@ -405,13 +405,13 @@ inline void ELIST2_ITERATOR::add_before_then_move( // element to add
    ELIST2_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT, nullptr);
+    STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT);
  }
 #endif

@ -453,13 +453,13 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add
    ELIST2_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, nullptr);
+    STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT);
  }
 #endif

@ -502,7 +502,7 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add
 inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT);
  }
  if (!list_to_add) {
    BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr");
@ -553,7 +553,7 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) {
 inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT);
  }
  if (!list_to_add) {
    BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr");
@ -605,11 +605,11 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() {

 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::extract", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::extract", ABORT);
  }
  if (!current) { // list empty or
                  // element extracted
-    NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT, nullptr);
+    NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT);
  }
 #endif

@ -646,7 +646,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() {
 inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT);
  }
 #endif

@ -666,7 +666,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() {
 inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT);
  }
 #endif

@ -690,7 +690,7 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() {
 inline void ELIST2_ITERATOR::mark_cycle_pt() {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT);
  }
 #endif

@ -712,7 +712,7 @@ inline void ELIST2_ITERATOR::mark_cycle_pt() {
 inline bool ELIST2_ITERATOR::at_first() const {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT);
  }
 #endif

@ -732,7 +732,7 @@ inline bool ELIST2_ITERATOR::at_first() const {
 inline bool ELIST2_ITERATOR::at_last() const {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT);
  }
 #endif

@ -752,7 +752,7 @@ inline bool ELIST2_ITERATOR::at_last() const {
 inline bool ELIST2_ITERATOR::cycled_list() const {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT);
  }
 #endif

@ -771,7 +771,7 @@ inline void ELIST2_ITERATOR::sort( // sort elements
        const void *, const void *)) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::sort", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::sort", ABORT);
  }
 #endif

@ -793,13 +793,13 @@ inline void ELIST2_ITERATOR::add_to_end( // element to add
    ELIST2_LINK *new_element) {
 #ifndef NDEBUG
  if (!list) {
-    NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT, nullptr);
+    NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT);
  }
  if (!new_element) {
    BAD_PARAMETER.error("ELIST2_ITERATOR::add_to_end", ABORT, "new_element is nullptr");
  }
  if (new_element->next) {
-    STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT, nullptr);
+    STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT);
  }
 #endif

@ -821,13 +821,10 @@ inline void ELIST2_ITERATOR::add_to_end( // element to add

 #define ELIST2IZEH(CLASSNAME)                                                  \
  class CLASSNAME##_LIST : public X_LIST<ELIST2, ELIST2_ITERATOR, CLASSNAME> { \
-  public:                                                                      \
    using X_LIST<ELIST2, ELIST2_ITERATOR, CLASSNAME>::X_LIST;                  \
  };                                                                           \
-  class CLASSNAME##_IT : public X_ITER<ELIST2_ITERATOR, CLASSNAME> {           \
-  public:                                                                      \
+  struct CLASSNAME##_IT : X_ITER<ELIST2_ITERATOR, CLASSNAME> {                 \
    using X_ITER<ELIST2_ITERATOR, CLASSNAME>::X_ITER;                          \
-    CLASSNAME##_IT(CLASSNAME##_LIST *list) : X_ITER(list) {}                   \
    CLASSNAME *backward() {                                                    \
      return reinterpret_cast<CLASSNAME *>(ELIST2_ITERATOR::backward());       \
    }                                                                          \
--- a/src/ccutil/errcode.cpp
+++ b/src/ccutil/errcode.cpp
@ -91,8 +91,12 @@ void ERRCODE::error(         // handle error
 #endif
      abort();
    default:
-      BADERRACTION.error("error", ABORT, nullptr);
+      BADERRACTION.error("error", ABORT);
  }
 }

+void ERRCODE::error(const char *caller, TessErrorLogCode action) const {
+  error(caller, action, nullptr);
+}
+
 } // namespace tesseract
--- a/src/ccutil/errcode.h
+++ b/src/ccutil/errcode.h
@ -31,12 +31,6 @@ enum TessErrorLogCode {
  ABORT = 2     /*abort after error */
 };

-/* Explicit Error Abort codes */
-#define NO_ABORT_CODE 0
-#define LIST_ABORT 1
-#define MEMORY_ABORT 2
-#define FILE_ABORT 3
-
 #if !defined(__GNUC__) && !defined(__attribute__)
 # define __attribute__(attr) // compiler without support for __attribute__
 #endif
@ -49,6 +43,7 @@ public:
      TessErrorLogCode action, // action to take
      const char *format, ...  // fprintf format
  ) const __attribute__((format(printf, 4, 5)));
+  void error(const char *caller, TessErrorLogCode action) const;
  constexpr ERRCODE(const char *string) : message(string) {} // initialize with string
 };

--- a/src/ccutil/fileerr.h
+++ b/src/ccutil/fileerr.h
@ -24,14 +24,6 @@
 namespace tesseract {

 constexpr ERRCODE CANTOPENFILE("Can't open file");
-constexpr ERRCODE CANTCREATEFILE("Can't create file");
-constexpr ERRCODE CANTMAKEPIPE("Can't create pipe");
-constexpr ERRCODE CANTCONNECTPIPE("Can't reconnect pipes to stdin/stdout");
-constexpr ERRCODE READFAILED("Read of file failed");
-constexpr ERRCODE WRITEFAILED("Write of file failed");
-constexpr ERRCODE SELECTFAILED("Select failed");
-
-constexpr ERRCODE EXECFAILED("Could not exec new process");

 } // namespace tesseract

--- a/src/ccutil/genericvector.h
+++ b/src/ccutil/genericvector.h
@ -41,10 +41,6 @@ public:
  GenericVector() {
    init(kDefaultVectorSize);
  }
-  GenericVector(int size, const T &init_val) {
-    init(size);
-    init_to_size(size, init_val);
-  }

  // Copy
  GenericVector(const GenericVector &other) {
@ -107,14 +103,6 @@ public:
  int push_back(T object);
  void operator+=(const T &t);

-  // Push an element in the end of the array if the same
-  // element is not already contained in the array.
-  int push_back_new(const T &object);
-
-  // Push an element in the front of the array
-  // Note: This function is O(n)
-  int push_front(const T &object);
-
  // Set the value at the given index
  void set(const T &t, int index);

@ -178,27 +166,13 @@ public:
  // bool T::Serialize(FILE* fp) const that returns false in case of error.
  // Returns false in case of error.
  bool SerializeClasses(FILE *fp) const;
-  bool SerializeClasses(TFile *fp) const;
  // Reads a vector of classes from the given file. Assumes the existence of
  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
  // this function. Returns false in case of error.
  // If swap is true, assumes a big/little-endian swap is needed.
-  bool DeSerializeClasses(bool swap, FILE *fp);
  bool DeSerializeClasses(TFile *fp);

-  // Allocates a new array of double the current_size, copies over the
-  // information from data to the new location, deletes data and returns
-  // the pointed to the new larger array.
-  // This function uses memcpy to copy the data, instead of invoking
-  // operator=() for each element like double_the_size() does.
-  static T *double_the_size_memcpy(int current_size, T *data) {
-    T *data_new = new T[current_size * 2];
-    memcpy(data_new, data, sizeof(T) * current_size);
-    delete[] data;
-    return data_new;
-  }
-
  // Reverses the elements of the vector.
  void reverse() {
    for (int i = 0; i < size_used_ / 2; ++i) {
@ -221,26 +195,6 @@ public:
    qsort(data_, size_used_, sizeof(*data_), comparator);
  }

-  // Searches the array (assuming sorted in ascending order, using sort()) for
-  // an element equal to target and returns the index of the best candidate.
-  // The return value is conceptually the largest index i such that
-  // data_[i] <= target or 0 if target < the whole vector.
-  // NOTE that this function uses operator> so really the return value is
-  // the largest index i such that data_[i] > target is false.
-  int binary_search(const T &target) const {
-    int bottom = 0;
-    int top = size_used_;
-    while (top - bottom > 1) {
-      int middle = (bottom + top) / 2;
-      if (data_[middle] > target) {
-        top = middle;
-      } else {
-        bottom = middle;
-      }
-    }
-    return bottom;
-  }
-
  // Swaps the elements with the given indices.
  void swap(int index1, int index2) {
    if (index1 != index2) {
@ -307,11 +261,6 @@ inline bool SaveDataToFile(const GenericVector<char> &data, const char *filename
  return result;
 }

-template <typename T>
-bool cmp_eq(T const &t1, T const &t2) {
-  return t1 == t2;
-}
-
 // Used by sort()
 // return < 0 if t1 < t2
 // return 0 if t1 == t2
@ -632,29 +581,6 @@ int GenericVector<T>::push_back(T object) {
  return index;
 }

-template <typename T>
-int GenericVector<T>::push_back_new(const T &object) {
-  int index = get_index(object);
-  if (index >= 0) {
-    return index;
-  }
-  return push_back(object);
-}
-
-// Add an element in the array (front)
-template <typename T>
-int GenericVector<T>::push_front(const T &object) {
-  if (size_used_ == size_reserved_) {
-    double_the_size();
-  }
-  for (int i = size_used_; i > 0; --i) {
-    data_[i] = data_[i - 1];
-  }
-  data_[0] = object;
-  ++size_used_;
-  return 0;
-}
-
 template <typename T>
 void GenericVector<T>::operator+=(const T &t) {
  push_back(t);
@ -831,18 +757,6 @@ bool GenericVector<T>::SerializeClasses(FILE *fp) const {
  }
  return true;
 }
-template <typename T>
-bool GenericVector<T>::SerializeClasses(TFile *fp) const {
-  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
-    return false;
-  }
-  for (int i = 0; i < size_used_; ++i) {
-    if (!data_[i].Serialize(fp)) {
-      return false;
-    }
-  }
-  return true;
-}

 // Reads a vector of classes from the given file. Assumes the existence of
 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
@ -850,24 +764,6 @@ bool GenericVector<T>::SerializeClasses(TFile *fp) const {
 // this function. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
 template <typename T>
-bool GenericVector<T>::DeSerializeClasses(bool swap, FILE *fp) {
-  int32_t reserved;
-  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
-    return false;
-  }
-  if (swap) {
-    Reverse32(&reserved);
-  }
-  T empty;
-  init_to_size(reserved, empty);
-  for (int i = 0; i < reserved; ++i) {
-    if (!data_[i].DeSerialize(swap, fp)) {
-      return false;
-    }
-  }
-  return true;
-}
-template <typename T>
 bool GenericVector<T>::DeSerializeClasses(TFile *fp) {
  int32_t reserved;
  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
--- a/src/ccutil/helpers.h
+++ b/src/ccutil/helpers.h
@ -93,13 +93,6 @@ inline void chomp_string(char *str) {
  }
 }

-// Advance the current pointer of the file if it points to a newline character.
-inline void SkipNewline(FILE *file) {
-  if (fgetc(file) != '\n') {
-    fseek(file, -1, SEEK_CUR);
-  }
-}
-
 // return the smallest multiple of block_size greater than or equal to n.
 inline int RoundUp(int n, int block_size) {
  return block_size * ((n + block_size - 1) / block_size);
@ -197,21 +190,11 @@ inline void ReverseN(void *ptr, int num_bytes) {
  }
 }

-// Reverse the order of bytes in a 16 bit quantity for big/little-endian switch.
-inline void Reverse16(void *ptr) {
-  ReverseN(ptr, 2);
-}
-
 // Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
 inline void Reverse32(void *ptr) {
  ReverseN(ptr, 4);
 }

-// Reverse the order of bytes in a 64 bit quantity for big/little-endian switch.
-inline void Reverse64(void *ptr) {
-  ReverseN(ptr, 8);
-}
-
 // Reads a vector of simple types from the given file. Assumes that bitwise
 // read/write will work with ReverseN according to sizeof(T).
 // Returns false in case of error.
--- a/src/ccutil/lsterr.h
+++ b/src/ccutil/lsterr.h
@ -23,12 +23,9 @@

 namespace tesseract {

-constexpr ERRCODE SERIALISE_LINKS("Attempted to (de)serialise a link element");
-
 #ifndef NDEBUG

 constexpr ERRCODE NO_LIST("Iterator not set to a list");
-constexpr ERRCODE NULL_OBJECT("List found this = nullptr!");
 constexpr ERRCODE NULL_DATA("List would have returned a nullptr data pointer");
 constexpr ERRCODE NULL_CURRENT("List current position is nullptr");
 constexpr ERRCODE NULL_NEXT("Next element on the list is nullptr");
--- a/src/ccutil/mainblk.cpp
+++ b/src/ccutil/mainblk.cpp
@ -1,82 +0,0 @@
-/**********************************************************************
- * File:        mainblk.cpp  (Formerly main.c)
- * Description: Function to call from main() to setup.
- * Author:      Ray Smith
- *
- * (C) Copyright 1991, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <cstdlib>
-#include <cstring> // for std::strrchr
-#if defined(_WIN32)
-#  include <io.h> // for _access
-#endif
-
-#include "ccutil.h"
-#include "fileerr.h"
-
-namespace tesseract {
-/**
- * @brief CCUtil::main_setup - set location of tessdata and name of image
- *
- * @param argv0 - paths to the directory with language files and config files.
- * An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is
- * used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If
- * previous is not successful - use current directory.
- * @param basename - name of image
- */
-void CCUtil::main_setup(const std::string &argv0, const std::string &basename) {
-  imagebasename = basename; /**< name of image */
-
-  char *tessdata_prefix = getenv("TESSDATA_PREFIX");
-
-  if (!argv0.empty()) {
-    /* Use tessdata prefix from the command line. */
-    datadir = argv0;
-  } else if (tessdata_prefix) {
-    /* Use tessdata prefix from the environment. */
-    datadir = tessdata_prefix;
-#if defined(_WIN32)
-  } else if (datadir.empty() || _access(datadir.c_str(), 0) != 0) {
-    /* Look for tessdata in directory of executable. */
-    char path[_MAX_PATH];
-    DWORD length = GetModuleFileName(nullptr, path, sizeof(path));
-    if (length > 0 && length < sizeof(path)) {
-      char *separator = std::strrchr(path, '\\');
-      if (separator != nullptr) {
-        *separator = '\0';
-        datadir = path;
-        datadir += "/tessdata";
-      }
-    }
-#endif /* _WIN32 */
-#if defined(TESSDATA_PREFIX)
-  } else {
-    // Use tessdata prefix which was compiled in.
-    datadir = TESSDATA_PREFIX "/tessdata";
-#endif
-  }
-
-  // datadir may still be empty:
-  if (datadir.empty()) {
-    datadir = "./";
-  }
-
-  // check for missing directory separator
-  const char *lastchar = datadir.c_str();
-  lastchar += datadir.length() - 1;
-  if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) {
-    datadir += "/";
-  }
-}
-} // namespace tesseract
--- a/src/ccutil/object_cache.h
+++ b/src/ccutil/object_cache.h
@ -43,7 +43,8 @@ public:
        tprintf(
            "ObjectCache(%p)::~ObjectCache(): WARNING! LEAK! object %p "
            "still has count %d (id %s)\n",
-            this, it.object, it.count, it.id.c_str());
+            static_cast<void *>(this), static_cast<void *>(it.object),
+            it.count, it.id.c_str());
      } else {
        delete it.object;
        it.object = nullptr;
--- a/src/ccutil/serialis.h
+++ b/src/ccutil/serialis.h
@ -29,14 +29,6 @@

 namespace tesseract {

-/***********************************************************************
-  QUOTE_IT   MACRO DEFINITION
-  ===========================
-Replace <parm> with "<parm>".  <parm> may be an arbitrary number of tokens
-***********************************************************************/
-
-#define QUOTE_IT(parm) #parm
-
 // Return number of elements of an array.
 template <typename T, size_t N>
 constexpr size_t countof(T const (&)[N]) noexcept {
--- a/src/ccutil/tessdatamanager.cpp
+++ b/src/ccutil/tessdatamanager.cpp
@ -48,7 +48,7 @@ TessdataManager::TessdataManager(FileReader reader)
  SetVersionString(TESSERACT_VERSION_STR);
 }

-// Lazily loads from the the given filename. Won't actually read the file
+// Lazily loads from the given filename. Won't actually read the file
 // until it needs it.
 void TessdataManager::LoadFileLater(const char *data_file_name) {
  Clear();
--- a/src/ccutil/tessdatamanager.h
+++ b/src/ccutil/tessdatamanager.h
@ -138,7 +138,7 @@ public:
    return is_loaded_;
  }

-  // Lazily loads from the the given filename. Won't actually read the file
+  // Lazily loads from the given filename. Won't actually read the file
  // until it needs it.
  void LoadFileLater(const char *data_file_name);
  /**
--- a/src/ccutil/tprintf.h
+++ b/src/ccutil/tprintf.h
@ -38,4 +38,6 @@ extern TESS_API void tprintf( // Trace printf

 } // namespace tesseract

+#undef __attribute__
+
 #endif // define TESSERACT_CCUTIL_TPRINTF_H
--- a/src/ccutil/unicharset.h
+++ b/src/ccutil/unicharset.h
@ -614,7 +614,6 @@ public:
    if (INVALID_UNICHAR_ID == unichar_id) {
      *width = 0.0f;
      *width_sd = 0.0f;
-      ;
      return;
    }
    ASSERT_HOST(contains_unichar_id(unichar_id));
--- a/src/classify/adaptmatch.cpp
+++ b/src/classify/adaptmatch.cpp
@ -276,7 +276,7 @@ void Classify::LearnWord(const char *fontname, WERD_RES *word) {
      tprintf("\n\nAdapting to word = %s\n", word->best_choice->debug_string().c_str());
    }
    thresholds = new float[word_len];
-    word->ComputeAdaptionThresholds(certainty_scale, matcher_perfect_threshold,
+    word->ComputeAdaptionThresholds(getDict().certainty_scale, matcher_perfect_threshold,
                                    matcher_good_threshold, matcher_rating_margin, thresholds);
  }
  int start_blob = 0;
--- a/src/classify/classify.cpp
+++ b/src/classify/classify.cpp
@ -101,7 +101,6 @@ Classify::Classify()
                    "its expected textline position",
                    this->params())
    , double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params())
-    , double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", this->params())
    , double_MEMBER(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used",
                    this->params())
    , double_MEMBER(classify_adapted_pruning_factor, 2.5,
--- a/src/classify/classify.h
+++ b/src/classify/classify.h
@ -394,7 +394,6 @@ public:
  double_VAR_H(matcher_clustering_max_angle_delta);
  double_VAR_H(classify_misfit_junk_penalty);
  double_VAR_H(rating_scale);
-  double_VAR_H(certainty_scale);
  double_VAR_H(tessedit_class_miss_scale);
  double_VAR_H(classify_adapted_pruning_factor);
  double_VAR_H(classify_adapted_pruning_threshold);
--- a/src/classify/cluster.cpp
+++ b/src/classify/cluster.cpp
@ -28,7 +28,6 @@

 #include <cfloat> // for FLT_MAX
 #include <cmath>  // for M_PI
-#include <array>  // for std::array
 #include <vector> // for std::vector

 namespace tesseract {
--- a/src/classify/intproto.h
+++ b/src/classify/intproto.h
@ -66,7 +66,7 @@ class FCOORD;

 /* The first 3 dimensions of the CLASS_PRUNER_STRUCT are the
 * 3 axes of the quantized feature space.
- * The position of the the bits recorded for each class in the
+ * The position of the bits recorded for each class in the
 * 4th dimension is determined by using CPrunerWordIndexFor(c),
 * where c is the corresponding class id. */
 struct CLASS_PRUNER_STRUCT {
--- a/src/classify/shapetable.h
+++ b/src/classify/shapetable.h
@ -44,7 +44,7 @@ struct UnicharRating {
    tprintf(
        "Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%u,"
        " %zu fonts\n",
-        unichar_id, rating, adapted, config, feature_misses, fonts.size());
+        unichar_id, static_cast<double>(rating), adapted, config, feature_misses, fonts.size());
  }

  // Helper function to get the index of the first result with the required
--- a/src/dict/trie.h
+++ b/src/dict/trie.h
@ -333,7 +333,7 @@ protected:

  // Finds the edge with the given direction, word_end and unichar_id
  // in the node indicated by node_ref. Fills in the pointer to the
-  // EDGE_RECORD and the index of the edge with the the values
+  // EDGE_RECORD and the index of the edge with the values
  // corresponding to the edge found. Returns true if an edge was found.
  bool edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, bool word_end,
                    UNICHAR_ID unichar_id, EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const;
--- a/src/lstm/functions.cpp
+++ b/src/lstm/functions.cpp
@ -1,4 +1,4 @@
-// Generated code with lookup tables
+// Generated code with lookup tables (see generate_lut.py)
 #include "functions.h"
 namespace tesseract {
 const TFloat TanhTable[] = {
--- a/src/lstm/functions.h
+++ b/src/lstm/functions.h
@ -42,13 +42,13 @@ extern const TFloat LogisticTable[];

 // Non-linearity (sigmoid) functions with cache tables and clipping.
 inline TFloat Tanh(TFloat x) {
-  if (x < 0.0) {
+  if (x < 0) {
    return -Tanh(-x);
  }
  x *= kScaleFactor;
  auto index = static_cast<unsigned>(x);
  if (index >= (kTableSize - 1)) {
-    return 1.0;
+    return 1;
  }
  TFloat tanh_i0 = TanhTable[index];
  TFloat tanh_i1 = TanhTable[index + 1];
@ -57,13 +57,13 @@ inline TFloat Tanh(TFloat x) {
 }

 inline TFloat Logistic(TFloat x) {
-  if (x < 0.0) {
-    return 1.0 - Logistic(-x);
+  if (x < 0) {
+    return 1 - Logistic(-x);
  }
  x *= kScaleFactor;
  auto index = static_cast<unsigned>(x);
  if (index >= (kTableSize - 1)) {
-    return 1.0;
+    return 1;
  }
  TFloat l0 = LogisticTable[index];
  TFloat l1 = LogisticTable[index + 1];
@ -79,36 +79,36 @@ struct FFunc {
 };
 struct FPrime {
  inline TFloat operator()(TFloat y) const {
-    return y * (1.0 - y);
+    return y * (1 - y);
  }
 };
 struct ClipFFunc {
  inline TFloat operator()(TFloat x) const {
-    if (x <= 0.0) {
-      return 0.0;
+    if (x <= 0) {
+      return 0;
    }
-    if (x >= 1.0) {
-      return 1.0;
+    if (x >= 1) {
+      return 1;
    }
    return x;
  }
 };
 struct ClipFPrime {
  inline TFloat operator()(TFloat y) const {
-    return 0.0 < y && y < 1.0 ? 1.0 : 0.0;
+    return 0 < y && y < 1 ? 1 : 0;
  }
 };
 struct Relu {
  inline TFloat operator()(TFloat x) const {
-    if (x <= 0.0) {
-      return 0.0;
+    if (x <= 0) {
+      return 0;
    }
    return x;
  }
 };
 struct ReluPrime {
  inline TFloat operator()(TFloat y) const {
-    return 0.0 < y ? 1.0 : 0.0;
+    return 0 < y ? 1 : 0;
  }
 };
 struct GFunc {
@ -118,23 +118,23 @@ struct GFunc {
 };
 struct GPrime {
  inline TFloat operator()(TFloat y) const {
-    return 1.0 - y * y;
+    return 1 - y * y;
  }
 };
 struct ClipGFunc {
  inline TFloat operator()(TFloat x) const {
-    if (x <= -1.0) {
-      return -1.0;
+    if (x <= -1) {
+      return -1;
    }
-    if (x >= 1.0) {
-      return 1.0;
+    if (x >= 1) {
+      return 1;
    }
    return x;
  }
 };
 struct ClipGPrime {
  inline TFloat operator()(TFloat y) const {
-    return -1.0 < y && y < 1.0 ? 1.0 : 0.0;
+    return -1 < y && y < 1 ? 1 : 0;
  }
 };
 struct HFunc {
@ -183,7 +183,7 @@ inline void SoftmaxInPlace(int n, T *inout) {
    return;
  }
  // A limit on the negative range input to exp to guarantee non-zero output.
-  const T kMaxSoftmaxActivation = 86.0f;
+  const T kMaxSoftmaxActivation = 86;

  T max_output = inout[0];
  for (int i = 1; i < n; i++) {
@ -192,14 +192,14 @@ inline void SoftmaxInPlace(int n, T *inout) {
      max_output = output;
    }
  }
-  T prob_total = 0.0;
+  T prob_total = 0;
  for (int i = 0; i < n; i++) {
    T prob = inout[i] - max_output;
-    prob = exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
+    prob = std::exp(ClipToRange(prob, -kMaxSoftmaxActivation, static_cast<T>(0)));
    prob_total += prob;
    inout[i] = prob;
  }
-  if (prob_total > 0.0) {
+  if (prob_total > 0) {
    for (int i = 0; i < n; i++) {
      inout[i] /= prob_total;
    }
@ -207,7 +207,7 @@ inline void SoftmaxInPlace(int n, T *inout) {
 }

 // Copies n values of the given src vector to dest.
-inline void CopyVector(int n, const TFloat *src, TFloat *dest) {
+inline void CopyVector(unsigned n, const TFloat *src, TFloat *dest) {
  memcpy(dest, src, n * sizeof(dest[0]));
 }

@ -242,7 +242,7 @@ inline void SumVectors(int n, const TFloat *v1, const TFloat *v2, const TFloat *

 // Sets the given n-vector vec to 0.
 template <typename T>
-inline void ZeroVector(int n, T *vec) {
+inline void ZeroVector(unsigned n, T *vec) {
  memset(vec, 0, n * sizeof(*vec));
 }

--- a/src/lstm/generate_lut.py
+++ b/src/lstm/generate_lut.py
@ -4,22 +4,24 @@

 import math

+# kTableSize and kScaleFactor must match the values in functions.h.
+
 # Size of static tables.
 kTableSize = 4096
 # Scale factor for float arg to int index.
 kScaleFactor = 256.0

-print("// Generated code with lookup tables")
+print("// Generated code with lookup tables (see generate_lut.py)")
 print('#include "functions.h"')
 print("namespace tesseract {")

-print("const double TanhTable[] = {")
+print("const TFloat TanhTable[] = {")
 for i in range(kTableSize):
-    print("  %a," % math.tanh(i / kScaleFactor))
+    print("    %a," % math.tanh(i / kScaleFactor))
 print("};")

-print("const double LogisticTable[] = {")
+print("const TFloat LogisticTable[] = {")
 for i in range(kTableSize):
-    print("  %a," % (1 / (1 + math.exp(-i / kScaleFactor))))
+    print("    %a," % (1 / (1 + math.exp(-i / kScaleFactor))))
 print("};")
-print("}  // namespace tesseract.")
+print("} // namespace tesseract.")
--- a/src/lstm/lstm.cpp
+++ b/src/lstm/lstm.cpp
@ -28,7 +28,7 @@
 #include <cstdlib>
 #include <sstream> // for std::ostringstream

-#if !defined(__GNUC__) && defined(_MSC_VER)
+#if defined(_MSC_VER) && !defined(__clang__)
 #  include <intrin.h> // _BitScanReverse
 #endif

--- a/src/lstm/lstmrecognizer.cpp
+++ b/src/lstm/lstmrecognizer.cpp
@ -294,7 +294,7 @@ void LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
 void LSTMRecognizer::OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output,
                                 float *sd) {
  const int kOutputScale = INT8_MAX;
-  STATS stats(0, kOutputScale + 1);
+  STATS stats(0, kOutputScale);
  for (int t = 0; t < outputs.Width(); ++t) {
    int best_label = outputs.BestLabel(t, nullptr);
    if (best_label != null_char_) {
--- a/src/lstm/networkio.cpp
+++ b/src/lstm/networkio.cpp
@ -127,7 +127,7 @@ void NetworkIO::ZeroInvalidElements() {
 static void ComputeBlackWhite(Image pix, float *black, float *white) {
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
-  STATS mins(0, 256), maxes(0, 256);
+  STATS mins(0, 255), maxes(0, 255);
  if (width >= 3) {
    int y = height / 2;
    l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y;
@ -412,15 +412,6 @@ void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, int num_feature
  }
 }

-// Zeroes a single time step.
-void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) {
-  if (int_mode_) {
-    ZeroVector(num_features, i_[t] + offset);
-  } else {
-    ZeroVector(num_features, f_[t] + offset);
-  }
-}
-
 // Sets the given range to random values.
 void NetworkIO::Randomize(int t, int offset, int num_features, TRand *randomizer) {
  if (int_mode_) {
--- a/src/lstm/networkio.h
+++ b/src/lstm/networkio.h
@ -2,7 +2,6 @@
 // File:        networkio.h
 // Description: Network input/output data, allowing float/int implementations.
 // Author:      Ray Smith
-// Created:     Tue Jun 17 08:43:11 PST 2014
 //
 // (C) Copyright 2014, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -146,9 +145,12 @@ public:
                           int src_t, int src_offset);
  // Zeroes a single time step.
  void ZeroTimeStep(int t) {
-    ZeroTimeStepGeneral(t, 0, NumFeatures());
+    if (int_mode_) {
+      memset(i_[t], 0, sizeof(*i_[t]) * NumFeatures());
+    } else {
+      memset(f_[t], 0, sizeof(*f_[t]) * NumFeatures());
+    }
  }
-  void ZeroTimeStepGeneral(int t, int offset, int num_features);
  // Sets the given range to random values.
  void Randomize(int t, int offset, int num_features, TRand *randomizer);

--- a/src/lstm/parallel.h
+++ b/src/lstm/parallel.h
@ -28,7 +28,6 @@ public:
  // ni_ and no_ will be set by AddToStack.
  TESS_API
  Parallel(const char *name, NetworkType type);
-  ~Parallel() override = default;

  // Returns the shape output from the network given an input shape (which may
  // be partially unknown ie zero).
--- a/src/lstm/plumbing.h
+++ b/src/lstm/plumbing.h
@ -25,7 +25,7 @@
 namespace tesseract {

 // Holds a collection of other networks and forwards calls to each of them.
-class Plumbing : public Network {
+class TESS_API Plumbing : public Network {
 public:
  // ni_ and no_ will be set by AddToStack.
  explicit Plumbing(const std::string &name);
@ -103,10 +103,8 @@ public:
    return stack_;
  }
  // Returns a set of strings representing the layer-ids of all layers below.
-  TESS_API
  void EnumerateLayers(const std::string *prefix, std::vector<std::string> &layers) const;
  // Returns a pointer to the network layer corresponding to the given id.
-  TESS_API
  Network *GetLayer(const char *id) const;
  // Returns the learning rate for a specific layer of the stack.
  float LayerLearningRate(const char *id) {
@ -129,7 +127,6 @@ public:
  }

  // Returns a pointer to the learning rate for the given layer id.
-  TESS_API
  float *LayerLearningRatePtr(const char *id);

  // Writes to the given file. Returns false in case of error.
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@ -24,12 +24,6 @@
 #include "unicharcompress.h"

 #include <algorithm> // for std::reverse
-#include <deque>
-#include <map>
-#include <set>
-#include <tuple>
-#include <unordered_set>
-#include <vector>

 namespace tesseract {

--- a/src/lstm/recodebeam.h
+++ b/src/lstm/recodebeam.h
@ -29,11 +29,8 @@
 #include "ratngs.h"
 #include "unicharcompress.h"

-#include <deque>
-#include <set>
-#include <tuple>
-#include <unordered_set>
-#include <vector>
+#include <unordered_set> // for std::unordered_set
+#include <vector>        // for std::vector

 namespace tesseract {

--- a/src/lstm/weightmatrix.cpp
+++ b/src/lstm/weightmatrix.cpp
@ -525,7 +525,7 @@ static void HistogramWeight(TFloat weight, STATS *histogram) {
 }

 void WeightMatrix::Debug2D(const char *msg) {
-  STATS histogram(0, kHistogramBuckets);
+  STATS histogram(0, kHistogramBuckets - 1);
  if (int_mode_) {
    for (int i = 0; i < wi_.dim1(); ++i) {
      for (int j = 0; j < wi_.dim2(); ++j) {
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .0.0
 .1.0