diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..16fb91b0
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Question
+    url: https://groups.google.com/g/tesseract-ocr
+    about: Please ask questions in our forum
diff --git a/.github/ISSUE_TEMPLATE/issue-bug.yml b/.github/ISSUE_TEMPLATE/issue-bug.yml
index 92bdf14c..61a088aa 100644
--- a/.github/ISSUE_TEMPLATE/issue-bug.yml
+++ b/.github/ISSUE_TEMPLATE/issue-bug.yml
@@ -16,11 +16,22 @@ body:
         * Please provide the input image.
         * Also provide output files (txt and/or tsv, hocr, pdf). You can make a zip archive that will contain these files, so GitHub will let you upload them.
         * Don't attach a screenshot of the command line and output. Instead, copy the text and paste it in your bug report.
+
+        Windows versions 7, 8, 8.1 are not supported.
   - type: textarea
     attributes:
-      label: Basic Information
+      label: Current Behavior
+  - type: textarea
+    attributes:
+      label: Expected Behavior
+  - type: textarea
+    attributes:
+      label: Suggested Fix
+  - type: textarea
+    attributes:
+      label: tesseract -v
       description: Version info, compiled libraries, SIMD, OpenMP
-      placeholder: "Paste the output of the command: tesseract -v"
+      placeholder: "Please paste the output of the command: tesseract -v"
   - type: dropdown
     id: os-linux
     attributes:
@@ -30,12 +41,13 @@ body:
       options:
         - Windows 11
         - Windows 10
+        - macOS 14 Sonoma
         - macOS 13 Ventura
         - macOS 12 Monterey
         - macOS 11 Big Sur
         - Ubuntu 22.04 Jammy
         - Ubuntu 20.04 Focal
-        - Debian Testing Bookworm
+        - Debian 12 Bookworm
         - Debian 11 Bullseye
         - RHEL 9
         - RHEL 8
@@ -52,23 +64,14 @@ body:
     attributes:
       label: Compiler
       placeholder: "Enter compiler name and version (Examples: MSVC 2019 16.11, Clang 13.0.1, GCC 11.2, Xcode 14.1)"
-  - type: textarea
-    attributes:
-      label: Virtualization / Containers
-      placeholder: "Enter the name and version of the VM / container which you use (Examples: Oracle VM VirtualBox 7.0.4,VMware Workstation 17.0, Hyper-V, Docker 20.10.22)"
   - type: textarea
     attributes:
       label: CPU
       placeholder: "Enter your CPU vendor name and model (Examples: Intel Core i7-11700K, AMD Ryzen 7 5800X, Apple Silicon M1)"
   - type: textarea
     attributes:
-      label: Current Behavior
-  - type: textarea
-    attributes:
-      label: Expected Behavior
-  - type: textarea
-    attributes:
-      label: Suggested Fix
+      label: Virtualization / Containers
+      placeholder: "Enter the name and version of the VM / container which you use (Examples: Oracle VM VirtualBox 7.0.4,VMware Workstation 17.0, Hyper-V, Docker 20.10.22)"
   - type: textarea
     attributes:
       label: Other Information
diff --git a/.github/workflows/autotools.yml b/.github/workflows/autotools.yml
index 3639c701..418287e6 100644
--- a/.github/workflows/autotools.yml
+++ b/.github/workflows/autotools.yml
@@ -13,15 +13,14 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - { name: ubuntu-20.04-clang-7-autotools, os: ubuntu-20.04, cxx: clang++-7 }
-          - { name: ubuntu-20.04-clang-8-autotools, os: ubuntu-20.04, cxx: clang++-8 } #installed
-          - { name: ubuntu-20.04-clang-9-autotools, os: ubuntu-20.04, cxx: clang++-9 } #installed
-          - { name: ubuntu-20.04-clang-10-autotools, os: ubuntu-20.04, cxx: clang++-10 } #installed
+          - { name: ubuntu-22.04-clang-15-autotools, os: ubuntu-22.04, cxx: clang++-15 }
+          - { name: ubuntu-22.04-clang-14-autotools, os: ubuntu-22.04, cxx: clang++-14 } #installed
 
-          - { name: ubuntu-20.04-gcc-7-autotools, os: ubuntu-20.04, cxx: g++-7 } #installed
-          - { name: ubuntu-20.04-gcc-8-autotools, os: ubuntu-20.04, cxx: g++-8 } #installed
-          - { name: ubuntu-20.04-gcc-9-autotools, os: ubuntu-20.04, cxx: g++-9 } #installed
+          - { name: ubuntu-22.04-gcc-12-autotools, os: ubuntu-22.04, cxx: g++-12 } #installed
+          - { name: ubuntu-22.04-gcc-11-autotools, os: ubuntu-22.04, cxx: g++-11 } #installed
           - { name: ubuntu-20.04-gcc-10-autotools, os: ubuntu-20.04, cxx: g++-10 } #installed
+          - { name: ubuntu-20.04-gcc-9-autotools, os: ubuntu-20.04, cxx: g++-9 } #installed
+          - { name: ubuntu-20.04-gcc-8-autotools, os: ubuntu-20.04, cxx: g++-8 }
 
     steps:
     - uses: actions/checkout@v3
diff --git a/.github/workflows/cmake-win64.yml b/.github/workflows/cmake-win64.yml
index bbcd5f1e..d144f759 100644
--- a/.github/workflows/cmake-win64.yml
+++ b/.github/workflows/cmake-win64.yml
@@ -26,7 +26,7 @@ jobs:
         run: |
              $git_info=$(git describe --tags HEAD)
              echo "version=${git_info}" >> $env:GITHUB_OUTPUT
-      - name: Setup Instalation Location
+      - name: Setup Installation Location
         run: |
              mkdir ${{env.ILOC}}
       - name: Uninstall Perl
@@ -45,8 +45,8 @@ jobs:
       - name: Build and Install libpng
         shell: cmd
         run: |
-             curl -sSL -o lpng1639.zip https://download.sourceforge.net/libpng/lpng1639.zip
-             unzip.exe  -qq lpng1639.zip
+             curl -sSL -o lpng1639.zip https://download.sourceforge.net/libpng/lpng1640.zip
+             unzip.exe  -qq lpng1640.zip
              cd lpng1639
              cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -DPNG_TESTS=OFF -DPNG_SHARED=OFF
              cmake --build build --config Release --target install
@@ -73,7 +73,7 @@ jobs:
       - name: Build and Install libtiff
         shell: cmd
         run: |
-             git clone -c advice.detachedHead=false -b "v4.0.10" --depth 1 https://gitlab.com/libtiff/libtiff.git
+             git clone -c advice.detachedHead=false -b "v4.6.0" --depth 1 https://gitlab.com/libtiff/libtiff.git
              cd libtiff
              cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${{env.ILOC}} -DCMAKE_INSTALL_PREFIX=${{env.ILOC}} -Dtiff-tools=OFF -Dtiff-tests=OFF -Dtiff-contrib=OFF -Dtiff-docs=OFF
              cmake --build build --config Release --target install
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 288fb924..5e6750e0 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -15,20 +15,19 @@ jobs:
       matrix:
         config:
 
-          - { name: macos-11-clang-12-cmake, os: macos-11, cxx: clang++ } # default
-          # - { name: macos-11-clang-11-cmake, os: macos-11, cxx: '$(brew --prefix llvm)/bin/clang++' }  #installed
-          - { name: macos-11-gcc-9-cmake, os: macos-11, cxx: g++-9 } #installed
-          - { name: macos-11-gcc-10-cmake, os: macos-11, cxx: g++-10 } #installed
-          - { name: macos-11-gcc-11-cmake, os: macos-11, cxx: g++-11 } #installed
+          - { name: macos-12-clang-14-cmake, os: macos-12, cxx: clang++ } # default
+          - { name: macos-11-clang-13-cmake, os: macos-11, cxx: clang++ } # default
 
-          - { name: ubuntu-20.04-clang-7-cmake, os: ubuntu-20.04, cxx: clang++-7 }
-          - { name: ubuntu-20.04-clang-8-cmake, os: ubuntu-20.04, cxx: clang++-8 } #installed
-          - { name: ubuntu-20.04-clang-9-cmake, os: ubuntu-20.04, cxx: clang++-9 } #installed
-          - { name: ubuntu-20.04-clang-10-cmake, os: ubuntu-20.04, cxx: clang++-10 } #installed
+          - { name: macos-11-gcc-12-cmake, os: macos-11, cxx: g++-12 } #installed
 
-          - { name: ubuntu-20.04-gcc-8-cmake, os: ubuntu-20.04, cxx: g++-8 } #installed
-          - { name: ubuntu-20.04-gcc-9-cmake, os: ubuntu-20.04, cxx: g++-9 } #installed
+          - { name: ubuntu-22.04-clang-15-cmake, os: ubuntu-22.04, cxx: clang++-15 } #installed
+          - { name: ubuntu-22.04-clang-14-cmake, os: ubuntu-22.04, cxx: clang++-14 } #installed
+
+          - { name: ubuntu-22.04-gcc-12-cmake, os: ubuntu-22.04, cxx: g++-12 } #installed
+          - { name: ubuntu-22.04-gcc-11-cmake, os: ubuntu-22.04, cxx: g++-11 } #installed
           - { name: ubuntu-20.04-gcc-10-cmake, os: ubuntu-20.04, cxx: g++-10 } #installed
+          - { name: ubuntu-20.04-gcc-9-cmake, os: ubuntu-20.04, cxx: g++-9 } #installed
+          - { name: ubuntu-20.04-gcc-8-cmake, os: ubuntu-20.04, cxx: g++-8 }
 
     steps:
       - name: Install compilers on Linux
diff --git a/.github/workflows/sw.yml b/.github/workflows/sw.yml
index 1d735d14..276e19a4 100644
--- a/.github/workflows/sw.yml
+++ b/.github/workflows/sw.yml
@@ -22,13 +22,21 @@ on:
 jobs:
   build:
     runs-on: ${{ matrix.os }}
+    container: ${{ matrix.container }}
     strategy:
       fail-fast: false
       matrix:
-        os: [windows-2022, ubuntu-22.04, ubuntu-20.04, macos-12]
+        os: [windows-2022, macos-13]
+        include:
+          - os: ubuntu-22.04
+            container: fedora:latest
 
     steps:
-    - uses: actions/checkout@v3
+    - name: packages
+      if: matrix.os == 'ubuntu-22.04'
+      run: sudo dnf -y install cmake gcc lld which flex bison clang clang-tools-extra git
+
+    - uses: actions/checkout@v4
       with:
         submodules: recursive
     - uses: egorpugin/sw-action@master
@@ -59,7 +67,7 @@ jobs:
       shell: pwsh
 
     - name: test
-      if: github.event_name != 'pull_request' && (matrix.os != 'windows-2022')
+      if: github.event_name != 'pull_request' && (matrix.os != 'windows-2022' && matrix.os != 'macos-13')
       run: ./sw -static -shared -config "d,r" test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
       continue-on-error: true
     - name: test
@@ -68,13 +76,13 @@ jobs:
       continue-on-error: true
 
     - name: test-nightly
-      if: matrix.os != 'windows-2022' && matrix.os != 'macos-12' && github.event.schedule=='0 0 * * *'
+      if: matrix.os != 'windows-2022' && matrix.os != 'macos-13' && github.event.schedule=='0 0 * * *'
       run: ./sw -static -shared -config "d,r" test -Dwith-tests=1
       continue-on-error: true
 
-    # windows and macos-12 tests hang here for some reason, investigate
+    # windows and macos-13 tests hang here for some reason, investigate
     #- name: test
-      #if: matrix.os == 'windows-2022' || matrix.os == 'macos-12'
+      #if: matrix.os == 'windows-2022' || matrix.os == 'macos-13'
       #run: ./sw test -Dwith-tests=1 "-Dskip-tests=lstm,lstm_recode"
       #continue-on-error: true
 
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 179a33e6..e879ecda 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -5,6 +5,7 @@ on:
   #push:
   schedule:
     - cron: 0 0 * * *
+  workflow_dispatch:
 
 jobs:
   sanitizers:
@@ -14,13 +15,20 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - { name: ubuntu-20.04-gcc-unittest, os: ubuntu-20.04, cxx: g++ }
-          - { name: ubuntu-22.04-clang-unittest, os: ubuntu-22.04, cxx: clang++ }
+          - { name: ubuntu-20.04-gcc-unittest, os: ubuntu-20.04, cxx: g++, cxxflags: '-g -O2 -fsanitize=address,undefined' }
+          - { name: ubuntu-22.04-clang-unittest, os: ubuntu-22.04, cxx: clang++, cxxflags: '-g -O2 -fsanitize=address,undefined -stdlib=libc++' }
     steps:
     - uses: actions/checkout@v3
       with:
         submodules: recursive
 
+    - name: Remove Homebrew, Android and .NET to provide more disk space
+      run: |
+           # https://github.com/actions/virtual-environments/issues/2606#issuecomment-772683150
+           sudo rm -rf /home/linuxbrew # will release Homebrew
+           sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
+           sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
+
     - name: Install dependencies (Linux)
       run: |
            sudo apt-get update
@@ -35,7 +43,7 @@ jobs:
     - name: Configure (Linux)
       run: |
            ./configure '--disable-shared' 'CXX=${{ matrix.config.cxx }}' \
-               'CXXFLAGS=-g -O2 -fsanitize=address,undefined'
+               'CXXFLAGS=${{ matrix.config.cxxflags }}'
 
     - name: Make and Install Tesseract
       run: |
diff --git a/.github/workflows/vcpkg-4.1.1.yml b/.github/workflows/vcpkg-4.1.1.yml
deleted file mode 100644
index fe71d4db..00000000
--- a/.github/workflows/vcpkg-4.1.1.yml
+++ /dev/null
@@ -1,109 +0,0 @@
-name: vcpkg-4.1.1
-# build tesseract 4.1 using vcpkg and cmake on ubuntu and windows.
-# build and run basicapitest on windows.
-# macos fails on leptonica build - https://github.com/microsoft/vcpkg/issues/16116
-on:
-  #push:
-  schedule:
-    - cron: 0 22 1 * *
-
-jobs:
-  build:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, windows-latest]
-
-    steps:
-      - name: Checkout Tesseract Source (for test images)
-        uses: actions/checkout@v3
-        with:
-          submodules: recursive
-
-      - name: Install vcpkg (Linux)
-        run: |
-             git clone https://github.com/microsoft/vcpkg
-             vcpkg/bootstrap-vcpkg.sh
-             vcpkg/vcpkg integrate install
-        if: runner.os == 'Linux'
-
-      - name: Build Tesseract 4.1.1 (Linux)
-        run: |
-             vcpkg/vcpkg install tesseract:x64-linux
-        if: runner.os == 'Linux'
-
-      - name: Visual Studio Setup (Windows)
-        shell: cmd
-        run: |
-             call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
-        if: runner.os == 'Windows'
-
-      - name: Install vcpkg (Windows)
-        run: |
-             git clone https://github.com/microsoft/vcpkg
-             vcpkg/bootstrap-vcpkg.bat
-             vcpkg/vcpkg integrate install
-        if: runner.os == 'Windows'
-
-      - name: Build and Install Tesseract and dependencies using vcpkg (Windows)
-        run: |
-            vcpkg/vcpkg install tesseract:x64-windows
-        if: runner.os == 'Windows'
-
-      - name: Download tessdata used for tests
-        run: |
-             git clone https://github.com/egorpugin/tessdata tessdata_unittest
-             mv tessdata_unittest/* ../
-        if: runner.os == 'Windows'
-
-      - name: Create CMakeLists.txt file for basicapitest
-        shell: bash
-        run: |
-             cd test
-             cat << "EOF" > CMakeLists.txt
-             cmake_minimum_required(VERSION 3.19)
-             project( basicapitest )
-             find_package( Tesseract REQUIRED )
-             find_package( Leptonica REQUIRED )
-             include_directories(${Tesseract_INCLUDE_DIRS})
-             include_directories(${Leptonica_INCLUDE_DIRS})
-             add_executable( basicapitest testing/basicapitest.cpp )
-             target_link_libraries(basicapitest ${Leptonica_LIBRARIES})
-             target_link_libraries(basicapitest ${Tesseract_LIBRARIES})
-             target_link_libraries(basicapitest libtesseract)
-             EOF
-             cat CMakeLists.txt
-        if: runner.os == 'Windows'
-
-      - name: Configure basicapitest
-        run: |
-             cd test
-             cmake . "-DCMAKE_TOOLCHAIN_FILE=${env:GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake"
-        if: runner.os == 'Windows'
-
-      - name: Build basicapitest
-        run: |
-             cd test
-             cmake --build .  --config Release
-        if: runner.os == 'Windows'
-
-      - name: Run basicapitest (Windows)
-        run: |
-             cd test
-             D:\a\tesseract\tesseract\test\Release\basicapitest.exe
-        if: runner.os == 'Windows'
-
-      - name: Build Tesseract 4.1.1 (macOS) Leptonica build fails
-        run: |
-             git clone https://github.com/microsoft/vcpkg
-             vcpkg/bootstrap-vcpkg.sh
-             vcpkg/vcpkg integrate install
-             vcpkg install leptonica:x64-osx
-             vcpkg install tesseract:x64-osx
-        if: runner.os == 'macOS'
-
-      - name: Display Leptonica error log (macOS) Fails
-        run: |
-             cat /usr/local/share/vcpkg/buildtrees/leptonica/install-x64-osx-dbg-out.log
-        if: ${{ runner.os == 'macOS' && always() }}
diff --git a/.mailmap b/.mailmap
index 57dbd363..9a4bbd18 100644
--- a/.mailmap
+++ b/.mailmap
@@ -2,6 +2,9 @@ Amit Dovev <amitdev2222@gmail.com>
 
 Egor Pugin <egor.pugin@gmail.com>
 
+Jeff Breidenbach <breidenbach@gmail.com>
+Jeff Breidenbach <breidenbach@gmail.com> <jbreiden@google.com>
+
 Jim O'Regan <joregan@gmail.com>
 Jim O'Regan <joregan@gmail.com> <joregan@gmail.com@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
 Jim O'Regan <joregan@gmail.com> <joregan@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
diff --git a/doc/tesseract.bib b/CITATIONS.bib
similarity index 84%
rename from doc/tesseract.bib
rename to CITATIONS.bib
index f13f0ef2..8f9cc79a 100644
--- a/doc/tesseract.bib
+++ b/CITATIONS.bib
@@ -20,11 +20,11 @@
   publisher = {ACM},
   series = {ACM International Conference Proceeding Series},
   title = {Adapting the Tesseract Open Source OCR Engine for Multilingual OCR.},
-  url = {http://www.google.de/research/pubs/archive/35248.pdf},
+  url = {https://storage.googleapis.com/pub-tools-public-publication-data/pdf/35248.pdf},
   year = 2009,
   isbn = {978-1-60558-698-4},
-  date = {2009-07-25}
-  doi = {http://doi.acm.org/10/1145/1577802.1577804}
+  date = {2009-07-25},
+  doi = {http://doi.acm.org/10/1145/1577802.1577804},
   location = {Barcelona, Spain},
 }
 
@@ -33,7 +33,7 @@
   title = {Combined Orientation and Script Detection using the Tesseract OCR Engine},
   booktitle = {MOCR '09: Proceedings of the International Workshop on Multilingual OCR},
   editor = {Venu Govindaraju and Premkumar Natarajan and Santanu Chaudhury and Daniel P. Lopresti},
-  url = {http://www.google.de/research/pubs/archive/35506.pdf}
+  url = {https://storage.googleapis.com/pub-tools-public-publication-data/pdf/35506.pdf},
   year = {2009},
   isbn = {978-1-60558-698-4},
   pages = {1--7},
@@ -47,7 +47,7 @@
   author = {Ray Smith},
   title = {Hybrid Page Layout Analysis via Tab-Stop Detection},
   booktitle = {ICDAR '09: Proceedings of the 2009 10th International Conference on Document Analysis and Recognition},
-  url = {http://www.google.de/research/pubs/archive/35094.pdf}
+  url = {https://storage.googleapis.com/pub-tools-public-publication-data/pdf/35094.pdf},
   year = {2009},
   isbn = {978-0-7695-3725-2},
   pages = {241--245},
@@ -60,10 +60,11 @@
   author = {Ray Smith},
   title = {An Overview of the Tesseract OCR Engine},
   booktitle = {ICDAR '07: Proceedings of the Ninth International Conference on Document Analysis and Recognition},
-  url = {http://www.google.de/research/pubs/archive/33418.pdf}
+  url = {https://storage.googleapis.com/pub-tools-public-publication-data/pdf/33418.pdf},
   year = {2007},
   isbn = {0-7695-2822-8},
   pages = {629--633},
   publisher = {IEEE Computer Society},
   address = {Washington, DC, USA},
 }
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ae2f6f2c..68da6c53 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,9 +89,14 @@ option(DISABLED_LEGACY_ENGINE "Disable the legacy OCR engine" OFF)
 option(ENABLE_LTO "Enable link-time optimization" OFF)
 option(FAST_FLOAT "Enable float for LSTM" ON)
 option(ENABLE_OPENCL "Enable unsupported experimental OpenCL support" OFF)
+option(ENABLE_NATIVE
+       "Enable optimization for host CPU (could break HW compatibility)" OFF)
+# see
+# https://stackoverflow.com/questions/52653025/why-is-march-native-used-so-rarely
 option(BUILD_TRAINING_TOOLS "Build training tools" ON)
 option(BUILD_TESTS "Build tests" OFF)
 option(USE_SYSTEM_ICU "Use system ICU" OFF)
+option(DISABLE_TIFF "Disable build with libtiff (if available)" OFF)
 option(DISABLE_ARCHIVE "Disable build with libarchive (if available)" OFF)
 option(DISABLE_CURL "Disable build with libcurl (if available)" OFF)
 option(INSTALL_CONFIGS "Install tesseract configs" ON)
@@ -123,6 +128,9 @@ endif()
 include(CheckCXXCompilerFlag)
 
 set(CMAKE_CXX_STANDARD 17)
+if("cxx_std_20" IN_LIST CMAKE_CXX_COMPILE_FEATURES)
+  set(CMAKE_CXX_STANDARD 20)
+endif()
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   # cygwin gnu c++ needs to use -std=gnu++17 instead of -std=c++17
@@ -143,15 +151,18 @@ else()
   message(STATUS "IPO / LTO not supported: <${error}>")
 endif()
 
-check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
-if(COMPILER_SUPPORTS_MARCH_NATIVE)
-  set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -march=native")
-  if(NOT CLANG AND MSVC)
-    # clang-cl does not know this argument
-    set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -mtune=native")
-  endif()
-  set(MARCH_NATIVE_OPT ON)
-endif()
+set(MARCH_NATIVE_OPT OFF)
+if(ENABLE_NATIVE)
+  check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
+  if(COMPILER_SUPPORTS_MARCH_NATIVE)
+    set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -march=native")
+    if(NOT CLANG AND MSVC)
+      # clang-cl does not know this argument
+      set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -mtune=native")
+    endif()
+    set(MARCH_NATIVE_OPT ON)
+  endif(COMPILER_SUPPORTS_MARCH_NATIVE)
+endif(ENABLE_NATIVE)
 
 message(STATUS "CMAKE_SYSTEM_PROCESSOR=<${CMAKE_SYSTEM_PROCESSOR}>")
 
@@ -178,6 +189,10 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686")
     set(HAVE_SSE4_1 ON)
     set(SSE4_1_COMPILE_FLAGS "-D__SSE4_1__")
     add_definitions("-DHAVE_SSE4_1")
+
+    set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -openmp:experimental")
+    add_definitions("-DOPENMP_SIMD")
+
     # clang with MSVC compatibility
     if(CLANG)
       set(CMAKE_CXX_FLAGS
@@ -189,7 +204,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686")
         set(SSE4_1_COMPILE_FLAGS "-msse4.1 ${SSE4_1_COMPILE_FLAGS}")
       endif(HAVE_SSE4_1)
     endif(CLANG)
-  else()  # if not MSVC
+  else() # if not MSVC
     check_cxx_compiler_flag("-mavx" HAVE_AVX)
     if(HAVE_AVX)
       set(AVX_COMPILE_FLAGS "-mavx")
@@ -219,6 +234,12 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686")
       set(SSE4_1_COMPILE_FLAGS "-msse4.1")
       add_definitions("-DHAVE_SSE4_1")
     endif()
+
+    check_cxx_compiler_flag("-fopenmp-simd" OPENMP_SIMD)
+    if(OPENMP_SIMD)
+      set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -fopenmp-simd")
+      add_definitions("-DOPENMP_SIMD")
+    endif(OPENMP_SIMD)
   endif(MSVC)
 
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64.*|AARCH64.*")
@@ -228,8 +249,6 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64.*|AARCH64.*")
   set(HAVE_AVX512F FALSE)
   set(HAVE_FMA FALSE)
   set(HAVE_SSE4_1 FALSE)
-
-  add_definitions("-DHAVE_NEON")
   set(HAVE_NEON TRUE)
 
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*")
@@ -243,8 +262,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*")
   check_cxx_compiler_flag("-mfpu=neon" HAVE_NEON)
   if(HAVE_NEON)
     set(NEON_COMPILE_FLAGS "-mfpu=neon")
-    add_definitions("-DHAVE_NEON")
-  endif()
+  endif(HAVE_NEON)
 
 else()
 
@@ -257,7 +275,12 @@ else()
 
 endif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686")
 
-# Compiler specific environments
+if(HAVE_NEON)
+  message(STATUS "LTO build is not supported on arm/RBPi.")
+  set(ENABLE_LTO FALSE)  # enable LTO cause fatal error on arm/RBPi
+endif()
+
+# Compiler specific environment
 if(CMAKE_COMPILER_IS_GNUCXX OR MINGW)
   set(CMAKE_CXX_FLAGS_DEBUG
       "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -pedantic -Og")
@@ -274,9 +297,9 @@ elseif(MSVC)
   # loss of data wd4275 non dll-interface class wd4305 ...truncation from
   # 'double' to 'float'
   set(CMAKE_CXX_FLAGS_RELEASE
-      "${CMAKE_CXX_FLAGS_RELEASE} /wd4244 /wd4305 /wd4267 /wd4251 /wd4275 /wd4005")
-  set(CMAKE_CXX_FLAGS_RELEASE
-      "${CMAKE_CXX_FLAGS_RELEASE} /wd4068")
+      "${CMAKE_CXX_FLAGS_RELEASE} /wd4244 /wd4305 /wd4267 /wd4251 /wd4275 /wd4005"
+  )
+  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /wd4068")
   # Don't use /Wall because it generates too many warnings.
   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /W0 /bigobj")
   # MT flag
@@ -304,7 +327,7 @@ if(OPENMP_BUILD)
   if(NOT OpenMP_FOUND
      AND CLANG
      AND WIN32)
-    # workaroung because find_package(OpenMP) does not work for clang-cl
+    # workaround because find_package(OpenMP) does not work for clang-cl
     # https://gitlab.kitware.com/cmake/cmake/issues/19404
     check_include_file_cxx(omp.h HAVE_OMP_H_INCLUDE)
     find_library(OpenMP_LIBRARY NAMES omp libomp.lib)
@@ -319,20 +342,17 @@ if(OPENMP_BUILD)
     add_definitions(-D_OPENMP=201107)
   endif()
   if(MSVC)
-    # Note: -openmp:llvm is available for X64 from MSVC 16.9
-    # from MSVC 16.10 Preview 2 there is support also for x86 and arm64
+    # Note: -openmp:llvm is available for X64 from MSVC 16.9 from MSVC 16.10
+    # Preview 2 there is support also for x86 and arm64
     # https://devblogs.microsoft.com/cppblog/openmp-updates-and-fixes-for-cpp-in-visual-studio-2019-16-10/
-    if ("${OpenMP_CXX_FLAGS}" STREQUAL "-openmp")
+    if("${OpenMP_CXX_FLAGS}" STREQUAL "-openmp")
       set(OpenMP_CXX_FLAGS "-openmp:llvm")
     endif()
-    # 'simd': requires '-openmp:experimental'
-    set_source_files_properties(src/arch/dotproduct.cpp
-                              PROPERTIES COMPILE_FLAGS "-openmp:experimental")
   endif()
   if(OpenMP_FOUND)
     message(">> OpenMP_FOUND ${OpenMP_FOUND} version: ${OpenMP_CXX_VERSION}")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-    if (NOT TARGET OpenMP::OpenMP_CXX)
+    if(NOT TARGET OpenMP::OpenMP_CXX)
       add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE)
     endif()
   endif()
@@ -355,6 +375,7 @@ add_definitions("-DCMAKE_BUILD")
 # packages
 #
 # ##############################################################################
+include(CheckFunctions)
 
 if(SW_BUILD)
   find_package(SW REQUIRED)
@@ -381,11 +402,24 @@ else()
   endif()
   if(NOT Leptonica_FOUND)
     message(FATAL_ERROR "Cannot find required library Leptonica. Quitting!")
+  else()
+    message(STATUS "Found leptonica version: ${Leptonica_VERSION}")
   endif(NOT Leptonica_FOUND)
   include_directories(${Leptonica_INCLUDE_DIRS})
 
+  check_leptonica_tiff_support()
+  if ((NOT LEPT_TIFF_RESULT EQUAL 0) AND LEPT_TIFF_COMPILE_SUCCESS)
+    message(NOTICE "Leptonica was build without TIFF support! Disabling TIFF support...")
+    set(DISABLE_TIFF ON)
+  elseif(NOT ${CMAKE_VERSION} VERSION_LESS "3.25")
+    message(STATUS "Leptonica was build with TIFF support.")
+  endif()
+
   # Check for optional libraries.
-  if(WIN32)
+  if(DISABLE_TIFF)
+    set(HAVE_TIFFIO_H OFF)
+    message(STATUS "TIFF support disabled.")
+  else(DISABLE_TIFF)
     find_package(TIFF) # for tesseract
     if(NOT TIFF_FOUND AND PKG_CONFIG_EXECUTABLE)
       # try PKG_CONFIG to find libtiff if cmake failed
@@ -395,9 +429,10 @@ else()
       set(HAVE_TIFFIO_H ON)
       include_directories(${TIFF_INCLUDE_DIRS})
     endif(TIFF_FOUND)
-  endif(WIN32)
+  endif(DISABLE_TIFF)
   if(DISABLE_ARCHIVE)
     set(HAVE_LIBARCHIVE OFF)
+    message(STATUS "LibArchive support disabled.")
   else(DISABLE_ARCHIVE)
     find_package(LibArchive)
     if(NOT LibArchive_FOUND AND PKG_CONFIG_EXECUTABLE)
@@ -411,6 +446,7 @@ else()
   endif(DISABLE_ARCHIVE)
   if(DISABLE_CURL)
     set(HAVE_LIBCURL OFF)
+    message(STATUS "CURL support disabled.")
   else(DISABLE_CURL)
     find_package(CURL)
     if(NOT CURL_FOUND AND PKG_CONFIG_EXECUTABLE)
@@ -424,9 +460,9 @@ else()
   endif(DISABLE_CURL)
 endif()
 
-IF(ENABLE_OPENCL)
+if(ENABLE_OPENCL)
   find_package(OpenCL)
-  if (OpenCL_FOUND)
+  if(OpenCL_FOUND)
     include_directories(${OpenCL_INCLUDE_DIRS})
     message(STATUS "OpenCL_INCLUDE_DIRS: ${OpenCL_INCLUDE_DIRS}")
     message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}")
@@ -442,8 +478,10 @@ endif(ENABLE_OPENCL)
 #
 # ##############################################################################
 
-if(NOT MSVC)
-  set(MARCH_NATIVE_FLAGS "${MARCH_NATIVE_FLAGS} -O3 -ffast-math")
+if(MSVC)
+  set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} /fp:fast")
+else()
+  set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -O3 -ffast-math")
 endif()
 
 if(NOT DEFINED CMAKE_INSTALL_LIBDIR)
@@ -518,6 +556,8 @@ message(STATUS "Build with libarchive support [HAVE_LIBARCHIVE]: "
                "${HAVE_LIBARCHIVE}")
 message(STATUS "Build with libcurl support [HAVE_LIBCURL]: ${HAVE_LIBCURL}")
 message(STATUS "Enable float for LSTM [FAST_FLOAT]: ${FAST_FLOAT}")
+message(STATUS "Enable optimization for host CPU (could break HW compatibility)"
+               " [ENABLE_NATIVE]: ${ENABLE_NATIVE}")
 message(STATUS "Disable disable graphics (ScrollView) [GRAPHICS_DISABLED]: "
                "${GRAPHICS_DISABLED}")
 message(STATUS "Disable the legacy OCR engine [DISABLED_LEGACY_ENGINE]: "
@@ -526,10 +566,13 @@ message(STATUS "Build training tools [BUILD_TRAINING_TOOLS]: "
                "${BUILD_TRAINING_TOOLS}")
 message(STATUS "Build tests [BUILD_TESTS]: ${BUILD_TESTS}")
 if(ENABLE_OPENCL)
-  message(STATUS "Enable unsupported experimental OpenCL [ENABLE_OPENCL]: ${USE_OPENCL}")
+  message(
+    STATUS
+      "Enable unsupported experimental OpenCL [ENABLE_OPENCL]: ${USE_OPENCL}")
 endif(ENABLE_OPENCL)
 message(STATUS "Use system ICU Library [USE_SYSTEM_ICU]: ${USE_SYSTEM_ICU}")
-message(STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}")
+message(
+  STATUS "Install tesseract configs [INSTALL_CONFIGS]: ${INSTALL_CONFIGS}")
 message(STATUS "--------------------------------------------------------")
 message(STATUS)
 
@@ -582,7 +625,7 @@ if(DISABLED_LEGACY_ENGINE)
         PARENT_SCOPE)
   endfunction()
 
-set(TESSERACT_SRC_LEGACY
+  set(TESSERACT_SRC_LEGACY
       src/ccmain/adaptions.cpp
       src/ccmain/docqual.cpp
       src/ccmain/equationdetect.cpp
@@ -652,10 +695,10 @@ endif(DISABLED_LEGACY_ENGINE)
 list(APPEND arch_files src/arch/dotproduct.cpp src/arch/simddetect.cpp
      src/arch/intsimdmatrix.cpp)
 
-if(MARCH_NATIVE_FLAGS)
+if(DOTPRODUCT_FLAGS)
   set_source_files_properties(src/arch/dotproduct.cpp
-                              PROPERTIES COMPILE_FLAGS ${MARCH_NATIVE_FLAGS})
-endif(MARCH_NATIVE_FLAGS)
+                              PROPERTIES COMPILE_FLAGS ${DOTPRODUCT_FLAGS})
+endif(DOTPRODUCT_FLAGS)
 if(HAVE_AVX)
   list(APPEND arch_files_opt src/arch/dotproductavx.cpp)
   set_source_files_properties(src/arch/dotproductavx.cpp
@@ -723,40 +766,37 @@ set(TESSERACT_SRC
     src/api/wordstrboxrenderer.cpp)
 
 set(TESSERACT_CONFIGS
-  tessdata/configs/alto
-  tessdata/configs/ambigs.train
-  tessdata/configs/api_config
-  tessdata/configs/bazaar
-  tessdata/configs/bigram
-  tessdata/configs/box.train
-  tessdata/configs/box.train.stderr
-  tessdata/configs/digits
-  tessdata/configs/get.images
-  tessdata/configs/hocr
-  tessdata/configs/inter
-  tessdata/configs/kannada
-  tessdata/configs/linebox
-  tessdata/configs/logfile
-  tessdata/configs/lstm.train
-  tessdata/configs/lstmbox
-  tessdata/configs/lstmdebug
-  tessdata/configs/makebox
-  tessdata/configs/pdf
-  tessdata/configs/quiet
-  tessdata/configs/rebox
-  tessdata/configs/strokewidth
-  tessdata/configs/tsv
-  tessdata/configs/txt
-  tessdata/configs/unlv
-  tessdata/configs/wordstrbox)
+    tessdata/configs/alto
+    tessdata/configs/ambigs.train
+    tessdata/configs/api_config
+    tessdata/configs/bazaar
+    tessdata/configs/bigram
+    tessdata/configs/box.train
+    tessdata/configs/box.train.stderr
+    tessdata/configs/digits
+    tessdata/configs/get.images
+    tessdata/configs/hocr
+    tessdata/configs/inter
+    tessdata/configs/kannada
+    tessdata/configs/linebox
+    tessdata/configs/logfile
+    tessdata/configs/lstm.train
+    tessdata/configs/lstmbox
+    tessdata/configs/lstmdebug
+    tessdata/configs/makebox
+    tessdata/configs/pdf
+    tessdata/configs/quiet
+    tessdata/configs/rebox
+    tessdata/configs/strokewidth
+    tessdata/configs/tsv
+    tessdata/configs/txt
+    tessdata/configs/unlv
+    tessdata/configs/wordstrbox)
 
 set(TESSERACT_TESSCONFIGS
-  tessdata/tessconfigs/batch
-  tessdata/tessconfigs/batch.nochop
-  tessdata/tessconfigs/matdemo
-  tessdata/tessconfigs/msdemo
-  tessdata/tessconfigs/nobatch
-  tessdata/tessconfigs/segdemo)
+    tessdata/tessconfigs/batch tessdata/tessconfigs/batch.nochop
+    tessdata/tessconfigs/matdemo tessdata/tessconfigs/msdemo
+    tessdata/tessconfigs/nobatch tessdata/tessconfigs/segdemo)
 
 set(LIBTESSFILES ${TESSERACT_SRC} ${arch_files} ${arch_files_opt}
                  ${TESSERACT_HDR})
@@ -765,8 +805,7 @@ source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${LIBTESSFILES})
 
 add_library(libtesseract ${LIBTESSFILES})
 target_include_directories(
-  libtesseract
-  BEFORE
+  libtesseract BEFORE
   PRIVATE src
   PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
          $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/arch>
@@ -807,13 +846,19 @@ if(CURL_FOUND)
   endif()
 endif(CURL_FOUND)
 
-set_target_properties(libtesseract
-  PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
-set_target_properties(libtesseract
-  PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
+set_target_properties(
+  libtesseract PROPERTIES VERSION
+                          ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
+set_target_properties(
+  libtesseract PROPERTIES SOVERSION
+                          ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
 
-set_target_properties(libtesseract
-  PROPERTIES OUTPUT_NAME tesseract$<$<BOOL:${WIN32}>:${VERSION_MAJOR}${VERSION_MINOR}$<$<CONFIG:DEBUG>:d>>)
+set_target_properties(
+  libtesseract
+  PROPERTIES
+    OUTPUT_NAME
+    tesseract$<$<BOOL:${WIN32}>:${VERSION_MAJOR}${VERSION_MINOR}$<$<CONFIG:DEBUG>:d>>
+)
 
 if(SW_BUILD)
   target_link_libraries(libtesseract PUBLIC org.sw.demo.danbloomberg.leptonica
@@ -841,11 +886,13 @@ if(WIN32
   target_link_libraries(libtesseract PRIVATE ${OpenMP_LIBRARY})
 endif()
 
-if (ANDROID)
-    add_definitions(-DANDROID)
-    find_package(CpuFeaturesNdkCompat REQUIRED)
-    target_include_directories(libtesseract PRIVATE "${CpuFeaturesNdkCompat_DIR}/../../../include/ndk_compat")
-    target_link_libraries     (libtesseract PRIVATE CpuFeatures::ndk_compat)
+if(ANDROID)
+  add_definitions(-DANDROID)
+  find_package(CpuFeaturesNdkCompat REQUIRED)
+  target_include_directories(
+    libtesseract
+    PRIVATE "${CpuFeaturesNdkCompat_DIR}/../../../include/ndk_compat")
+  target_link_libraries(libtesseract PRIVATE CpuFeatures::ndk_compat)
 endif()
 
 # ##############################################################################
@@ -864,8 +911,9 @@ endif()
 
 # ##############################################################################
 
-if(BUILD_TESTS AND EXISTS
-     ${CMAKE_CURRENT_SOURCE_DIR}/unittest/third_party/googletest/CMakeLists.txt
+if(BUILD_TESTS
+   AND EXISTS
+       ${CMAKE_CURRENT_SOURCE_DIR}/unittest/third_party/googletest/CMakeLists.txt
 )
   add_subdirectory(unittest/third_party/googletest)
 endif()
@@ -881,7 +929,10 @@ get_target_property(tesseract_OUTPUT_NAME libtesseract OUTPUT_NAME)
 configure_file(tesseract.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc.in
                @ONLY)
 # to resolve generator expression in OUTPUT_NAME
-file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tesseract_$<CONFIG>.pc INPUT ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc.in)
+file(
+  GENERATE
+  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tesseract_$<CONFIG>.pc
+  INPUT ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc.in)
 
 configure_package_config_file(
   cmake/templates/TesseractConfig.cmake.in
@@ -893,8 +944,10 @@ write_basic_package_version_file(
   VERSION ${PACKAGE_VERSION}
   COMPATIBILITY SameMajorVersion)
 
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract_$<CONFIG>.pc
-        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig RENAME tesseract.pc)
+install(
+  FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract_$<CONFIG>.pc
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
+  RENAME tesseract.pc)
 install(TARGETS tesseract DESTINATION bin)
 install(
   TARGETS libtesseract
@@ -926,10 +979,10 @@ install(
   DESTINATION include/tesseract)
 
 if(INSTALL_CONFIGS)
-install(FILES ${TESSERACT_CONFIGS}
-        DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/configs)
-install(FILES ${TESSERACT_TESSCONFIGS}
-        DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/tessconfigs)
+  install(FILES ${TESSERACT_CONFIGS}
+          DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/configs)
+  install(FILES ${TESSERACT_TESSCONFIGS}
+          DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/tessconfigs)
 endif()
 
 # ##############################################################################
diff --git a/ChangeLog b/ChangeLog
index f700a224..85b4955d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2023-10-05 - V5.3.3
+* Small code fixes and improvements to fix Coverity Scan issues.
+* Disable -mfpu=neon for aarch64.
+* Fix build without git clone in cloned directory (required for FreeBSD).
+* Other build fixes for autotools, cmake and sw.
+* Fix regression in layout detection which was introduced in release 5.0.0.
+* Fix regression which prevented loading of submodels, introduced in release 5.0.0-rc2.
+* Other small improvements for code and documentation.
+
+2023-07-11 - V5.3.2
+* Updates for snap package building.
+* Support for Sgaw and W Pwo Karen languages in the Myanmar validator (#4065).
+* Improve format of logging from lstmtraining.
+* Use less digits in filenames of checkpoints written by lstmtraining.
+* Replace deprecated sprintf.
+* Remove unused code in function fix_rep_char.
+* Avoid 32 bit overflow in multiplication (fixes 3 CodeQL CI alerts).
+* Avoid conversions from std::string to char* to std::string.
+* Abort with error message if OSD is requested with LSTM-only model.
+* cmake: allow to disable tiff (-DDISABLE_TIFF=ON).
+* cmake: provide info about disabled LibArchive and CURL.
+* cmake: check if leptonica was build with tiff support.
+* Remove old broken GitHub action vcpkg-4.1.1 (fixes issue #4078).
+* Create config.yml.
+* Fix typos.
+
+2023-04-01 - V5.3.1
+ * Bug fixes for some special scenarios:
+   * Fix issue #4010.
+   * textord: Catch empty rows in block iterator (fixes #4039).
+   * Fix FP division by zero (issue #3995).
+ * Improve documentation and log messages.
+ * Build fixes and improvements (mainly for cmake).
+
 2022-12-22 - V5.3.0
  * Minor updates for documentation and cmake builds.
 
@@ -239,7 +273,7 @@
   * Many other fixes, including the way in which the chopper finds chops and messes with the outline while it does so.
 
 2010-11-29 - V3.01
-  * Removed old/dead serialise/deserialze methods on *LISTIZED classes.
+  * Removed old/dead serialise/deserialize methods on *LISTIZED classes.
   * Total rewrite of DENORM to better encapsulate operation and make
     for potential to extract features from images.
   * Thread-safety! Moved all critical global and static variables to members of the appropriate class. Tesseract is now thread-safe (multiple instances can be used in parallel in multiple threads.) with the minor exception that some control parameters are still global and affect all threads.
diff --git a/Makefile.am b/Makefile.am
index f9c48dea..b0e0baee 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -734,10 +734,15 @@ bin_PROGRAMS = tesseract
 tesseract_SOURCES = src/tesseract.cpp
 tesseract_CPPFLAGS =
 tesseract_CPPFLAGS += -I$(top_srcdir)/src/arch
+tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccmain
 tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccstruct
 tesseract_CPPFLAGS += -I$(top_srcdir)/src/ccutil
+tesseract_CPPFLAGS += -I$(top_srcdir)/src/classify
+tesseract_CPPFLAGS += -I$(top_srcdir)/src/cutil
 tesseract_CPPFLAGS += -I$(top_srcdir)/src/dict
+tesseract_CPPFLAGS += -I$(top_srcdir)/src/textord
 tesseract_CPPFLAGS += -I$(top_srcdir)/src/viewer
+tesseract_CPPFLAGS += -I$(top_srcdir)/src/wordrec
 if OPENCL
 tesseract_CPPFLAGS += -I$(top_srcdir)/src/opencl
 endif
diff --git a/README.md b/README.md
index f8f006cd..c67894ac 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # Tesseract OCR
 
 [![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
-[![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)\
+[![Build status](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml/badge.svg)](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)\
 [![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr)
 [![CodeQL](https://github.com/tesseract-ocr/tesseract/workflows/CodeQL/badge.svg)](https://github.com/tesseract-ocr/tesseract/security/code-scanning)
 [![OSS-Fuzz](https://img.shields.io/badge/oss--fuzz-fuzzing-brightgreen)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=2&q=proj:tesseract-ocr)
@@ -32,11 +32,11 @@ It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.h
 Stefan Weil is the current lead developer. Ray Smith was the lead developer until 2018. The maintainer is Zdenko Podobny. For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS)
 and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
 
-Tesseract has **unicode (UTF-8) support**, and can **recognize more than 100 languages** "out of the box".
+Tesseract has **unicode (UTF-8) support**, and can **recognize [more than 100 languages](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html)** "out of the box".
 
 Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF.
 
-Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO (the last one - since version 4.1.0).
+Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO.
 
 You should note that in many cases, in order to get better OCR results, you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract.
 
diff --git a/VERSION b/VERSION
index 03f488b0..74664af7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-5.3.0
+5.3.3
diff --git a/autogen.sh b/autogen.sh
index 362bcc6f..979b4567 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -46,6 +46,14 @@ if [ "$1" = "clean" ]; then
     find . -iname "Makefile.in" -type f -exec rm '{}' +
 fi
 
+bail_out()
+{
+    echo
+    echo "  Something went wrong, bailing out!"
+    echo
+    exit 1
+}
+
 # Prevent any errors that might result from failing to properly invoke
 # `libtoolize` or `glibtoolize,` whichever is present on your system,
 # from occurring by testing for its existence and capturing the absolute path to
@@ -59,14 +67,6 @@ else
   bail_out
 fi
 
-bail_out()
-{
-    echo
-    echo "  Something went wrong, bailing out!"
-    echo
-    exit 1
-}
-
 # --- Step 1: Generate aclocal.m4 from:
 #             . acinclude.m4
 #             . config/*.m4 (these files are referenced in acinclude.m4)
diff --git a/cmake/CheckFunctions.cmake b/cmake/CheckFunctions.cmake
new file mode 100644
index 00000000..4618eaeb
--- /dev/null
+++ b/cmake/CheckFunctions.cmake
@@ -0,0 +1,53 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+# applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+# ##############################################################################
+#
+# macros and functions
+#
+# ##############################################################################
+
+# ##############################################################################
+# FUNCTION check_leptonica_tiff_support
+# ##############################################################################
+function(check_leptonica_tiff_support)
+  # check if leptonica was build with tiff support set result to
+  # LEPT_TIFF_RESULT
+  set(TIFF_TEST
+  "#include \"leptonica/allheaders.h\"\n"
+  "int main() {\n"
+  "  l_uint8 *data = NULL;\n"
+  "  size_t size = 0;\n"
+  "  PIX* pix = pixCreate(3, 3, 4);\n"
+  "  l_int32 ret_val = pixWriteMemTiff(&data, &size, pix, IFF_TIFF_G3);\n"
+  "  pixDestroy(&pix);\n"
+  "  lept_free(data);\n"
+  "  return ret_val;}\n")
+  if(${CMAKE_VERSION} VERSION_LESS "3.25")
+    message(STATUS "Testing TIFF support in Leptonica is available with CMake >= 3.25 (you have ${CMAKE_VERSION}))")
+  else()
+    set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
+    try_run(
+      LEPT_TIFF_RESULT
+      LEPT_TIFF_COMPILE_SUCCESS
+      SOURCE_FROM_CONTENT tiff_test.cpp "${TIFF_TEST}"
+      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${Leptonica_INCLUDE_DIRS}"
+      LINK_LIBRARIES ${Leptonica_LIBRARIES}
+      COMPILE_OUTPUT_VARIABLE
+      COMPILE_OUTPUT)
+    if(NOT LEPT_TIFF_COMPILE_SUCCESS)
+      message(STATUS "COMPILE_OUTPUT: ${COMPILE_OUTPUT}")
+      message(STATUS "Leptonica_INCLUDE_DIRS: ${Leptonica_INCLUDE_DIRS}")
+      message(STATUS "Leptonica_LIBRARIES: ${Leptonica_LIBRARIES}")
+      message(STATUS "LEPT_TIFF_RESULT: ${LEPT_TIFF_RESULT}")
+      message(STATUS "LEPT_TIFF_COMPILE: ${LEPT_TIFF_COMPILE}")
+      message(WARNING "Failed to compile test")
+    endif()
+  endif()
+endfunction(check_leptonica_tiff_support)
+
+# ##############################################################################
diff --git a/cmake/Configure.cmake b/cmake/Configure.cmake
index c7abb973..1e0ee2a3 100644
--- a/cmake/Configure.cmake
+++ b/cmake/Configure.cmake
@@ -118,6 +118,7 @@ file(APPEND ${AUTOCONFIG_SRC} "
 #cmakedefine FAST_FLOAT ${FAST_FLOAT}
 #cmakedefine DISABLED_LEGACY_ENGINE ${DISABLED_LEGACY_ENGINE}
 #cmakedefine HAVE_TIFFIO_H ${HAVE_TIFFIO_H}
+#cmakedefine HAVE_NEON ${HAVE_NEON}
 #cmakedefine HAVE_LIBARCHIVE ${HAVE_LIBARCHIVE}
 #cmakedefine HAVE_LIBCURL ${HAVE_LIBCURL}
 #cmakedefine USE_OPENCL ${USE_OPENCL}
diff --git a/configure.ac b/configure.ac
index db88f0b3..0b385372 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7,7 +7,7 @@
 # ----------------------------------------
 AC_PREREQ([2.69])
 AC_INIT([tesseract],
-        [m4_esyscmd_s([git describe --abbrev=4 2>/dev/null || cat VERSION])],
+        [m4_esyscmd_s([test -d .git && git describe --abbrev=4 2>/dev/null || cat VERSION])],
         [https://github.com/tesseract-ocr/tesseract/issues],,
         [https://github.com/tesseract-ocr/tesseract/])
 
@@ -28,8 +28,8 @@ AM_INIT_AUTOMAKE([foreign subdir-objects nostdinc])
 
 # Define date of package, etc. Could be useful in auto-generated
 # documentation.
-PACKAGE_YEAR=2022
-PACKAGE_DATE="12/22"
+PACKAGE_YEAR=2023
+PACKAGE_DATE="10/05"
 
 abs_top_srcdir=`AS_DIRNAME([$0])`
 
diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h
index c5576a1a..927191ca 100644
--- a/include/tesseract/baseapi.h
+++ b/include/tesseract/baseapi.h
@@ -815,7 +815,7 @@ private:
                                  int tessedit_page_number);
 }; // class TessBaseAPI.
 
-/** Escape a char string - remove &<>"' with HTML codes. */
+/** Escape a char string - replace &<>"' with HTML codes. */
 std::string HOcrEscape(const char *text);
 
 } // namespace tesseract
diff --git a/include/tesseract/ocrclass.h b/include/tesseract/ocrclass.h
index a55e6528..d11e2d96 100644
--- a/include/tesseract/ocrclass.h
+++ b/include/tesseract/ocrclass.h
@@ -61,7 +61,7 @@ struct EANYCODE_CHAR { /*single character */
   // is UTF8 which means that ASCII characters will come out as one structure
   // but other characters will be returned in two or more instances of this
   // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
+  // the same bounding box. Programs which want to handle languages with
   // different characters sets will need to handle extended characters
   // appropriately, but *all* code needs to be prepared to receive UTF8 coded
   // characters for characters such as bullet and fancy quotes.
diff --git a/java/com/google/scrollview/ScrollView.java b/java/com/google/scrollview/ScrollView.java
index e98af3dd..fcd06add 100644
--- a/java/com/google/scrollview/ScrollView.java
+++ b/java/com/google/scrollview/ScrollView.java
@@ -56,7 +56,7 @@ public class ScrollView {
   /** Prints all received messages to the console if true. */
   static boolean debugViewNetworkTraffic = false;
 
-  /** Add a new message to the outgoing queue */
+  /** Add a new message to the outgoing queue. */
   public static void addMessage(SVEvent e) {
     if (debugViewNetworkTraffic) {
       System.out.println("(S->c) " + e.toString());
@@ -191,7 +191,7 @@ public class ScrollView {
           }
         }
         // If str is not null here, then we have a string with a comma in it.
-        // Append , and the next argument at the next iteration, but check
+        // Append, and the next argument at the next iteration, but check
         // that str is null after the loop terminates in case it was an
         // unterminated string.
       } else if (floatPattern.matcher(argStr).matches()) {
@@ -390,8 +390,7 @@ public class ScrollView {
               "UTF8"));
     } catch (IOException e) {
       // Something went wrong and we were unable to set up a connection. This is
-      // pretty
-      // much a fatal error.
+      // pretty much a fatal error.
       // Note: The server does not get restarted automatically if this happens.
       e.printStackTrace();
       System.exit(1);
diff --git a/java/com/google/scrollview/events/SVEvent.java b/java/com/google/scrollview/events/SVEvent.java
index 18309c2f..9b57d9e0 100644
--- a/java/com/google/scrollview/events/SVEvent.java
+++ b/java/com/google/scrollview/events/SVEvent.java
@@ -53,7 +53,7 @@ public class SVEvent {
   }
 
   /**
-   * An event which issues a command (like clicking on a item in the menubar).
+   * An event which issues a command (like clicking on an item in the menubar).
    *
    * @param eventtype The type of the event as specified in SVEventType
    *        (usually SVET_MENU or SVET_POPUP)
diff --git a/java/com/google/scrollview/events/SVEventHandler.java b/java/com/google/scrollview/events/SVEventHandler.java
index 26a92bdb..53c7e68f 100644
--- a/java/com/google/scrollview/events/SVEventHandler.java
+++ b/java/com/google/scrollview/events/SVEventHandler.java
@@ -36,7 +36,7 @@ import javax.swing.Timer;
  * The ScrollViewEventHandler takes care of any events which might happen on the
  * canvas and converts them to an according SVEvent, which is (using the
  * processEvent method) then added to a message queue. All events from the
- * message queue get sent gradually
+ * message queue get sent gradually.
  *
  * @author wanke@google.com
  */
@@ -60,7 +60,7 @@ public class SVEventHandler extends PBasicInputEventHandler implements
   private int lastXMove = 0;
   private int lastYMove = 0;
 
-  /** For Drawing a rubber-band rectangle for selection */
+  /** For Drawing a rubber-band rectangle for selection. */
   private int startX = 0;
   private int startY = 0;
   private float rubberBandTransparency = 0.5f;
@@ -274,7 +274,7 @@ public class SVEventHandler extends PBasicInputEventHandler implements
     }
   }
 
-  /** These are all events we do not care about and throw away */
+  /** These are all events we do not care about and throw away. */
   public void keyReleased(KeyEvent e) {
   }
 
diff --git a/java/com/google/scrollview/ui/SVImageHandler.java b/java/com/google/scrollview/ui/SVImageHandler.java
index ed6b7c04..981d75a5 100644
--- a/java/com/google/scrollview/ui/SVImageHandler.java
+++ b/java/com/google/scrollview/ui/SVImageHandler.java
@@ -27,7 +27,7 @@ import javax.xml.bind.DatatypeConverter;
  * @author wanke@google.com
  */
 public class SVImageHandler {
-  /* All methods are static, so we forbid to construct SVImageHandler objects */
+  /* All methods are static, so we forbid to construct SVImageHandler objects. */
   private SVImageHandler() {
   }
 
diff --git a/java/com/google/scrollview/ui/SVPopupMenu.java b/java/com/google/scrollview/ui/SVPopupMenu.java
index 14c8b3ac..6584447e 100644
--- a/java/com/google/scrollview/ui/SVPopupMenu.java
+++ b/java/com/google/scrollview/ui/SVPopupMenu.java
@@ -56,7 +56,7 @@ public class SVPopupMenu implements ActionListener {
    *
    * @param parent The menu we add our new entry to (should have been defined
    *        before). If the parent is "", we will add the entry to the root
-   *        (top-level)
+   *        (top-level).
    * @param name The caption of the new entry.
    * @param id The Id of the new entry. If it is -1, the entry will be treated
    *        as a menu.
@@ -64,14 +64,14 @@ public class SVPopupMenu implements ActionListener {
   public void add(String parent, String name, int id) {
     // A duplicate entry - we just throw it away, since its already in.
     if (items.get(name) != null) { return; }
-    // A new submenu at the top-level
+    // A new submenu at the top-level.
     if (parent.equals("")) {
       JMenu jli = new JMenu(name);
       SVAbstractMenuItem mli = new SVSubMenuItem(name, jli);
       items.put(name, mli);
       root.add(jli);
     }
-    // A new sub-submenu
+    // A new sub-submenu.
     else if (id == -1) {
       SVAbstractMenuItem jmi = items.get(parent);
       JMenu jli = new JMenu(name);
@@ -101,7 +101,7 @@ public class SVPopupMenu implements ActionListener {
    *
    * @param parent The menu we add our new entry to (should have been defined
    *        before). If the parent is "", we will add the entry to the root
-   *        (top-level)
+   *        (top-level).
    * @param name The caption of the new entry.
    * @param id The Id of the new entry. If it is -1, the entry will be treated
    *        as a menu.
diff --git a/java/com/google/scrollview/ui/SVWindow.java b/java/com/google/scrollview/ui/SVWindow.java
index 3b5e7cd8..0da062a8 100644
--- a/java/com/google/scrollview/ui/SVWindow.java
+++ b/java/com/google/scrollview/ui/SVWindow.java
@@ -82,7 +82,7 @@ public class SVWindow extends JFrame {
   // This really needs to be a fixed width stroke as the basic stroke is
   // anti-aliased and gets too faint, but the piccolo fixed width stroke
   // is too buggy and generates missing initial moveto in path definition
-  // errors with a IllegalPathStateException that cannot be caught because
+  // errors with an IllegalPathStateException that cannot be caught because
   // it is in the automatic repaint function. If we can fix the exceptions
   // in piccolo, then we can use the following instead of BasicStroke:
   //   import edu.umd.cs.piccolox.util.PFixedWidthStroke;
diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml
index 64585c6b..b6770555 100644
--- a/snap/snapcraft.yaml
+++ b/snap/snapcraft.yaml
@@ -13,13 +13,13 @@ description: |
 
 grade: stable # must be 'stable' to release into candidate/stable channels
 confinement: strict
+base: core22
 
 apps:
   tesseract:
-    command: >
-      env
-      TESSDATA_PREFIX=$SNAP_USER_COMMON
-      tesseract
+    command: usr/local/bin/tesseract
+    environment:
+      TESSDATA_PREFIX: $SNAP_USER_COMMON
     plugs:
       - home
       - removable-media
@@ -30,9 +30,9 @@ parts:
     plugin: autotools
     build-packages:
       - pkg-config
-      - libpng12-dev
-      - libjpeg8-dev
-      - libtiff5-dev
+      - libpng-dev
+      - libjpeg-dev
+      - libtiff-dev
       - zlib1g-dev
       - libicu-dev
       - libpango1.0-dev
@@ -41,7 +41,7 @@ parts:
       - libgomp1
     after: [leptonica]
   leptonica:
-    source: https://github.com/DanBloomberg/leptonica/archive/1.74.2.tar.gz
+    source: https://github.com/DanBloomberg/leptonica/archive/1.83.1.tar.gz
     plugin: autotools
     stage-packages:
       - libjbig0
diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index ec3de401..f573f5f4 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -101,9 +101,10 @@ static BOOL_VAR(stream_filelist, false, "Stream a filelist from stdin");
 static STRING_VAR(document_title, "", "Title of output document (used for hOCR and PDF output)");
 #ifdef HAVE_LIBCURL
 static INT_VAR(curl_timeout, 0, "Timeout for curl in seconds");
+static STRING_VAR(curl_cookiefile, "", "File with cookie data for curl");
 #endif
 
-/** Minimum sensible image size to be worth running tesseract. */
+/** Minimum sensible image size to be worth running Tesseract. */
 const int kMinRectSize = 10;
 /** Character returned when Tesseract couldn't recognize as anything. */
 const char kTesseractReject = '~';
@@ -412,7 +413,7 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
     if (data_size != 0) {
       mgr.LoadMemBuffer(language, data, data_size);
     }
-    if (tesseract_->init_tesseract(datapath.c_str(), output_file_.c_str(), language, oem, configs,
+    if (tesseract_->init_tesseract(datapath, output_file_, language, oem, configs,
                                    configs_size, vars_vec, vars_values, set_only_non_debug_params,
                                    &mgr) != 0) {
       return -1;
@@ -613,7 +614,7 @@ void TessBaseAPI::SetImage(Pix *pix) {
 
 /**
  * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
- * Each SetRectangle clears the recogntion results so multiple rectangles
+ * Each SetRectangle clears the recognition results so multiple rectangles
  * can be recognized with the same image.
  */
 void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
@@ -1162,6 +1163,10 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
       if (curlcode != CURLE_OK) {
         return error("curl_easy_setopt");
       }
+      curlcode = curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1L);
+      if (curlcode != CURLE_OK) {
+        return error("curl_easy_setopt");
+      }
       // Follow HTTP, HTTPS, FTP and FTPS redirects.
       curlcode = curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
       if (curlcode != CURLE_OK) {
@@ -1183,6 +1188,13 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
           return error("curl_easy_setopt");
         }
       }
+      std::string cookiefile = curl_cookiefile;
+      if (!cookiefile.empty()) {
+        curlcode = curl_easy_setopt(curl, CURLOPT_COOKIEFILE, cookiefile.c_str());
+        if (curlcode != CURLE_OK) {
+          return error("curl_easy_setopt");
+        }
+      }
       curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
       if (curlcode != CURLE_OK) {
         return error("curl_easy_setopt");
@@ -2223,7 +2235,7 @@ int TessBaseAPI::FindLines() {
             " but data path is undefined\n");
         delete osd_tesseract_;
         osd_tesseract_ = nullptr;
-      } else if (osd_tesseract_->init_tesseract(datapath_.c_str(), "", "osd", OEM_TESSERACT_ONLY,
+      } else if (osd_tesseract_->init_tesseract(datapath_, "", "osd", OEM_TESSERACT_ONLY,
                                                 nullptr, 0, nullptr, nullptr, false, &mgr) == 0) {
         osd_tess = osd_tesseract_;
         osd_tesseract_->set_source_resolution(thresholder_->GetSourceYResolution());
@@ -2421,7 +2433,7 @@ int TessBaseAPI::NumDawgs() const {
   return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs();
 }
 
-/** Escape a char string - remove <>&"' with HTML codes. */
+/** Escape a char string - replace <>&"' with HTML codes. */
 std::string HOcrEscape(const char *text) {
   std::string ret;
   const char *ptr;
diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp
index 6de25d39..d6da06df 100644
--- a/src/ccmain/control.cpp
+++ b/src/ccmain/control.cpp
@@ -1684,18 +1684,6 @@ void Tesseract::fix_rep_char(PAGE_RES_IT *page_res_it) {
   }
   word_res->done = true;
 
-  // Measure the mean space.
-  int gap_count = 0;
-  WERD *werd = word_res->word;
-  C_BLOB_IT blob_it(werd->cblob_list());
-  C_BLOB *prev_blob = blob_it.data();
-  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
-    C_BLOB *blob = blob_it.data();
-    int gap = blob->bounding_box().left();
-    gap -= prev_blob->bounding_box().right();
-    ++gap_count;
-    prev_blob = blob;
-  }
   // Just correct existing classification.
   CorrectRepcharChoices(best_choice, word_res);
   word_res->reject_map.initialise(word.length());
diff --git a/src/ccmain/osdetect.cpp b/src/ccmain/osdetect.cpp
index daee2b40..dcc1aa52 100644
--- a/src/ccmain/osdetect.cpp
+++ b/src/ccmain/osdetect.cpp
@@ -460,7 +460,7 @@ ScriptDetector::ScriptDetector(const std::vector<int> *allowed_scripts, OSResult
 // adding this blob.
 void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
   for (int i = 0; i < 4; ++i) {
-    bool done[kMaxNumberOfScripts] = {false};
+    std::vector<bool> done(kMaxNumberOfScripts);
 
     BLOB_CHOICE_IT choice_it;
     choice_it.set_to_list(scores + i);
@@ -488,7 +488,7 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
         }
       }
       // Script already processed before.
-      if (done[id]) {
+      if (done.at(id)) {
         continue;
       }
       done[id] = true;
diff --git a/src/ccmain/pagesegmain.cpp b/src/ccmain/pagesegmain.cpp
index 0af44607..c613badb 100644
--- a/src/ccmain/pagesegmain.cpp
+++ b/src/ccmain/pagesegmain.cpp
@@ -332,11 +332,11 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
 
     finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
 
-#ifndef DISABLED_LEGACY_ENGINE
-
+  #ifndef DISABLED_LEGACY_ENGINE
     if (equ_detect_) {
       equ_detect_->LabelSpecialText(to_block);
     }
+  #endif
 
     BLOBNBOX_CLIST osd_blobs;
     // osd_orientation is the number of 90 degree rotations to make the
@@ -350,6 +350,8 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
       vertical_text = finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio, to_block,
                                                       &osd_blobs);
     }
+
+  #ifndef DISABLED_LEGACY_ENGINE
     if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) {
       std::vector<int> osd_scripts;
       if (osd_tess != this) {
@@ -400,10 +402,10 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
         }
       }
     }
+  #endif // ndef DISABLED_LEGACY_ENGINE
+
     osd_blobs.shallow_clear();
     finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
-
-#endif // ndef DISABLED_LEGACY_ENGINE
   }
 
   return finder;
diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp
index fec08810..6076f379 100644
--- a/src/ccmain/paragraphs.cpp
+++ b/src/ccmain/paragraphs.cpp
@@ -514,8 +514,12 @@ void RowScratchRegisters::AppendDebugHeaderFields(std::vector<std::string> &head
 
 void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
                                           std::vector<std::string> &dbg) const {
-  char s[30];
-  snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]", lmargin_, lindent_, rindent_, rmargin_);
+  char s[60];
+  // The largest (positive and negative) numbers are reported for lindent & rindent.
+  // While the column header has widths 5,4,4,5, it is therefore opportune to slightly
+  // offset the widths in the format string here to allow ample space for lindent & rindent
+  // while keeping the final table output nicely readable: 4,5,5,4.
+  snprintf(s, sizeof(s), "[%4d,%5d;%5d,%4d]", lmargin_, lindent_, rindent_, rmargin_);
   dbg.emplace_back(s);
   std::string model_string;
   model_string += static_cast<char>(GetLineType());
diff --git a/src/ccmain/paramsd.cpp b/src/ccmain/paramsd.cpp
index 60de457b..85e596d1 100644
--- a/src/ccmain/paramsd.cpp
+++ b/src/ccmain/paramsd.cpp
@@ -32,7 +32,7 @@
 #  include "svmnode.h"        // for SVMenuNode
 #  include "tesseractclass.h" // for Tesseract
 
-#  include <cstdio>  // for fclose, fopen, fprintf, sprintf, FILE
+#  include <cstdio>  // for fclose, fopen, fprintf, FILE
 #  include <cstdlib> // for atoi
 #  include <cstring> // for strcmp, strcspn, strlen, strncpy
 #  include <locale>  // for std::locale::classic
@@ -319,16 +319,12 @@ ParamsEditor::ParamsEditor(tesseract::Tesseract *tess, ScrollView *sv) {
 // Write all (changed_) parameters to a config file.
 void ParamsEditor::WriteParams(char *filename, bool changes_only) {
   FILE *fp; // input file
-  char msg_str[255];
   // if file exists
   if ((fp = fopen(filename, "rb")) != nullptr) {
     fclose(fp);
-    sprintf(msg_str,
-            "Overwrite file "
-            "%s"
-            "? (Y/N)",
-            filename);
-    int a = sv_window_->ShowYesNoDialog(msg_str);
+    std::stringstream msg;
+    msg << "Overwrite file " << filename << "? (Y/N)";
+    int a = sv_window_->ShowYesNoDialog(msg.str().c_str());
     if (a == 'n') {
       return;
     } // don't write
diff --git a/src/ccmain/pgedit.cpp b/src/ccmain/pgedit.cpp
index 9e4902bb..dd239851 100644
--- a/src/ccmain/pgedit.cpp
+++ b/src/ccmain/pgedit.cpp
@@ -36,6 +36,9 @@
 
 #include <cctype>
 #include <cmath>
+#include <iomanip> // for std::setprecision
+#include <locale>  // for std::locale::classic
+#include <sstream> // for std::stringstream
 
 #ifndef GRAPHICS_DISABLED
 namespace tesseract {
@@ -140,32 +143,30 @@ static void show_point(PAGE_RES *page_res, float x, float y) {
   FCOORD pt(x, y);
   PAGE_RES_IT pr_it(page_res);
 
-  const int kBufsize = 512;
-  char msg[kBufsize];
-  char *msg_ptr = msg;
-
-  msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y);
+  std::stringstream msg;
+  msg.imbue(std::locale::classic());
+  msg << std::fixed << std::setprecision(3) << "Pt:(" << x << ", " << y << ") ";
 
   for (WERD_RES *word = pr_it.word(); word != nullptr; word = pr_it.forward()) {
     if (pr_it.row() != pr_it.prev_row() && pr_it.row()->row->bounding_box().contains(pt)) {
-      msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", pr_it.row()->row->base_line(x));
+      msg << "BL(x)=" << pr_it.row()->row->base_line(x) << ' ';
     }
     if (word->word->bounding_box().contains(pt)) {
       TBOX box = word->word->bounding_box();
-      msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(),
-                         box.top());
+      msg << "Wd(" << box.left() << ", " << box.bottom() << ")/("
+          << box.right() << ", " << box.top() << ") ";
       C_BLOB_IT cblob_it(word->word->cblob_list());
       for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
         C_BLOB *cblob = cblob_it.data();
         box = cblob->bounding_box();
         if (box.contains(pt)) {
-          msg_ptr += sprintf(msg_ptr, "CBlb(%d, %d)/(%d, %d) ", box.left(), box.bottom(),
-                             box.right(), box.top());
+          msg << "CBlb(" << box.left() << ", " << box.bottom() << ")/("
+              << box.right() << ", " << box.top() << ") ";
         }
       }
     }
   }
-  image_win->AddMessage(msg);
+  image_win->AddMessage(msg.str().c_str());
 }
 
 /**
@@ -622,7 +623,7 @@ void Tesseract::process_image_event( // action in image win
           break;
 
         default:
-          sprintf(msg, "Mode %d not yet implemented", mode);
+          snprintf(msg, sizeof(msg), "Mode %d not yet implemented", mode);
           image_win->AddMessage(msg);
           break;
       }
diff --git a/src/ccmain/resultiterator.cpp b/src/ccmain/resultiterator.cpp
index 9ee348c7..c2b3d708 100644
--- a/src/ccmain/resultiterator.cpp
+++ b/src/ccmain/resultiterator.cpp
@@ -149,7 +149,7 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
   for (int i = 0; i < word_length_; i++) {
     letter_types.push_back(it_->word()->SymbolDirection(i));
   }
-  // Convert a single separtor sandwiched between two EN's into an EN.
+  // Convert a single separator sandwiched between two ENs into an EN.
   for (int i = 0; i + 2 < word_length_; i++) {
     if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM &&
         (letter_types[i + 1] == U_EURO_NUM_SEP || letter_types[i + 1] == U_COMMON_NUM_SEP)) {
diff --git a/src/ccmain/tessedit.cpp b/src/ccmain/tessedit.cpp
index c97ce83c..03997bee 100644
--- a/src/ccmain/tessedit.cpp
+++ b/src/ccmain/tessedit.cpp
@@ -306,9 +306,10 @@ int Tesseract::init_tesseract(const std::string &arg0, const std::string &textba
   // Add any languages that this language requires
   bool loaded_primary = false;
   // Load the rest into sub_langs_.
-  // A range based for loop does not work here because langs_to_load
+  // WARNING: A range based for loop does not work here because langs_to_load
   // might be changed in the loop when a new submodel is found.
-  for (auto &lang_to_load : langs_to_load) {
+  for (size_t lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) {
+    auto &lang_to_load = langs_to_load[lang_index];
     if (!IsStrInList(lang_to_load, langs_not_to_load)) {
       const char *lang_str = lang_to_load.c_str();
       Tesseract *tess_to_init;
diff --git a/src/ccstruct/dppoint.cpp b/src/ccstruct/dppoint.cpp
index 68f8f946..30e174d0 100644
--- a/src/ccstruct/dppoint.cpp
+++ b/src/ccstruct/dppoint.cpp
@@ -76,7 +76,7 @@ int64_t DPPoint::CostWithVariance(const DPPoint *prev) {
   int delta = this - prev;
   int32_t n = prev->n_ + 1;
   int32_t sig_x = prev->sig_x_ + delta;
-  int64_t sig_xsq = prev->sig_xsq_ + delta * delta;
+  int64_t sig_xsq = prev->sig_xsq_ + static_cast<int64_t>(delta) * delta;
   int64_t cost = (sig_xsq - sig_x * sig_x / n) / n;
   cost += prev->total_cost_;
   UpdateIfBetter(cost, prev->total_steps_ + 1, prev, n, sig_x, sig_xsq);
diff --git a/src/ccstruct/normalis.cpp b/src/ccstruct/normalis.cpp
index eabb2bf7..8379f6e8 100644
--- a/src/ccstruct/normalis.cpp
+++ b/src/ccstruct/normalis.cpp
@@ -42,6 +42,8 @@ DENORM::DENORM() {
 
 DENORM::DENORM(const DENORM &src) {
   rotation_ = nullptr;
+  x_map_ = nullptr;
+  y_map_ = nullptr;
   *this = src;
 }
 
diff --git a/src/ccstruct/stepblob.cpp b/src/ccstruct/stepblob.cpp
index 4c61b6c6..3311f0c6 100644
--- a/src/ccstruct/stepblob.cpp
+++ b/src/ccstruct/stepblob.cpp
@@ -314,7 +314,7 @@ int32_t C_BLOB::outer_area() { // area
  * C_BLOB::count_transitions
  *
  * Return the total x and y maxes and mins in the blob.
- * Chlid outlines are not counted.
+ * Child outlines are not counted.
  **********************************************************************/
 
 int32_t C_BLOB::count_transitions( // area
diff --git a/src/ccstruct/stepblob.h b/src/ccstruct/stepblob.h
index 08aa8a4a..31d26710 100644
--- a/src/ccstruct/stepblob.h
+++ b/src/ccstruct/stepblob.h
@@ -78,7 +78,7 @@ public:
   int32_t count_transitions( // count maxima
       int32_t threshold);    // size threshold
 
-  void move(const ICOORD vec);         // repostion blob by vector
+  void move(const ICOORD vec);         // reposition blob by vector
   void rotate(const FCOORD &rotation); // Rotate by given vector.
 
   // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
diff --git a/src/ccutil/errcode.cpp b/src/ccutil/errcode.cpp
index dddc1231..e6b05c21 100644
--- a/src/ccutil/errcode.cpp
+++ b/src/ccutil/errcode.cpp
@@ -22,6 +22,8 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <iostream> // for std::cerr
+#include <sstream>  // for std::stringstream
 
 namespace tesseract {
 
@@ -41,37 +43,26 @@ void ERRCODE::error(         // handle error
     const char *format, ...  // special message
     ) const {
   va_list args; // variable args
-  char msg[MAX_MSG];
-  char *msgptr = msg;
+  std::stringstream msg;
 
   if (caller != nullptr) {
     // name of caller
-    msgptr += sprintf(msgptr, "%s:", caller);
+    msg << caller << ':';
   }
   // actual message
-  msgptr += sprintf(msgptr, "Error:%s", message);
+  msg << "Error:" << message;
   if (format != nullptr) {
-    msgptr += sprintf(msgptr, ":");
+    char str[MAX_MSG];
     va_start(args, format); // variable list
-#ifdef _WIN32
-                            // print remainder
-    msgptr += _vsnprintf(msgptr, MAX_MSG - 2 - (msgptr - msg), format, args);
-    msg[MAX_MSG - 2] = '\0'; // ensure termination
-    strcat(msg, "\n");
-#else
-                            // print remainder
-    msgptr += vsprintf(msgptr, format, args);
-    // no specific
-    msgptr += sprintf(msgptr, "\n");
-#endif
+    // print remainder
+    std::vsnprintf(str, sizeof(str), format, args);
+    // ensure termination
+    str[sizeof(str) - 1] = '\0';
     va_end(args);
-  } else {
-    // no specific
-    msgptr += sprintf(msgptr, "\n");
+    msg << ':' << str;
   }
 
-  // %s is needed here so msg is printed correctly!
-  fprintf(stderr, "%s", msg);
+  std::cerr << msg.str() << '\n';
 
   switch (action) {
     case DBG:
diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h
index c54db69b..4a5bbe12 100644
--- a/src/ccutil/genericvector.h
+++ b/src/ccutil/genericvector.h
@@ -123,7 +123,7 @@ public:
 
   // Add a callback to be called to delete the elements when the array took
   // their ownership.
-  void set_clear_callback(std::function<void(T)> cb) {
+  void set_clear_callback(const std::function<void(T)> &cb) {
     clear_cb_ = cb;
   }
 
@@ -148,8 +148,8 @@ public:
   // fread (and swapping)/fwrite.
   // Returns false on error or if the callback returns false.
   // DEPRECATED. Use [De]Serialize[Classes] instead.
-  bool write(FILE *f, std::function<bool(FILE *, const T &)> cb) const;
-  bool read(TFile *f, std::function<bool(TFile *, T *)> cb);
+  bool write(FILE *f, const std::function<bool(FILE *, const T &)> &cb) const;
+  bool read(TFile *f, const std::function<bool(TFile *, T *)> &cb);
   // Writes a vector of simple types to the given file. Assumes that bitwise
   // read/write of T will work. Returns false in case of error.
   // TODO(rays) Change all callers to use TFile and remove deprecated methods.
@@ -577,7 +577,7 @@ int GenericVector<T>::push_back(T object) {
     double_the_size();
   }
   index = size_used_++;
-  data_[index] = object;
+  data_[index] = std::move(object);
   return index;
 }
 
@@ -627,7 +627,7 @@ void GenericVector<T>::delete_data_pointers() {
 }
 
 template <typename T>
-bool GenericVector<T>::write(FILE *f, std::function<bool(FILE *, const T &)> cb) const {
+bool GenericVector<T>::write(FILE *f, const std::function<bool(FILE *, const T &)> &cb) const {
   if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
     return false;
   }
@@ -649,7 +649,7 @@ bool GenericVector<T>::write(FILE *f, std::function<bool(FILE *, const T &)> cb)
 }
 
 template <typename T>
-bool GenericVector<T>::read(TFile *f, std::function<bool(TFile *, T *)> cb) {
+bool GenericVector<T>::read(TFile *f, const std::function<bool(TFile *, T *)> &cb) {
   int32_t reserved;
   if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
     return false;
diff --git a/src/ccutil/tessdatamanager.cpp b/src/ccutil/tessdatamanager.cpp
index 7b6a7267..8ab26506 100644
--- a/src/ccutil/tessdatamanager.cpp
+++ b/src/ccutil/tessdatamanager.cpp
@@ -211,11 +211,11 @@ void TessdataManager::Clear() {
 
 // Prints a directory of contents.
 void TessdataManager::Directory() const {
-  tprintf("Version:%s\n", VersionString().c_str());
+  printf("Version:%s\n", VersionString().c_str());
   auto offset = TESSDATA_NUM_ENTRIES * sizeof(int64_t);
   for (unsigned i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
     if (!entries_[i].empty()) {
-      tprintf("%u:%s:size=%zu, offset=%zu\n", i, kTessdataFileSuffixes[i], entries_[i].size(),
+      printf("%u:%s:size=%zu, offset=%zu\n", i, kTessdataFileSuffixes[i], entries_[i].size(),
               offset);
       offset += entries_[i].size();
     }
diff --git a/src/ccutil/unicharset.cpp b/src/ccutil/unicharset.cpp
index 7f06f7cd..7ac232a7 100644
--- a/src/ccutil/unicharset.cpp
+++ b/src/ccutil/unicharset.cpp
@@ -314,10 +314,10 @@ std::string UNICHARSET::debug_utf8_str(const char *str) {
     step = UNICHAR::utf8_step(str + i);
     if (step == 0) {
       step = 1;
-      sprintf(hex, "%x", str[i]);
+      snprintf(hex, sizeof(hex), "%x", str[i]);
     } else {
       UNICHAR ch(str + i, step);
-      sprintf(hex, "%x", ch.first_uni());
+      snprintf(hex, sizeof(hex), "%x", ch.first_uni());
     }
     result += hex;
     result += " ";
@@ -1000,7 +1000,7 @@ bool UNICHARSET::major_right_to_left() const {
 // Set a whitelist and/or blacklist of characters to recognize.
 // An empty or nullptr whitelist enables everything (minus any blacklist).
 // An empty or nullptr blacklist disables nothing.
-// An empty or nullptr blacklist has no effect.
+// An empty or nullptr unblacklist has no effect.
 void UNICHARSET::set_black_and_whitelist(const char *blacklist,
                                          const char *whitelist,
                                          const char *unblacklist) {
diff --git a/src/ccutil/unicity_table.h b/src/ccutil/unicity_table.h
index 41089950..54f740a3 100644
--- a/src/ccutil/unicity_table.h
+++ b/src/ccutil/unicity_table.h
@@ -87,7 +87,7 @@ public:
 
   /// Add a callback to be called to delete the elements when the table took
   /// their ownership.
-  void set_clear_callback(std::function<void(T)> cb) {
+  void set_clear_callback(const std::function<void(T)> &cb) {
     table_.set_clear_callback(cb);
   }
 
@@ -109,10 +109,10 @@ public:
   /// The Callback given must be permanent since they will be called more than
   /// once. The given callback will be deleted at the end.
   /// Returns false on read/write error.
-  bool write(FILE *f, std::function<bool(FILE *, const T &)> cb) const {
+  bool write(FILE *f, const std::function<bool(FILE *, const T &)> &cb) const {
     return table_.write(f, cb);
   }
-  bool read(tesseract::TFile *f, std::function<bool(tesseract::TFile *, T *)> cb) {
+  bool read(tesseract::TFile *f, const std::function<bool(tesseract::TFile *, T *)> &cb) {
     return table_.read(f, cb);
   }
 
diff --git a/src/dict/dict.cpp b/src/dict/dict.cpp
index dbb7e0b6..8874a55e 100644
--- a/src/dict/dict.cpp
+++ b/src/dict/dict.cpp
@@ -114,7 +114,7 @@ Dict::Dict(CCUtil *ccutil)
                     " for each dict char above small word size.",
                     getCCUtil()->params())
     , double_MEMBER(stopper_allowable_character_badness, 3.0,
-                    "Max certaintly variation allowed in a word (in sigma)", getCCUtil()->params())
+                    "Max certainty variation allowed in a word (in sigma)", getCCUtil()->params())
     , INT_MEMBER(stopper_debug_level, 0, "Stopper debug level", getCCUtil()->params())
     , BOOL_MEMBER(stopper_no_acceptable_choices, false,
                   "Make AcceptableChoice() always return false. Useful"
@@ -171,7 +171,7 @@ Dict::~Dict() {
 
 DawgCache *Dict::GlobalDawgCache() {
   // This global cache (a singleton) will outlive every Tesseract instance
-  // (even those that someone else might declare as global statics).
+  // (even those that someone else might declare as global static variables).
   static DawgCache cache;
   return &cache;
 }
diff --git a/src/lstm/maxpool.cpp b/src/lstm/maxpool.cpp
index c097f59d..ccd5f9b9 100644
--- a/src/lstm/maxpool.cpp
+++ b/src/lstm/maxpool.cpp
@@ -19,7 +19,7 @@
 
 namespace tesseract {
 
-Maxpool::Maxpool(const char *name, int ni, int x_scale, int y_scale)
+Maxpool::Maxpool(const std::string &name, int ni, int x_scale, int y_scale)
     : Reconfig(name, ni, x_scale, y_scale) {
   type_ = NT_MAXPOOL;
   no_ = ni;
diff --git a/src/lstm/maxpool.h b/src/lstm/maxpool.h
index eee3f08f..bf0110fd 100644
--- a/src/lstm/maxpool.h
+++ b/src/lstm/maxpool.h
@@ -2,7 +2,6 @@
 // File:        maxpool.h
 // Description: Standard Max-Pooling layer.
 // Author:      Ray Smith
-// Created:     Tue Mar 18 16:28:18 PST 2014
 //
 // (C) Copyright 2014, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -29,7 +28,7 @@ namespace tesseract {
 class Maxpool : public Reconfig {
 public:
   TESS_API
-  Maxpool(const char *name, int ni, int x_scale, int y_scale);
+  Maxpool(const std::string &name, int ni, int x_scale, int y_scale);
   ~Maxpool() override = default;
 
   // Accessors.
diff --git a/src/lstm/network.cpp b/src/lstm/network.cpp
index 23a6c354..6e13e496 100644
--- a/src/lstm/network.cpp
+++ b/src/lstm/network.cpp
@@ -252,19 +252,19 @@ Network *Network::CreateFromFile(TFile *fp) {
 
   switch (type) {
     case NT_CONVOLVE:
-      network = new Convolve(name.c_str(), ni, 0, 0);
+      network = new Convolve(name, ni, 0, 0);
       break;
     case NT_INPUT:
-      network = new Input(name.c_str(), ni, no);
+      network = new Input(name, ni, no);
       break;
     case NT_LSTM:
     case NT_LSTM_SOFTMAX:
     case NT_LSTM_SOFTMAX_ENCODED:
     case NT_LSTM_SUMMARY:
-      network = new LSTM(name.c_str(), ni, no, no, false, type);
+      network = new LSTM(name, ni, no, no, false, type);
       break;
     case NT_MAXPOOL:
-      network = new Maxpool(name.c_str(), ni, 0, 0);
+      network = new Maxpool(name, ni, 0, 0);
       break;
     // All variants of Parallel.
     case NT_PARALLEL:
@@ -272,23 +272,23 @@ Network *Network::CreateFromFile(TFile *fp) {
     case NT_PAR_RL_LSTM:
     case NT_PAR_UD_LSTM:
     case NT_PAR_2D_LSTM:
-      network = new Parallel(name.c_str(), type);
+      network = new Parallel(name, type);
       break;
     case NT_RECONFIG:
-      network = new Reconfig(name.c_str(), ni, 0, 0);
+      network = new Reconfig(name, ni, 0, 0);
       break;
     // All variants of reversed.
     case NT_XREVERSED:
     case NT_YREVERSED:
     case NT_XYTRANSPOSE:
-      network = new Reversed(name.c_str(), type);
+      network = new Reversed(name, type);
       break;
     case NT_SERIES:
-      network = new Series(name.c_str());
+      network = new Series(name);
       break;
     case NT_TENSORFLOW:
 #ifdef INCLUDE_TENSORFLOW
-      network = new TFNetwork(name.c_str());
+      network = new TFNetwork(name);
 #else
       tprintf("TensorFlow not compiled in! -DINCLUDE_TENSORFLOW\n");
 #endif
@@ -302,7 +302,7 @@ Network *Network::CreateFromFile(TFile *fp) {
     case NT_LOGISTIC:
     case NT_POSCLIP:
     case NT_SYMCLIP:
-      network = new FullyConnected(name.c_str(), ni, no, type);
+      network = new FullyConnected(name, ni, no, type);
       break;
     default:
       break;
diff --git a/src/lstm/parallel.cpp b/src/lstm/parallel.cpp
index 2713314c..83ac0eb1 100644
--- a/src/lstm/parallel.cpp
+++ b/src/lstm/parallel.cpp
@@ -31,7 +31,7 @@
 namespace tesseract {
 
 // ni_ and no_ will be set by AddToStack.
-Parallel::Parallel(const char *name, NetworkType type) : Plumbing(name) {
+Parallel::Parallel(const std::string &name, NetworkType type) : Plumbing(name) {
   type_ = type;
 }
 
diff --git a/src/lstm/parallel.h b/src/lstm/parallel.h
index 0706493c..0d7ce094 100644
--- a/src/lstm/parallel.h
+++ b/src/lstm/parallel.h
@@ -27,7 +27,7 @@ class Parallel : public Plumbing {
 public:
   // ni_ and no_ will be set by AddToStack.
   TESS_API
-  Parallel(const char *name, NetworkType type);
+  Parallel(const std::string &name, NetworkType type);
 
   // Returns the shape output from the network given an input shape (which may
   // be partially unknown ie zero).
diff --git a/src/lstm/reconfig.cpp b/src/lstm/reconfig.cpp
index 4b416206..2f49d63e 100644
--- a/src/lstm/reconfig.cpp
+++ b/src/lstm/reconfig.cpp
@@ -20,7 +20,7 @@
 
 namespace tesseract {
 
-Reconfig::Reconfig(const char *name, int ni, int x_scale, int y_scale)
+Reconfig::Reconfig(const std::string &name, int ni, int x_scale, int y_scale)
     : Network(NT_RECONFIG, name, ni, ni * x_scale * y_scale)
     , x_scale_(x_scale)
     , y_scale_(y_scale) {}
diff --git a/src/lstm/reconfig.h b/src/lstm/reconfig.h
index e2c6a10a..2c35f091 100644
--- a/src/lstm/reconfig.h
+++ b/src/lstm/reconfig.h
@@ -31,7 +31,7 @@ namespace tesseract {
 class Reconfig : public Network {
 public:
   TESS_API
-  Reconfig(const char *name, int ni, int x_scale, int y_scale);
+  Reconfig(const std::string &name, int ni, int x_scale, int y_scale);
   ~Reconfig() override = default;
 
   // Returns the shape output from the network given an input shape (which may
diff --git a/src/lstm/series.cpp b/src/lstm/series.cpp
index 3a1ed098..99069861 100644
--- a/src/lstm/series.cpp
+++ b/src/lstm/series.cpp
@@ -25,7 +25,7 @@
 namespace tesseract {
 
 // ni_ and no_ will be set by AddToStack.
-Series::Series(const char *name) : Plumbing(name) {
+Series::Series(const std::string &name) : Plumbing(name) {
   type_ = NT_SERIES;
 }
 
diff --git a/src/lstm/series.h b/src/lstm/series.h
index 6d9965f4..fc63f284 100644
--- a/src/lstm/series.h
+++ b/src/lstm/series.h
@@ -27,7 +27,7 @@ class Series : public Plumbing {
 public:
   // ni_ and no_ will be set by AddToStack.
   TESS_API
-  explicit Series(const char *name);
+  explicit Series(const std::string &name);
   ~Series() override = default;
 
   // Returns the shape output from the network given an input shape (which may
diff --git a/src/lstm/tfnetwork.cpp b/src/lstm/tfnetwork.cpp
index 29d05ef4..d7b1441e 100644
--- a/src/lstm/tfnetwork.cpp
+++ b/src/lstm/tfnetwork.cpp
@@ -29,7 +29,7 @@ using tensorflow::TensorShape;
 
 namespace tesseract {
 
-TFNetwork::TFNetwork(const char *name) : Network(NT_TENSORFLOW, name, 0, 0) {}
+TFNetwork::TFNetwork(const std::string &name) : Network(NT_TENSORFLOW, name, 0, 0) {}
 
 int TFNetwork::InitFromProtoStr(const std::string &proto_str) {
   if (!model_proto_.ParseFromString(proto_str))
diff --git a/src/lstm/tfnetwork.h b/src/lstm/tfnetwork.h
index 1ee6aeb8..7fbd6042 100644
--- a/src/lstm/tfnetwork.h
+++ b/src/lstm/tfnetwork.h
@@ -34,7 +34,7 @@ namespace tesseract {
 
 class TFNetwork : public Network {
 public:
-  explicit TFNetwork(const char *name);
+  explicit TFNetwork(const std::string &name);
   virtual ~TFNetwork() = default;
 
   // Returns the required shape input to the network.
diff --git a/src/opencl/openclwrapper.cpp b/src/opencl/openclwrapper.cpp
index 79817f2d..8377d402 100644
--- a/src/opencl/openclwrapper.cpp
+++ b/src/opencl/openclwrapper.cpp
@@ -174,8 +174,8 @@ static ds_status initDSProfile(ds_profile **p, const char *version) {
   clGetPlatformIDs(0, nullptr, &numPlatforms);
 
   if (numPlatforms > 0) {
-    platforms.reserve(numPlatforms);
-    clGetPlatformIDs(numPlatforms, &platforms[0], nullptr);
+    platforms.resize(numPlatforms);
+    clGetPlatformIDs(numPlatforms, platforms.data(), nullptr);
   }
 
   numDevices = 0;
@@ -186,12 +186,11 @@ static ds_status initDSProfile(ds_profile **p, const char *version) {
   }
 
   if (numDevices > 0) {
-    devices.reserve(numDevices);
+    devices.resize(numDevices);
   }
 
   profile->numDevices = numDevices + 1; // +1 to numDevices to include the native CPU
-  profile->devices.reserve(profile->numDevices);
-  memset(&profile->devices[0], 0, profile->numDevices * sizeof(ds_device));
+  profile->devices.resize(profile->numDevices);
 
   next = 0;
   for (i = 0; i < numPlatforms; i++) {
@@ -812,7 +811,8 @@ int OpenclDevice::BinaryGenerated(const char *clFileName, FILE **fhandle) {
   cl_int clStatus;
   int status = 0;
   FILE *fd = nullptr;
-  char fileName[256] = {0}, cl_name[128] = {0};
+  char fileName[256];
+  char cl_name[128];
   char deviceName[1024];
   clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName,
                              nullptr);
@@ -820,7 +820,7 @@ int OpenclDevice::BinaryGenerated(const char *clFileName, FILE **fhandle) {
   const char *str = strstr(clFileName, ".cl");
   memcpy(cl_name, clFileName, str - clFileName);
   cl_name[str - clFileName] = '\0';
-  sprintf(fileName, "%s-%s.bin", cl_name, deviceName);
+  snprintf(fileName, sizeof(fileName), "%s-%s.bin", cl_name, deviceName);
   legalizeFileName(fileName);
   fd = fopen(fileName, "rb");
   status = (fd != nullptr) ? 1 : 0;
@@ -894,9 +894,9 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program, const char *clF
 
   /* dump out each binary into its own separate file. */
   for (i = 0; i < numDevices; i++) {
-    char fileName[256] = {0}, cl_name[128] = {0};
-
     if (binarySizes[i] != 0) {
+      char fileName[256];
+      char cl_name[128];
       char deviceName[1024];
       clStatus =
           clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr);
@@ -905,7 +905,7 @@ int OpenclDevice::GeneratBinFromKernelSource(cl_program program, const char *clF
       const char *str = strstr(clFileName, ".cl");
       memcpy(cl_name, clFileName, str - clFileName);
       cl_name[str - clFileName] = '\0';
-      sprintf(fileName, "%s-%s.bin", cl_name, deviceName);
+      snprintf(fileName, sizeof(fileName), "%s-%s.bin", cl_name, deviceName);
       legalizeFileName(fileName);
       if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) {
         tprintf("[OD] write binary[%s] failed\n", fileName);
diff --git a/src/tesseract.cpp b/src/tesseract.cpp
index e0697aa7..48081556 100644
--- a/src/tesseract.cpp
+++ b/src/tesseract.cpp
@@ -39,6 +39,7 @@
 #endif
 #include <tesseract/renderer.h>
 #include "simddetect.h"
+#include "tesseractclass.h" // for AnyTessLang
 #include "tprintf.h" // for tprintf
 
 #ifdef _OPENMP
@@ -787,6 +788,12 @@ int main(int argc, char **argv) {
                           (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
                           (api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
 
+  if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY) {
+    if (!api.tesseract()->AnyTessLang()) {
+      fprintf(stderr, "Error, OSD requires a model for the legacy engine\n");
+      return EXIT_FAILURE;
+    }
+  }
 #ifdef DISABLED_LEGACY_ENGINE
   auto cur_psm = api.GetPageSegMode();
   auto osd_warning = std::string("");
diff --git a/src/textord/colpartitiongrid.cpp b/src/textord/colpartitiongrid.cpp
index f2a842bb..40462d6c 100644
--- a/src/textord/colpartitiongrid.cpp
+++ b/src/textord/colpartitiongrid.cpp
@@ -1609,10 +1609,10 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(
     }
     // See if we have a decision yet.
     auto image_count = counts[NPT_IMAGE];
-    auto htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
-                       (image_count + counts[NPT_WEAK_VTEXT]);
-    auto vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
-                       (image_count + counts[NPT_WEAK_HTEXT]);
+    int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
+                      (image_count + counts[NPT_WEAK_VTEXT]);
+    int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
+                      (image_count + counts[NPT_WEAK_HTEXT]);
     if (image_count > 0 && image_bias - htext_score >= kSmoothDecisionMargin &&
         image_bias - vtext_score >= kSmoothDecisionMargin) {
       *best_distance = dists[NPT_IMAGE][0];
diff --git a/src/textord/edgblob.cpp b/src/textord/edgblob.cpp
index ee3e1560..781b8e9f 100644
--- a/src/textord/edgblob.cpp
+++ b/src/textord/edgblob.cpp
@@ -174,7 +174,7 @@ int32_t OL_BUCKETS::outline_complexity(C_OUTLINE *outline, // parent outline
         if (child_count + grandchild_count > max_count) { // too complex
           if (edges_debug) {
             tprintf(
-                "Disgard outline on child_count=%d + grandchild_count=%d "
+                "Discard outline on child_count=%d + grandchild_count=%d "
                 "> max_count=%d\n",
                 child_count, grandchild_count, max_count);
           }
diff --git a/src/textord/imagefind.cpp b/src/textord/imagefind.cpp
index 4fdcced9..acf868f1 100644
--- a/src/textord/imagefind.cpp
+++ b/src/textord/imagefind.cpp
@@ -250,9 +250,14 @@ static void ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **b
 // If not nullptr, it must be PixDestroyed by the caller.
 // If textord_tabfind_show_images, debug images are appended to pixa_debug.
 Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) {
+  auto width = pixGetWidth(pix);
+  auto height = pixGetHeight(pix);
   // Not worth looking at small images.
-  if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) {
-    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+  // Leptonica will print an error message and return nullptr if we call
+  // pixGenHalftoneMask(pixr, nullptr, ...) with width or height < 100
+  // for the reduced image, so we want to bypass that, too.
+  if (width / 2 < kMinImageFindSize || height / 2 < kMinImageFindSize) {
+    return pixCreate(width, height, 1);
   }
 
   // Reduce by factor 2.
@@ -262,15 +267,6 @@ Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) {
   }
 
   // Get the halftone mask directly from Leptonica.
-  //
-  // Leptonica will print an error message and return nullptr if we call
-  // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
-  // want to bypass that.
-  if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) {
-    pixr.destroy();
-    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
-  }
-  // Get the halftone mask.
   l_int32 ht_found = 0;
   Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr;
   Image pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
@@ -287,7 +283,7 @@ Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) {
     pixht2.destroy();
   }
   if (pixht2 == nullptr) {
-    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+    return pixCreate(width, height, 1);
   }
 
   // Expand back up again.
@@ -334,7 +330,7 @@ Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) {
     pixa_debug->AddPix(pixht, "FinalMask");
   }
   // Make the result image the same size as the input.
-  Image result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+  Image result = pixCreate(width, height, 1);
   result |= pixht;
   pixht.destroy();
   return result;
diff --git a/src/textord/strokewidth.cpp b/src/textord/strokewidth.cpp
index d8f52f60..e579eb21 100644
--- a/src/textord/strokewidth.cpp
+++ b/src/textord/strokewidth.cpp
@@ -1581,7 +1581,7 @@ bool StrokeWidth::DiacriticBlob(BlobGrid *small_grid, BLOBNBOX *blob) {
   // Setup a rectangle search to find its nearest base-character neighbour.
   // We keep 2 different best candidates:
   // best_x_overlap is a category of base characters that have an overlap in x
-  // (like a acute) in which we look for the least y-gap, computed using the
+  // (like an acute) in which we look for the least y-gap, computed using the
   // projection to favor base characters in the same textline.
   // best_y_overlap is a category of base characters that have no x overlap,
   // (nominally a y-overlap is preferrecd but not essential) in which we
diff --git a/src/textord/tablefind.cpp b/src/textord/tablefind.cpp
index d67f1824..0b5fa502 100644
--- a/src/textord/tablefind.cpp
+++ b/src/textord/tablefind.cpp
@@ -432,7 +432,7 @@ void TableFinder::InsertImagePartition(ColPartition *part) {
 // text lines on the page. The assumption is that a table
 // will have several lines with similar overlapping whitespace
 // whereas text will not have this type of property.
-// Note: The code Assumes that blobs are sorted by the left side x!
+// Note: The code assumes that blobs are sorted by the left side x!
 // This will not work (as well) if the blobs are sorted by center/right.
 void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition *part) {
   ASSERT_HOST(part != nullptr);
diff --git a/src/textord/tablerecog.h b/src/textord/tablerecog.h
index f7f49fbd..fb22b386 100644
--- a/src/textord/tablerecog.h
+++ b/src/textord/tablerecog.h
@@ -279,7 +279,7 @@ public:
   // nullptr is returned.
   //
   // Keep in mind, this may "overgrow" or "undergrow" the size of guess.
-  // Ideally, there is a either a one-to-one correspondence between
+  // Ideally, there is either a one-to-one correspondence between
   // the guess and table or no table at all. This is not the best of
   // assumptions right now, but was made to try to keep things simple in
   // the first pass.
diff --git a/src/textord/topitch.cpp b/src/textord/topitch.cpp
index 0d01e93f..15fe23b8 100644
--- a/src/textord/topitch.cpp
+++ b/src/textord/topitch.cpp
@@ -398,9 +398,8 @@ bool try_doc_fixed(             // determine pitch
   int16_t mid_cuts; // no of cheap cuts
   float pitch_sd;   // sync rating
 
-  if (block_it.empty()
-      //      || block_it.data()==block_it.data_relative(1)
-      || !textord_blockndoc_fixed) {
+  if (!textord_blockndoc_fixed ||
+      block_it.empty() || block_it.data()->get_rows()->empty()) {
     return false;
   }
   shift_factor = gradient / (gradient * gradient + 1);
diff --git a/src/textord/underlin.cpp b/src/textord/underlin.cpp
index 112d5fdd..431cd498 100644
--- a/src/textord/underlin.cpp
+++ b/src/textord/underlin.cpp
@@ -195,7 +195,7 @@ void find_underlined_blobs(    // get chop points
 /**********************************************************************
  * vertical_cunderline_projection
  *
- * Compute the vertical projection of a outline from its outlines
+ * Compute the vertical projection of an outline from its outlines
  * and add to the given STATS.
  **********************************************************************/
 
diff --git a/src/training/combine_tessdata.cpp b/src/training/combine_tessdata.cpp
index 6cbe3abc..6c452f76 100644
--- a/src/training/combine_tessdata.cpp
+++ b/src/training/combine_tessdata.cpp
@@ -265,6 +265,9 @@ int main(int argc, char **argv) {
         "Usage for listing directory of components:\n"
         "  %s -d traineddata_file\n\n",
         argv[0]);
+    printf(
+        "NOTE: Above two flags may combined as -dl or -ld to get both outputs"
+        );
     printf(
         "Usage for compacting LSTM component to int:\n"
         "  %s -c traineddata_file\n",
diff --git a/src/training/lstmtraining.cpp b/src/training/lstmtraining.cpp
index a1068bdb..d1cae301 100644
--- a/src/training/lstmtraining.cpp
+++ b/src/training/lstmtraining.cpp
@@ -16,6 +16,7 @@
 ///////////////////////////////////////////////////////////////////////
 
 #include <cerrno>
+#include <locale> // for std::locale::classic
 #if defined(__USE_GNU)
 #  include <cfenv> // for feenableexcept
 #endif
@@ -222,9 +223,10 @@ int main(int argc, char **argv) {
          iteration = trainer.training_iteration()) {
       trainer.TrainOnLine(&trainer, false);
     }
-    std::string log_str;
+    std::stringstream log_str;
+    log_str.imbue(std::locale::classic());
     trainer.MaintainCheckpoints(tester_callback, log_str);
-    tprintf("%s\n", log_str.c_str());
+    tprintf("%s\n", log_str.str().c_str());
   } while (trainer.best_error_rate() > FLAGS_target_error_rate &&
            (trainer.training_iteration() < max_iterations));
   tprintf("Finished! Selected model with minimal training error rate (BCER) = %g\n",
diff --git a/src/training/pango/boxchar.cpp b/src/training/pango/boxchar.cpp
index ec16d743..d31c5a8b 100644
--- a/src/training/pango/boxchar.cpp
+++ b/src/training/pango/boxchar.cpp
@@ -278,8 +278,8 @@ bool BoxChar::MostlyVertical(const std::vector<BoxChar *> &boxes) {
       int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
       int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y;
       if (abs(dx) > abs(dy) * kMinNewlineRatio || abs(dy) > abs(dx) * kMinNewlineRatio) {
-        total_dx += dx * dx;
-        total_dy += dy * dy;
+        total_dx += static_cast<int64_t>(dx) * dx;
+        total_dy += static_cast<int64_t>(dy) * dy;
       }
     }
   }
diff --git a/src/training/unicharset/lstmtester.cpp b/src/training/unicharset/lstmtester.cpp
index bd0f222a..052460cf 100644
--- a/src/training/unicharset/lstmtester.cpp
+++ b/src/training/unicharset/lstmtester.cpp
@@ -16,6 +16,7 @@
 ///////////////////////////////////////////////////////////////////////
 
 #include "lstmtester.h"
+#include <iomanip>  // for std::setprecision
 #include <thread>   // for std::thread
 #include "fileio.h" // for LoadFileLinesToStrings
 
@@ -115,14 +116,15 @@ std::string LSTMTester::RunEvalSync(int iteration, const double *training_errors
   }
   char_error *= 100.0 / total_pages_;
   word_error *= 100.0 / total_pages_;
-  std::string result;
+  std::stringstream result;
+  result.imbue(std::locale::classic());
+  result << std::fixed << std::setprecision(3);
   if (iteration != 0 || training_stage != 0) {
-    result += "At iteration " + std::to_string(iteration);
-    result += ", stage " + std::to_string(training_stage) + ", ";
+    result << "At iteration " << iteration
+           << ", stage " << training_stage << ", ";
   }
-  result += "BCER eval=" + std::to_string(char_error);
-  result += ", BWER eval=" + std::to_string(word_error);
-  return result;
+  result << "BCER eval=" << char_error << ", BWER eval=" << word_error;
+  return result.str();
 }
 
 // Helper thread function for RunEvalAsync.
diff --git a/src/training/unicharset/lstmtrainer.cpp b/src/training/unicharset/lstmtrainer.cpp
index 0ebad4de..6e7b780b 100644
--- a/src/training/unicharset/lstmtrainer.cpp
+++ b/src/training/unicharset/lstmtrainer.cpp
@@ -23,6 +23,8 @@
 #endif
 
 #include <cmath>
+#include <iomanip>             // for std::setprecision
+#include <locale>              // for std::locale::classic
 #include <string>
 #include "lstmtrainer.h"
 
@@ -305,7 +307,7 @@ bool LSTMTrainer::LoadAllTrainingData(const std::vector<std::string> &filenames,
 // Writes checkpoints at appropriate times and builds and returns a log message
 // to indicate progress. Returns false if nothing interesting happened.
 bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,
-                                      std::string &log_msg) {
+                                      std::stringstream &log_msg) {
   PrepareLogMsg(log_msg);
   double error_rate = CharError();
   int iteration = learning_iteration();
@@ -330,35 +332,34 @@ bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,
   std::vector<char> rec_model_data;
   if (error_rate < best_error_rate_) {
     SaveRecognitionDump(&rec_model_data);
-    log_msg += " New best BCER = " + std::to_string(error_rate);
-    log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
+    log_msg << " New best BCER = " << error_rate;
+    log_msg << UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
     // If sub_trainer_ is not nullptr, either *this beat it to a new best, or it
     // just overwrote *this. In either case, we have finished with it.
     sub_trainer_.reset();
     stall_iteration_ = learning_iteration() + kMinStallIterations;
     if (TransitionTrainingStage(kStageTransitionThreshold)) {
-      log_msg +=
-          " Transitioned to stage " + std::to_string(CurrentTrainingStage());
+      log_msg << " Transitioned to stage " << CurrentTrainingStage();
     }
     SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);
     if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
       std::string best_model_name = DumpFilename();
       if (!SaveDataToFile(best_trainer_, best_model_name.c_str())) {
-        log_msg += " failed to write best model:";
+        log_msg << " failed to write best model:";
       } else {
-        log_msg += " wrote best model:";
+        log_msg << " wrote best model:";
         error_rate_of_last_saved_best_ = best_error_rate_;
       }
-      log_msg += best_model_name;
+      log_msg << best_model_name;
     }
   } else if (error_rate > worst_error_rate_) {
     SaveRecognitionDump(&rec_model_data);
-    log_msg += " New worst BCER = " + std::to_string(error_rate);
-    log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
+    log_msg << " New worst BCER = " << error_rate;
+    log_msg << UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
     if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate &&
         best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
       // Error rate has ballooned. Go back to the best model.
-      log_msg += "\nDivergence! ";
+      log_msg << "\nDivergence! ";
       // Copy best_trainer_ before reading it, as it will get overwritten.
       std::vector<char> revert_data(best_trainer_);
       if (ReadTrainingDump(revert_data, *this)) {
@@ -382,34 +383,33 @@ bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,
     std::vector<char> checkpoint;
     if (!SaveTrainingDump(FULL, *this, &checkpoint) ||
         !SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
-      log_msg += " failed to write checkpoint.";
+      log_msg << " failed to write checkpoint.";
     } else {
-      log_msg += " wrote checkpoint.";
+      log_msg << " wrote checkpoint.";
     }
   }
-  log_msg += "\n";
   return result;
 }
 
 // Builds a string containing a progress message with current error rates.
-void LSTMTrainer::PrepareLogMsg(std::string &log_msg) const {
+void LSTMTrainer::PrepareLogMsg(std::stringstream &log_msg) const {
   LogIterations("At", log_msg);
-  log_msg += ", Mean rms=" + std::to_string(error_rates_[ET_RMS]);
-  log_msg += "%, delta=" + std::to_string(error_rates_[ET_DELTA]);
-  log_msg += "%, BCER train=" + std::to_string(error_rates_[ET_CHAR_ERROR]);
-  log_msg += "%, BWER train=" + std::to_string(error_rates_[ET_WORD_RECERR]);
-  log_msg += "%, skip ratio=" + std::to_string(error_rates_[ET_SKIP_RATIO]);
-  log_msg += "%, ";
+  log_msg << std::fixed << std::setprecision(3)
+          << ", mean rms=" << error_rates_[ET_RMS]
+          << "%, delta=" << error_rates_[ET_DELTA]
+          << "%, BCER train=" << error_rates_[ET_CHAR_ERROR]
+          << "%, BWER train=" << error_rates_[ET_WORD_RECERR]
+          << "%, skip ratio=" << error_rates_[ET_SKIP_RATIO] << "%,";
 }
 
 // Appends <intro_str> iteration learning_iteration()/training_iteration()/
 // sample_iteration() to the log_msg.
 void LSTMTrainer::LogIterations(const char *intro_str,
-                                std::string &log_msg) const {
-  log_msg += intro_str;
-  log_msg += " iteration " + std::to_string(learning_iteration());
-  log_msg += "/" + std::to_string(training_iteration());
-  log_msg += "/" + std::to_string(sample_iteration());
+                                std::stringstream &log_msg) const {
+  log_msg << intro_str
+          << " iteration " << learning_iteration()
+          << "/" << training_iteration()
+          << "/" << sample_iteration();
 }
 
 // Returns true and increments the training_stage_ if the error rate has just
@@ -602,14 +602,14 @@ bool LSTMTrainer::DeSerialize(const TessdataManager *mgr, TFile *fp) {
 // De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
 // learning rates (by scaling reduction, or layer specific, according to
 // NF_LAYER_SPECIFIC_LR).
-void LSTMTrainer::StartSubtrainer(std::string &log_msg) {
+void LSTMTrainer::StartSubtrainer(std::stringstream &log_msg) {
   sub_trainer_ = std::make_unique<LSTMTrainer>();
   if (!ReadTrainingDump(best_trainer_, *sub_trainer_)) {
-    log_msg += " Failed to revert to previous best for trial!";
+    log_msg << " Failed to revert to previous best for trial!";
     sub_trainer_.reset();
   } else {
-    log_msg += " Trial sub_trainer_ from iteration " +
-               std::to_string(sub_trainer_->training_iteration());
+    log_msg << " Trial sub_trainer_ from iteration "
+            << sub_trainer_->training_iteration();
     // Reduce learning rate so it doesn't diverge this time.
     sub_trainer_->ReduceLearningRates(this, log_msg);
     // If it fails again, we will wait twice as long before reverting again.
@@ -630,14 +630,13 @@ void LSTMTrainer::StartSubtrainer(std::string &log_msg) {
 // trainer in *this is replaced with sub_trainer_, and STR_REPLACED is
 // returned. STR_NONE is returned if the subtrainer wasn't good enough to
 // receive any training iterations.
-SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
+SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::stringstream &log_msg) {
   double training_error = CharError();
   double sub_error = sub_trainer_->CharError();
   double sub_margin = (training_error - sub_error) / sub_error;
   if (sub_margin >= kSubTrainerMarginFraction) {
-    log_msg += " sub_trainer=" + std::to_string(sub_error);
-    log_msg += " margin=" + std::to_string(100.0 * sub_margin);
-    log_msg += "\n";
+    log_msg << " sub_trainer=" << sub_error
+            << " margin=" << 100.0 * sub_margin << "\n";
     // Catch up to current iteration.
     int end_iteration = training_iteration();
     while (sub_trainer_->training_iteration() < end_iteration &&
@@ -647,11 +646,12 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
       while (sub_trainer_->training_iteration() < target_iteration) {
         sub_trainer_->TrainOnLine(this, false);
       }
-      std::string batch_log = "Sub:";
+      std::stringstream batch_log("Sub:");
+      batch_log.imbue(std::locale::classic());
       sub_trainer_->PrepareLogMsg(batch_log);
-      batch_log += "\n";
-      tprintf("UpdateSubtrainer:%s", batch_log.c_str());
-      log_msg += batch_log;
+      batch_log << "\n";
+      tprintf("UpdateSubtrainer:%s", batch_log.str().c_str());
+      log_msg << batch_log.str();
       sub_error = sub_trainer_->CharError();
       sub_margin = (training_error - sub_error) / sub_error;
     }
@@ -661,9 +661,8 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
       std::vector<char> updated_trainer;
       SaveTrainingDump(LIGHT, *sub_trainer_, &updated_trainer);
       ReadTrainingDump(updated_trainer, *this);
-      log_msg += " Sub trainer wins at iteration " +
-                 std::to_string(training_iteration());
-      log_msg += "\n";
+      log_msg << " Sub trainer wins at iteration "
+              << training_iteration() << "\n";
       return STR_REPLACED;
     }
     return STR_UPDATED;
@@ -674,17 +673,16 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
 // Reduces network learning rates, either for everything, or for layers
 // independently, according to NF_LAYER_SPECIFIC_LR.
 void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer,
-                                      std::string &log_msg) {
+                                      std::stringstream &log_msg) {
   if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
     int num_reduced = ReduceLayerLearningRates(
         kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
-    log_msg +=
-        "\nReduced learning rate on layers: " + std::to_string(num_reduced);
+    log_msg << "\nReduced learning rate on layers: " << num_reduced;
   } else {
     ScaleLearningRate(kLearningRateDecay);
-    log_msg += "\nReduced learning rate to :" + std::to_string(learning_rate_);
+    log_msg << "\nReduced learning rate to :" << learning_rate_;
   }
-  log_msg += "\n";
+  log_msg << "\n";
 }
 
 // Considers reducing the learning rate independently for each layer down by
@@ -1053,13 +1051,14 @@ void LSTMTrainer::SaveRecognitionDump(std::vector<char> *data) const {
 // Returns a suitable filename for a training dump, based on the model_base_,
 // best_error_rate_, best_iteration_ and training_iteration_.
 std::string LSTMTrainer::DumpFilename() const {
-  std::string filename;
-  filename += model_base_.c_str();
-  filename += "_" + std::to_string(best_error_rate_);
-  filename += "_" + std::to_string(best_iteration_);
-  filename += "_" + std::to_string(training_iteration_);
-  filename += ".checkpoint";
-  return filename;
+  std::stringstream filename;
+  filename.imbue(std::locale::classic());
+  filename << model_base_ << std::fixed << std::setprecision(3)
+           << "_" << best_error_rate_
+           << "_" << best_iteration_
+           << "_" << training_iteration_
+           << ".checkpoint";
+  return filename.str();
 }
 
 // Fills the whole error buffer of the given type with the given value.
diff --git a/src/training/unicharset/lstmtrainer.h b/src/training/unicharset/lstmtrainer.h
index 026e4b11..6481a59c 100644
--- a/src/training/unicharset/lstmtrainer.h
+++ b/src/training/unicharset/lstmtrainer.h
@@ -25,6 +25,7 @@
 #include "rect.h"
 
 #include <functional> // for std::function
+#include <sstream>    // for std::stringstream
 
 namespace tesseract {
 
@@ -192,7 +193,7 @@ public:
 
   // Keeps track of best and locally worst error rate, using internally computed
   // values. See MaintainCheckpointsSpecific for more detail.
-  bool MaintainCheckpoints(const TestCallback &tester, std::string &log_msg);
+  bool MaintainCheckpoints(const TestCallback &tester, std::stringstream &log_msg);
   // Keeps track of best and locally worst error_rate (whatever it is) and
   // launches tests using rec_model, when a new min or max is reached.
   // Writes checkpoints using train_model at appropriate times and builds and
@@ -201,12 +202,12 @@ public:
   bool MaintainCheckpointsSpecific(int iteration,
                                    const std::vector<char> *train_model,
                                    const std::vector<char> *rec_model,
-                                   TestCallback tester, std::string &log_msg);
-  // Builds a string containing a progress message with current error rates.
-  void PrepareLogMsg(std::string &log_msg) const;
+                                   TestCallback tester, std::stringstream &log_msg);
+  // Builds a progress message with current error rates.
+  void PrepareLogMsg(std::stringstream &log_msg) const;
   // Appends <intro_str> iteration learning_iteration()/training_iteration()/
   // sample_iteration() to the log_msg.
-  void LogIterations(const char *intro_str, std::string &log_msg) const;
+  void LogIterations(const char *intro_str, std::stringstream &log_msg) const;
 
   // TODO(rays) Add curriculum learning.
   // Returns true and increments the training_stage_ if the error rate has just
@@ -226,7 +227,7 @@ public:
   // De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
   // learning rates (by scaling reduction, or layer specific, according to
   // NF_LAYER_SPECIFIC_LR).
-  void StartSubtrainer(std::string &log_msg);
+  void StartSubtrainer(std::stringstream &log_msg);
   // While the sub_trainer_ is behind the current training iteration and its
   // training error is at least kSubTrainerMarginFraction better than the
   // current training error, trains the sub_trainer_, and returns STR_UPDATED if
@@ -235,10 +236,10 @@ public:
   // trainer in *this is replaced with sub_trainer_, and STR_REPLACED is
   // returned. STR_NONE is returned if the subtrainer wasn't good enough to
   // receive any training iterations.
-  SubTrainerResult UpdateSubtrainer(std::string &log_msg);
+  SubTrainerResult UpdateSubtrainer(std::stringstream &log_msg);
   // Reduces network learning rates, either for everything, or for layers
   // independently, according to NF_LAYER_SPECIFIC_LR.
-  void ReduceLearningRates(LSTMTrainer *samples_trainer, std::string &log_msg);
+  void ReduceLearningRates(LSTMTrainer *samples_trainer, std::stringstream &log_msg);
   // Considers reducing the learning rate independently for each layer down by
   // factor(<1), or leaving it the same, by double-training the given number of
   // samples and minimizing the amount of changing of sign of weight updates.
diff --git a/src/training/unicharset/validate_myanmar.cpp b/src/training/unicharset/validate_myanmar.cpp
index 8e97bcc1..abe082d5 100644
--- a/src/training/unicharset/validate_myanmar.cpp
+++ b/src/training/unicharset/validate_myanmar.cpp
@@ -140,13 +140,21 @@ bool ValidateMyanmar::ConsumeOptionalSignsIfPresent() {
   }
   // Tone mark extensions.
   ch = codes_[codes_used_].second;
-  if (ch == 0x1038 || ch == kMyanmarAsat || ch == 0x1063 || ch == 0x1064 ||
+  if (ch == 0x102c || ch == 0x1038 || ch == kMyanmarAsat || (0x1062 <= ch && ch <= 0x1064) ||
       (0x1069 <= ch && ch <= 0x106d) || (0x1087 <= ch && ch <= 0x108d) || ch == 0x108f ||
       ch == 0x109a || ch == 0x109b || (0xaa7b <= ch && ch <= 0xaa7d)) {
     if (UseMultiCode(1)) {
       return true;
     }
   }
+  // Sgaw tones 0x1062, 0x1063 must be followed by asat.
+  // W Pwo tones 0x1069, 0x106a, and 0x106b may be followed by dot below or visarga (nasal).
+  ch = codes_[codes_used_].second;
+  if (ch == 0x103a || ch == 0x1037 || ch == 0x1038) {
+    if (UseMultiCode(1)) {
+      return true;
+    }
+  }
   return false;
 }
 
diff --git a/src/viewer/svutil.cpp b/src/viewer/svutil.cpp
index 79f5beb9..7fe68259 100644
--- a/src/viewer/svutil.cpp
+++ b/src/viewer/svutil.cpp
@@ -244,15 +244,10 @@ static const char *ScrollViewProg() {
 
 // The arguments to the program to invoke to start ScrollView
 static std::string ScrollViewCommand(const std::string &scrollview_path) {
-  // The following ugly ifdef is to enable the output of the java runtime
-  // to be sent down a black hole on non-windows to ignore all the
-  // exceptions in piccolo. Ideally piccolo would be debugged to make
-  // this unnecessary.
-  // Also the path has to be separated by ; on windows and : otherwise.
+  // Quote our paths on Windows to deal with spaces
 #  ifdef _WIN32
   const char cmd_template[] =
       "-Djava.library.path=\"%s\" -jar \"%s/ScrollView.jar\"";
-
 #  else
   const char cmd_template[] =
       "-c \"trap 'kill %%1' 0 1 2 ; java "
@@ -279,6 +274,7 @@ SVNetwork::SVNetwork(const char *hostname, int port) {
   buffer_ptr_ = nullptr;
 
   struct addrinfo *addr_info = nullptr;
+  struct addrinfo hints = {0, PF_INET, SOCK_STREAM};
   auto port_string = std::to_string(port);
 #  ifdef _WIN32
   // Initialize Winsock
@@ -289,7 +285,7 @@ SVNetwork::SVNetwork(const char *hostname, int port) {
   }
 #  endif // _WIN32
 
-  if (getaddrinfo(hostname, port_string.c_str(), nullptr, &addr_info) != 0) {
+  if (getaddrinfo(hostname, port_string.c_str(), &hints, &addr_info) != 0) {
     std::cerr << "Error resolving name for ScrollView host "
               << std::string(hostname) << ":" << port << std::endl;
 #  ifdef _WIN32
diff --git a/src/wordrec/findseam.cpp b/src/wordrec/findseam.cpp
index 74a0578c..fdc347a1 100644
--- a/src/wordrec/findseam.cpp
+++ b/src/wordrec/findseam.cpp
@@ -103,7 +103,6 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *s
 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority,
                                SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile) {
   SEAM *seam;
-  char str[80];
   float my_priority;
   /* Add seam of split */
   my_priority = priority;
@@ -133,7 +132,8 @@ void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORI
         seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth,
                            chop_center_knob, chop_width_change_knob);
     if (chop_debug) {
-      sprintf(str, "Full my_priority %0.0f,  ", my_priority);
+      char str[80];
+      snprintf(str, sizeof(str), "Full my_priority %0.0f,  ", my_priority);
       seam->Print(str);
     }
 
diff --git a/src/wordrec/language_model.h b/src/wordrec/language_model.h
index 846f6ab0..010cf5b4 100644
--- a/src/wordrec/language_model.h
+++ b/src/wordrec/language_model.h
@@ -77,7 +77,7 @@ public:
                    float rating_cert_scale);
 
   // Updates language model state of the given BLOB_CHOICE_LIST (from
-  // the ratings matrix) a its parent. Updates pain_points if new
+  // the ratings matrix) and its parent. Updates pain_points if new
   // problematic points are found in the segmentation graph.
   //
   // At most language_model_viterbi_list_size are kept in each
diff --git a/test b/test
index 3ea10996..27618999 160000
--- a/test
+++ b/test
@@ -1 +1 @@
-Subproject commit 3ea1099664211958cb5c66c2bc69fb6652254a37
+Subproject commit 2761899921c08014cf9dbf3b63592237fb9e6ecb
diff --git a/unittest/lstm_test.h b/unittest/lstm_test.h
index 0b0ff6f5..d1de2eb7 100644
--- a/unittest/lstm_test.h
+++ b/unittest/lstm_test.h
@@ -103,7 +103,7 @@ protected:
     int iteration_limit = iteration + max_iterations;
     double best_error = 100.0;
     do {
-      std::string log_str;
+      std::stringstream log_str;
       int target_iteration = iteration + kBatchIterations;
       // Train a few.
       double mean_error = 0.0;