Move sources into src dir. Update build scripts.

2025-06-06 01:04:57 +08:00 · 2018-04-25 11:02:54 +03:00 · 2018-04-25 11:02:54 +03:00 · e95ff1159e
commit e95ff1159e
parent e8fceb58ab
518 changed files with 33887 additions and 33884 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -128,14 +128,14 @@ configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
 set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")

 configure_file(
-    ${CMAKE_SOURCE_DIR}/api/tess_version.h.in
-    ${CMAKE_BINARY_DIR}/api/tess_version.h @ONLY)
+    ${CMAKE_SOURCE_DIR}/src/api/tess_version.h.in
+    ${CMAKE_BINARY_DIR}/src/api/tess_version.h @ONLY)
 configure_file(
-    ${CMAKE_SOURCE_DIR}/vs2010/tesseract/tesseract.rc.in
-    ${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY)
+    ${CMAKE_SOURCE_DIR}/src/vs2010/tesseract/tesseract.rc.in
+    ${CMAKE_BINARY_DIR}/src/vs2010/tesseract/tesseract.rc @ONLY)
 configure_file(
-    ${CMAKE_SOURCE_DIR}/vs2010/tesseract/libtesseract.rc.in
-    ${CMAKE_BINARY_DIR}/vs2010/tesseract/libtesseract.rc @ONLY)
+    ${CMAKE_SOURCE_DIR}/src/vs2010/tesseract/libtesseract.rc.in
+    ${CMAKE_BINARY_DIR}/src/vs2010/tesseract/libtesseract.rc @ONLY)
 configure_file(
    ${CMAKE_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in
    ${CMAKE_BINARY_DIR}/TesseractConfig-version.cmake @ONLY)
@ -160,101 +160,101 @@ include_directories(${Leptonica_INCLUDE_DIRS})

 include_directories(${CMAKE_BINARY_DIR})

-include_directories(api)
+include_directories(src/api)
 include_directories(${CMAKE_BINARY_DIR}/api)
-include_directories(arch)
-include_directories(ccmain)
-include_directories(ccstruct)
-include_directories(ccutil)
-include_directories(classify)
-include_directories(cutil)
-include_directories(dict)
-include_directories(lstm)
-include_directories(opencl)
-include_directories(textord)
-include_directories(vs2010/port)
-include_directories(viewer)
-include_directories(wordrec)
+include_directories(src/arch)
+include_directories(src/ccmain)
+include_directories(src/ccstruct)
+include_directories(src/ccutil)
+include_directories(src/classify)
+include_directories(src/cutil)
+include_directories(src/dict)
+include_directories(src/lstm)
+include_directories(src/opencl)
+include_directories(src/textord)
+include_directories(src/vs2010/port)
+include_directories(src/viewer)
+include_directories(src/wordrec)

 ########################################
 # LIBRARY tesseract
 ########################################

 file(GLOB tesseract_src
-    arch/*.cpp
-    ccmain/*.cpp
-    ccstruct/*.cpp
-    ccutil/*.cpp
-    classify/*.cpp
-    cutil/*.cpp
-    dict/*.cpp
-    lstm/*.cpp
-    opencl/*.cpp
-    textord/*.cpp
-    viewer/*.cpp
-    wordrec/*.cpp
+    src/arch/*.cpp
+    src/ccmain/*.cpp
+    src/ccstruct/*.cpp
+    src/ccutil/*.cpp
+    src/classify/*.cpp
+    src/cutil/*.cpp
+    src/dict/*.cpp
+    src/lstm/*.cpp
+    src/opencl/*.cpp
+    src/textord/*.cpp
+    src/viewer/*.cpp
+    src/wordrec/*.cpp
 )
 file(GLOB tesseract_hdr
-    api/*.h
-    arch/*.h
-    ccmain/*.h
-    ccstruct/*.h
-    ccutil/*.h
-    classify/*.h
-    cutil/*.h
-    dict/*.h
-    lstm/*.h
-    opencl/*.h
-    textord/*.h
-    viewer/*.h
-    wordrec/*.h
+    src/api/*.h
+    src/arch/*.h
+    src/ccmain/*.h
+    src/ccstruct/*.h
+    src/ccutil/*.h
+    src/classify/*.h
+    src/cutil/*.h
+    src/dict/*.h
+    src/lstm/*.h
+    src/opencl/*.h
+    src/textord/*.h
+    src/viewer/*.h
+    src/wordrec/*.h
 )
 if (WIN32)
-    file(GLOB tesseract_win32_src "vs2010/port/*.cpp")
-    file(GLOB tesseract_win32_hdr "vs2010/port/*.h")
+    file(GLOB tesseract_win32_src "src/vs2010/port/*.cpp")
+    file(GLOB tesseract_win32_hdr "src/vs2010/port/*.h")
    set(tesseract_src ${tesseract_src} ${tesseract_win32_src})
    set(tesseract_hdr ${tesseract_hdr} ${tesseract_win32_hdr})
 endif()

 set(tesseract_src ${tesseract_src}
-    api/baseapi.cpp
-    api/capi.cpp
-    api/renderer.cpp
-    api/pdfrenderer.cpp
+    src/api/baseapi.cpp
+    src/api/capi.cpp
+    src/api/renderer.cpp
+    src/api/pdfrenderer.cpp
 )

 if (WIN32)
    if (MSVC)
-        include_directories(vs2010/tesseract)
+        include_directories(src/vs2010/tesseract)
        set(tesseract_hdr
            ${tesseract_hdr}
-            ${CMAKE_CURRENT_SOURCE_DIR}/vs2010/tesseract/resource.h)
-        set(tesseract_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/libtesseract.rc)
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/resource.h)
+        set(tesseract_rsc ${CMAKE_BINARY_DIR}/src/vs2010/tesseract/libtesseract.rc)
        set_source_files_properties(
-            ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
            PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
        set_source_files_properties(
-            ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixsse.cpp
            PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
        set_source_files_properties(
-            ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductavx.cpp
            PROPERTIES COMPILE_FLAGS "/arch:AVX")
        set_source_files_properties(
-            ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixavx2.cpp
            PROPERTIES COMPILE_FLAGS "/arch:AVX2")
    endif()
 else()
    set_source_files_properties(
-            ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
            PROPERTIES COMPILE_FLAGS "-msse4.1")
    set_source_files_properties(
-            ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixsse.cpp
            PROPERTIES COMPILE_FLAGS "-msse4.1")
    set_source_files_properties(
-            ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductavx.cpp
            PROPERTIES COMPILE_FLAGS "-mavx")
    set_source_files_properties(
-            ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixavx2.cpp
            PROPERTIES COMPILE_FLAGS "-mavx2")
 endif()

@ -291,7 +291,7 @@ endif()
 # EXECUTABLE tesseractmain
 ########################################

-set(tesseractmain_src api/tesseractmain.cpp)
+set(tesseractmain_src src/api/tesseractmain.cpp)
 if (MSVC)
    set(tesseractmain_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc)
 endif()
@ -326,74 +326,74 @@ install(FILES

 install(FILES
    # from api/makefile.am
-    api/apitypes.h
-    api/baseapi.h
-    api/capi.h
-    api/renderer.h
+    src/api/apitypes.h
+    src/api/baseapi.h
+    src/api/capi.h
+    src/api/renderer.h
    ${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h

    #from arch/makefile.am
-    arch/dotproductavx.h
-    arch/dotproductsse.h
-    arch/intsimdmatrix.h
-    arch/intsimdmatrixavx2.h
-    arch/intsimdmatrixsse.h
-    arch/simddetect.h
+    src/arch/dotproductavx.h
+    src/arch/dotproductsse.h
+    src/arch/intsimdmatrix.h
+    src/arch/intsimdmatrixavx2.h
+    src/arch/intsimdmatrixsse.h
+    src/arch/simddetect.h

    #from ccmain/makefile.am
-    ccmain/thresholder.h
-    ccmain/ltrresultiterator.h
-    ccmain/pageiterator.h
-    ccmain/resultiterator.h
-    ccmain/osdetect.h
+    src/ccmain/thresholder.h
+    src/ccmain/ltrresultiterator.h
+    src/ccmain/pageiterator.h
+    src/ccmain/resultiterator.h
+    src/ccmain/osdetect.h

    #from ccstruct/makefile.am
-    ccstruct/publictypes.h
+    src/ccstruct/publictypes.h

    #from ccutil/makefile.am
-    ccutil/basedir.h
-    ccutil/errcode.h
-    ccutil/fileerr.h
-    ccutil/genericvector.h
-    ccutil/helpers.h
-    ccutil/host.h
-    ccutil/memry.h
-    ccutil/ndminx.h
-    ccutil/params.h
-    ccutil/ocrclass.h
-    ccutil/platform.h
-    ccutil/serialis.h
-    ccutil/strngs.h
-    ccutil/tesscallback.h
-    ccutil/unichar.h
-    ccutil/unicharcompress.h
-    ccutil/unicharmap.h
-    ccutil/unicharset.h
+    src/ccutil/basedir.h
+    src/ccutil/errcode.h
+    src/ccutil/fileerr.h
+    src/ccutil/genericvector.h
+    src/ccutil/helpers.h
+    src/ccutil/host.h
+    src/ccutil/memry.h
+    src/ccutil/ndminx.h
+    src/ccutil/params.h
+    src/ccutil/ocrclass.h
+    src/ccutil/platform.h
+    src/ccutil/serialis.h
+    src/ccutil/strngs.h
+    src/ccutil/tesscallback.h
+    src/ccutil/unichar.h
+    src/ccutil/unicharcompress.h
+    src/ccutil/unicharmap.h
+    src/ccutil/unicharset.h

    #from lstm/makefile.am
-    lstm/convolve.h
-    lstm/ctc.h
-    lstm/fullyconnected.h
-    lstm/functions.h
-    lstm/input.h
-    lstm/lstm.h
-    lstm/lstmrecognizer.h
-    lstm/lstmtrainer.h
-    lstm/maxpool.h
-    lstm/networkbuilder.h
-    lstm/network.h
-    lstm/networkio.h
-    lstm/networkscratch.h
-    lstm/parallel.h
-    lstm/plumbing.h
-    lstm/recodebeam.h
-    lstm/reconfig.h
-    lstm/reversed.h
-    lstm/series.h
-    lstm/static_shape.h
-    lstm/stridemap.h
-    lstm/tfnetwork.h
-    lstm/weightmatrix.h
+    src/lstm/convolve.h
+    src/lstm/ctc.h
+    src/lstm/fullyconnected.h
+    src/lstm/functions.h
+    src/lstm/input.h
+    src/lstm/lstm.h
+    src/lstm/lstmrecognizer.h
+    src/lstm/lstmtrainer.h
+    src/lstm/maxpool.h
+    src/lstm/networkbuilder.h
+    src/lstm/network.h
+    src/lstm/networkio.h
+    src/lstm/networkscratch.h
+    src/lstm/parallel.h
+    src/lstm/plumbing.h
+    src/lstm/recodebeam.h
+    src/lstm/reconfig.h
+    src/lstm/reversed.h
+    src/lstm/series.h
+    src/lstm/static_shape.h
+    src/lstm/stridemap.h
+    src/lstm/tfnetwork.h
+    src/lstm/weightmatrix.h

    #${CMAKE_BINARY_DIR}/src/endianness.h
    DESTINATION include/tesseract)
--- a/configure.ac
+++ b/configure.ac
@ -16,7 +16,7 @@ AC_LANG_COMPILER_REQUIRE
 CXXFLAGS=${CXXFLAGS:-""}
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([config])
-AC_CONFIG_SRCDIR([api/tesseractmain.cpp])
+AC_CONFIG_SRCDIR([src/api/tesseractmain.cpp])
 AC_PREFIX_DEFAULT([/usr/local])

 # Automake configuration. Do not require README file (we use README.md).
@ -476,20 +476,20 @@ fi

 # Output files
 AC_CONFIG_FILES([Makefile tesseract.pc])
-AC_CONFIG_FILES([api/Makefile])
-AC_CONFIG_FILES([api/tess_version.h])
-AC_CONFIG_FILES([arch/Makefile])
-AC_CONFIG_FILES([ccmain/Makefile])
-AC_CONFIG_FILES([opencl/Makefile])
-AC_CONFIG_FILES([ccstruct/Makefile])
-AC_CONFIG_FILES([ccutil/Makefile])
-AC_CONFIG_FILES([classify/Makefile])
-AC_CONFIG_FILES([cutil/Makefile])
-AC_CONFIG_FILES([dict/Makefile])
-AC_CONFIG_FILES([lstm/Makefile])
-AC_CONFIG_FILES([textord/Makefile])
-AC_CONFIG_FILES([viewer/Makefile])
-AC_CONFIG_FILES([wordrec/Makefile])
+AC_CONFIG_FILES([src/api/Makefile])
+AC_CONFIG_FILES([src/api/tess_version.h])
+AC_CONFIG_FILES([src/arch/Makefile])
+AC_CONFIG_FILES([src/ccmain/Makefile])
+AC_CONFIG_FILES([src/opencl/Makefile])
+AC_CONFIG_FILES([src/ccstruct/Makefile])
+AC_CONFIG_FILES([src/ccutil/Makefile])
+AC_CONFIG_FILES([src/classify/Makefile])
+AC_CONFIG_FILES([src/cutil/Makefile])
+AC_CONFIG_FILES([src/dict/Makefile])
+AC_CONFIG_FILES([src/lstm/Makefile])
+AC_CONFIG_FILES([src/textord/Makefile])
+AC_CONFIG_FILES([src/viewer/Makefile])
+AC_CONFIG_FILES([src/wordrec/Makefile])
 AC_CONFIG_FILES([tessdata/Makefile])
 AC_CONFIG_FILES([tessdata/configs/Makefile])
 AC_CONFIG_FILES([tessdata/tessconfigs/Makefile])
--- a/cppan.yml
+++ b/cppan.yml
@ -35,59 +35,59 @@ projects:
        type: lib
        export_all_symbols: true
        files:
-          - api/.*\.cpp
-          - arch/.*\.cpp
-          - ccmain/.*\.cpp
-          - ccstruct/.*\.cpp
-          - ccutil/.*\.cpp
-          - classify/.*\.cpp
-          - cutil/.*\.cpp
-          - dict/.*\.cpp
-          - lstm/.*\.cpp
-          - opencl/.*\.cpp
-          - textord/.*\.cpp
-          - viewer/.*\.cpp
-          - wordrec/.*\.cpp
+          - src/api/.*\.cpp
+          - src/arch/.*\.cpp
+          - src/ccmain/.*\.cpp
+          - src/ccstruct/.*\.cpp
+          - src/ccutil/.*\.cpp
+          - src/classify/.*\.cpp
+          - src/cutil/.*\.cpp
+          - src/dict/.*\.cpp
+          - src/lstm/.*\.cpp
+          - src/opencl/.*\.cpp
+          - src/textord/.*\.cpp
+          - src/viewer/.*\.cpp
+          - src/wordrec/.*\.cpp

-          - api/.*\.h
-          - arch/.*\.h
-          - ccmain/.*\.h
-          - ccstruct/.*\.h
-          - ccutil/.*\.h
-          - classify/.*\.h
-          - cutil/.*\.h
-          - dict/.*\.h
-          - lstm/.*\.h
-          - opencl/.*\.h
-          - textord/.*\.h
-          - viewer/.*\.h
-          - wordrec/.*\.h
+          - src/api/.*\.h
+          - src/arch/.*\.h
+          - src/ccmain/.*\.h
+          - src/ccstruct/.*\.h
+          - src/ccutil/.*\.h
+          - src/classify/.*\.h
+          - src/cutil/.*\.h
+          - src/dict/.*\.h
+          - src/lstm/.*\.h
+          - src/opencl/.*\.h
+          - src/textord/.*\.h
+          - src/viewer/.*\.h
+          - src/wordrec/.*\.h

-          - vs2010/port/.*
+          - src/vs2010/port/.*

        exclude_from_build:
-          - api/tesseractmain.cpp
-          - viewer/svpaint.cpp
+          - src/api/tesseractmain.cpp
+          - src/viewer/svpaint.cpp

        include_directories:
          public:
          #private:
-            - arch
-            - classify
-            - cutil
-            - ccutil
-            - dict
-            - lstm
-            - opencl
-            - textord
-            - vs2010/port
-            - viewer
-            - wordrec
+            - src/arch
+            - src/classify
+            - src/cutil
+            - src/ccutil
+            - src/dict
+            - src/lstm
+            - src/opencl
+            - src/textord
+            - src/vs2010/port
+            - src/viewer
+            - src/wordrec
          #public:
-            - api
-            - ccmain
-            - ccstruct
-            - ccutil
+            - src/api
+            - src/ccmain
+            - src/ccstruct
+            - src/ccutil

        check_function_exists:
          - getline
@ -125,23 +125,26 @@ projects:
            file_write_once(${BDIR}/config_auto.h "")

        post_sources: |
+            configure_file(
+                ${SDIR}/src/api/tess_version.h.in
+                ${BDIR}/tess_version.h @ONLY)
            if (WIN32)
                if (MSVC)
                    set_source_files_properties(
-                        ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
+                        ${SDIR}/src/arch/dotproductsse.cpp
                        PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
                    set_source_files_properties(
-                        ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
+                        ${SDIR}/src/arch/intsimdmatrixsse.cpp
                        PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
                    set_source_files_properties(
-                        ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
+                        ${SDIR}/src/arch/dotproductavx.cpp
                        PROPERTIES COMPILE_FLAGS "/arch:AVX")
                    set_source_files_properties(
-                        ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
+                        ${SDIR}/src/arch/intsimdmatrixavx2.cpp
                        PROPERTIES COMPILE_FLAGS "/arch:AVX2")
                endif()
            else()
-                remove_src_dir(vs2010/port/*)
+                remove_src_dir(src/vs2010/port/*)
            endif()

        options:
@ -162,7 +165,7 @@ projects:
            pvt.cppan.demo.danbloomberg.leptonica: 1

    tesseract:
-        files: api/tesseractmain.cpp
+        files: src/api/tesseractmain.cpp
        dependencies:
            - libtesseract

--- a/src/api/Makefile.am
+++ b/src/api/Makefile.am
--- a/src/api/apitypes.h
+++ b/src/api/apitypes.h
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
--- a/src/api/baseapi.h
+++ b/src/api/baseapi.h
--- a/src/api/capi.cpp
+++ b/src/api/capi.cpp
--- a/src/api/capi.h
+++ b/src/api/capi.h
--- a/src/api/pdfrenderer.cpp
+++ b/src/api/pdfrenderer.cpp
--- a/src/api/renderer.cpp
+++ b/src/api/renderer.cpp
--- a/src/api/renderer.h
+++ b/src/api/renderer.h
--- a/src/api/tess_version.h.in
+++ b/src/api/tess_version.h.in
--- a/src/api/tesseractmain.cpp
+++ b/src/api/tesseractmain.cpp
--- a/src/arch/Makefile.am
+++ b/src/arch/Makefile.am
--- a/src/arch/dotproductavx.cpp
+++ b/src/arch/dotproductavx.cpp
--- a/src/arch/dotproductavx.h
+++ b/src/arch/dotproductavx.h
@ -1,30 +1,30 @@
-///////////////////////////////////////////////////////////////////////
-// File:        dotproductavx.h
-// Description: Architecture-specific dot-product function.
-// Author:      Ray Smith
-// Created:     Wed Jul 22 10:51:05 PDT 2015
-//
-// (C) Copyright 2015, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-///////////////////////////////////////////////////////////////////////
-
-#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
-#define TESSERACT_ARCH_DOTPRODUCTAVX_H_
-
-namespace tesseract {
-
-// Computes and returns the dot product of the n-vectors u and v.
-// Uses Intel AVX intrinsics to access the SIMD instruction set.
-double DotProductAVX(const double* u, const double* v, int n);
-
-}  // namespace tesseract.
-
-#endif  // TESSERACT_ARCH_DOTPRODUCTAVX_H_
+///////////////////////////////////////////////////////////////////////
+// File:        dotproductavx.h
+// Description: Architecture-specific dot-product function.
+// Author:      Ray Smith
+// Created:     Wed Jul 22 10:51:05 PDT 2015
+//
+// (C) Copyright 2015, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
+#define TESSERACT_ARCH_DOTPRODUCTAVX_H_
+
+namespace tesseract {
+
+// Computes and returns the dot product of the n-vectors u and v.
+// Uses Intel AVX intrinsics to access the SIMD instruction set.
+double DotProductAVX(const double* u, const double* v, int n);
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_ARCH_DOTPRODUCTAVX_H_
--- a/src/arch/dotproductsse.cpp
+++ b/src/arch/dotproductsse.cpp
--- a/src/arch/dotproductsse.h
+++ b/src/arch/dotproductsse.h
@ -1,35 +1,35 @@
-///////////////////////////////////////////////////////////////////////
-// File:        dotproductsse.h
-// Description: Architecture-specific dot-product function.
-// Author:      Ray Smith
-// Created:     Wed Jul 22 10:57:05 PDT 2015
-//
-// (C) Copyright 2015, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-///////////////////////////////////////////////////////////////////////
-
-#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
-#define TESSERACT_ARCH_DOTPRODUCTSSE_H_
-
-#include "host.h"
-
-namespace tesseract {
-
-// Computes and returns the dot product of the n-vectors u and v.
-// Uses Intel SSE intrinsics to access the SIMD instruction set.
-double DotProductSSE(const double* u, const double* v, int n);
-// Computes and returns the dot product of the n-vectors u and v.
-// Uses Intel SSE intrinsics to access the SIMD instruction set.
-int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n);
-
-}  // namespace tesseract.
-
-#endif  // TESSERACT_ARCH_DOTPRODUCTSSE_H_
+///////////////////////////////////////////////////////////////////////
+// File:        dotproductsse.h
+// Description: Architecture-specific dot-product function.
+// Author:      Ray Smith
+// Created:     Wed Jul 22 10:57:05 PDT 2015
+//
+// (C) Copyright 2015, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
+#define TESSERACT_ARCH_DOTPRODUCTSSE_H_
+
+#include "host.h"
+
+namespace tesseract {
+
+// Computes and returns the dot product of the n-vectors u and v.
+// Uses Intel SSE intrinsics to access the SIMD instruction set.
+double DotProductSSE(const double* u, const double* v, int n);
+// Computes and returns the dot product of the n-vectors u and v.
+// Uses Intel SSE intrinsics to access the SIMD instruction set.
+int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n);
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_ARCH_DOTPRODUCTSSE_H_
--- a/src/arch/intsimdmatrix.cpp
+++ b/src/arch/intsimdmatrix.cpp
--- a/src/arch/intsimdmatrix.h
+++ b/src/arch/intsimdmatrix.h
--- a/src/arch/intsimdmatrixavx2.cpp
+++ b/src/arch/intsimdmatrixavx2.cpp
--- a/src/arch/intsimdmatrixavx2.h
+++ b/src/arch/intsimdmatrixavx2.h
--- a/src/arch/intsimdmatrixsse.cpp
+++ b/src/arch/intsimdmatrixsse.cpp
--- a/src/arch/intsimdmatrixsse.h
+++ b/src/arch/intsimdmatrixsse.h
--- a/src/arch/simddetect.cpp
+++ b/src/arch/simddetect.cpp
@ -1,82 +1,82 @@
-///////////////////////////////////////////////////////////////////////
-// File:        simddetect.cpp
-// Description: Architecture detector.
-// Author:      Stefan Weil (based on code from Ray Smith)
-//
-// (C) Copyright 2014, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-///////////////////////////////////////////////////////////////////////
-
-#include "simddetect.h"
-#include "tprintf.h"
-
-#undef X86_BUILD
-#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
-#if !defined(ANDROID_BUILD)
-#define X86_BUILD 1
-#endif  // !ANDROID_BUILD
-#endif  // x86 target
-
-#if defined(X86_BUILD)
-#if defined(__GNUC__)
-#include <cpuid.h>
-#elif defined(_WIN32)
-#include <intrin.h>
-#endif
-#endif
-
-SIMDDetect SIMDDetect::detector;
-
-// If true, then AVX has been detected.
-bool SIMDDetect::avx_available_;
-bool SIMDDetect::avx2_available_;
-bool SIMDDetect::avx512F_available_;
-bool SIMDDetect::avx512BW_available_;
-// If true, then SSe4.1 has been detected.
-bool SIMDDetect::sse_available_;
-
-// Constructor.
-// Tests the architecture in a system-dependent way to detect AVX, SSE and
-// any other available SIMD equipment.
-// __GNUC__ is also defined by compilers that include GNU extensions such as
-// clang.
-SIMDDetect::SIMDDetect() {
-#if defined(X86_BUILD)
-#if defined(__GNUC__)
-  unsigned int eax, ebx, ecx, edx;
-  if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
-    // Note that these tests all use hex because the older compilers don't have
-    // the newer flags.
-    sse_available_ = (ecx & 0x00080000) != 0;
-    avx_available_ = (ecx & 0x10000000) != 0;
-    if (avx_available_) {
-      // There is supposed to be a __get_cpuid_count function, but this is all
-      // there is in my cpuid.h. It is a macro for an asm statement and cannot
-      // be used inside an if.
-      __cpuid_count(7, 0, eax, ebx, ecx, edx);
-      avx2_available_ = (ebx & 0x00000020) != 0;
-      avx512F_available_ = (ebx & 0x00010000) != 0;
-      avx512BW_available_ = (ebx & 0x40000000) != 0;
-    }
-  }
-#elif defined(_WIN32)
-  int cpuInfo[4];
-  __cpuid(cpuInfo, 0);
-  if (cpuInfo[0] >= 1) {
-    __cpuid(cpuInfo, 1);
-    sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
-    avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
-  }
-#else
-#error "I don't know how to test for SIMD with this compiler"
-#endif
-#endif  // X86_BUILD
-}
+///////////////////////////////////////////////////////////////////////
+// File:        simddetect.cpp
+// Description: Architecture detector.
+// Author:      Stefan Weil (based on code from Ray Smith)
+//
+// (C) Copyright 2014, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#include "simddetect.h"
+#include "tprintf.h"
+
+#undef X86_BUILD
+#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
+#if !defined(ANDROID_BUILD)
+#define X86_BUILD 1
+#endif  // !ANDROID_BUILD
+#endif  // x86 target
+
+#if defined(X86_BUILD)
+#if defined(__GNUC__)
+#include <cpuid.h>
+#elif defined(_WIN32)
+#include <intrin.h>
+#endif
+#endif
+
+SIMDDetect SIMDDetect::detector;
+
+// If true, then AVX has been detected.
+bool SIMDDetect::avx_available_;
+bool SIMDDetect::avx2_available_;
+bool SIMDDetect::avx512F_available_;
+bool SIMDDetect::avx512BW_available_;
+// If true, then SSe4.1 has been detected.
+bool SIMDDetect::sse_available_;
+
+// Constructor.
+// Tests the architecture in a system-dependent way to detect AVX, SSE and
+// any other available SIMD equipment.
+// __GNUC__ is also defined by compilers that include GNU extensions such as
+// clang.
+SIMDDetect::SIMDDetect() {
+#if defined(X86_BUILD)
+#if defined(__GNUC__)
+  unsigned int eax, ebx, ecx, edx;
+  if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
+    // Note that these tests all use hex because the older compilers don't have
+    // the newer flags.
+    sse_available_ = (ecx & 0x00080000) != 0;
+    avx_available_ = (ecx & 0x10000000) != 0;
+    if (avx_available_) {
+      // There is supposed to be a __get_cpuid_count function, but this is all
+      // there is in my cpuid.h. It is a macro for an asm statement and cannot
+      // be used inside an if.
+      __cpuid_count(7, 0, eax, ebx, ecx, edx);
+      avx2_available_ = (ebx & 0x00000020) != 0;
+      avx512F_available_ = (ebx & 0x00010000) != 0;
+      avx512BW_available_ = (ebx & 0x40000000) != 0;
+    }
+  }
+#elif defined(_WIN32)
+  int cpuInfo[4];
+  __cpuid(cpuInfo, 0);
+  if (cpuInfo[0] >= 1) {
+    __cpuid(cpuInfo, 1);
+    sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
+    avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
+  }
+#else
+#error "I don't know how to test for SIMD with this compiler"
+#endif
+#endif  // X86_BUILD
+}
--- a/src/arch/simddetect.h
+++ b/src/arch/simddetect.h
@ -1,54 +1,54 @@
-///////////////////////////////////////////////////////////////////////
-// File:        simddetect.h
-// Description: Architecture detector.
-// Author:      Stefan Weil (based on code from Ray Smith)
-//
-// (C) Copyright 2014, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-///////////////////////////////////////////////////////////////////////
-
-#include "platform.h"
-
-// Architecture detector. Add code here to detect any other architectures for
-// SIMD-based faster dot product functions. Intended to be a single static
-// object, but it does no real harm to have more than one.
-class SIMDDetect {
- public:
-  // Returns true if AVX is available on this system.
-  static inline bool IsAVXAvailable() { return detector.avx_available_; }
-  // Returns true if AVX2 (integer support) is available on this system.
-  static inline bool IsAVX2Available() { return detector.avx2_available_; }
-  // Returns true if AVX512 Foundation (float) is available on this system.
-  static inline bool IsAVX512FAvailable() {
-    return detector.avx512F_available_;
-  }
-  // Returns true if AVX512 integer is available on this system.
-  static inline bool IsAVX512BWAvailable() {
-    return detector.avx512BW_available_;
-  }
-  // Returns true if SSE4.1 is available on this system.
-  static inline bool IsSSEAvailable() { return detector.sse_available_; }
-
- private:
-  // Constructor, must set all static member variables.
-  SIMDDetect();
-
- private:
-  // Singleton.
-  static SIMDDetect detector;
-  // If true, then AVX has been detected.
-  static TESS_API bool avx_available_;
-  static TESS_API bool avx2_available_;
-  static TESS_API bool avx512F_available_;
-  static TESS_API bool avx512BW_available_;
-  // If true, then SSe4.1 has been detected.
-  static TESS_API bool sse_available_;
-};
+///////////////////////////////////////////////////////////////////////
+// File:        simddetect.h
+// Description: Architecture detector.
+// Author:      Stefan Weil (based on code from Ray Smith)
+//
+// (C) Copyright 2014, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#include "platform.h"
+
+// Architecture detector. Add code here to detect any other architectures for
+// SIMD-based faster dot product functions. Intended to be a single static
+// object, but it does no real harm to have more than one.
+class SIMDDetect {
+ public:
+  // Returns true if AVX is available on this system.
+  static inline bool IsAVXAvailable() { return detector.avx_available_; }
+  // Returns true if AVX2 (integer support) is available on this system.
+  static inline bool IsAVX2Available() { return detector.avx2_available_; }
+  // Returns true if AVX512 Foundation (float) is available on this system.
+  static inline bool IsAVX512FAvailable() {
+    return detector.avx512F_available_;
+  }
+  // Returns true if AVX512 integer is available on this system.
+  static inline bool IsAVX512BWAvailable() {
+    return detector.avx512BW_available_;
+  }
+  // Returns true if SSE4.1 is available on this system.
+  static inline bool IsSSEAvailable() { return detector.sse_available_; }
+
+ private:
+  // Constructor, must set all static member variables.
+  SIMDDetect();
+
+ private:
+  // Singleton.
+  static SIMDDetect detector;
+  // If true, then AVX has been detected.
+  static TESS_API bool avx_available_;
+  static TESS_API bool avx2_available_;
+  static TESS_API bool avx512F_available_;
+  static TESS_API bool avx512BW_available_;
+  // If true, then SSe4.1 has been detected.
+  static TESS_API bool sse_available_;
+};
--- a/src/ccmain/Makefile.am
+++ b/src/ccmain/Makefile.am
--- a/src/ccmain/adaptions.cpp
+++ b/src/ccmain/adaptions.cpp
--- a/src/ccmain/applybox.cpp
+++ b/src/ccmain/applybox.cpp
--- a/src/ccmain/control.cpp
+++ b/src/ccmain/control.cpp
--- a/src/ccmain/control.h
+++ b/src/ccmain/control.h
@ -1,44 +1,44 @@
-/**********************************************************************
- * File:        control.h  (Formerly control.h)
- * Description: Module-independent matcher controller.
- * Author:		Ray Smith
- * Created:		Thu Apr 23 11:09:58 BST 1992
- *
- * (C) Copyright 1992, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-/**
- * @file control.h
- * Module-independent matcher controller.
- */
-
-#ifndef           CONTROL_H
-#define           CONTROL_H
-
-#include          "params.h"
-#include          "ocrblock.h"
-#include          "ratngs.h"
-#include          "statistc.h"
-#include          "pageres.h"
-
-enum ACCEPTABLE_WERD_TYPE
-{
-  AC_UNACCEPTABLE,               ///< Unacceptable word
-  AC_LOWER_CASE,                 ///< ALL lower case
-  AC_UPPER_CASE,                 ///< ALL upper case
-  AC_INITIAL_CAP,                ///< ALL but initial lc
-  AC_LC_ABBREV,                  ///< a.b.c.
-  AC_UC_ABBREV                   ///< A.B.C.
-};
-
-#endif
+/**********************************************************************
+ * File:        control.h  (Formerly control.h)
+ * Description: Module-independent matcher controller.
+ * Author:		Ray Smith
+ * Created:		Thu Apr 23 11:09:58 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+/**
+ * @file control.h
+ * Module-independent matcher controller.
+ */
+
+#ifndef           CONTROL_H
+#define           CONTROL_H
+
+#include          "params.h"
+#include          "ocrblock.h"
+#include          "ratngs.h"
+#include          "statistc.h"
+#include          "pageres.h"
+
+enum ACCEPTABLE_WERD_TYPE
+{
+  AC_UNACCEPTABLE,               ///< Unacceptable word
+  AC_LOWER_CASE,                 ///< ALL lower case
+  AC_UPPER_CASE,                 ///< ALL upper case
+  AC_INITIAL_CAP,                ///< ALL but initial lc
+  AC_LC_ABBREV,                  ///< a.b.c.
+  AC_UC_ABBREV                   ///< A.B.C.
+};
+
+#endif
--- a/src/ccmain/docqual.cpp
+++ b/src/ccmain/docqual.cpp
--- a/src/ccmain/docqual.h
+++ b/src/ccmain/docqual.h
--- a/src/ccmain/equationdetect.cpp
+++ b/src/ccmain/equationdetect.cpp
--- a/src/ccmain/equationdetect.h
+++ b/src/ccmain/equationdetect.h
--- a/src/ccmain/fixspace.cpp
+++ b/src/ccmain/fixspace.cpp
--- a/src/ccmain/fixspace.h
+++ b/src/ccmain/fixspace.h
@ -1,31 +1,31 @@
-/******************************************************************
- * File:        fixspace.h  (Formerly fixspace.h)
- * Description: Implements a pass over the page res, exploring the alternative
- *					spacing possibilities, trying to use context to improve the
-          word spacing
-* Author:		Phil Cheatle
-* Created:		Thu Oct 21 11:38:43 BST 1993
-*
-* (C) Copyright 1993, Hewlett-Packard Ltd.
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-** http://www.apache.org/licenses/LICENSE-2.0
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*
-**********************************************************************/
-
-#ifndef           FIXSPACE_H
-#define           FIXSPACE_H
-
-#include          "pageres.h"
-#include          "params.h"
-
-void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
-void transform_to_next_perm(WERD_RES_LIST &words);
-void fixspace_dbg(WERD_RES *word);
-#endif
+/******************************************************************
+ * File:        fixspace.h  (Formerly fixspace.h)
+ * Description: Implements a pass over the page res, exploring the alternative
+ *					spacing possibilities, trying to use context to improve the
+          word spacing
+* Author:		Phil Cheatle
+* Created:		Thu Oct 21 11:38:43 BST 1993
+*
+* (C) Copyright 1993, Hewlett-Packard Ltd.
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+** http://www.apache.org/licenses/LICENSE-2.0
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*
+**********************************************************************/
+
+#ifndef           FIXSPACE_H
+#define           FIXSPACE_H
+
+#include          "pageres.h"
+#include          "params.h"
+
+void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
+void transform_to_next_perm(WERD_RES_LIST &words);
+void fixspace_dbg(WERD_RES *word);
+#endif
--- a/src/ccmain/fixxht.cpp
+++ b/src/ccmain/fixxht.cpp
--- a/src/ccmain/linerec.cpp
+++ b/src/ccmain/linerec.cpp
--- a/src/ccmain/ltrresultiterator.cpp
+++ b/src/ccmain/ltrresultiterator.cpp
--- a/src/ccmain/ltrresultiterator.h
+++ b/src/ccmain/ltrresultiterator.h
--- a/src/ccmain/mutableiterator.h
+++ b/src/ccmain/mutableiterator.h
@ -1,64 +1,64 @@
-///////////////////////////////////////////////////////////////////////
-// File:        mutableiterator.h
-// Description: Iterator for tesseract results providing access to
-//              both high-level API and Tesseract internal data structures.
-// Author:      David Eger
-// Created:     Thu Feb 24 19:01:06 PST 2011
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
-#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
-
-#include "resultiterator.h"
-
-class BLOB_CHOICE_IT;
-
-namespace tesseract {
-
-class Tesseract;
-
-// Class to iterate over tesseract results, providing access to all levels
-// of the page hierarchy, without including any tesseract headers or having
-// to handle any tesseract structures.
-// WARNING! This class points to data held within the TessBaseAPI class, and
-// therefore can only be used while the TessBaseAPI class still exists and
-// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-// DetectOS, or anything else that changes the internal PAGE_RES.
-// See apitypes.h for the definition of PageIteratorLevel.
-// See also base class PageIterator, which contains the bulk of the interface.
-// ResultIterator adds text-specific methods for access to OCR output.
-// MutableIterator adds access to internal data structures.
-
-class MutableIterator : public ResultIterator {
- public:
-  // See argument descriptions in ResultIterator()
-  MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
-                  int scale, int scaled_yres,
-                  int rect_left, int rect_top,
-                  int rect_width, int rect_height)
-      : ResultIterator(
-          LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
-                            rect_top, rect_width, rect_height)) {}
-  virtual ~MutableIterator() {}
-
-  // See PageIterator and ResultIterator for most calls.
-
-  // Return access to Tesseract internals.
-  const PAGE_RES_IT *PageResIt() const { return it_; }
-};
-
-}  // namespace tesseract.
-
-#endif  // TESSERACT_CCMAIN_MUTABLEITERATOR_H_
+///////////////////////////////////////////////////////////////////////
+// File:        mutableiterator.h
+// Description: Iterator for tesseract results providing access to
+//              both high-level API and Tesseract internal data structures.
+// Author:      David Eger
+// Created:     Thu Feb 24 19:01:06 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
+#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
+
+#include "resultiterator.h"
+
+class BLOB_CHOICE_IT;
+
+namespace tesseract {
+
+class Tesseract;
+
+// Class to iterate over tesseract results, providing access to all levels
+// of the page hierarchy, without including any tesseract headers or having
+// to handle any tesseract structures.
+// WARNING! This class points to data held within the TessBaseAPI class, and
+// therefore can only be used while the TessBaseAPI class still exists and
+// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+// DetectOS, or anything else that changes the internal PAGE_RES.
+// See apitypes.h for the definition of PageIteratorLevel.
+// See also base class PageIterator, which contains the bulk of the interface.
+// ResultIterator adds text-specific methods for access to OCR output.
+// MutableIterator adds access to internal data structures.
+
+class MutableIterator : public ResultIterator {
+ public:
+  // See argument descriptions in ResultIterator()
+  MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
+                  int scale, int scaled_yres,
+                  int rect_left, int rect_top,
+                  int rect_width, int rect_height)
+      : ResultIterator(
+          LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
+                            rect_top, rect_width, rect_height)) {}
+  virtual ~MutableIterator() {}
+
+  // See PageIterator and ResultIterator for most calls.
+
+  // Return access to Tesseract internals.
+  const PAGE_RES_IT *PageResIt() const { return it_; }
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCMAIN_MUTABLEITERATOR_H_
--- a/src/ccmain/osdetect.cpp
+++ b/src/ccmain/osdetect.cpp
--- a/src/ccmain/osdetect.h
+++ b/src/ccmain/osdetect.h
--- a/src/ccmain/output.cpp
+++ b/src/ccmain/output.cpp
--- a/src/ccmain/output.h
+++ b/src/ccmain/output.h
@ -1,33 +1,33 @@
-/******************************************************************
- * File:        output.h  (Formerly output.h)
- * Description: Output pass
- * Author:		Phil Cheatle
- * Created:		Thu Aug  4 10:56:08 BST 1994
- *
- * (C) Copyright 1994, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef           OUTPUT_H
-#define           OUTPUT_H
-
-#include          "params.h"
-//#include                                      "epapconv.h"
-#include          "pageres.h"
-
-/** test line ends */
-char determine_newline_type(WERD *word,        ///< word to do
-                            BLOCK *block,      ///< current block
-                            WERD *next_word,   ///< next word
-                            BLOCK *next_block  ///< block of next word
-                           );
-#endif
+/******************************************************************
+ * File:        output.h  (Formerly output.h)
+ * Description: Output pass
+ * Author:		Phil Cheatle
+ * Created:		Thu Aug  4 10:56:08 BST 1994
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           OUTPUT_H
+#define           OUTPUT_H
+
+#include          "params.h"
+//#include                                      "epapconv.h"
+#include          "pageres.h"
+
+/** test line ends */
+char determine_newline_type(WERD *word,        ///< word to do
+                            BLOCK *block,      ///< current block
+                            WERD *next_word,   ///< next word
+                            BLOCK *next_block  ///< block of next word
+                           );
+#endif
--- a/src/ccmain/pageiterator.cpp
+++ b/src/ccmain/pageiterator.cpp
--- a/src/ccmain/pageiterator.h
+++ b/src/ccmain/pageiterator.h
--- a/src/ccmain/pagesegmain.cpp
+++ b/src/ccmain/pagesegmain.cpp
--- a/src/ccmain/pagewalk.cpp
+++ b/src/ccmain/pagewalk.cpp
--- a/src/ccmain/par_control.cpp
+++ b/src/ccmain/par_control.cpp
--- a/src/ccmain/paragraphs.cpp
+++ b/src/ccmain/paragraphs.cpp
--- a/src/ccmain/paragraphs.h
+++ b/src/ccmain/paragraphs.h
@ -1,108 +1,108 @@
-/**********************************************************************
- * File:        paragraphs.h
- * Description: Paragraph Detection data structures.
- * Author:      David Eger
- * Created:     25 February 2011
- *
- * (C) Copyright 2011, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
-#define TESSERACT_CCMAIN_PARAGRAPHS_H_
-
-#include "rect.h"
-#include "ocrpara.h"
-#include "genericvector.h"
-#include "strngs.h"
-
-
-class WERD;
-class UNICHARSET;
-
-namespace tesseract {
-
-class MutableIterator;
-
-// This structure captures all information needed about a text line for the
-// purposes of paragraph detection.  It is meant to be exceedingly light-weight
-// so that we can easily test paragraph detection independent of the rest of
-// Tesseract.
-class RowInfo {
- public:
-  // Constant data derived from Tesseract output.
-  STRING text;        // the full UTF-8 text of the line.
-  bool ltr;           // whether the majority of the text is left-to-right
-                      // TODO(eger) make this more fine-grained.
-
-  bool has_leaders;   // does the line contain leader dots (.....)?
-  bool has_drop_cap;  // does the line have a drop cap?
-  int pix_ldistance;  // distance to the left pblock boundary in pixels
-  int pix_rdistance;  // distance to the right pblock boundary in pixels
-  float pix_xheight;  // guessed xheight for the line
-  int average_interword_space; // average space between words in pixels.
-
-  int num_words;
-  TBOX lword_box;     // in normalized (horiz text rows) space
-  TBOX rword_box;     // in normalized (horiz text rows) space
-
-  STRING lword_text;   // the UTF-8 text of the leftmost werd
-  STRING rword_text;   // the UTF-8 text of the rightmost werd
-
-  //   The text of a paragraph typically starts with the start of an idea and
-  // ends with the end of an idea.  Here we define paragraph as something that
-  // may have a first line indent and a body indent which may be different.
-  // Typical words that start an idea are:
-  //   1. Words in western scripts that start with
-  //      a capital letter, for example "The"
-  //   2. Bulleted or numbered list items, for
-  //      example "2."
-  // Typical words which end an idea are words ending in punctuation marks. In
-  // this vocabulary, each list item is represented as a paragraph.
-  bool lword_indicates_list_item;
-  bool lword_likely_starts_idea;
-  bool lword_likely_ends_idea;
-
-  bool rword_indicates_list_item;
-  bool rword_likely_starts_idea;
-  bool rword_likely_ends_idea;
-};
-
-// Main entry point for Paragraph Detection Algorithm.
-//
-// Given a set of equally spaced textlines (described by row_infos),
-// Split them into paragraphs.  See http://goto/paragraphstalk
-//
-// Output:
-//   row_owners - one pointer for each row, to the paragraph it belongs to.
-//   paragraphs - this is the actual list of PARA objects.
-//   models - the list of paragraph models referenced by the PARA objects.
-//            caller is responsible for deleting the models.
-void DetectParagraphs(int debug_level,
-                      GenericVector<RowInfo> *row_infos,
-                      GenericVector<PARA *> *row_owners,
-                      PARA_LIST *paragraphs,
-                      GenericVector<ParagraphModel *> *models);
-
-// Given a MutableIterator to the start of a block, run DetectParagraphs on
-// that block and commit the results to the underlying ROW and BLOCK structs,
-// saving the ParagraphModels in models.  Caller owns the models.
-// We use unicharset during the function to answer questions such as "is the
-// first letter of this word upper case?"
-void DetectParagraphs(int debug_level,
-                      bool after_text_recognition,
-                      const MutableIterator *block_start,
-                      GenericVector<ParagraphModel *> *models);
-
-}  // namespace
-
-#endif  // TESSERACT_CCMAIN_PARAGRAPHS_H_
+/**********************************************************************
+ * File:        paragraphs.h
+ * Description: Paragraph Detection data structures.
+ * Author:      David Eger
+ * Created:     25 February 2011
+ *
+ * (C) Copyright 2011, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
+#define TESSERACT_CCMAIN_PARAGRAPHS_H_
+
+#include "rect.h"
+#include "ocrpara.h"
+#include "genericvector.h"
+#include "strngs.h"
+
+
+class WERD;
+class UNICHARSET;
+
+namespace tesseract {
+
+class MutableIterator;
+
+// This structure captures all information needed about a text line for the
+// purposes of paragraph detection.  It is meant to be exceedingly light-weight
+// so that we can easily test paragraph detection independent of the rest of
+// Tesseract.
+class RowInfo {
+ public:
+  // Constant data derived from Tesseract output.
+  STRING text;        // the full UTF-8 text of the line.
+  bool ltr;           // whether the majority of the text is left-to-right
+                      // TODO(eger) make this more fine-grained.
+
+  bool has_leaders;   // does the line contain leader dots (.....)?
+  bool has_drop_cap;  // does the line have a drop cap?
+  int pix_ldistance;  // distance to the left pblock boundary in pixels
+  int pix_rdistance;  // distance to the right pblock boundary in pixels
+  float pix_xheight;  // guessed xheight for the line
+  int average_interword_space; // average space between words in pixels.
+
+  int num_words;
+  TBOX lword_box;     // in normalized (horiz text rows) space
+  TBOX rword_box;     // in normalized (horiz text rows) space
+
+  STRING lword_text;   // the UTF-8 text of the leftmost werd
+  STRING rword_text;   // the UTF-8 text of the rightmost werd
+
+  //   The text of a paragraph typically starts with the start of an idea and
+  // ends with the end of an idea.  Here we define paragraph as something that
+  // may have a first line indent and a body indent which may be different.
+  // Typical words that start an idea are:
+  //   1. Words in western scripts that start with
+  //      a capital letter, for example "The"
+  //   2. Bulleted or numbered list items, for
+  //      example "2."
+  // Typical words which end an idea are words ending in punctuation marks. In
+  // this vocabulary, each list item is represented as a paragraph.
+  bool lword_indicates_list_item;
+  bool lword_likely_starts_idea;
+  bool lword_likely_ends_idea;
+
+  bool rword_indicates_list_item;
+  bool rword_likely_starts_idea;
+  bool rword_likely_ends_idea;
+};
+
+// Main entry point for Paragraph Detection Algorithm.
+//
+// Given a set of equally spaced textlines (described by row_infos),
+// Split them into paragraphs.  See http://goto/paragraphstalk
+//
+// Output:
+//   row_owners - one pointer for each row, to the paragraph it belongs to.
+//   paragraphs - this is the actual list of PARA objects.
+//   models - the list of paragraph models referenced by the PARA objects.
+//            caller is responsible for deleting the models.
+void DetectParagraphs(int debug_level,
+                      GenericVector<RowInfo> *row_infos,
+                      GenericVector<PARA *> *row_owners,
+                      PARA_LIST *paragraphs,
+                      GenericVector<ParagraphModel *> *models);
+
+// Given a MutableIterator to the start of a block, run DetectParagraphs on
+// that block and commit the results to the underlying ROW and BLOCK structs,
+// saving the ParagraphModels in models.  Caller owns the models.
+// We use unicharset during the function to answer questions such as "is the
+// first letter of this word upper case?"
+void DetectParagraphs(int debug_level,
+                      bool after_text_recognition,
+                      const MutableIterator *block_start,
+                      GenericVector<ParagraphModel *> *models);
+
+}  // namespace
+
+#endif  // TESSERACT_CCMAIN_PARAGRAPHS_H_
--- a/src/ccmain/paragraphs_internal.h
+++ b/src/ccmain/paragraphs_internal.h
--- a/src/ccmain/paramsd.cpp
+++ b/src/ccmain/paramsd.cpp
--- a/src/ccmain/paramsd.h
+++ b/src/ccmain/paramsd.h
--- a/src/ccmain/pgedit.cpp
+++ b/src/ccmain/pgedit.cpp
--- a/src/ccmain/pgedit.h
+++ b/src/ccmain/pgedit.h
@ -1,87 +1,87 @@
-///////////////////////////////////////////////////////////////////////
-// File:        pgedit.h
-// Description: Page structure file editor
-// Author:      Joern Wanke
-// Created:     Wed Jul 18 10:05:01 PDT 2007
-//
-// (C) Copyright 2007, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#ifndef           PGEDIT_H
-#define           PGEDIT_H
-
-#include          "ocrblock.h"
-#include          "ocrrow.h"
-#include          "werd.h"
-#include          "rect.h"
-#include          "params.h"
-#include          "tesseractclass.h"
-
-class ScrollView;
-class SVMenuNode;
-struct SVEvent;
-
-// A small event handler class to process incoming events to
-// this window.
-class PGEventHandler : public SVEventHandler {
-  public:
-   PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
-   }
-   void Notify(const SVEvent* sve);
-  private:
-    tesseract::Tesseract* tess_;
-};
-
-extern BLOCK_LIST *current_block_list;
-extern STRING_VAR_H (editor_image_win_name, "EditorImage",
-"Editor image window name");
-extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos");
-extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos");
-extern INT_VAR_H (editor_image_height, 680, "Editor image height");
-extern INT_VAR_H (editor_image_width, 655, "Editor image width");
-extern INT_VAR_H (editor_image_word_bb_color, BLUE,
-"Word bounding box colour");
-extern INT_VAR_H (editor_image_blob_bb_color, YELLOW,
-"Blob bounding box colour");
-extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour");
-extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin",
-"Editor debug window name");
-extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos");
-extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos");
-extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height");
-extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width");
-extern STRING_VAR_H (editor_word_name, "BlnWords",
-"BL normalised word window");
-extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos");
-extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos");
-extern INT_VAR_H (editor_word_height, 240, "Word window height");
-extern INT_VAR_H (editor_word_width, 655, "Word window width");
-extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image");
-
-ScrollView* bln_word_window_handle();  //return handle
-void build_image_window(int width, int height);
-void display_bln_lines(ScrollView window,
-                       ScrollView::Color colour,
-                       float scale_factor,
-                       float y_offset,
-                       float minx,
-                       float maxx);
-                                 //function to call
-void pgeditor_msg(  //message display
-                  const char *msg);
-void pgeditor_show_point(  //display coords
-                         SVEvent *event);
-                                 //put bln word in       box
-void show_point(PAGE_RES* page_res, float x, float y);
-
-#endif
+///////////////////////////////////////////////////////////////////////
+// File:        pgedit.h
+// Description: Page structure file editor
+// Author:      Joern Wanke
+// Created:     Wed Jul 18 10:05:01 PDT 2007
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef           PGEDIT_H
+#define           PGEDIT_H
+
+#include          "ocrblock.h"
+#include          "ocrrow.h"
+#include          "werd.h"
+#include          "rect.h"
+#include          "params.h"
+#include          "tesseractclass.h"
+
+class ScrollView;
+class SVMenuNode;
+struct SVEvent;
+
+// A small event handler class to process incoming events to
+// this window.
+class PGEventHandler : public SVEventHandler {
+  public:
+   PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
+   }
+   void Notify(const SVEvent* sve);
+  private:
+    tesseract::Tesseract* tess_;
+};
+
+extern BLOCK_LIST *current_block_list;
+extern STRING_VAR_H (editor_image_win_name, "EditorImage",
+"Editor image window name");
+extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos");
+extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos");
+extern INT_VAR_H (editor_image_height, 680, "Editor image height");
+extern INT_VAR_H (editor_image_width, 655, "Editor image width");
+extern INT_VAR_H (editor_image_word_bb_color, BLUE,
+"Word bounding box colour");
+extern INT_VAR_H (editor_image_blob_bb_color, YELLOW,
+"Blob bounding box colour");
+extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour");
+extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin",
+"Editor debug window name");
+extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos");
+extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos");
+extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height");
+extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width");
+extern STRING_VAR_H (editor_word_name, "BlnWords",
+"BL normalised word window");
+extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos");
+extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos");
+extern INT_VAR_H (editor_word_height, 240, "Word window height");
+extern INT_VAR_H (editor_word_width, 655, "Word window width");
+extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image");
+
+ScrollView* bln_word_window_handle();  //return handle
+void build_image_window(int width, int height);
+void display_bln_lines(ScrollView window,
+                       ScrollView::Color colour,
+                       float scale_factor,
+                       float y_offset,
+                       float minx,
+                       float maxx);
+                                 //function to call
+void pgeditor_msg(  //message display
+                  const char *msg);
+void pgeditor_show_point(  //display coords
+                         SVEvent *event);
+                                 //put bln word in       box
+void show_point(PAGE_RES* page_res, float x, float y);
+
+#endif
--- a/src/ccmain/recogtraining.cpp
+++ b/src/ccmain/recogtraining.cpp
--- a/src/ccmain/reject.cpp
+++ b/src/ccmain/reject.cpp
--- a/src/ccmain/reject.h
+++ b/src/ccmain/reject.h
@ -1,34 +1,34 @@
-/**********************************************************************
- * File:        reject.h  (Formerly reject.h)
- * Description: Rejection functions used in tessedit
- * Author:		Phil Cheatle
- * Created:		Wed Sep 23 16:50:21 BST 1992
- *
- * (C) Copyright 1992, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef           REJECT_H
-#define           REJECT_H
-
-#include          "params.h"
-#include          "pageres.h"
-
-void reject_blanks(WERD_RES *word);
-void reject_poor_matches(WERD_RES *word);
-float compute_reject_threshold(WERD_CHOICE* word);
-BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
-void dont_allow_1Il(WERD_RES *word);
-void flip_hyphens(WERD_RES *word);
-void flip_0O(WERD_RES *word);
-BOOL8 non_0_digit(const char* str, int length);
-#endif
+/**********************************************************************
+ * File:        reject.h  (Formerly reject.h)
+ * Description: Rejection functions used in tessedit
+ * Author:		Phil Cheatle
+ * Created:		Wed Sep 23 16:50:21 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           REJECT_H
+#define           REJECT_H
+
+#include          "params.h"
+#include          "pageres.h"
+
+void reject_blanks(WERD_RES *word);
+void reject_poor_matches(WERD_RES *word);
+float compute_reject_threshold(WERD_CHOICE* word);
+BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
+void dont_allow_1Il(WERD_RES *word);
+void flip_hyphens(WERD_RES *word);
+void flip_0O(WERD_RES *word);
+BOOL8 non_0_digit(const char* str, int length);
+#endif
--- a/src/ccmain/resultiterator.cpp
+++ b/src/ccmain/resultiterator.cpp
--- a/src/ccmain/resultiterator.h
+++ b/src/ccmain/resultiterator.h
--- a/src/ccmain/superscript.cpp
+++ b/src/ccmain/superscript.cpp
--- a/src/ccmain/tessbox.cpp
+++ b/src/ccmain/tessbox.cpp
--- a/src/ccmain/tessbox.h
+++ b/src/ccmain/tessbox.h
@ -1,28 +1,28 @@
-/**********************************************************************
- * File:        tessbox.h  (Formerly tessbox.h)
- * Description: Black boxed Tess for developing a resaljet.
- * Author:					Ray Smith
- * Created:					Thu Apr 23 11:03:36 BST 1992
- *
- * (C) Copyright 1992, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef           TESSBOX_H
-#define           TESSBOX_H
-
-#include          "ratngs.h"
-#include "tesseractclass.h"
-
-// TODO(ocr-team): Delete this along with other empty header files.
-
-#endif
+/**********************************************************************
+ * File:        tessbox.h  (Formerly tessbox.h)
+ * Description: Black boxed Tess for developing a resaljet.
+ * Author:					Ray Smith
+ * Created:					Thu Apr 23 11:03:36 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TESSBOX_H
+#define           TESSBOX_H
+
+#include          "ratngs.h"
+#include "tesseractclass.h"
+
+// TODO(ocr-team): Delete this along with other empty header files.
+
+#endif
--- a/src/ccmain/tessedit.cpp
+++ b/src/ccmain/tessedit.cpp
--- a/src/ccmain/tessedit.h
+++ b/src/ccmain/tessedit.h
@ -1,29 +1,29 @@
-/**********************************************************************
- * File:        tessedit.h  (Formerly tessedit.h)
- * Description: Main program for merge of tess and editor.
- * Author:		Ray Smith
- * Created:		Tue Jan 07 15:21:46 GMT 1992
- *
- * (C) Copyright 1992, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef           TESSEDIT_H
-#define           TESSEDIT_H
-
-#include          "blobs.h"
-#include          "pgedit.h"
-
-                                 //progress monitor
-extern ETEXT_DESC *global_monitor;
-
-#endif
+/**********************************************************************
+ * File:        tessedit.h  (Formerly tessedit.h)
+ * Description: Main program for merge of tess and editor.
+ * Author:		Ray Smith
+ * Created:		Tue Jan 07 15:21:46 GMT 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TESSEDIT_H
+#define           TESSEDIT_H
+
+#include          "blobs.h"
+#include          "pgedit.h"
+
+                                 //progress monitor
+extern ETEXT_DESC *global_monitor;
+
+#endif
--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
--- a/src/ccmain/tessvars.cpp
+++ b/src/ccmain/tessvars.cpp
@ -1,24 +1,24 @@
-/**********************************************************************
- * File:        tessvars.cpp  (Formerly tessvars.c)
- * Description: Variables and other globals for tessedit.
- * Author:		Ray Smith
- * Created:		Mon Apr 13 13:13:23 BST 1992
- *
- * (C) Copyright 1992, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <stdio.h>
-
-#include  "tessvars.h"
-
-FILE *debug_fp = stderr;  // write debug stuff here
+/**********************************************************************
+ * File:        tessvars.cpp  (Formerly tessvars.c)
+ * Description: Variables and other globals for tessedit.
+ * Author:		Ray Smith
+ * Created:		Mon Apr 13 13:13:23 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include <stdio.h>
+
+#include  "tessvars.h"
+
+FILE *debug_fp = stderr;  // write debug stuff here
--- a/src/ccmain/tessvars.h
+++ b/src/ccmain/tessvars.h
@ -1,27 +1,27 @@
-/**********************************************************************
- * File:        tessvars.h  (Formerly tessvars.h)
- * Description: Variables and other globals for tessedit.
- * Author:		Ray Smith
- * Created:		Mon Apr 13 13:13:23 BST 1992
- *
- * (C) Copyright 1992, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef           TESSVARS_H
-#define           TESSVARS_H
-
-#include <stdio.h>
-
-
-extern FILE *debug_fp;    // write debug stuff here
-#endif
+/**********************************************************************
+ * File:        tessvars.h  (Formerly tessvars.h)
+ * Description: Variables and other globals for tessedit.
+ * Author:		Ray Smith
+ * Created:		Mon Apr 13 13:13:23 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TESSVARS_H
+#define           TESSVARS_H
+
+#include <stdio.h>
+
+
+extern FILE *debug_fp;    // write debug stuff here
+#endif
--- a/src/ccmain/tfacepp.cpp
+++ b/src/ccmain/tfacepp.cpp
--- a/src/ccmain/thresholder.cpp
+++ b/src/ccmain/thresholder.cpp
--- a/src/ccmain/thresholder.h
+++ b/src/ccmain/thresholder.h
--- a/src/ccmain/werdit.cpp
+++ b/src/ccmain/werdit.cpp
--- a/src/ccmain/werdit.h
+++ b/src/ccmain/werdit.h
@ -1,27 +1,27 @@
-/**********************************************************************
- * File:        wordit.h
- * Description: An iterator for passing over all the words in a document.
- * Author:      Ray Smith
- * Created:     Mon Apr 27 08:51:22 BST 1992
- *
- * (C) Copyright 1992, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef           WERDIT_H
-#define           WERDIT_H
-
-#include          "pageres.h"
-
-PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
-
-#endif
+/**********************************************************************
+ * File:        wordit.h
+ * Description: An iterator for passing over all the words in a document.
+ * Author:      Ray Smith
+ * Created:     Mon Apr 27 08:51:22 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           WERDIT_H
+#define           WERDIT_H
+
+#include          "pageres.h"
+
+PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
+
+#endif
--- a/src/ccstruct/Makefile.am
+++ b/src/ccstruct/Makefile.am
--- a/src/ccstruct/blamer.cpp
+++ b/src/ccstruct/blamer.cpp
--- a/src/ccstruct/blamer.h
+++ b/src/ccstruct/blamer.h
--- a/src/ccstruct/blckerr.h
+++ b/src/ccstruct/blckerr.h
@ -1,29 +1,29 @@
-/**********************************************************************
- * File:        blckerr.h  (Formerly blockerr.h)
- * Description: Error codes for the page block classes.
- * Author:					Ray Smith
- * Created:					Tue Mar 19 17:43:30 GMT 1991
- *
- * (C) Copyright 1991, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef           BLCKERR_H
-#define           BLCKERR_H
-
-#include          "errcode.h"
-
-const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
-const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
-const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
-const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
-#endif
+/**********************************************************************
+ * File:        blckerr.h  (Formerly blockerr.h)
+ * Description: Error codes for the page block classes.
+ * Author:					Ray Smith
+ * Created:					Tue Mar 19 17:43:30 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           BLCKERR_H
+#define           BLCKERR_H
+
+#include          "errcode.h"
+
+const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
+const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
+const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
+const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
+#endif
--- a/src/ccstruct/blobbox.cpp
+++ b/src/ccstruct/blobbox.cpp
--- a/src/ccstruct/blobbox.h
+++ b/src/ccstruct/blobbox.h
--- a/src/ccstruct/blobs.cpp
+++ b/src/ccstruct/blobs.cpp
--- a/src/ccstruct/blobs.h
+++ b/src/ccstruct/blobs.h
--- a/src/ccstruct/blread.cpp
+++ b/src/ccstruct/blread.cpp
--- a/src/ccstruct/blread.h
+++ b/src/ccstruct/blread.h
--- a/src/ccstruct/boxread.cpp
+++ b/src/ccstruct/boxread.cpp
--- a/src/ccstruct/boxread.h
+++ b/src/ccstruct/boxread.h
--- a/src/ccstruct/boxword.cpp
+++ b/src/ccstruct/boxword.cpp
--- a/src/ccstruct/boxword.h
+++ b/src/ccstruct/boxword.h
@ -1,100 +1,100 @@
-///////////////////////////////////////////////////////////////////////
-// File:        boxword.h
-// Description: Class to represent the bounding boxes of the output.
-// Author:      Ray Smith
-// Created:     Tue May 25 14:18:14 PDT 2010
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
-#define TESSERACT_CSTRUCT_BOXWORD_H_
-
-#include "genericvector.h"
-#include "rect.h"
-#include "unichar.h"
-
-class BLOCK;
-class DENORM;
-struct TWERD;
-class UNICHARSET;
-class WERD;
-class WERD_CHOICE;
-class WERD_RES;
-
-namespace tesseract {
-
-// Class to hold an array of bounding boxes for an output word and
-// the bounding box of the whole word.
-class BoxWord {
- public:
-  BoxWord();
-  explicit BoxWord(const BoxWord& src);
-  ~BoxWord();
-
-  BoxWord& operator=(const BoxWord& src);
-
-  void CopyFrom(const BoxWord& src);
-
-  // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
-  // switch back to original image coordinates.
-  static BoxWord* CopyFromNormalized(TWERD* tessword);
-
-  // Clean up the bounding boxes from the polygonal approximation by
-  // expanding slightly, then clipping to the blobs from the original_word
-  // that overlap. If not null, the block provides the inverse rotation.
-  void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
-
-  // Merges the boxes from start to end, not including end, and deletes
-  // the boxes between start and end.
-  void MergeBoxes(int start, int end);
-
-  // Inserts a new box before the given index.
-  // Recomputes the bounding box.
-  void InsertBox(int index, const TBOX& box);
-
-  // Changes the box at the given index to the new box.
-  // Recomputes the bounding box.
-  void ChangeBox(int index, const TBOX& box);
-
-  // Deletes the box with the given index, and shuffles up the rest.
-  // Recomputes the bounding box.
-  void DeleteBox(int index);
-
-  // Deletes all the boxes stored in BoxWord.
-  void DeleteAllBoxes();
-
-  // This and other putatively are the same, so call the (permanent) callback
-  // for each blob index where the bounding boxes match.
-  // The callback is deleted on completion.
-  void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
-
-  const TBOX& bounding_box() const {
-    return bbox_;
-  }
-  int length() const { return length_; }
-  const TBOX& BlobBox(int index) const {
-    return boxes_[index];
-  }
-
- private:
-  void ComputeBoundingBox();
-
-  TBOX bbox_;
-  int length_;
-  GenericVector<TBOX> boxes_;
-};
-
-}  // namespace tesseract.
-
-#endif  // TESSERACT_CSTRUCT_BOXWORD_H_
+///////////////////////////////////////////////////////////////////////
+// File:        boxword.h
+// Description: Class to represent the bounding boxes of the output.
+// Author:      Ray Smith
+// Created:     Tue May 25 14:18:14 PDT 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
+#define TESSERACT_CSTRUCT_BOXWORD_H_
+
+#include "genericvector.h"
+#include "rect.h"
+#include "unichar.h"
+
+class BLOCK;
+class DENORM;
+struct TWERD;
+class UNICHARSET;
+class WERD;
+class WERD_CHOICE;
+class WERD_RES;
+
+namespace tesseract {
+
+// Class to hold an array of bounding boxes for an output word and
+// the bounding box of the whole word.
+class BoxWord {
+ public:
+  BoxWord();
+  explicit BoxWord(const BoxWord& src);
+  ~BoxWord();
+
+  BoxWord& operator=(const BoxWord& src);
+
+  void CopyFrom(const BoxWord& src);
+
+  // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
+  // switch back to original image coordinates.
+  static BoxWord* CopyFromNormalized(TWERD* tessword);
+
+  // Clean up the bounding boxes from the polygonal approximation by
+  // expanding slightly, then clipping to the blobs from the original_word
+  // that overlap. If not null, the block provides the inverse rotation.
+  void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
+
+  // Merges the boxes from start to end, not including end, and deletes
+  // the boxes between start and end.
+  void MergeBoxes(int start, int end);
+
+  // Inserts a new box before the given index.
+  // Recomputes the bounding box.
+  void InsertBox(int index, const TBOX& box);
+
+  // Changes the box at the given index to the new box.
+  // Recomputes the bounding box.
+  void ChangeBox(int index, const TBOX& box);
+
+  // Deletes the box with the given index, and shuffles up the rest.
+  // Recomputes the bounding box.
+  void DeleteBox(int index);
+
+  // Deletes all the boxes stored in BoxWord.
+  void DeleteAllBoxes();
+
+  // This and other putatively are the same, so call the (permanent) callback
+  // for each blob index where the bounding boxes match.
+  // The callback is deleted on completion.
+  void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
+
+  const TBOX& bounding_box() const {
+    return bbox_;
+  }
+  int length() const { return length_; }
+  const TBOX& BlobBox(int index) const {
+    return boxes_[index];
+  }
+
+ private:
+  void ComputeBoundingBox();
+
+  TBOX bbox_;
+  int length_;
+  GenericVector<TBOX> boxes_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CSTRUCT_BOXWORD_H_
--- a/src/ccstruct/ccstruct.cpp
+++ b/src/ccstruct/ccstruct.cpp
@ -1,36 +1,36 @@
-///////////////////////////////////////////////////////////////////////
-// File:        ccstruct.cpp
-// Description: ccstruct class.
-// Author:      Samuel Charron
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#include "ccstruct.h"
-
-namespace tesseract  {
-
-// APPROXIMATIONS of the fractions of the character cell taken by
-// the descenders, ascenders, and x-height.
-const double CCStruct::kDescenderFraction = 0.25;
-const double CCStruct::kXHeightFraction = 0.5;
-const double CCStruct::kAscenderFraction = 0.25;
-const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
-    (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
-
-CCStruct::CCStruct() {}
-
-CCStruct::~CCStruct() {
-}
-
-}
+///////////////////////////////////////////////////////////////////////
+// File:        ccstruct.cpp
+// Description: ccstruct class.
+// Author:      Samuel Charron
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "ccstruct.h"
+
+namespace tesseract  {
+
+// APPROXIMATIONS of the fractions of the character cell taken by
+// the descenders, ascenders, and x-height.
+const double CCStruct::kDescenderFraction = 0.25;
+const double CCStruct::kXHeightFraction = 0.5;
+const double CCStruct::kAscenderFraction = 0.25;
+const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
+    (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
+
+CCStruct::CCStruct() {}
+
+CCStruct::~CCStruct() {
+}
+
+}
--- a/src/ccstruct/ccstruct.h
+++ b/src/ccstruct/ccstruct.h
@ -1,43 +1,43 @@
-///////////////////////////////////////////////////////////////////////
-// File:        ccstruct.h
-// Description: ccstruct class.
-// Author:      Samuel Charron
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
-#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
-
-#include "cutil.h"
-
-namespace tesseract {
-class CCStruct : public CUtil {
- public:
-  CCStruct();
-  ~CCStruct();
-
-  // Globally accessible constants.
-  // APPROXIMATIONS of the fractions of the character cell taken by
-  // the descenders, ascenders, and x-height.
-  static const double kDescenderFraction;  // = 0.25;
-  static const double kXHeightFraction;    // = 0.5;
-  static const double kAscenderFraction;   // = 0.25;
-  // Derived value giving the x-height as a fraction of cap-height.
-  static const double kXHeightCapRatio;    // = XHeight/(XHeight + Ascender).
-};
-
-class Tesseract;
-}  // namespace tesseract
-
-#endif  // TESSERACT_CCSTRUCT_CCSTRUCT_H_
+///////////////////////////////////////////////////////////////////////
+// File:        ccstruct.h
+// Description: ccstruct class.
+// Author:      Samuel Charron
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
+#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
+
+#include "cutil.h"
+
+namespace tesseract {
+class CCStruct : public CUtil {
+ public:
+  CCStruct();
+  ~CCStruct();
+
+  // Globally accessible constants.
+  // APPROXIMATIONS of the fractions of the character cell taken by
+  // the descenders, ascenders, and x-height.
+  static const double kDescenderFraction;  // = 0.25;
+  static const double kXHeightFraction;    // = 0.5;
+  static const double kAscenderFraction;   // = 0.25;
+  // Derived value giving the x-height as a fraction of cap-height.
+  static const double kXHeightCapRatio;    // = XHeight/(XHeight + Ascender).
+};
+
+class Tesseract;
+}  // namespace tesseract
+
+#endif  // TESSERACT_CCSTRUCT_CCSTRUCT_H_
--- a/src/ccstruct/coutln.cpp
+++ b/src/ccstruct/coutln.cpp
--- a/src/ccstruct/coutln.h
+++ b/src/ccstruct/coutln.h
--- a/src/ccstruct/crakedge.h
+++ b/src/ccstruct/crakedge.h
--- a/src/ccstruct/debugpixa.h
+++ b/src/ccstruct/debugpixa.h
@ -1,52 +1,52 @@
-#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
-#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
-
-#include "allheaders.h"
-
-namespace tesseract {
-
-// Class to hold a Pixa collection of debug images with captions and save them
-// to a PDF file.
-class DebugPixa {
- public:
-  // TODO(rays) add another constructor with size control.
-  DebugPixa() {
-    pixa_ = pixaCreate(0);
-    fonts_ = bmfCreate(nullptr, 14);
-  }
-  // If the filename_ has been set and there are any debug images, they are
-  // written to the set filename_.
-  ~DebugPixa() {
-    pixaDestroy(&pixa_);
-    bmfDestroy(&fonts_);
-  }
-
-  // Adds the given pix to the set of pages in the PDF file, with the given
-  // caption added to the top.
-  void AddPix(const Pix* pix, const char* caption) {
-    int depth = pixGetDepth(const_cast<Pix*>(pix));
-    int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
-    Pix* pix_debug = pixAddSingleTextblock(
-        const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
-    pixaAddPix(pixa_, pix_debug, L_INSERT);
-  }
-
-  // Sets the destination filename and enables images to be written to a PDF
-  // on destruction.
-  void WritePDF(const char* filename) {
-    if (pixaGetCount(pixa_) > 0) {
-      pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
-      pixaClear(pixa_);
-    }
-  }
-
- private:
-  // The collection of images to put in the PDF.
-  Pixa* pixa_;
-  // The fonts used to draw text captions.
-  L_Bmf* fonts_;
-};
-
-}  // namespace tesseract
-
-#endif  // TESSERACT_CCSTRUCT_DEBUGPIXA_H_
+#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
+#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
+
+#include "allheaders.h"
+
+namespace tesseract {
+
+// Class to hold a Pixa collection of debug images with captions and save them
+// to a PDF file.
+class DebugPixa {
+ public:
+  // TODO(rays) add another constructor with size control.
+  DebugPixa() {
+    pixa_ = pixaCreate(0);
+    fonts_ = bmfCreate(nullptr, 14);
+  }
+  // If the filename_ has been set and there are any debug images, they are
+  // written to the set filename_.
+  ~DebugPixa() {
+    pixaDestroy(&pixa_);
+    bmfDestroy(&fonts_);
+  }
+
+  // Adds the given pix to the set of pages in the PDF file, with the given
+  // caption added to the top.
+  void AddPix(const Pix* pix, const char* caption) {
+    int depth = pixGetDepth(const_cast<Pix*>(pix));
+    int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
+    Pix* pix_debug = pixAddSingleTextblock(
+        const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
+    pixaAddPix(pixa_, pix_debug, L_INSERT);
+  }
+
+  // Sets the destination filename and enables images to be written to a PDF
+  // on destruction.
+  void WritePDF(const char* filename) {
+    if (pixaGetCount(pixa_) > 0) {
+      pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
+      pixaClear(pixa_);
+    }
+  }
+
+ private:
+  // The collection of images to put in the PDF.
+  Pixa* pixa_;
+  // The fonts used to draw text captions.
+  L_Bmf* fonts_;
+};
+
+}  // namespace tesseract
+
+#endif  // TESSERACT_CCSTRUCT_DEBUGPIXA_H_
--- a/src/ccstruct/detlinefit.cpp
+++ b/src/ccstruct/detlinefit.cpp
@ -1,295 +1,295 @@
-///////////////////////////////////////////////////////////////////////
-// File:        detlinefit.cpp
-// Description: Deterministic least median squares line fitting.
-// Author:      Ray Smith
-// Created:     Thu Feb 28 14:45:01 PDT 2008
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#include "detlinefit.h"
-#include "statistc.h"
-#include "ndminx.h"
-#include "tprintf.h"
-
-namespace tesseract {
-
-// The number of points to consider at each end.
-const int kNumEndPoints = 3;
-// The minimum number of points at which to switch to number of points
-// for badly fitted lines.
-// To ensure a sensible error metric, kMinPointsForErrorCount should be at
-// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
-// ComputeUpperQuartileError.
-const int kMinPointsForErrorCount = 16;
-// The maximum real distance to use before switching to number of
-// mis-fitted points, which will get square-rooted for true distance.
-const int kMaxRealDistance = 2.0;
-
-DetLineFit::DetLineFit() : square_length_(0.0) {
-}
-
-DetLineFit::~DetLineFit() {
-}
-
-// Delete all Added points.
-void DetLineFit::Clear() {
-  pts_.clear();
-  distances_.clear();
-}
-
-// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
-void DetLineFit::Add(const ICOORD& pt) {
-  pts_.push_back(PointWidth(pt, 0));
-}
-// Associates a half-width with the given point if a point overlaps the
-// previous point by more than half the width, and its distance is further
-// than the previous point, then the more distant point is ignored in the
-// distance calculation. Useful for ignoring i dots and other diacritics.
-void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
-  pts_.push_back(PointWidth(pt, halfwidth));
-}
-
-// Fits a line to the points, ignoring the skip_first initial points and the
-// skip_last final points, returning the fitted line as a pair of points,
-// and the upper quartile error.
-double DetLineFit::Fit(int skip_first, int skip_last,
-                       ICOORD* pt1, ICOORD* pt2) {
-  // Do something sensible with no points.
-  if (pts_.empty()) {
-    pt1->set_x(0);
-    pt1->set_y(0);
-    *pt2 = *pt1;
-    return 0.0;
-  }
-  // Count the points and find the first and last kNumEndPoints.
-  int pt_count = pts_.size();
-  ICOORD* starts[kNumEndPoints];
-  if (skip_first >= pt_count) skip_first = pt_count - 1;
-  int start_count = 0;
-  int end_i = MIN(skip_first + kNumEndPoints, pt_count);
-  for (int i = skip_first; i < end_i; ++i) {
-    starts[start_count++] = &pts_[i].pt;
-  }
-  ICOORD* ends[kNumEndPoints];
-  if (skip_last >= pt_count) skip_last = pt_count - 1;
-  int end_count = 0;
-  end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
-  for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
-    ends[end_count++] = &pts_[i].pt;
-  }
-  // 1 or 2 points need special treatment.
-  if (pt_count <= 2) {
-    *pt1 = *starts[0];
-    if (pt_count > 1)
-      *pt2 = *ends[0];
-    else
-      *pt2 = *pt1;
-    return 0.0;
-  }
-  // Although with between 2 and 2*kNumEndPoints-1 points, there will be
-  // overlap in the starts, ends sets, this is OK and taken care of by the
-  // if (*start != *end) test below, which also tests for equal input points.
-  double best_uq = -1.0;
-  // Iterate each pair of points and find the best fitting line.
-  for (int i = 0; i < start_count; ++i) {
-    ICOORD* start = starts[i];
-    for (int j = 0; j < end_count; ++j) {
-      ICOORD* end = ends[j];
-      if (*start != *end) {
-        ComputeDistances(*start, *end);
-        // Compute the upper quartile error from the line.
-        double dist = EvaluateLineFit();
-        if (dist < best_uq || best_uq < 0.0) {
-          best_uq = dist;
-          *pt1 = *start;
-          *pt2 = *end;
-        }
-      }
-    }
-  }
-  // Finally compute the square root to return the true distance.
-  return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
-}
-
-// Constrained fit with a supplied direction vector. Finds the best line_pt,
-// that is one of the supplied points having the median cross product with
-// direction, ignoring points that have a cross product outside of the range
-// [min_dist, max_dist]. Returns the resulting error metric using the same
-// reduced set of points.
-// *Makes use of floating point arithmetic*
-double DetLineFit::ConstrainedFit(const FCOORD& direction,
-                                  double min_dist, double max_dist,
-                                  bool debug, ICOORD* line_pt) {
-  ComputeConstrainedDistances(direction, min_dist, max_dist);
-  // Do something sensible with no points or computed distances.
-  if (pts_.empty() || distances_.empty()) {
-    line_pt->set_x(0);
-    line_pt->set_y(0);
-    return 0.0;
-  }
-  int median_index = distances_.choose_nth_item(distances_.size() / 2);
-  *line_pt = distances_[median_index].data;
-  if (debug) {
-    tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
-            direction.x(), direction.y(),
-            line_pt->x(), line_pt->y(), distances_.size());
-    for (int i = 0; i < distances_.size(); ++i) {
-      tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
-              distances_[i].data.y(), distances_[i].key);
-    }
-    tprintf("Result = %d\n", median_index);
-  }
-  // Center distances on the fitted point.
-  double dist_origin = direction * *line_pt;
-  for (int i = 0; i < distances_.size(); ++i) {
-    distances_[i].key -= dist_origin;
-  }
-  return sqrt(EvaluateLineFit());
-}
-
-// Returns true if there were enough points at the last call to Fit or
-// ConstrainedFit for the fitted points to be used on a badly fitted line.
-bool DetLineFit::SufficientPointsForIndependentFit() const {
-  return distances_.size() >= kMinPointsForErrorCount;
-}
-
-// Backwards compatible fit returning a gradient and constant.
-// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
-// function in preference to the LMS class.
-double DetLineFit::Fit(float* m, float* c) {
-  ICOORD start, end;
-  double error = Fit(&start, &end);
-  if (end.x() != start.x()) {
-    *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
-    *c = start.y() - *m * start.x();
-  } else {
-    *m = 0.0f;
-    *c = 0.0f;
-  }
-  return error;
-}
-
-// Backwards compatible constrained fit with a supplied gradient.
-// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
-// to avoid potential difficulties with infinite gradients.
-double DetLineFit::ConstrainedFit(double m, float* c) {
-  // Do something sensible with no points.
-  if (pts_.empty()) {
-    *c = 0.0f;
-    return 0.0;
-  }
-  double cos = 1.0 / sqrt(1.0 + m * m);
-  FCOORD direction(cos, m * cos);
-  ICOORD line_pt;
-  double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
-                                &line_pt);
-  *c = line_pt.y() - line_pt.x() * m;
-  return error;
-}
-
-// Computes and returns the squared evaluation metric for a line fit.
-double DetLineFit::EvaluateLineFit() {
-  // Compute the upper quartile error from the line.
-  double dist = ComputeUpperQuartileError();
-  if (distances_.size() >= kMinPointsForErrorCount &&
-      dist > kMaxRealDistance * kMaxRealDistance) {
-    // Use the number of mis-fitted points as the error metric, as this
-    // gives a better measure of fit for badly fitted lines where more
-    // than a quarter are badly fitted.
-    double threshold = kMaxRealDistance * sqrt(square_length_);
-    dist = NumberOfMisfittedPoints(threshold);
-  }
-  return dist;
-}
-
-// Computes the absolute error distances of the points from the line,
-// and returns the squared upper-quartile error distance.
-double DetLineFit::ComputeUpperQuartileError() {
-  int num_errors = distances_.size();
-  if (num_errors == 0) return 0.0;
-  // Get the absolute values of the errors.
-  for (int i = 0; i < num_errors; ++i) {
-    if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
-  }
-  // Now get the upper quartile distance.
-  int index = distances_.choose_nth_item(3 * num_errors / 4);
-  double dist = distances_[index].key;
-  // The true distance is the square root of the dist squared / square_length.
-  // Don't bother with the square root. Just return the square distance.
-  return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
-}
-
-// Returns the number of sample points that have an error more than threshold.
-int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
-  int num_misfits = 0;
-  int num_dists = distances_.size();
-  // Get the absolute values of the errors.
-  for (int i = 0; i < num_dists; ++i) {
-    if (distances_[i].key > threshold)
-      ++num_misfits;
-  }
-  return num_misfits;
-}
-
-// Computes all the cross product distances of the points from the line,
-// storing the actual (signed) cross products in distances.
-// Ignores distances of points that are further away than the previous point,
-// and overlaps the previous point by at least half.
-void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
-  distances_.truncate(0);
-  ICOORD line_vector = end;
-  line_vector -= start;
-  square_length_ = line_vector.sqlength();
-  int line_length = IntCastRounded(sqrt(square_length_));
-  // Compute the distance of each point from the line.
-  int prev_abs_dist = 0;
-  int prev_dot = 0;
-  for (int i = 0; i < pts_.size(); ++i) {
-    ICOORD pt_vector = pts_[i].pt;
-    pt_vector -= start;
-    int dot = line_vector % pt_vector;
-    // Compute |line_vector||pt_vector|sin(angle between)
-    int dist = line_vector * pt_vector;
-    int abs_dist = dist < 0 ? -dist : dist;
-    if (abs_dist > prev_abs_dist && i > 0) {
-      // Ignore this point if it overlaps the previous one.
-      int separation = abs(dot - prev_dot);
-      if (separation < line_length * pts_[i].halfwidth ||
-          separation < line_length * pts_[i - 1].halfwidth)
-        continue;
-    }
-    distances_.push_back(DistPointPair(dist, pts_[i].pt));
-    prev_abs_dist = abs_dist;
-    prev_dot = dot;
-  }
-}
-
-// Computes all the cross product distances of the points perpendicular to
-// the given direction, ignoring distances outside of the give distance range,
-// storing the actual (signed) cross products in distances_.
-void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
-                                             double min_dist, double max_dist) {
-  distances_.truncate(0);
-  square_length_ = direction.sqlength();
-  // Compute the distance of each point from the line.
-  for (int i = 0; i < pts_.size(); ++i) {
-    FCOORD pt_vector = pts_[i].pt;
-    // Compute |line_vector||pt_vector|sin(angle between)
-    double dist = direction * pt_vector;
-    if (min_dist <= dist && dist <= max_dist)
-      distances_.push_back(DistPointPair(dist, pts_[i].pt));
-  }
-}
-
-}  // namespace tesseract.
+///////////////////////////////////////////////////////////////////////
+// File:        detlinefit.cpp
+// Description: Deterministic least median squares line fitting.
+// Author:      Ray Smith
+// Created:     Thu Feb 28 14:45:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "detlinefit.h"
+#include "statistc.h"
+#include "ndminx.h"
+#include "tprintf.h"
+
+namespace tesseract {
+
+// The number of points to consider at each end.
+const int kNumEndPoints = 3;
+// The minimum number of points at which to switch to number of points
+// for badly fitted lines.
+// To ensure a sensible error metric, kMinPointsForErrorCount should be at
+// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
+// ComputeUpperQuartileError.
+const int kMinPointsForErrorCount = 16;
+// The maximum real distance to use before switching to number of
+// mis-fitted points, which will get square-rooted for true distance.
+const int kMaxRealDistance = 2.0;
+
+DetLineFit::DetLineFit() : square_length_(0.0) {
+}
+
+DetLineFit::~DetLineFit() {
+}
+
+// Delete all Added points.
+void DetLineFit::Clear() {
+  pts_.clear();
+  distances_.clear();
+}
+
+// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
+void DetLineFit::Add(const ICOORD& pt) {
+  pts_.push_back(PointWidth(pt, 0));
+}
+// Associates a half-width with the given point if a point overlaps the
+// previous point by more than half the width, and its distance is further
+// than the previous point, then the more distant point is ignored in the
+// distance calculation. Useful for ignoring i dots and other diacritics.
+void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
+  pts_.push_back(PointWidth(pt, halfwidth));
+}
+
+// Fits a line to the points, ignoring the skip_first initial points and the
+// skip_last final points, returning the fitted line as a pair of points,
+// and the upper quartile error.
+double DetLineFit::Fit(int skip_first, int skip_last,
+                       ICOORD* pt1, ICOORD* pt2) {
+  // Do something sensible with no points.
+  if (pts_.empty()) {
+    pt1->set_x(0);
+    pt1->set_y(0);
+    *pt2 = *pt1;
+    return 0.0;
+  }
+  // Count the points and find the first and last kNumEndPoints.
+  int pt_count = pts_.size();
+  ICOORD* starts[kNumEndPoints];
+  if (skip_first >= pt_count) skip_first = pt_count - 1;
+  int start_count = 0;
+  int end_i = MIN(skip_first + kNumEndPoints, pt_count);
+  for (int i = skip_first; i < end_i; ++i) {
+    starts[start_count++] = &pts_[i].pt;
+  }
+  ICOORD* ends[kNumEndPoints];
+  if (skip_last >= pt_count) skip_last = pt_count - 1;
+  int end_count = 0;
+  end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
+  for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
+    ends[end_count++] = &pts_[i].pt;
+  }
+  // 1 or 2 points need special treatment.
+  if (pt_count <= 2) {
+    *pt1 = *starts[0];
+    if (pt_count > 1)
+      *pt2 = *ends[0];
+    else
+      *pt2 = *pt1;
+    return 0.0;
+  }
+  // Although with between 2 and 2*kNumEndPoints-1 points, there will be
+  // overlap in the starts, ends sets, this is OK and taken care of by the
+  // if (*start != *end) test below, which also tests for equal input points.
+  double best_uq = -1.0;
+  // Iterate each pair of points and find the best fitting line.
+  for (int i = 0; i < start_count; ++i) {
+    ICOORD* start = starts[i];
+    for (int j = 0; j < end_count; ++j) {
+      ICOORD* end = ends[j];
+      if (*start != *end) {
+        ComputeDistances(*start, *end);
+        // Compute the upper quartile error from the line.
+        double dist = EvaluateLineFit();
+        if (dist < best_uq || best_uq < 0.0) {
+          best_uq = dist;
+          *pt1 = *start;
+          *pt2 = *end;
+        }
+      }
+    }
+  }
+  // Finally compute the square root to return the true distance.
+  return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
+}
+
+// Constrained fit with a supplied direction vector. Finds the best line_pt,
+// that is one of the supplied points having the median cross product with
+// direction, ignoring points that have a cross product outside of the range
+// [min_dist, max_dist]. Returns the resulting error metric using the same
+// reduced set of points.
+// *Makes use of floating point arithmetic*
+double DetLineFit::ConstrainedFit(const FCOORD& direction,
+                                  double min_dist, double max_dist,
+                                  bool debug, ICOORD* line_pt) {
+  ComputeConstrainedDistances(direction, min_dist, max_dist);
+  // Do something sensible with no points or computed distances.
+  if (pts_.empty() || distances_.empty()) {
+    line_pt->set_x(0);
+    line_pt->set_y(0);
+    return 0.0;
+  }
+  int median_index = distances_.choose_nth_item(distances_.size() / 2);
+  *line_pt = distances_[median_index].data;
+  if (debug) {
+    tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
+            direction.x(), direction.y(),
+            line_pt->x(), line_pt->y(), distances_.size());
+    for (int i = 0; i < distances_.size(); ++i) {
+      tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
+              distances_[i].data.y(), distances_[i].key);
+    }
+    tprintf("Result = %d\n", median_index);
+  }
+  // Center distances on the fitted point.
+  double dist_origin = direction * *line_pt;
+  for (int i = 0; i < distances_.size(); ++i) {
+    distances_[i].key -= dist_origin;
+  }
+  return sqrt(EvaluateLineFit());
+}
+
+// Returns true if there were enough points at the last call to Fit or
+// ConstrainedFit for the fitted points to be used on a badly fitted line.
+bool DetLineFit::SufficientPointsForIndependentFit() const {
+  return distances_.size() >= kMinPointsForErrorCount;
+}
+
+// Backwards compatible fit returning a gradient and constant.
+// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
+// function in preference to the LMS class.
+double DetLineFit::Fit(float* m, float* c) {
+  ICOORD start, end;
+  double error = Fit(&start, &end);
+  if (end.x() != start.x()) {
+    *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
+    *c = start.y() - *m * start.x();
+  } else {
+    *m = 0.0f;
+    *c = 0.0f;
+  }
+  return error;
+}
+
+// Backwards compatible constrained fit with a supplied gradient.
+// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
+// to avoid potential difficulties with infinite gradients.
+double DetLineFit::ConstrainedFit(double m, float* c) {
+  // Do something sensible with no points.
+  if (pts_.empty()) {
+    *c = 0.0f;
+    return 0.0;
+  }
+  double cos = 1.0 / sqrt(1.0 + m * m);
+  FCOORD direction(cos, m * cos);
+  ICOORD line_pt;
+  double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
+                                &line_pt);
+  *c = line_pt.y() - line_pt.x() * m;
+  return error;
+}
+
+// Computes and returns the squared evaluation metric for a line fit.
+double DetLineFit::EvaluateLineFit() {
+  // Compute the upper quartile error from the line.
+  double dist = ComputeUpperQuartileError();
+  if (distances_.size() >= kMinPointsForErrorCount &&
+      dist > kMaxRealDistance * kMaxRealDistance) {
+    // Use the number of mis-fitted points as the error metric, as this
+    // gives a better measure of fit for badly fitted lines where more
+    // than a quarter are badly fitted.
+    double threshold = kMaxRealDistance * sqrt(square_length_);
+    dist = NumberOfMisfittedPoints(threshold);
+  }
+  return dist;
+}
+
+// Computes the absolute error distances of the points from the line,
+// and returns the squared upper-quartile error distance.
+double DetLineFit::ComputeUpperQuartileError() {
+  int num_errors = distances_.size();
+  if (num_errors == 0) return 0.0;
+  // Get the absolute values of the errors.
+  for (int i = 0; i < num_errors; ++i) {
+    if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
+  }
+  // Now get the upper quartile distance.
+  int index = distances_.choose_nth_item(3 * num_errors / 4);
+  double dist = distances_[index].key;
+  // The true distance is the square root of the dist squared / square_length.
+  // Don't bother with the square root. Just return the square distance.
+  return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
+}
+
+// Returns the number of sample points that have an error more than threshold.
+int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
+  int num_misfits = 0;
+  int num_dists = distances_.size();
+  // Get the absolute values of the errors.
+  for (int i = 0; i < num_dists; ++i) {
+    if (distances_[i].key > threshold)
+      ++num_misfits;
+  }
+  return num_misfits;
+}
+
+// Computes all the cross product distances of the points from the line,
+// storing the actual (signed) cross products in distances.
+// Ignores distances of points that are further away than the previous point,
+// and overlaps the previous point by at least half.
+void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
+  distances_.truncate(0);
+  ICOORD line_vector = end;
+  line_vector -= start;
+  square_length_ = line_vector.sqlength();
+  int line_length = IntCastRounded(sqrt(square_length_));
+  // Compute the distance of each point from the line.
+  int prev_abs_dist = 0;
+  int prev_dot = 0;
+  for (int i = 0; i < pts_.size(); ++i) {
+    ICOORD pt_vector = pts_[i].pt;
+    pt_vector -= start;
+    int dot = line_vector % pt_vector;
+    // Compute |line_vector||pt_vector|sin(angle between)
+    int dist = line_vector * pt_vector;
+    int abs_dist = dist < 0 ? -dist : dist;
+    if (abs_dist > prev_abs_dist && i > 0) {
+      // Ignore this point if it overlaps the previous one.
+      int separation = abs(dot - prev_dot);
+      if (separation < line_length * pts_[i].halfwidth ||
+          separation < line_length * pts_[i - 1].halfwidth)
+        continue;
+    }
+    distances_.push_back(DistPointPair(dist, pts_[i].pt));
+    prev_abs_dist = abs_dist;
+    prev_dot = dot;
+  }
+}
+
+// Computes all the cross product distances of the points perpendicular to
+// the given direction, ignoring distances outside of the give distance range,
+// storing the actual (signed) cross products in distances_.
+void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
+                                             double min_dist, double max_dist) {
+  distances_.truncate(0);
+  square_length_ = direction.sqlength();
+  // Compute the distance of each point from the line.
+  for (int i = 0; i < pts_.size(); ++i) {
+    FCOORD pt_vector = pts_[i].pt;
+    // Compute |line_vector||pt_vector|sin(angle between)
+    double dist = direction * pt_vector;
+    if (min_dist <= dist && dist <= max_dist)
+      distances_.push_back(DistPointPair(dist, pts_[i].pt));
+  }
+}
+
+}  // namespace tesseract.
--- a/src/ccstruct/detlinefit.h
+++ b/src/ccstruct/detlinefit.h
@ -1,164 +1,164 @@
-///////////////////////////////////////////////////////////////////////
-// File:        detlinefit.h
-// Description: Deterministic least upper-quartile squares line fitting.
-// Author:      Ray Smith
-// Created:     Thu Feb 28 14:35:01 PDT 2008
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
-#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
-
-#include "genericvector.h"
-#include "kdpair.h"
-#include "points.h"
-
-namespace tesseract {
-
-// This class fits a line to a set of ICOORD points.
-// There is no restriction on the direction of the line, as it
-// uses a vector method, ie no concern over infinite gradients.
-// The fitted line has the least upper quartile of squares of perpendicular
-// distances of all source points from the line, subject to the constraint
-// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
-// i.e. the 9 combinations of one of the first 3 and last 3 points.
-// A fundamental assumption of this algorithm is that one of the first 3 and
-// one of the last 3 points are near the best line fit.
-// The points must be Added in line order for the algorithm to work properly.
-// No floating point calculations are needed* to make an accurate fit,
-// and no random numbers are needed** so the algorithm is deterministic,
-// architecture-stable, and compiler-stable as well as stable to minor
-// changes in the input.
-// *A single floating point division is used to compute each line's distance.
-// This is unlikely to result in choice of a different line, but if it does,
-// it would be easy to replace with a 64 bit integer calculation.
-// **Random numbers are used in the nth_item function, but the worst
-// non-determinism that can result is picking a different result among equals,
-// and that wouldn't make any difference to the end-result distance, so the
-// randomness does not affect the determinism of the algorithm. The random
-// numbers are only there to guarantee average linear time.
-// Fitting time is linear, but with a high constant, as it tries 9 different
-// lines and computes the distance of all points each time.
-// This class is aimed at replacing the LLSQ (linear least squares) and
-// LMS (least median of squares) classes that are currently used for most
-// of the line fitting in Tesseract.
-class DetLineFit {
- public:
-  DetLineFit();
-  ~DetLineFit();
-
-  // Delete all Added points.
-  void Clear();
-
-  // Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
-  // Add must be called on points in sequence along the line.
-  void Add(const ICOORD& pt);
-  // Associates a half-width with the given point if a point overlaps the
-  // previous point by more than half the width, and its distance is further
-  // than the previous point, then the more distant point is ignored in the
-  // distance calculation. Useful for ignoring i dots and other diacritics.
-  void Add(const ICOORD& pt, int halfwidth);
-
-  // Fits a line to the points, returning the fitted line as a pair of
-  // points, and the upper quartile error.
-  double Fit(ICOORD* pt1, ICOORD* pt2) {
-    return Fit(0, 0, pt1, pt2);
-  }
-  // Fits a line to the points, ignoring the skip_first initial points and the
-  // skip_last final points, returning the fitted line as a pair of points,
-  // and the upper quartile error.
-  double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
-
-  // Constrained fit with a supplied direction vector. Finds the best line_pt,
-  // that is one of the supplied points having the median cross product with
-  // direction, ignoring points that have a cross product outside of the range
-  // [min_dist, max_dist]. Returns the resulting error metric using the same
-  // reduced set of points.
-  // *Makes use of floating point arithmetic*
-  double ConstrainedFit(const FCOORD& direction,
-                        double min_dist, double max_dist,
-                        bool debug, ICOORD* line_pt);
-
-  // Returns true if there were enough points at the last call to Fit or
-  // ConstrainedFit for the fitted points to be used on a badly fitted line.
-  bool SufficientPointsForIndependentFit() const;
-
-  // Backwards compatible fit returning a gradient and constant.
-  // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
-  // function in preference to the LMS class.
-  double Fit(float* m, float* c);
-
-  // Backwards compatible constrained fit with a supplied gradient.
-  // Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
-  // to avoid potential difficulties with infinite gradients.
-  double ConstrainedFit(double m, float* c);
-
- private:
-  // Simple struct to hold an ICOORD point and a halfwidth representing half
-  // the "width" (supposedly approximately parallel to the direction of the
-  // line) of each point, such that distant points can be discarded when they
-  // overlap nearer points. (Think i dot and other diacritics or noise.)
-  struct PointWidth {
-    PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
-    PointWidth(const ICOORD& pt0, int halfwidth0)
-      : pt(pt0), halfwidth(halfwidth0) {}
-
-    ICOORD pt;
-    int halfwidth;
-  };
-  // Type holds the distance of each point from the fitted line and the point
-  // itself. Use of double allows integer distances from ICOORDs to be stored
-  // exactly, and also the floating point results from ConstrainedFit.
-  typedef KDPairInc<double, ICOORD> DistPointPair;
-
-  // Computes and returns the squared evaluation metric for a line fit.
-  double EvaluateLineFit();
-
-  // Computes the absolute values of the precomputed distances_,
-  // and returns the squared upper-quartile error distance.
-  double ComputeUpperQuartileError();
-
-  // Returns the number of sample points that have an error more than threshold.
-  int NumberOfMisfittedPoints(double threshold) const;
-
-  // Computes all the cross product distances of the points from the line,
-  // storing the actual (signed) cross products in distances_.
-  // Ignores distances of points that are further away than the previous point,
-  // and overlaps the previous point by at least half.
-  void ComputeDistances(const ICOORD& start, const ICOORD& end);
-
-  // Computes all the cross product distances of the points perpendicular to
-  // the given direction, ignoring distances outside of the give distance range,
-  // storing the actual (signed) cross products in distances_.
-  void ComputeConstrainedDistances(const FCOORD& direction,
-                                   double min_dist, double max_dist);
-
-  // Stores all the source points in the order they were given and their
-  // halfwidths, if any.
-  GenericVector<PointWidth> pts_;
-  // Stores the computed perpendicular distances of (some of) the pts_ from a
-  // given vector (assuming it goes through the origin, making it a line).
-  // Since the distances may be a subset of the input points, and get
-  // re-ordered by the nth_item function, the original point is stored
-  // along side the distance.
-  GenericVector<DistPointPair> distances_;  // Distances of points.
-  // The squared length of the vector used to compute distances_.
-  double square_length_;
-};
-
-}  // namespace tesseract.
-
-#endif  // TESSERACT_CCSTRUCT_DETLINEFIT_H_
-
-
+///////////////////////////////////////////////////////////////////////
+// File:        detlinefit.h
+// Description: Deterministic least upper-quartile squares line fitting.
+// Author:      Ray Smith
+// Created:     Thu Feb 28 14:35:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
+#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
+
+#include "genericvector.h"
+#include "kdpair.h"
+#include "points.h"
+
+namespace tesseract {
+
+// This class fits a line to a set of ICOORD points.
+// There is no restriction on the direction of the line, as it
+// uses a vector method, ie no concern over infinite gradients.
+// The fitted line has the least upper quartile of squares of perpendicular
+// distances of all source points from the line, subject to the constraint
+// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
+// i.e. the 9 combinations of one of the first 3 and last 3 points.
+// A fundamental assumption of this algorithm is that one of the first 3 and
+// one of the last 3 points are near the best line fit.
+// The points must be Added in line order for the algorithm to work properly.
+// No floating point calculations are needed* to make an accurate fit,
+// and no random numbers are needed** so the algorithm is deterministic,
+// architecture-stable, and compiler-stable as well as stable to minor
+// changes in the input.
+// *A single floating point division is used to compute each line's distance.
+// This is unlikely to result in choice of a different line, but if it does,
+// it would be easy to replace with a 64 bit integer calculation.
+// **Random numbers are used in the nth_item function, but the worst
+// non-determinism that can result is picking a different result among equals,
+// and that wouldn't make any difference to the end-result distance, so the
+// randomness does not affect the determinism of the algorithm. The random
+// numbers are only there to guarantee average linear time.
+// Fitting time is linear, but with a high constant, as it tries 9 different
+// lines and computes the distance of all points each time.
+// This class is aimed at replacing the LLSQ (linear least squares) and
+// LMS (least median of squares) classes that are currently used for most
+// of the line fitting in Tesseract.
+class DetLineFit {
+ public:
+  DetLineFit();
+  ~DetLineFit();
+
+  // Delete all Added points.
+  void Clear();
+
+  // Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
+  // Add must be called on points in sequence along the line.
+  void Add(const ICOORD& pt);
+  // Associates a half-width with the given point if a point overlaps the
+  // previous point by more than half the width, and its distance is further
+  // than the previous point, then the more distant point is ignored in the
+  // distance calculation. Useful for ignoring i dots and other diacritics.
+  void Add(const ICOORD& pt, int halfwidth);
+
+  // Fits a line to the points, returning the fitted line as a pair of
+  // points, and the upper quartile error.
+  double Fit(ICOORD* pt1, ICOORD* pt2) {
+    return Fit(0, 0, pt1, pt2);
+  }
+  // Fits a line to the points, ignoring the skip_first initial points and the
+  // skip_last final points, returning the fitted line as a pair of points,
+  // and the upper quartile error.
+  double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
+
+  // Constrained fit with a supplied direction vector. Finds the best line_pt,
+  // that is one of the supplied points having the median cross product with
+  // direction, ignoring points that have a cross product outside of the range
+  // [min_dist, max_dist]. Returns the resulting error metric using the same
+  // reduced set of points.
+  // *Makes use of floating point arithmetic*
+  double ConstrainedFit(const FCOORD& direction,
+                        double min_dist, double max_dist,
+                        bool debug, ICOORD* line_pt);
+
+  // Returns true if there were enough points at the last call to Fit or
+  // ConstrainedFit for the fitted points to be used on a badly fitted line.
+  bool SufficientPointsForIndependentFit() const;
+
+  // Backwards compatible fit returning a gradient and constant.
+  // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
+  // function in preference to the LMS class.
+  double Fit(float* m, float* c);
+
+  // Backwards compatible constrained fit with a supplied gradient.
+  // Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
+  // to avoid potential difficulties with infinite gradients.
+  double ConstrainedFit(double m, float* c);
+
+ private:
+  // Simple struct to hold an ICOORD point and a halfwidth representing half
+  // the "width" (supposedly approximately parallel to the direction of the
+  // line) of each point, such that distant points can be discarded when they
+  // overlap nearer points. (Think i dot and other diacritics or noise.)
+  struct PointWidth {
+    PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
+    PointWidth(const ICOORD& pt0, int halfwidth0)
+      : pt(pt0), halfwidth(halfwidth0) {}
+
+    ICOORD pt;
+    int halfwidth;
+  };
+  // Type holds the distance of each point from the fitted line and the point
+  // itself. Use of double allows integer distances from ICOORDs to be stored
+  // exactly, and also the floating point results from ConstrainedFit.
+  typedef KDPairInc<double, ICOORD> DistPointPair;
+
+  // Computes and returns the squared evaluation metric for a line fit.
+  double EvaluateLineFit();
+
+  // Computes the absolute values of the precomputed distances_,
+  // and returns the squared upper-quartile error distance.
+  double ComputeUpperQuartileError();
+
+  // Returns the number of sample points that have an error more than threshold.
+  int NumberOfMisfittedPoints(double threshold) const;
+
+  // Computes all the cross product distances of the points from the line,
+  // storing the actual (signed) cross products in distances_.
+  // Ignores distances of points that are further away than the previous point,
+  // and overlaps the previous point by at least half.
+  void ComputeDistances(const ICOORD& start, const ICOORD& end);
+
+  // Computes all the cross product distances of the points perpendicular to
+  // the given direction, ignoring distances outside of the give distance range,
+  // storing the actual (signed) cross products in distances_.
+  void ComputeConstrainedDistances(const FCOORD& direction,
+                                   double min_dist, double max_dist);
+
+  // Stores all the source points in the order they were given and their
+  // halfwidths, if any.
+  GenericVector<PointWidth> pts_;
+  // Stores the computed perpendicular distances of (some of) the pts_ from a
+  // given vector (assuming it goes through the origin, making it a line).
+  // Since the distances may be a subset of the input points, and get
+  // re-ordered by the nth_item function, the original point is stored
+  // along side the distance.
+  GenericVector<DistPointPair> distances_;  // Distances of points.
+  // The squared length of the vector used to compute distances_.
+  double square_length_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCSTRUCT_DETLINEFIT_H_
+
+
--- a/Show More
+++ b/Show More