Move sources into src dir. Update build scripts.

This commit is contained in:
Egor Pugin 2018-04-25 11:02:54 +03:00
parent e8fceb58ab
commit e95ff1159e
518 changed files with 33887 additions and 33884 deletions

View File

@ -128,14 +128,14 @@ configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract") set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")
configure_file( configure_file(
${CMAKE_SOURCE_DIR}/api/tess_version.h.in ${CMAKE_SOURCE_DIR}/src/api/tess_version.h.in
${CMAKE_BINARY_DIR}/api/tess_version.h @ONLY) ${CMAKE_BINARY_DIR}/src/api/tess_version.h @ONLY)
configure_file( configure_file(
${CMAKE_SOURCE_DIR}/vs2010/tesseract/tesseract.rc.in ${CMAKE_SOURCE_DIR}/src/vs2010/tesseract/tesseract.rc.in
${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY) ${CMAKE_BINARY_DIR}/src/vs2010/tesseract/tesseract.rc @ONLY)
configure_file( configure_file(
${CMAKE_SOURCE_DIR}/vs2010/tesseract/libtesseract.rc.in ${CMAKE_SOURCE_DIR}/src/vs2010/tesseract/libtesseract.rc.in
${CMAKE_BINARY_DIR}/vs2010/tesseract/libtesseract.rc @ONLY) ${CMAKE_BINARY_DIR}/src/vs2010/tesseract/libtesseract.rc @ONLY)
configure_file( configure_file(
${CMAKE_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in ${CMAKE_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in
${CMAKE_BINARY_DIR}/TesseractConfig-version.cmake @ONLY) ${CMAKE_BINARY_DIR}/TesseractConfig-version.cmake @ONLY)
@ -160,101 +160,101 @@ include_directories(${Leptonica_INCLUDE_DIRS})
include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR})
include_directories(api) include_directories(src/api)
include_directories(${CMAKE_BINARY_DIR}/api) include_directories(${CMAKE_BINARY_DIR}/api)
include_directories(arch) include_directories(src/arch)
include_directories(ccmain) include_directories(src/ccmain)
include_directories(ccstruct) include_directories(src/ccstruct)
include_directories(ccutil) include_directories(src/ccutil)
include_directories(classify) include_directories(src/classify)
include_directories(cutil) include_directories(src/cutil)
include_directories(dict) include_directories(src/dict)
include_directories(lstm) include_directories(src/lstm)
include_directories(opencl) include_directories(src/opencl)
include_directories(textord) include_directories(src/textord)
include_directories(vs2010/port) include_directories(src/vs2010/port)
include_directories(viewer) include_directories(src/viewer)
include_directories(wordrec) include_directories(src/wordrec)
######################################## ########################################
# LIBRARY tesseract # LIBRARY tesseract
######################################## ########################################
file(GLOB tesseract_src file(GLOB tesseract_src
arch/*.cpp src/arch/*.cpp
ccmain/*.cpp src/ccmain/*.cpp
ccstruct/*.cpp src/ccstruct/*.cpp
ccutil/*.cpp src/ccutil/*.cpp
classify/*.cpp src/classify/*.cpp
cutil/*.cpp src/cutil/*.cpp
dict/*.cpp src/dict/*.cpp
lstm/*.cpp src/lstm/*.cpp
opencl/*.cpp src/opencl/*.cpp
textord/*.cpp src/textord/*.cpp
viewer/*.cpp src/viewer/*.cpp
wordrec/*.cpp src/wordrec/*.cpp
) )
file(GLOB tesseract_hdr file(GLOB tesseract_hdr
api/*.h src/api/*.h
arch/*.h src/arch/*.h
ccmain/*.h src/ccmain/*.h
ccstruct/*.h src/ccstruct/*.h
ccutil/*.h src/ccutil/*.h
classify/*.h src/classify/*.h
cutil/*.h src/cutil/*.h
dict/*.h src/dict/*.h
lstm/*.h src/lstm/*.h
opencl/*.h src/opencl/*.h
textord/*.h src/textord/*.h
viewer/*.h src/viewer/*.h
wordrec/*.h src/wordrec/*.h
) )
if (WIN32) if (WIN32)
file(GLOB tesseract_win32_src "vs2010/port/*.cpp") file(GLOB tesseract_win32_src "src/vs2010/port/*.cpp")
file(GLOB tesseract_win32_hdr "vs2010/port/*.h") file(GLOB tesseract_win32_hdr "src/vs2010/port/*.h")
set(tesseract_src ${tesseract_src} ${tesseract_win32_src}) set(tesseract_src ${tesseract_src} ${tesseract_win32_src})
set(tesseract_hdr ${tesseract_hdr} ${tesseract_win32_hdr}) set(tesseract_hdr ${tesseract_hdr} ${tesseract_win32_hdr})
endif() endif()
set(tesseract_src ${tesseract_src} set(tesseract_src ${tesseract_src}
api/baseapi.cpp src/api/baseapi.cpp
api/capi.cpp src/api/capi.cpp
api/renderer.cpp src/api/renderer.cpp
api/pdfrenderer.cpp src/api/pdfrenderer.cpp
) )
if (WIN32) if (WIN32)
if (MSVC) if (MSVC)
include_directories(vs2010/tesseract) include_directories(src/vs2010/tesseract)
set(tesseract_hdr set(tesseract_hdr
${tesseract_hdr} ${tesseract_hdr}
${CMAKE_CURRENT_SOURCE_DIR}/vs2010/tesseract/resource.h) ${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/resource.h)
set(tesseract_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/libtesseract.rc) set(tesseract_rsc ${CMAKE_BINARY_DIR}/src/vs2010/tesseract/libtesseract.rc)
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductavx.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX") PROPERTIES COMPILE_FLAGS "/arch:AVX")
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixavx2.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX2") PROPERTIES COMPILE_FLAGS "/arch:AVX2")
endif() endif()
else() else()
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
PROPERTIES COMPILE_FLAGS "-msse4.1") PROPERTIES COMPILE_FLAGS "-msse4.1")
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixsse.cpp
PROPERTIES COMPILE_FLAGS "-msse4.1") PROPERTIES COMPILE_FLAGS "-msse4.1")
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductavx.cpp
PROPERTIES COMPILE_FLAGS "-mavx") PROPERTIES COMPILE_FLAGS "-mavx")
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixavx2.cpp
PROPERTIES COMPILE_FLAGS "-mavx2") PROPERTIES COMPILE_FLAGS "-mavx2")
endif() endif()
@ -291,7 +291,7 @@ endif()
# EXECUTABLE tesseractmain # EXECUTABLE tesseractmain
######################################## ########################################
set(tesseractmain_src api/tesseractmain.cpp) set(tesseractmain_src src/api/tesseractmain.cpp)
if (MSVC) if (MSVC)
set(tesseractmain_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc) set(tesseractmain_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc)
endif() endif()
@ -326,74 +326,74 @@ install(FILES
install(FILES install(FILES
# from api/makefile.am # from api/makefile.am
api/apitypes.h src/api/apitypes.h
api/baseapi.h src/api/baseapi.h
api/capi.h src/api/capi.h
api/renderer.h src/api/renderer.h
${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h ${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h
#from arch/makefile.am #from arch/makefile.am
arch/dotproductavx.h src/arch/dotproductavx.h
arch/dotproductsse.h src/arch/dotproductsse.h
arch/intsimdmatrix.h src/arch/intsimdmatrix.h
arch/intsimdmatrixavx2.h src/arch/intsimdmatrixavx2.h
arch/intsimdmatrixsse.h src/arch/intsimdmatrixsse.h
arch/simddetect.h src/arch/simddetect.h
#from ccmain/makefile.am #from ccmain/makefile.am
ccmain/thresholder.h src/ccmain/thresholder.h
ccmain/ltrresultiterator.h src/ccmain/ltrresultiterator.h
ccmain/pageiterator.h src/ccmain/pageiterator.h
ccmain/resultiterator.h src/ccmain/resultiterator.h
ccmain/osdetect.h src/ccmain/osdetect.h
#from ccstruct/makefile.am #from ccstruct/makefile.am
ccstruct/publictypes.h src/ccstruct/publictypes.h
#from ccutil/makefile.am #from ccutil/makefile.am
ccutil/basedir.h src/ccutil/basedir.h
ccutil/errcode.h src/ccutil/errcode.h
ccutil/fileerr.h src/ccutil/fileerr.h
ccutil/genericvector.h src/ccutil/genericvector.h
ccutil/helpers.h src/ccutil/helpers.h
ccutil/host.h src/ccutil/host.h
ccutil/memry.h src/ccutil/memry.h
ccutil/ndminx.h src/ccutil/ndminx.h
ccutil/params.h src/ccutil/params.h
ccutil/ocrclass.h src/ccutil/ocrclass.h
ccutil/platform.h src/ccutil/platform.h
ccutil/serialis.h src/ccutil/serialis.h
ccutil/strngs.h src/ccutil/strngs.h
ccutil/tesscallback.h src/ccutil/tesscallback.h
ccutil/unichar.h src/ccutil/unichar.h
ccutil/unicharcompress.h src/ccutil/unicharcompress.h
ccutil/unicharmap.h src/ccutil/unicharmap.h
ccutil/unicharset.h src/ccutil/unicharset.h
#from lstm/makefile.am #from lstm/makefile.am
lstm/convolve.h src/lstm/convolve.h
lstm/ctc.h src/lstm/ctc.h
lstm/fullyconnected.h src/lstm/fullyconnected.h
lstm/functions.h src/lstm/functions.h
lstm/input.h src/lstm/input.h
lstm/lstm.h src/lstm/lstm.h
lstm/lstmrecognizer.h src/lstm/lstmrecognizer.h
lstm/lstmtrainer.h src/lstm/lstmtrainer.h
lstm/maxpool.h src/lstm/maxpool.h
lstm/networkbuilder.h src/lstm/networkbuilder.h
lstm/network.h src/lstm/network.h
lstm/networkio.h src/lstm/networkio.h
lstm/networkscratch.h src/lstm/networkscratch.h
lstm/parallel.h src/lstm/parallel.h
lstm/plumbing.h src/lstm/plumbing.h
lstm/recodebeam.h src/lstm/recodebeam.h
lstm/reconfig.h src/lstm/reconfig.h
lstm/reversed.h src/lstm/reversed.h
lstm/series.h src/lstm/series.h
lstm/static_shape.h src/lstm/static_shape.h
lstm/stridemap.h src/lstm/stridemap.h
lstm/tfnetwork.h src/lstm/tfnetwork.h
lstm/weightmatrix.h src/lstm/weightmatrix.h
#${CMAKE_BINARY_DIR}/src/endianness.h #${CMAKE_BINARY_DIR}/src/endianness.h
DESTINATION include/tesseract) DESTINATION include/tesseract)

View File

@ -16,7 +16,7 @@ AC_LANG_COMPILER_REQUIRE
CXXFLAGS=${CXXFLAGS:-""} CXXFLAGS=${CXXFLAGS:-""}
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([config]) AC_CONFIG_AUX_DIR([config])
AC_CONFIG_SRCDIR([api/tesseractmain.cpp]) AC_CONFIG_SRCDIR([src/api/tesseractmain.cpp])
AC_PREFIX_DEFAULT([/usr/local]) AC_PREFIX_DEFAULT([/usr/local])
# Automake configuration. Do not require README file (we use README.md). # Automake configuration. Do not require README file (we use README.md).
@ -476,20 +476,20 @@ fi
# Output files # Output files
AC_CONFIG_FILES([Makefile tesseract.pc]) AC_CONFIG_FILES([Makefile tesseract.pc])
AC_CONFIG_FILES([api/Makefile]) AC_CONFIG_FILES([src/api/Makefile])
AC_CONFIG_FILES([api/tess_version.h]) AC_CONFIG_FILES([src/api/tess_version.h])
AC_CONFIG_FILES([arch/Makefile]) AC_CONFIG_FILES([src/arch/Makefile])
AC_CONFIG_FILES([ccmain/Makefile]) AC_CONFIG_FILES([src/ccmain/Makefile])
AC_CONFIG_FILES([opencl/Makefile]) AC_CONFIG_FILES([src/opencl/Makefile])
AC_CONFIG_FILES([ccstruct/Makefile]) AC_CONFIG_FILES([src/ccstruct/Makefile])
AC_CONFIG_FILES([ccutil/Makefile]) AC_CONFIG_FILES([src/ccutil/Makefile])
AC_CONFIG_FILES([classify/Makefile]) AC_CONFIG_FILES([src/classify/Makefile])
AC_CONFIG_FILES([cutil/Makefile]) AC_CONFIG_FILES([src/cutil/Makefile])
AC_CONFIG_FILES([dict/Makefile]) AC_CONFIG_FILES([src/dict/Makefile])
AC_CONFIG_FILES([lstm/Makefile]) AC_CONFIG_FILES([src/lstm/Makefile])
AC_CONFIG_FILES([textord/Makefile]) AC_CONFIG_FILES([src/textord/Makefile])
AC_CONFIG_FILES([viewer/Makefile]) AC_CONFIG_FILES([src/viewer/Makefile])
AC_CONFIG_FILES([wordrec/Makefile]) AC_CONFIG_FILES([src/wordrec/Makefile])
AC_CONFIG_FILES([tessdata/Makefile]) AC_CONFIG_FILES([tessdata/Makefile])
AC_CONFIG_FILES([tessdata/configs/Makefile]) AC_CONFIG_FILES([tessdata/configs/Makefile])
AC_CONFIG_FILES([tessdata/tessconfigs/Makefile]) AC_CONFIG_FILES([tessdata/tessconfigs/Makefile])

103
cppan.yml
View File

@ -35,59 +35,59 @@ projects:
type: lib type: lib
export_all_symbols: true export_all_symbols: true
files: files:
- api/.*\.cpp - src/api/.*\.cpp
- arch/.*\.cpp - src/arch/.*\.cpp
- ccmain/.*\.cpp - src/ccmain/.*\.cpp
- ccstruct/.*\.cpp - src/ccstruct/.*\.cpp
- ccutil/.*\.cpp - src/ccutil/.*\.cpp
- classify/.*\.cpp - src/classify/.*\.cpp
- cutil/.*\.cpp - src/cutil/.*\.cpp
- dict/.*\.cpp - src/dict/.*\.cpp
- lstm/.*\.cpp - src/lstm/.*\.cpp
- opencl/.*\.cpp - src/opencl/.*\.cpp
- textord/.*\.cpp - src/textord/.*\.cpp
- viewer/.*\.cpp - src/viewer/.*\.cpp
- wordrec/.*\.cpp - src/wordrec/.*\.cpp
- api/.*\.h - src/api/.*\.h
- arch/.*\.h - src/arch/.*\.h
- ccmain/.*\.h - src/ccmain/.*\.h
- ccstruct/.*\.h - src/ccstruct/.*\.h
- ccutil/.*\.h - src/ccutil/.*\.h
- classify/.*\.h - src/classify/.*\.h
- cutil/.*\.h - src/cutil/.*\.h
- dict/.*\.h - src/dict/.*\.h
- lstm/.*\.h - src/lstm/.*\.h
- opencl/.*\.h - src/opencl/.*\.h
- textord/.*\.h - src/textord/.*\.h
- viewer/.*\.h - src/viewer/.*\.h
- wordrec/.*\.h - src/wordrec/.*\.h
- vs2010/port/.* - src/vs2010/port/.*
exclude_from_build: exclude_from_build:
- api/tesseractmain.cpp - src/api/tesseractmain.cpp
- viewer/svpaint.cpp - src/viewer/svpaint.cpp
include_directories: include_directories:
public: public:
#private: #private:
- arch - src/arch
- classify - src/classify
- cutil - src/cutil
- ccutil - src/ccutil
- dict - src/dict
- lstm - src/lstm
- opencl - src/opencl
- textord - src/textord
- vs2010/port - src/vs2010/port
- viewer - src/viewer
- wordrec - src/wordrec
#public: #public:
- api - src/api
- ccmain - src/ccmain
- ccstruct - src/ccstruct
- ccutil - src/ccutil
check_function_exists: check_function_exists:
- getline - getline
@ -125,23 +125,26 @@ projects:
file_write_once(${BDIR}/config_auto.h "") file_write_once(${BDIR}/config_auto.h "")
post_sources: | post_sources: |
configure_file(
${SDIR}/src/api/tess_version.h.in
${BDIR}/tess_version.h @ONLY)
if (WIN32) if (WIN32)
if (MSVC) if (MSVC)
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp ${SDIR}/src/arch/dotproductsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp ${SDIR}/src/arch/intsimdmatrixsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp ${SDIR}/src/arch/dotproductavx.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX") PROPERTIES COMPILE_FLAGS "/arch:AVX")
set_source_files_properties( set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp ${SDIR}/src/arch/intsimdmatrixavx2.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX2") PROPERTIES COMPILE_FLAGS "/arch:AVX2")
endif() endif()
else() else()
remove_src_dir(vs2010/port/*) remove_src_dir(src/vs2010/port/*)
endif() endif()
options: options:
@ -162,7 +165,7 @@ projects:
pvt.cppan.demo.danbloomberg.leptonica: 1 pvt.cppan.demo.danbloomberg.leptonica: 1
tesseract: tesseract:
files: api/tesseractmain.cpp files: src/api/tesseractmain.cpp
dependencies: dependencies:
- libtesseract - libtesseract

View File

@ -1,30 +1,30 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: dotproductavx.h // File: dotproductavx.h
// Description: Architecture-specific dot-product function. // Description: Architecture-specific dot-product function.
// Author: Ray Smith // Author: Ray Smith
// Created: Wed Jul 22 10:51:05 PDT 2015 // Created: Wed Jul 22 10:51:05 PDT 2015
// //
// (C) Copyright 2015, Google Inc. // (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_ #ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
#define TESSERACT_ARCH_DOTPRODUCTAVX_H_ #define TESSERACT_ARCH_DOTPRODUCTAVX_H_
namespace tesseract { namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v. // Computes and returns the dot product of the n-vectors u and v.
// Uses Intel AVX intrinsics to access the SIMD instruction set. // Uses Intel AVX intrinsics to access the SIMD instruction set.
double DotProductAVX(const double* u, const double* v, int n); double DotProductAVX(const double* u, const double* v, int n);
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_ #endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_

View File

@ -1,35 +1,35 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: dotproductsse.h // File: dotproductsse.h
// Description: Architecture-specific dot-product function. // Description: Architecture-specific dot-product function.
// Author: Ray Smith // Author: Ray Smith
// Created: Wed Jul 22 10:57:05 PDT 2015 // Created: Wed Jul 22 10:57:05 PDT 2015
// //
// (C) Copyright 2015, Google Inc. // (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_ #ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
#define TESSERACT_ARCH_DOTPRODUCTSSE_H_ #define TESSERACT_ARCH_DOTPRODUCTSSE_H_
#include "host.h" #include "host.h"
namespace tesseract { namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v. // Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set. // Uses Intel SSE intrinsics to access the SIMD instruction set.
double DotProductSSE(const double* u, const double* v, int n); double DotProductSSE(const double* u, const double* v, int n);
// Computes and returns the dot product of the n-vectors u and v. // Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set. // Uses Intel SSE intrinsics to access the SIMD instruction set.
int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n); int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n);
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_ #endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_

View File

@ -1,82 +1,82 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: simddetect.cpp // File: simddetect.cpp
// Description: Architecture detector. // Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith) // Author: Stefan Weil (based on code from Ray Smith)
// //
// (C) Copyright 2014, Google Inc. // (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#include "simddetect.h" #include "simddetect.h"
#include "tprintf.h" #include "tprintf.h"
#undef X86_BUILD #undef X86_BUILD
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32) #if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
#if !defined(ANDROID_BUILD) #if !defined(ANDROID_BUILD)
#define X86_BUILD 1 #define X86_BUILD 1
#endif // !ANDROID_BUILD #endif // !ANDROID_BUILD
#endif // x86 target #endif // x86 target
#if defined(X86_BUILD) #if defined(X86_BUILD)
#if defined(__GNUC__) #if defined(__GNUC__)
#include <cpuid.h> #include <cpuid.h>
#elif defined(_WIN32) #elif defined(_WIN32)
#include <intrin.h> #include <intrin.h>
#endif #endif
#endif #endif
SIMDDetect SIMDDetect::detector; SIMDDetect SIMDDetect::detector;
// If true, then AVX has been detected. // If true, then AVX has been detected.
bool SIMDDetect::avx_available_; bool SIMDDetect::avx_available_;
bool SIMDDetect::avx2_available_; bool SIMDDetect::avx2_available_;
bool SIMDDetect::avx512F_available_; bool SIMDDetect::avx512F_available_;
bool SIMDDetect::avx512BW_available_; bool SIMDDetect::avx512BW_available_;
// If true, then SSe4.1 has been detected. // If true, then SSe4.1 has been detected.
bool SIMDDetect::sse_available_; bool SIMDDetect::sse_available_;
// Constructor. // Constructor.
// Tests the architecture in a system-dependent way to detect AVX, SSE and // Tests the architecture in a system-dependent way to detect AVX, SSE and
// any other available SIMD equipment. // any other available SIMD equipment.
// __GNUC__ is also defined by compilers that include GNU extensions such as // __GNUC__ is also defined by compilers that include GNU extensions such as
// clang. // clang.
SIMDDetect::SIMDDetect() { SIMDDetect::SIMDDetect() {
#if defined(X86_BUILD) #if defined(X86_BUILD)
#if defined(__GNUC__) #if defined(__GNUC__)
unsigned int eax, ebx, ecx, edx; unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) { if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
// Note that these tests all use hex because the older compilers don't have // Note that these tests all use hex because the older compilers don't have
// the newer flags. // the newer flags.
sse_available_ = (ecx & 0x00080000) != 0; sse_available_ = (ecx & 0x00080000) != 0;
avx_available_ = (ecx & 0x10000000) != 0; avx_available_ = (ecx & 0x10000000) != 0;
if (avx_available_) { if (avx_available_) {
// There is supposed to be a __get_cpuid_count function, but this is all // There is supposed to be a __get_cpuid_count function, but this is all
// there is in my cpuid.h. It is a macro for an asm statement and cannot // there is in my cpuid.h. It is a macro for an asm statement and cannot
// be used inside an if. // be used inside an if.
__cpuid_count(7, 0, eax, ebx, ecx, edx); __cpuid_count(7, 0, eax, ebx, ecx, edx);
avx2_available_ = (ebx & 0x00000020) != 0; avx2_available_ = (ebx & 0x00000020) != 0;
avx512F_available_ = (ebx & 0x00010000) != 0; avx512F_available_ = (ebx & 0x00010000) != 0;
avx512BW_available_ = (ebx & 0x40000000) != 0; avx512BW_available_ = (ebx & 0x40000000) != 0;
} }
} }
#elif defined(_WIN32) #elif defined(_WIN32)
int cpuInfo[4]; int cpuInfo[4];
__cpuid(cpuInfo, 0); __cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) { if (cpuInfo[0] >= 1) {
__cpuid(cpuInfo, 1); __cpuid(cpuInfo, 1);
sse_available_ = (cpuInfo[2] & 0x00080000) != 0; sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
avx_available_ = (cpuInfo[2] & 0x10000000) != 0; avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
} }
#else #else
#error "I don't know how to test for SIMD with this compiler" #error "I don't know how to test for SIMD with this compiler"
#endif #endif
#endif // X86_BUILD #endif // X86_BUILD
} }

View File

@ -1,54 +1,54 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: simddetect.h // File: simddetect.h
// Description: Architecture detector. // Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith) // Author: Stefan Weil (based on code from Ray Smith)
// //
// (C) Copyright 2014, Google Inc. // (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#include "platform.h" #include "platform.h"
// Architecture detector. Add code here to detect any other architectures for // Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static // SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one. // object, but it does no real harm to have more than one.
class SIMDDetect { class SIMDDetect {
public: public:
// Returns true if AVX is available on this system. // Returns true if AVX is available on this system.
static inline bool IsAVXAvailable() { return detector.avx_available_; } static inline bool IsAVXAvailable() { return detector.avx_available_; }
// Returns true if AVX2 (integer support) is available on this system. // Returns true if AVX2 (integer support) is available on this system.
static inline bool IsAVX2Available() { return detector.avx2_available_; } static inline bool IsAVX2Available() { return detector.avx2_available_; }
// Returns true if AVX512 Foundation (float) is available on this system. // Returns true if AVX512 Foundation (float) is available on this system.
static inline bool IsAVX512FAvailable() { static inline bool IsAVX512FAvailable() {
return detector.avx512F_available_; return detector.avx512F_available_;
} }
// Returns true if AVX512 integer is available on this system. // Returns true if AVX512 integer is available on this system.
static inline bool IsAVX512BWAvailable() { static inline bool IsAVX512BWAvailable() {
return detector.avx512BW_available_; return detector.avx512BW_available_;
} }
// Returns true if SSE4.1 is available on this system. // Returns true if SSE4.1 is available on this system.
static inline bool IsSSEAvailable() { return detector.sse_available_; } static inline bool IsSSEAvailable() { return detector.sse_available_; }
private: private:
// Constructor, must set all static member variables. // Constructor, must set all static member variables.
SIMDDetect(); SIMDDetect();
private: private:
// Singleton. // Singleton.
static SIMDDetect detector; static SIMDDetect detector;
// If true, then AVX has been detected. // If true, then AVX has been detected.
static TESS_API bool avx_available_; static TESS_API bool avx_available_;
static TESS_API bool avx2_available_; static TESS_API bool avx2_available_;
static TESS_API bool avx512F_available_; static TESS_API bool avx512F_available_;
static TESS_API bool avx512BW_available_; static TESS_API bool avx512BW_available_;
// If true, then SSe4.1 has been detected. // If true, then SSe4.1 has been detected.
static TESS_API bool sse_available_; static TESS_API bool sse_available_;
}; };

View File

@ -1,44 +1,44 @@
/********************************************************************** /**********************************************************************
* File: control.h (Formerly control.h) * File: control.h (Formerly control.h)
* Description: Module-independent matcher controller. * Description: Module-independent matcher controller.
* Author: Ray Smith * Author: Ray Smith
* Created: Thu Apr 23 11:09:58 BST 1992 * Created: Thu Apr 23 11:09:58 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
/** /**
* @file control.h * @file control.h
* Module-independent matcher controller. * Module-independent matcher controller.
*/ */
#ifndef CONTROL_H #ifndef CONTROL_H
#define CONTROL_H #define CONTROL_H
#include "params.h" #include "params.h"
#include "ocrblock.h" #include "ocrblock.h"
#include "ratngs.h" #include "ratngs.h"
#include "statistc.h" #include "statistc.h"
#include "pageres.h" #include "pageres.h"
enum ACCEPTABLE_WERD_TYPE enum ACCEPTABLE_WERD_TYPE
{ {
AC_UNACCEPTABLE, ///< Unacceptable word AC_UNACCEPTABLE, ///< Unacceptable word
AC_LOWER_CASE, ///< ALL lower case AC_LOWER_CASE, ///< ALL lower case
AC_UPPER_CASE, ///< ALL upper case AC_UPPER_CASE, ///< ALL upper case
AC_INITIAL_CAP, ///< ALL but initial lc AC_INITIAL_CAP, ///< ALL but initial lc
AC_LC_ABBREV, ///< a.b.c. AC_LC_ABBREV, ///< a.b.c.
AC_UC_ABBREV ///< A.B.C. AC_UC_ABBREV ///< A.B.C.
}; };
#endif #endif

View File

@ -1,31 +1,31 @@
/****************************************************************** /******************************************************************
* File: fixspace.h (Formerly fixspace.h) * File: fixspace.h (Formerly fixspace.h)
* Description: Implements a pass over the page res, exploring the alternative * Description: Implements a pass over the page res, exploring the alternative
* spacing possibilities, trying to use context to improve the * spacing possibilities, trying to use context to improve the
word spacing word spacing
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Thu Oct 21 11:38:43 BST 1993 * Created: Thu Oct 21 11:38:43 BST 1993
* *
* (C) Copyright 1993, Hewlett-Packard Ltd. * (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef FIXSPACE_H #ifndef FIXSPACE_H
#define FIXSPACE_H #define FIXSPACE_H
#include "pageres.h" #include "pageres.h"
#include "params.h" #include "params.h"
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list); void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
void transform_to_next_perm(WERD_RES_LIST &words); void transform_to_next_perm(WERD_RES_LIST &words);
void fixspace_dbg(WERD_RES *word); void fixspace_dbg(WERD_RES *word);
#endif #endif

View File

@ -1,64 +1,64 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: mutableiterator.h // File: mutableiterator.h
// Description: Iterator for tesseract results providing access to // Description: Iterator for tesseract results providing access to
// both high-level API and Tesseract internal data structures. // both high-level API and Tesseract internal data structures.
// Author: David Eger // Author: David Eger
// Created: Thu Feb 24 19:01:06 PST 2011 // Created: Thu Feb 24 19:01:06 PST 2011
// //
// (C) Copyright 2011, Google Inc. // (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_ #ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_ #define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#include "resultiterator.h" #include "resultiterator.h"
class BLOB_CHOICE_IT; class BLOB_CHOICE_IT;
namespace tesseract { namespace tesseract {
class Tesseract; class Tesseract;
// Class to iterate over tesseract results, providing access to all levels // Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having // of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures. // to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and // WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and // therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End // has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES. // DetectOS, or anything else that changes the internal PAGE_RES.
// See apitypes.h for the definition of PageIteratorLevel. // See apitypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface. // See also base class PageIterator, which contains the bulk of the interface.
// ResultIterator adds text-specific methods for access to OCR output. // ResultIterator adds text-specific methods for access to OCR output.
// MutableIterator adds access to internal data structures. // MutableIterator adds access to internal data structures.
class MutableIterator : public ResultIterator { class MutableIterator : public ResultIterator {
public: public:
// See argument descriptions in ResultIterator() // See argument descriptions in ResultIterator()
MutableIterator(PAGE_RES* page_res, Tesseract* tesseract, MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
int scale, int scaled_yres, int scale, int scaled_yres,
int rect_left, int rect_top, int rect_left, int rect_top,
int rect_width, int rect_height) int rect_width, int rect_height)
: ResultIterator( : ResultIterator(
LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left, LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
rect_top, rect_width, rect_height)) {} rect_top, rect_width, rect_height)) {}
virtual ~MutableIterator() {} virtual ~MutableIterator() {}
// See PageIterator and ResultIterator for most calls. // See PageIterator and ResultIterator for most calls.
// Return access to Tesseract internals. // Return access to Tesseract internals.
const PAGE_RES_IT *PageResIt() const { return it_; } const PAGE_RES_IT *PageResIt() const { return it_; }
}; };
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_ #endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_

View File

@ -1,33 +1,33 @@
/****************************************************************** /******************************************************************
* File: output.h (Formerly output.h) * File: output.h (Formerly output.h)
* Description: Output pass * Description: Output pass
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Thu Aug 4 10:56:08 BST 1994 * Created: Thu Aug 4 10:56:08 BST 1994
* *
* (C) Copyright 1994, Hewlett-Packard Ltd. * (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef OUTPUT_H #ifndef OUTPUT_H
#define OUTPUT_H #define OUTPUT_H
#include "params.h" #include "params.h"
//#include "epapconv.h" //#include "epapconv.h"
#include "pageres.h" #include "pageres.h"
/** test line ends */ /** test line ends */
char determine_newline_type(WERD *word, ///< word to do char determine_newline_type(WERD *word, ///< word to do
BLOCK *block, ///< current block BLOCK *block, ///< current block
WERD *next_word, ///< next word WERD *next_word, ///< next word
BLOCK *next_block ///< block of next word BLOCK *next_block ///< block of next word
); );
#endif #endif

View File

@ -1,108 +1,108 @@
/********************************************************************** /**********************************************************************
* File: paragraphs.h * File: paragraphs.h
* Description: Paragraph Detection data structures. * Description: Paragraph Detection data structures.
* Author: David Eger * Author: David Eger
* Created: 25 February 2011 * Created: 25 February 2011
* *
* (C) Copyright 2011, Google Inc. * (C) Copyright 2011, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_ #ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
#define TESSERACT_CCMAIN_PARAGRAPHS_H_ #define TESSERACT_CCMAIN_PARAGRAPHS_H_
#include "rect.h" #include "rect.h"
#include "ocrpara.h" #include "ocrpara.h"
#include "genericvector.h" #include "genericvector.h"
#include "strngs.h" #include "strngs.h"
class WERD; class WERD;
class UNICHARSET; class UNICHARSET;
namespace tesseract { namespace tesseract {
class MutableIterator; class MutableIterator;
// This structure captures all information needed about a text line for the // This structure captures all information needed about a text line for the
// purposes of paragraph detection. It is meant to be exceedingly light-weight // purposes of paragraph detection. It is meant to be exceedingly light-weight
// so that we can easily test paragraph detection independent of the rest of // so that we can easily test paragraph detection independent of the rest of
// Tesseract. // Tesseract.
class RowInfo { class RowInfo {
public: public:
// Constant data derived from Tesseract output. // Constant data derived from Tesseract output.
STRING text; // the full UTF-8 text of the line. STRING text; // the full UTF-8 text of the line.
bool ltr; // whether the majority of the text is left-to-right bool ltr; // whether the majority of the text is left-to-right
// TODO(eger) make this more fine-grained. // TODO(eger) make this more fine-grained.
bool has_leaders; // does the line contain leader dots (.....)? bool has_leaders; // does the line contain leader dots (.....)?
bool has_drop_cap; // does the line have a drop cap? bool has_drop_cap; // does the line have a drop cap?
int pix_ldistance; // distance to the left pblock boundary in pixels int pix_ldistance; // distance to the left pblock boundary in pixels
int pix_rdistance; // distance to the right pblock boundary in pixels int pix_rdistance; // distance to the right pblock boundary in pixels
float pix_xheight; // guessed xheight for the line float pix_xheight; // guessed xheight for the line
int average_interword_space; // average space between words in pixels. int average_interword_space; // average space between words in pixels.
int num_words; int num_words;
TBOX lword_box; // in normalized (horiz text rows) space TBOX lword_box; // in normalized (horiz text rows) space
TBOX rword_box; // in normalized (horiz text rows) space TBOX rword_box; // in normalized (horiz text rows) space
STRING lword_text; // the UTF-8 text of the leftmost werd STRING lword_text; // the UTF-8 text of the leftmost werd
STRING rword_text; // the UTF-8 text of the rightmost werd STRING rword_text; // the UTF-8 text of the rightmost werd
// The text of a paragraph typically starts with the start of an idea and // The text of a paragraph typically starts with the start of an idea and
// ends with the end of an idea. Here we define paragraph as something that // ends with the end of an idea. Here we define paragraph as something that
// may have a first line indent and a body indent which may be different. // may have a first line indent and a body indent which may be different.
// Typical words that start an idea are: // Typical words that start an idea are:
// 1. Words in western scripts that start with // 1. Words in western scripts that start with
// a capital letter, for example "The" // a capital letter, for example "The"
// 2. Bulleted or numbered list items, for // 2. Bulleted or numbered list items, for
// example "2." // example "2."
// Typical words which end an idea are words ending in punctuation marks. In // Typical words which end an idea are words ending in punctuation marks. In
// this vocabulary, each list item is represented as a paragraph. // this vocabulary, each list item is represented as a paragraph.
bool lword_indicates_list_item; bool lword_indicates_list_item;
bool lword_likely_starts_idea; bool lword_likely_starts_idea;
bool lword_likely_ends_idea; bool lword_likely_ends_idea;
bool rword_indicates_list_item; bool rword_indicates_list_item;
bool rword_likely_starts_idea; bool rword_likely_starts_idea;
bool rword_likely_ends_idea; bool rword_likely_ends_idea;
}; };
// Main entry point for Paragraph Detection Algorithm. // Main entry point for Paragraph Detection Algorithm.
// //
// Given a set of equally spaced textlines (described by row_infos), // Given a set of equally spaced textlines (described by row_infos),
// Split them into paragraphs. See http://goto/paragraphstalk // Split them into paragraphs. See http://goto/paragraphstalk
// //
// Output: // Output:
// row_owners - one pointer for each row, to the paragraph it belongs to. // row_owners - one pointer for each row, to the paragraph it belongs to.
// paragraphs - this is the actual list of PARA objects. // paragraphs - this is the actual list of PARA objects.
// models - the list of paragraph models referenced by the PARA objects. // models - the list of paragraph models referenced by the PARA objects.
// caller is responsible for deleting the models. // caller is responsible for deleting the models.
void DetectParagraphs(int debug_level, void DetectParagraphs(int debug_level,
GenericVector<RowInfo> *row_infos, GenericVector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners, GenericVector<PARA *> *row_owners,
PARA_LIST *paragraphs, PARA_LIST *paragraphs,
GenericVector<ParagraphModel *> *models); GenericVector<ParagraphModel *> *models);
// Given a MutableIterator to the start of a block, run DetectParagraphs on // Given a MutableIterator to the start of a block, run DetectParagraphs on
// that block and commit the results to the underlying ROW and BLOCK structs, // that block and commit the results to the underlying ROW and BLOCK structs,
// saving the ParagraphModels in models. Caller owns the models. // saving the ParagraphModels in models. Caller owns the models.
// We use unicharset during the function to answer questions such as "is the // We use unicharset during the function to answer questions such as "is the
// first letter of this word upper case?" // first letter of this word upper case?"
void DetectParagraphs(int debug_level, void DetectParagraphs(int debug_level,
bool after_text_recognition, bool after_text_recognition,
const MutableIterator *block_start, const MutableIterator *block_start,
GenericVector<ParagraphModel *> *models); GenericVector<ParagraphModel *> *models);
} // namespace } // namespace
#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_ #endif // TESSERACT_CCMAIN_PARAGRAPHS_H_

View File

@ -1,87 +1,87 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: pgedit.h // File: pgedit.h
// Description: Page structure file editor // Description: Page structure file editor
// Author: Joern Wanke // Author: Joern Wanke
// Created: Wed Jul 18 10:05:01 PDT 2007 // Created: Wed Jul 18 10:05:01 PDT 2007
// //
// (C) Copyright 2007, Google Inc. // (C) Copyright 2007, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef PGEDIT_H #ifndef PGEDIT_H
#define PGEDIT_H #define PGEDIT_H
#include "ocrblock.h" #include "ocrblock.h"
#include "ocrrow.h" #include "ocrrow.h"
#include "werd.h" #include "werd.h"
#include "rect.h" #include "rect.h"
#include "params.h" #include "params.h"
#include "tesseractclass.h" #include "tesseractclass.h"
class ScrollView; class ScrollView;
class SVMenuNode; class SVMenuNode;
struct SVEvent; struct SVEvent;
// A small event handler class to process incoming events to // A small event handler class to process incoming events to
// this window. // this window.
class PGEventHandler : public SVEventHandler { class PGEventHandler : public SVEventHandler {
public: public:
PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) { PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
} }
void Notify(const SVEvent* sve); void Notify(const SVEvent* sve);
private: private:
tesseract::Tesseract* tess_; tesseract::Tesseract* tess_;
}; };
extern BLOCK_LIST *current_block_list; extern BLOCK_LIST *current_block_list;
extern STRING_VAR_H (editor_image_win_name, "EditorImage", extern STRING_VAR_H (editor_image_win_name, "EditorImage",
"Editor image window name"); "Editor image window name");
extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos"); extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos");
extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos"); extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos");
extern INT_VAR_H (editor_image_height, 680, "Editor image height"); extern INT_VAR_H (editor_image_height, 680, "Editor image height");
extern INT_VAR_H (editor_image_width, 655, "Editor image width"); extern INT_VAR_H (editor_image_width, 655, "Editor image width");
extern INT_VAR_H (editor_image_word_bb_color, BLUE, extern INT_VAR_H (editor_image_word_bb_color, BLUE,
"Word bounding box colour"); "Word bounding box colour");
extern INT_VAR_H (editor_image_blob_bb_color, YELLOW, extern INT_VAR_H (editor_image_blob_bb_color, YELLOW,
"Blob bounding box colour"); "Blob bounding box colour");
extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour"); extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour");
extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin", extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin",
"Editor debug window name"); "Editor debug window name");
extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos"); extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos");
extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos"); extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos");
extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height"); extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height");
extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width"); extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width");
extern STRING_VAR_H (editor_word_name, "BlnWords", extern STRING_VAR_H (editor_word_name, "BlnWords",
"BL normalised word window"); "BL normalised word window");
extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos"); extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos");
extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos"); extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos");
extern INT_VAR_H (editor_word_height, 240, "Word window height"); extern INT_VAR_H (editor_word_height, 240, "Word window height");
extern INT_VAR_H (editor_word_width, 655, "Word window width"); extern INT_VAR_H (editor_word_width, 655, "Word window width");
extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image"); extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image");
ScrollView* bln_word_window_handle(); //return handle ScrollView* bln_word_window_handle(); //return handle
void build_image_window(int width, int height); void build_image_window(int width, int height);
void display_bln_lines(ScrollView window, void display_bln_lines(ScrollView window,
ScrollView::Color colour, ScrollView::Color colour,
float scale_factor, float scale_factor,
float y_offset, float y_offset,
float minx, float minx,
float maxx); float maxx);
//function to call //function to call
void pgeditor_msg( //message display void pgeditor_msg( //message display
const char *msg); const char *msg);
void pgeditor_show_point( //display coords void pgeditor_show_point( //display coords
SVEvent *event); SVEvent *event);
//put bln word in box //put bln word in box
void show_point(PAGE_RES* page_res, float x, float y); void show_point(PAGE_RES* page_res, float x, float y);
#endif #endif

View File

@ -1,34 +1,34 @@
/********************************************************************** /**********************************************************************
* File: reject.h (Formerly reject.h) * File: reject.h (Formerly reject.h)
* Description: Rejection functions used in tessedit * Description: Rejection functions used in tessedit
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Wed Sep 23 16:50:21 BST 1992 * Created: Wed Sep 23 16:50:21 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef REJECT_H #ifndef REJECT_H
#define REJECT_H #define REJECT_H
#include "params.h" #include "params.h"
#include "pageres.h" #include "pageres.h"
void reject_blanks(WERD_RES *word); void reject_blanks(WERD_RES *word);
void reject_poor_matches(WERD_RES *word); void reject_poor_matches(WERD_RES *word);
float compute_reject_threshold(WERD_CHOICE* word); float compute_reject_threshold(WERD_CHOICE* word);
BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths); BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
void dont_allow_1Il(WERD_RES *word); void dont_allow_1Il(WERD_RES *word);
void flip_hyphens(WERD_RES *word); void flip_hyphens(WERD_RES *word);
void flip_0O(WERD_RES *word); void flip_0O(WERD_RES *word);
BOOL8 non_0_digit(const char* str, int length); BOOL8 non_0_digit(const char* str, int length);
#endif #endif

View File

@ -1,28 +1,28 @@
/********************************************************************** /**********************************************************************
* File: tessbox.h (Formerly tessbox.h) * File: tessbox.h (Formerly tessbox.h)
* Description: Black boxed Tess for developing a resaljet. * Description: Black boxed Tess for developing a resaljet.
* Author: Ray Smith * Author: Ray Smith
* Created: Thu Apr 23 11:03:36 BST 1992 * Created: Thu Apr 23 11:03:36 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef TESSBOX_H #ifndef TESSBOX_H
#define TESSBOX_H #define TESSBOX_H
#include "ratngs.h" #include "ratngs.h"
#include "tesseractclass.h" #include "tesseractclass.h"
// TODO(ocr-team): Delete this along with other empty header files. // TODO(ocr-team): Delete this along with other empty header files.
#endif #endif

View File

@ -1,29 +1,29 @@
/********************************************************************** /**********************************************************************
* File: tessedit.h (Formerly tessedit.h) * File: tessedit.h (Formerly tessedit.h)
* Description: Main program for merge of tess and editor. * Description: Main program for merge of tess and editor.
* Author: Ray Smith * Author: Ray Smith
* Created: Tue Jan 07 15:21:46 GMT 1992 * Created: Tue Jan 07 15:21:46 GMT 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef TESSEDIT_H #ifndef TESSEDIT_H
#define TESSEDIT_H #define TESSEDIT_H
#include "blobs.h" #include "blobs.h"
#include "pgedit.h" #include "pgedit.h"
//progress monitor //progress monitor
extern ETEXT_DESC *global_monitor; extern ETEXT_DESC *global_monitor;
#endif #endif

View File

@ -1,24 +1,24 @@
/********************************************************************** /**********************************************************************
* File: tessvars.cpp (Formerly tessvars.c) * File: tessvars.cpp (Formerly tessvars.c)
* Description: Variables and other globals for tessedit. * Description: Variables and other globals for tessedit.
* Author: Ray Smith * Author: Ray Smith
* Created: Mon Apr 13 13:13:23 BST 1992 * Created: Mon Apr 13 13:13:23 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#include <stdio.h> #include <stdio.h>
#include "tessvars.h" #include "tessvars.h"
FILE *debug_fp = stderr; // write debug stuff here FILE *debug_fp = stderr; // write debug stuff here

View File

@ -1,27 +1,27 @@
/********************************************************************** /**********************************************************************
* File: tessvars.h (Formerly tessvars.h) * File: tessvars.h (Formerly tessvars.h)
* Description: Variables and other globals for tessedit. * Description: Variables and other globals for tessedit.
* Author: Ray Smith * Author: Ray Smith
* Created: Mon Apr 13 13:13:23 BST 1992 * Created: Mon Apr 13 13:13:23 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef TESSVARS_H #ifndef TESSVARS_H
#define TESSVARS_H #define TESSVARS_H
#include <stdio.h> #include <stdio.h>
extern FILE *debug_fp; // write debug stuff here extern FILE *debug_fp; // write debug stuff here
#endif #endif

View File

@ -1,27 +1,27 @@
/********************************************************************** /**********************************************************************
* File: wordit.h * File: wordit.h
* Description: An iterator for passing over all the words in a document. * Description: An iterator for passing over all the words in a document.
* Author: Ray Smith * Author: Ray Smith
* Created: Mon Apr 27 08:51:22 BST 1992 * Created: Mon Apr 27 08:51:22 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef WERDIT_H #ifndef WERDIT_H
#define WERDIT_H #define WERDIT_H
#include "pageres.h" #include "pageres.h"
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box); PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
#endif #endif

View File

@ -1,29 +1,29 @@
/********************************************************************** /**********************************************************************
* File: blckerr.h (Formerly blockerr.h) * File: blckerr.h (Formerly blockerr.h)
* Description: Error codes for the page block classes. * Description: Error codes for the page block classes.
* Author: Ray Smith * Author: Ray Smith
* Created: Tue Mar 19 17:43:30 GMT 1991 * Created: Tue Mar 19 17:43:30 GMT 1991
* *
* (C) Copyright 1991, Hewlett-Packard Ltd. * (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License. ** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at ** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0 ** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software ** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS, ** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
* *
**********************************************************************/ **********************************************************************/
#ifndef BLCKERR_H #ifndef BLCKERR_H
#define BLCKERR_H #define BLCKERR_H
#include "errcode.h" #include "errcode.h"
const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds"; const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line"; const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!"; const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type"; const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
#endif #endif

View File

@ -1,100 +1,100 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: boxword.h // File: boxword.h
// Description: Class to represent the bounding boxes of the output. // Description: Class to represent the bounding boxes of the output.
// Author: Ray Smith // Author: Ray Smith
// Created: Tue May 25 14:18:14 PDT 2010 // Created: Tue May 25 14:18:14 PDT 2010
// //
// (C) Copyright 2010, Google Inc. // (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CSTRUCT_BOXWORD_H_ #ifndef TESSERACT_CSTRUCT_BOXWORD_H_
#define TESSERACT_CSTRUCT_BOXWORD_H_ #define TESSERACT_CSTRUCT_BOXWORD_H_
#include "genericvector.h" #include "genericvector.h"
#include "rect.h" #include "rect.h"
#include "unichar.h" #include "unichar.h"
class BLOCK; class BLOCK;
class DENORM; class DENORM;
struct TWERD; struct TWERD;
class UNICHARSET; class UNICHARSET;
class WERD; class WERD;
class WERD_CHOICE; class WERD_CHOICE;
class WERD_RES; class WERD_RES;
namespace tesseract { namespace tesseract {
// Class to hold an array of bounding boxes for an output word and // Class to hold an array of bounding boxes for an output word and
// the bounding box of the whole word. // the bounding box of the whole word.
class BoxWord { class BoxWord {
public: public:
BoxWord(); BoxWord();
explicit BoxWord(const BoxWord& src); explicit BoxWord(const BoxWord& src);
~BoxWord(); ~BoxWord();
BoxWord& operator=(const BoxWord& src); BoxWord& operator=(const BoxWord& src);
void CopyFrom(const BoxWord& src); void CopyFrom(const BoxWord& src);
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
// switch back to original image coordinates. // switch back to original image coordinates.
static BoxWord* CopyFromNormalized(TWERD* tessword); static BoxWord* CopyFromNormalized(TWERD* tessword);
// Clean up the bounding boxes from the polygonal approximation by // Clean up the bounding boxes from the polygonal approximation by
// expanding slightly, then clipping to the blobs from the original_word // expanding slightly, then clipping to the blobs from the original_word
// that overlap. If not null, the block provides the inverse rotation. // that overlap. If not null, the block provides the inverse rotation.
void ClipToOriginalWord(const BLOCK* block, WERD* original_word); void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
// Merges the boxes from start to end, not including end, and deletes // Merges the boxes from start to end, not including end, and deletes
// the boxes between start and end. // the boxes between start and end.
void MergeBoxes(int start, int end); void MergeBoxes(int start, int end);
// Inserts a new box before the given index. // Inserts a new box before the given index.
// Recomputes the bounding box. // Recomputes the bounding box.
void InsertBox(int index, const TBOX& box); void InsertBox(int index, const TBOX& box);
// Changes the box at the given index to the new box. // Changes the box at the given index to the new box.
// Recomputes the bounding box. // Recomputes the bounding box.
void ChangeBox(int index, const TBOX& box); void ChangeBox(int index, const TBOX& box);
// Deletes the box with the given index, and shuffles up the rest. // Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box. // Recomputes the bounding box.
void DeleteBox(int index); void DeleteBox(int index);
// Deletes all the boxes stored in BoxWord. // Deletes all the boxes stored in BoxWord.
void DeleteAllBoxes(); void DeleteAllBoxes();
// This and other putatively are the same, so call the (permanent) callback // This and other putatively are the same, so call the (permanent) callback
// for each blob index where the bounding boxes match. // for each blob index where the bounding boxes match.
// The callback is deleted on completion. // The callback is deleted on completion.
void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const; void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
const TBOX& bounding_box() const { const TBOX& bounding_box() const {
return bbox_; return bbox_;
} }
int length() const { return length_; } int length() const { return length_; }
const TBOX& BlobBox(int index) const { const TBOX& BlobBox(int index) const {
return boxes_[index]; return boxes_[index];
} }
private: private:
void ComputeBoundingBox(); void ComputeBoundingBox();
TBOX bbox_; TBOX bbox_;
int length_; int length_;
GenericVector<TBOX> boxes_; GenericVector<TBOX> boxes_;
}; };
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CSTRUCT_BOXWORD_H_ #endif // TESSERACT_CSTRUCT_BOXWORD_H_

View File

@ -1,36 +1,36 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: ccstruct.cpp // File: ccstruct.cpp
// Description: ccstruct class. // Description: ccstruct class.
// Author: Samuel Charron // Author: Samuel Charron
// //
// (C) Copyright 2006, Google Inc. // (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#include "ccstruct.h" #include "ccstruct.h"
namespace tesseract { namespace tesseract {
// APPROXIMATIONS of the fractions of the character cell taken by // APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height. // the descenders, ascenders, and x-height.
const double CCStruct::kDescenderFraction = 0.25; const double CCStruct::kDescenderFraction = 0.25;
const double CCStruct::kXHeightFraction = 0.5; const double CCStruct::kXHeightFraction = 0.5;
const double CCStruct::kAscenderFraction = 0.25; const double CCStruct::kAscenderFraction = 0.25;
const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction / const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
(CCStruct::kXHeightFraction + CCStruct::kAscenderFraction); (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
CCStruct::CCStruct() {} CCStruct::CCStruct() {}
CCStruct::~CCStruct() { CCStruct::~CCStruct() {
} }
} }

View File

@ -1,43 +1,43 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: ccstruct.h // File: ccstruct.h
// Description: ccstruct class. // Description: ccstruct class.
// Author: Samuel Charron // Author: Samuel Charron
// //
// (C) Copyright 2006, Google Inc. // (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_ #ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
#define TESSERACT_CCSTRUCT_CCSTRUCT_H_ #define TESSERACT_CCSTRUCT_CCSTRUCT_H_
#include "cutil.h" #include "cutil.h"
namespace tesseract { namespace tesseract {
class CCStruct : public CUtil { class CCStruct : public CUtil {
public: public:
CCStruct(); CCStruct();
~CCStruct(); ~CCStruct();
// Globally accessible constants. // Globally accessible constants.
// APPROXIMATIONS of the fractions of the character cell taken by // APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height. // the descenders, ascenders, and x-height.
static const double kDescenderFraction; // = 0.25; static const double kDescenderFraction; // = 0.25;
static const double kXHeightFraction; // = 0.5; static const double kXHeightFraction; // = 0.5;
static const double kAscenderFraction; // = 0.25; static const double kAscenderFraction; // = 0.25;
// Derived value giving the x-height as a fraction of cap-height. // Derived value giving the x-height as a fraction of cap-height.
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender). static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
}; };
class Tesseract; class Tesseract;
} // namespace tesseract } // namespace tesseract
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_ #endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_

View File

@ -1,52 +1,52 @@
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_ #ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_ #define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#include "allheaders.h" #include "allheaders.h"
namespace tesseract { namespace tesseract {
// Class to hold a Pixa collection of debug images with captions and save them // Class to hold a Pixa collection of debug images with captions and save them
// to a PDF file. // to a PDF file.
class DebugPixa { class DebugPixa {
public: public:
// TODO(rays) add another constructor with size control. // TODO(rays) add another constructor with size control.
DebugPixa() { DebugPixa() {
pixa_ = pixaCreate(0); pixa_ = pixaCreate(0);
fonts_ = bmfCreate(nullptr, 14); fonts_ = bmfCreate(nullptr, 14);
} }
// If the filename_ has been set and there are any debug images, they are // If the filename_ has been set and there are any debug images, they are
// written to the set filename_. // written to the set filename_.
~DebugPixa() { ~DebugPixa() {
pixaDestroy(&pixa_); pixaDestroy(&pixa_);
bmfDestroy(&fonts_); bmfDestroy(&fonts_);
} }
// Adds the given pix to the set of pages in the PDF file, with the given // Adds the given pix to the set of pages in the PDF file, with the given
// caption added to the top. // caption added to the top.
void AddPix(const Pix* pix, const char* caption) { void AddPix(const Pix* pix, const char* caption) {
int depth = pixGetDepth(const_cast<Pix*>(pix)); int depth = pixGetDepth(const_cast<Pix*>(pix));
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80); int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
Pix* pix_debug = pixAddSingleTextblock( Pix* pix_debug = pixAddSingleTextblock(
const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr); const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
pixaAddPix(pixa_, pix_debug, L_INSERT); pixaAddPix(pixa_, pix_debug, L_INSERT);
} }
// Sets the destination filename and enables images to be written to a PDF // Sets the destination filename and enables images to be written to a PDF
// on destruction. // on destruction.
void WritePDF(const char* filename) { void WritePDF(const char* filename) {
if (pixaGetCount(pixa_) > 0) { if (pixaGetCount(pixa_) > 0) {
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename); pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
pixaClear(pixa_); pixaClear(pixa_);
} }
} }
private: private:
// The collection of images to put in the PDF. // The collection of images to put in the PDF.
Pixa* pixa_; Pixa* pixa_;
// The fonts used to draw text captions. // The fonts used to draw text captions.
L_Bmf* fonts_; L_Bmf* fonts_;
}; };
} // namespace tesseract } // namespace tesseract
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_ #endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_

View File

@ -1,295 +1,295 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: detlinefit.cpp // File: detlinefit.cpp
// Description: Deterministic least median squares line fitting. // Description: Deterministic least median squares line fitting.
// Author: Ray Smith // Author: Ray Smith
// Created: Thu Feb 28 14:45:01 PDT 2008 // Created: Thu Feb 28 14:45:01 PDT 2008
// //
// (C) Copyright 2008, Google Inc. // (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#include "detlinefit.h" #include "detlinefit.h"
#include "statistc.h" #include "statistc.h"
#include "ndminx.h" #include "ndminx.h"
#include "tprintf.h" #include "tprintf.h"
namespace tesseract { namespace tesseract {
// The number of points to consider at each end. // The number of points to consider at each end.
const int kNumEndPoints = 3; const int kNumEndPoints = 3;
// The minimum number of points at which to switch to number of points // The minimum number of points at which to switch to number of points
// for badly fitted lines. // for badly fitted lines.
// To ensure a sensible error metric, kMinPointsForErrorCount should be at // To ensure a sensible error metric, kMinPointsForErrorCount should be at
// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in // least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
// ComputeUpperQuartileError. // ComputeUpperQuartileError.
const int kMinPointsForErrorCount = 16; const int kMinPointsForErrorCount = 16;
// The maximum real distance to use before switching to number of // The maximum real distance to use before switching to number of
// mis-fitted points, which will get square-rooted for true distance. // mis-fitted points, which will get square-rooted for true distance.
const int kMaxRealDistance = 2.0; const int kMaxRealDistance = 2.0;
DetLineFit::DetLineFit() : square_length_(0.0) { DetLineFit::DetLineFit() : square_length_(0.0) {
} }
DetLineFit::~DetLineFit() { DetLineFit::~DetLineFit() {
} }
// Delete all Added points. // Delete all Added points.
void DetLineFit::Clear() { void DetLineFit::Clear() {
pts_.clear(); pts_.clear();
distances_.clear(); distances_.clear();
} }
// Add a new point. Takes a copy - the pt doesn't need to stay in scope. // Add a new point. Takes a copy - the pt doesn't need to stay in scope.
void DetLineFit::Add(const ICOORD& pt) { void DetLineFit::Add(const ICOORD& pt) {
pts_.push_back(PointWidth(pt, 0)); pts_.push_back(PointWidth(pt, 0));
} }
// Associates a half-width with the given point if a point overlaps the // Associates a half-width with the given point if a point overlaps the
// previous point by more than half the width, and its distance is further // previous point by more than half the width, and its distance is further
// than the previous point, then the more distant point is ignored in the // than the previous point, then the more distant point is ignored in the
// distance calculation. Useful for ignoring i dots and other diacritics. // distance calculation. Useful for ignoring i dots and other diacritics.
void DetLineFit::Add(const ICOORD& pt, int halfwidth) { void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
pts_.push_back(PointWidth(pt, halfwidth)); pts_.push_back(PointWidth(pt, halfwidth));
} }
// Fits a line to the points, ignoring the skip_first initial points and the // Fits a line to the points, ignoring the skip_first initial points and the
// skip_last final points, returning the fitted line as a pair of points, // skip_last final points, returning the fitted line as a pair of points,
// and the upper quartile error. // and the upper quartile error.
double DetLineFit::Fit(int skip_first, int skip_last, double DetLineFit::Fit(int skip_first, int skip_last,
ICOORD* pt1, ICOORD* pt2) { ICOORD* pt1, ICOORD* pt2) {
// Do something sensible with no points. // Do something sensible with no points.
if (pts_.empty()) { if (pts_.empty()) {
pt1->set_x(0); pt1->set_x(0);
pt1->set_y(0); pt1->set_y(0);
*pt2 = *pt1; *pt2 = *pt1;
return 0.0; return 0.0;
} }
// Count the points and find the first and last kNumEndPoints. // Count the points and find the first and last kNumEndPoints.
int pt_count = pts_.size(); int pt_count = pts_.size();
ICOORD* starts[kNumEndPoints]; ICOORD* starts[kNumEndPoints];
if (skip_first >= pt_count) skip_first = pt_count - 1; if (skip_first >= pt_count) skip_first = pt_count - 1;
int start_count = 0; int start_count = 0;
int end_i = MIN(skip_first + kNumEndPoints, pt_count); int end_i = MIN(skip_first + kNumEndPoints, pt_count);
for (int i = skip_first; i < end_i; ++i) { for (int i = skip_first; i < end_i; ++i) {
starts[start_count++] = &pts_[i].pt; starts[start_count++] = &pts_[i].pt;
} }
ICOORD* ends[kNumEndPoints]; ICOORD* ends[kNumEndPoints];
if (skip_last >= pt_count) skip_last = pt_count - 1; if (skip_last >= pt_count) skip_last = pt_count - 1;
int end_count = 0; int end_count = 0;
end_i = MAX(0, pt_count - kNumEndPoints - skip_last); end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) { for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
ends[end_count++] = &pts_[i].pt; ends[end_count++] = &pts_[i].pt;
} }
// 1 or 2 points need special treatment. // 1 or 2 points need special treatment.
if (pt_count <= 2) { if (pt_count <= 2) {
*pt1 = *starts[0]; *pt1 = *starts[0];
if (pt_count > 1) if (pt_count > 1)
*pt2 = *ends[0]; *pt2 = *ends[0];
else else
*pt2 = *pt1; *pt2 = *pt1;
return 0.0; return 0.0;
} }
// Although with between 2 and 2*kNumEndPoints-1 points, there will be // Although with between 2 and 2*kNumEndPoints-1 points, there will be
// overlap in the starts, ends sets, this is OK and taken care of by the // overlap in the starts, ends sets, this is OK and taken care of by the
// if (*start != *end) test below, which also tests for equal input points. // if (*start != *end) test below, which also tests for equal input points.
double best_uq = -1.0; double best_uq = -1.0;
// Iterate each pair of points and find the best fitting line. // Iterate each pair of points and find the best fitting line.
for (int i = 0; i < start_count; ++i) { for (int i = 0; i < start_count; ++i) {
ICOORD* start = starts[i]; ICOORD* start = starts[i];
for (int j = 0; j < end_count; ++j) { for (int j = 0; j < end_count; ++j) {
ICOORD* end = ends[j]; ICOORD* end = ends[j];
if (*start != *end) { if (*start != *end) {
ComputeDistances(*start, *end); ComputeDistances(*start, *end);
// Compute the upper quartile error from the line. // Compute the upper quartile error from the line.
double dist = EvaluateLineFit(); double dist = EvaluateLineFit();
if (dist < best_uq || best_uq < 0.0) { if (dist < best_uq || best_uq < 0.0) {
best_uq = dist; best_uq = dist;
*pt1 = *start; *pt1 = *start;
*pt2 = *end; *pt2 = *end;
} }
} }
} }
} }
// Finally compute the square root to return the true distance. // Finally compute the square root to return the true distance.
return best_uq > 0.0 ? sqrt(best_uq) : best_uq; return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
} }
// Constrained fit with a supplied direction vector. Finds the best line_pt, // Constrained fit with a supplied direction vector. Finds the best line_pt,
// that is one of the supplied points having the median cross product with // that is one of the supplied points having the median cross product with
// direction, ignoring points that have a cross product outside of the range // direction, ignoring points that have a cross product outside of the range
// [min_dist, max_dist]. Returns the resulting error metric using the same // [min_dist, max_dist]. Returns the resulting error metric using the same
// reduced set of points. // reduced set of points.
// *Makes use of floating point arithmetic* // *Makes use of floating point arithmetic*
double DetLineFit::ConstrainedFit(const FCOORD& direction, double DetLineFit::ConstrainedFit(const FCOORD& direction,
double min_dist, double max_dist, double min_dist, double max_dist,
bool debug, ICOORD* line_pt) { bool debug, ICOORD* line_pt) {
ComputeConstrainedDistances(direction, min_dist, max_dist); ComputeConstrainedDistances(direction, min_dist, max_dist);
// Do something sensible with no points or computed distances. // Do something sensible with no points or computed distances.
if (pts_.empty() || distances_.empty()) { if (pts_.empty() || distances_.empty()) {
line_pt->set_x(0); line_pt->set_x(0);
line_pt->set_y(0); line_pt->set_y(0);
return 0.0; return 0.0;
} }
int median_index = distances_.choose_nth_item(distances_.size() / 2); int median_index = distances_.choose_nth_item(distances_.size() / 2);
*line_pt = distances_[median_index].data; *line_pt = distances_[median_index].data;
if (debug) { if (debug) {
tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n", tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
direction.x(), direction.y(), direction.x(), direction.y(),
line_pt->x(), line_pt->y(), distances_.size()); line_pt->x(), line_pt->y(), distances_.size());
for (int i = 0; i < distances_.size(); ++i) { for (int i = 0; i < distances_.size(); ++i) {
tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(), tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
distances_[i].data.y(), distances_[i].key); distances_[i].data.y(), distances_[i].key);
} }
tprintf("Result = %d\n", median_index); tprintf("Result = %d\n", median_index);
} }
// Center distances on the fitted point. // Center distances on the fitted point.
double dist_origin = direction * *line_pt; double dist_origin = direction * *line_pt;
for (int i = 0; i < distances_.size(); ++i) { for (int i = 0; i < distances_.size(); ++i) {
distances_[i].key -= dist_origin; distances_[i].key -= dist_origin;
} }
return sqrt(EvaluateLineFit()); return sqrt(EvaluateLineFit());
} }
// Returns true if there were enough points at the last call to Fit or // Returns true if there were enough points at the last call to Fit or
// ConstrainedFit for the fitted points to be used on a badly fitted line. // ConstrainedFit for the fitted points to be used on a badly fitted line.
bool DetLineFit::SufficientPointsForIndependentFit() const { bool DetLineFit::SufficientPointsForIndependentFit() const {
return distances_.size() >= kMinPointsForErrorCount; return distances_.size() >= kMinPointsForErrorCount;
} }
// Backwards compatible fit returning a gradient and constant. // Backwards compatible fit returning a gradient and constant.
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
// function in preference to the LMS class. // function in preference to the LMS class.
double DetLineFit::Fit(float* m, float* c) { double DetLineFit::Fit(float* m, float* c) {
ICOORD start, end; ICOORD start, end;
double error = Fit(&start, &end); double error = Fit(&start, &end);
if (end.x() != start.x()) { if (end.x() != start.x()) {
*m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x()); *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
*c = start.y() - *m * start.x(); *c = start.y() - *m * start.x();
} else { } else {
*m = 0.0f; *m = 0.0f;
*c = 0.0f; *c = 0.0f;
} }
return error; return error;
} }
// Backwards compatible constrained fit with a supplied gradient. // Backwards compatible constrained fit with a supplied gradient.
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible // Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
// to avoid potential difficulties with infinite gradients. // to avoid potential difficulties with infinite gradients.
double DetLineFit::ConstrainedFit(double m, float* c) { double DetLineFit::ConstrainedFit(double m, float* c) {
// Do something sensible with no points. // Do something sensible with no points.
if (pts_.empty()) { if (pts_.empty()) {
*c = 0.0f; *c = 0.0f;
return 0.0; return 0.0;
} }
double cos = 1.0 / sqrt(1.0 + m * m); double cos = 1.0 / sqrt(1.0 + m * m);
FCOORD direction(cos, m * cos); FCOORD direction(cos, m * cos);
ICOORD line_pt; ICOORD line_pt;
double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false, double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
&line_pt); &line_pt);
*c = line_pt.y() - line_pt.x() * m; *c = line_pt.y() - line_pt.x() * m;
return error; return error;
} }
// Computes and returns the squared evaluation metric for a line fit. // Computes and returns the squared evaluation metric for a line fit.
double DetLineFit::EvaluateLineFit() { double DetLineFit::EvaluateLineFit() {
// Compute the upper quartile error from the line. // Compute the upper quartile error from the line.
double dist = ComputeUpperQuartileError(); double dist = ComputeUpperQuartileError();
if (distances_.size() >= kMinPointsForErrorCount && if (distances_.size() >= kMinPointsForErrorCount &&
dist > kMaxRealDistance * kMaxRealDistance) { dist > kMaxRealDistance * kMaxRealDistance) {
// Use the number of mis-fitted points as the error metric, as this // Use the number of mis-fitted points as the error metric, as this
// gives a better measure of fit for badly fitted lines where more // gives a better measure of fit for badly fitted lines where more
// than a quarter are badly fitted. // than a quarter are badly fitted.
double threshold = kMaxRealDistance * sqrt(square_length_); double threshold = kMaxRealDistance * sqrt(square_length_);
dist = NumberOfMisfittedPoints(threshold); dist = NumberOfMisfittedPoints(threshold);
} }
return dist; return dist;
} }
// Computes the absolute error distances of the points from the line, // Computes the absolute error distances of the points from the line,
// and returns the squared upper-quartile error distance. // and returns the squared upper-quartile error distance.
double DetLineFit::ComputeUpperQuartileError() { double DetLineFit::ComputeUpperQuartileError() {
int num_errors = distances_.size(); int num_errors = distances_.size();
if (num_errors == 0) return 0.0; if (num_errors == 0) return 0.0;
// Get the absolute values of the errors. // Get the absolute values of the errors.
for (int i = 0; i < num_errors; ++i) { for (int i = 0; i < num_errors; ++i) {
if (distances_[i].key < 0) distances_[i].key = -distances_[i].key; if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
} }
// Now get the upper quartile distance. // Now get the upper quartile distance.
int index = distances_.choose_nth_item(3 * num_errors / 4); int index = distances_.choose_nth_item(3 * num_errors / 4);
double dist = distances_[index].key; double dist = distances_[index].key;
// The true distance is the square root of the dist squared / square_length. // The true distance is the square root of the dist squared / square_length.
// Don't bother with the square root. Just return the square distance. // Don't bother with the square root. Just return the square distance.
return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0; return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
} }
// Returns the number of sample points that have an error more than threshold. // Returns the number of sample points that have an error more than threshold.
int DetLineFit::NumberOfMisfittedPoints(double threshold) const { int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
int num_misfits = 0; int num_misfits = 0;
int num_dists = distances_.size(); int num_dists = distances_.size();
// Get the absolute values of the errors. // Get the absolute values of the errors.
for (int i = 0; i < num_dists; ++i) { for (int i = 0; i < num_dists; ++i) {
if (distances_[i].key > threshold) if (distances_[i].key > threshold)
++num_misfits; ++num_misfits;
} }
return num_misfits; return num_misfits;
} }
// Computes all the cross product distances of the points from the line, // Computes all the cross product distances of the points from the line,
// storing the actual (signed) cross products in distances. // storing the actual (signed) cross products in distances.
// Ignores distances of points that are further away than the previous point, // Ignores distances of points that are further away than the previous point,
// and overlaps the previous point by at least half. // and overlaps the previous point by at least half.
void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) { void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
distances_.truncate(0); distances_.truncate(0);
ICOORD line_vector = end; ICOORD line_vector = end;
line_vector -= start; line_vector -= start;
square_length_ = line_vector.sqlength(); square_length_ = line_vector.sqlength();
int line_length = IntCastRounded(sqrt(square_length_)); int line_length = IntCastRounded(sqrt(square_length_));
// Compute the distance of each point from the line. // Compute the distance of each point from the line.
int prev_abs_dist = 0; int prev_abs_dist = 0;
int prev_dot = 0; int prev_dot = 0;
for (int i = 0; i < pts_.size(); ++i) { for (int i = 0; i < pts_.size(); ++i) {
ICOORD pt_vector = pts_[i].pt; ICOORD pt_vector = pts_[i].pt;
pt_vector -= start; pt_vector -= start;
int dot = line_vector % pt_vector; int dot = line_vector % pt_vector;
// Compute |line_vector||pt_vector|sin(angle between) // Compute |line_vector||pt_vector|sin(angle between)
int dist = line_vector * pt_vector; int dist = line_vector * pt_vector;
int abs_dist = dist < 0 ? -dist : dist; int abs_dist = dist < 0 ? -dist : dist;
if (abs_dist > prev_abs_dist && i > 0) { if (abs_dist > prev_abs_dist && i > 0) {
// Ignore this point if it overlaps the previous one. // Ignore this point if it overlaps the previous one.
int separation = abs(dot - prev_dot); int separation = abs(dot - prev_dot);
if (separation < line_length * pts_[i].halfwidth || if (separation < line_length * pts_[i].halfwidth ||
separation < line_length * pts_[i - 1].halfwidth) separation < line_length * pts_[i - 1].halfwidth)
continue; continue;
} }
distances_.push_back(DistPointPair(dist, pts_[i].pt)); distances_.push_back(DistPointPair(dist, pts_[i].pt));
prev_abs_dist = abs_dist; prev_abs_dist = abs_dist;
prev_dot = dot; prev_dot = dot;
} }
} }
// Computes all the cross product distances of the points perpendicular to // Computes all the cross product distances of the points perpendicular to
// the given direction, ignoring distances outside of the give distance range, // the given direction, ignoring distances outside of the give distance range,
// storing the actual (signed) cross products in distances_. // storing the actual (signed) cross products in distances_.
void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction, void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
double min_dist, double max_dist) { double min_dist, double max_dist) {
distances_.truncate(0); distances_.truncate(0);
square_length_ = direction.sqlength(); square_length_ = direction.sqlength();
// Compute the distance of each point from the line. // Compute the distance of each point from the line.
for (int i = 0; i < pts_.size(); ++i) { for (int i = 0; i < pts_.size(); ++i) {
FCOORD pt_vector = pts_[i].pt; FCOORD pt_vector = pts_[i].pt;
// Compute |line_vector||pt_vector|sin(angle between) // Compute |line_vector||pt_vector|sin(angle between)
double dist = direction * pt_vector; double dist = direction * pt_vector;
if (min_dist <= dist && dist <= max_dist) if (min_dist <= dist && dist <= max_dist)
distances_.push_back(DistPointPair(dist, pts_[i].pt)); distances_.push_back(DistPointPair(dist, pts_[i].pt));
} }
} }
} // namespace tesseract. } // namespace tesseract.

View File

@ -1,164 +1,164 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: detlinefit.h // File: detlinefit.h
// Description: Deterministic least upper-quartile squares line fitting. // Description: Deterministic least upper-quartile squares line fitting.
// Author: Ray Smith // Author: Ray Smith
// Created: Thu Feb 28 14:35:01 PDT 2008 // Created: Thu Feb 28 14:35:01 PDT 2008
// //
// (C) Copyright 2008, Google Inc. // (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_ #ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_ #define TESSERACT_CCSTRUCT_DETLINEFIT_H_
#include "genericvector.h" #include "genericvector.h"
#include "kdpair.h" #include "kdpair.h"
#include "points.h" #include "points.h"
namespace tesseract { namespace tesseract {
// This class fits a line to a set of ICOORD points. // This class fits a line to a set of ICOORD points.
// There is no restriction on the direction of the line, as it // There is no restriction on the direction of the line, as it
// uses a vector method, ie no concern over infinite gradients. // uses a vector method, ie no concern over infinite gradients.
// The fitted line has the least upper quartile of squares of perpendicular // The fitted line has the least upper quartile of squares of perpendicular
// distances of all source points from the line, subject to the constraint // distances of all source points from the line, subject to the constraint
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}] // that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
// i.e. the 9 combinations of one of the first 3 and last 3 points. // i.e. the 9 combinations of one of the first 3 and last 3 points.
// A fundamental assumption of this algorithm is that one of the first 3 and // A fundamental assumption of this algorithm is that one of the first 3 and
// one of the last 3 points are near the best line fit. // one of the last 3 points are near the best line fit.
// The points must be Added in line order for the algorithm to work properly. // The points must be Added in line order for the algorithm to work properly.
// No floating point calculations are needed* to make an accurate fit, // No floating point calculations are needed* to make an accurate fit,
// and no random numbers are needed** so the algorithm is deterministic, // and no random numbers are needed** so the algorithm is deterministic,
// architecture-stable, and compiler-stable as well as stable to minor // architecture-stable, and compiler-stable as well as stable to minor
// changes in the input. // changes in the input.
// *A single floating point division is used to compute each line's distance. // *A single floating point division is used to compute each line's distance.
// This is unlikely to result in choice of a different line, but if it does, // This is unlikely to result in choice of a different line, but if it does,
// it would be easy to replace with a 64 bit integer calculation. // it would be easy to replace with a 64 bit integer calculation.
// **Random numbers are used in the nth_item function, but the worst // **Random numbers are used in the nth_item function, but the worst
// non-determinism that can result is picking a different result among equals, // non-determinism that can result is picking a different result among equals,
// and that wouldn't make any difference to the end-result distance, so the // and that wouldn't make any difference to the end-result distance, so the
// randomness does not affect the determinism of the algorithm. The random // randomness does not affect the determinism of the algorithm. The random
// numbers are only there to guarantee average linear time. // numbers are only there to guarantee average linear time.
// Fitting time is linear, but with a high constant, as it tries 9 different // Fitting time is linear, but with a high constant, as it tries 9 different
// lines and computes the distance of all points each time. // lines and computes the distance of all points each time.
// This class is aimed at replacing the LLSQ (linear least squares) and // This class is aimed at replacing the LLSQ (linear least squares) and
// LMS (least median of squares) classes that are currently used for most // LMS (least median of squares) classes that are currently used for most
// of the line fitting in Tesseract. // of the line fitting in Tesseract.
class DetLineFit { class DetLineFit {
public: public:
DetLineFit(); DetLineFit();
~DetLineFit(); ~DetLineFit();
// Delete all Added points. // Delete all Added points.
void Clear(); void Clear();
// Adds a new point. Takes a copy - the pt doesn't need to stay in scope. // Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
// Add must be called on points in sequence along the line. // Add must be called on points in sequence along the line.
void Add(const ICOORD& pt); void Add(const ICOORD& pt);
// Associates a half-width with the given point if a point overlaps the // Associates a half-width with the given point if a point overlaps the
// previous point by more than half the width, and its distance is further // previous point by more than half the width, and its distance is further
// than the previous point, then the more distant point is ignored in the // than the previous point, then the more distant point is ignored in the
// distance calculation. Useful for ignoring i dots and other diacritics. // distance calculation. Useful for ignoring i dots and other diacritics.
void Add(const ICOORD& pt, int halfwidth); void Add(const ICOORD& pt, int halfwidth);
// Fits a line to the points, returning the fitted line as a pair of // Fits a line to the points, returning the fitted line as a pair of
// points, and the upper quartile error. // points, and the upper quartile error.
double Fit(ICOORD* pt1, ICOORD* pt2) { double Fit(ICOORD* pt1, ICOORD* pt2) {
return Fit(0, 0, pt1, pt2); return Fit(0, 0, pt1, pt2);
} }
// Fits a line to the points, ignoring the skip_first initial points and the // Fits a line to the points, ignoring the skip_first initial points and the
// skip_last final points, returning the fitted line as a pair of points, // skip_last final points, returning the fitted line as a pair of points,
// and the upper quartile error. // and the upper quartile error.
double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2); double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
// Constrained fit with a supplied direction vector. Finds the best line_pt, // Constrained fit with a supplied direction vector. Finds the best line_pt,
// that is one of the supplied points having the median cross product with // that is one of the supplied points having the median cross product with
// direction, ignoring points that have a cross product outside of the range // direction, ignoring points that have a cross product outside of the range
// [min_dist, max_dist]. Returns the resulting error metric using the same // [min_dist, max_dist]. Returns the resulting error metric using the same
// reduced set of points. // reduced set of points.
// *Makes use of floating point arithmetic* // *Makes use of floating point arithmetic*
double ConstrainedFit(const FCOORD& direction, double ConstrainedFit(const FCOORD& direction,
double min_dist, double max_dist, double min_dist, double max_dist,
bool debug, ICOORD* line_pt); bool debug, ICOORD* line_pt);
// Returns true if there were enough points at the last call to Fit or // Returns true if there were enough points at the last call to Fit or
// ConstrainedFit for the fitted points to be used on a badly fitted line. // ConstrainedFit for the fitted points to be used on a badly fitted line.
bool SufficientPointsForIndependentFit() const; bool SufficientPointsForIndependentFit() const;
// Backwards compatible fit returning a gradient and constant. // Backwards compatible fit returning a gradient and constant.
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
// function in preference to the LMS class. // function in preference to the LMS class.
double Fit(float* m, float* c); double Fit(float* m, float* c);
// Backwards compatible constrained fit with a supplied gradient. // Backwards compatible constrained fit with a supplied gradient.
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible // Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
// to avoid potential difficulties with infinite gradients. // to avoid potential difficulties with infinite gradients.
double ConstrainedFit(double m, float* c); double ConstrainedFit(double m, float* c);
private: private:
// Simple struct to hold an ICOORD point and a halfwidth representing half // Simple struct to hold an ICOORD point and a halfwidth representing half
// the "width" (supposedly approximately parallel to the direction of the // the "width" (supposedly approximately parallel to the direction of the
// line) of each point, such that distant points can be discarded when they // line) of each point, such that distant points can be discarded when they
// overlap nearer points. (Think i dot and other diacritics or noise.) // overlap nearer points. (Think i dot and other diacritics or noise.)
struct PointWidth { struct PointWidth {
PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {} PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
PointWidth(const ICOORD& pt0, int halfwidth0) PointWidth(const ICOORD& pt0, int halfwidth0)
: pt(pt0), halfwidth(halfwidth0) {} : pt(pt0), halfwidth(halfwidth0) {}
ICOORD pt; ICOORD pt;
int halfwidth; int halfwidth;
}; };
// Type holds the distance of each point from the fitted line and the point // Type holds the distance of each point from the fitted line and the point
// itself. Use of double allows integer distances from ICOORDs to be stored // itself. Use of double allows integer distances from ICOORDs to be stored
// exactly, and also the floating point results from ConstrainedFit. // exactly, and also the floating point results from ConstrainedFit.
typedef KDPairInc<double, ICOORD> DistPointPair; typedef KDPairInc<double, ICOORD> DistPointPair;
// Computes and returns the squared evaluation metric for a line fit. // Computes and returns the squared evaluation metric for a line fit.
double EvaluateLineFit(); double EvaluateLineFit();
// Computes the absolute values of the precomputed distances_, // Computes the absolute values of the precomputed distances_,
// and returns the squared upper-quartile error distance. // and returns the squared upper-quartile error distance.
double ComputeUpperQuartileError(); double ComputeUpperQuartileError();
// Returns the number of sample points that have an error more than threshold. // Returns the number of sample points that have an error more than threshold.
int NumberOfMisfittedPoints(double threshold) const; int NumberOfMisfittedPoints(double threshold) const;
// Computes all the cross product distances of the points from the line, // Computes all the cross product distances of the points from the line,
// storing the actual (signed) cross products in distances_. // storing the actual (signed) cross products in distances_.
// Ignores distances of points that are further away than the previous point, // Ignores distances of points that are further away than the previous point,
// and overlaps the previous point by at least half. // and overlaps the previous point by at least half.
void ComputeDistances(const ICOORD& start, const ICOORD& end); void ComputeDistances(const ICOORD& start, const ICOORD& end);
// Computes all the cross product distances of the points perpendicular to // Computes all the cross product distances of the points perpendicular to
// the given direction, ignoring distances outside of the give distance range, // the given direction, ignoring distances outside of the give distance range,
// storing the actual (signed) cross products in distances_. // storing the actual (signed) cross products in distances_.
void ComputeConstrainedDistances(const FCOORD& direction, void ComputeConstrainedDistances(const FCOORD& direction,
double min_dist, double max_dist); double min_dist, double max_dist);
// Stores all the source points in the order they were given and their // Stores all the source points in the order they were given and their
// halfwidths, if any. // halfwidths, if any.
GenericVector<PointWidth> pts_; GenericVector<PointWidth> pts_;
// Stores the computed perpendicular distances of (some of) the pts_ from a // Stores the computed perpendicular distances of (some of) the pts_ from a
// given vector (assuming it goes through the origin, making it a line). // given vector (assuming it goes through the origin, making it a line).
// Since the distances may be a subset of the input points, and get // Since the distances may be a subset of the input points, and get
// re-ordered by the nth_item function, the original point is stored // re-ordered by the nth_item function, the original point is stored
// along side the distance. // along side the distance.
GenericVector<DistPointPair> distances_; // Distances of points. GenericVector<DistPointPair> distances_; // Distances of points.
// The squared length of the vector used to compute distances_. // The squared length of the vector used to compute distances_.
double square_length_; double square_length_;
}; };
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_ #endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_

Some files were not shown because too many files have changed in this diff Show More