Move sources into src dir. Update build scripts.

This commit is contained in:
Egor Pugin 2018-04-25 11:02:54 +03:00
parent e8fceb58ab
commit e95ff1159e
518 changed files with 33887 additions and 33884 deletions

View File

@ -128,14 +128,14 @@ configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")
configure_file(
${CMAKE_SOURCE_DIR}/api/tess_version.h.in
${CMAKE_BINARY_DIR}/api/tess_version.h @ONLY)
${CMAKE_SOURCE_DIR}/src/api/tess_version.h.in
${CMAKE_BINARY_DIR}/src/api/tess_version.h @ONLY)
configure_file(
${CMAKE_SOURCE_DIR}/vs2010/tesseract/tesseract.rc.in
${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY)
${CMAKE_SOURCE_DIR}/src/vs2010/tesseract/tesseract.rc.in
${CMAKE_BINARY_DIR}/src/vs2010/tesseract/tesseract.rc @ONLY)
configure_file(
${CMAKE_SOURCE_DIR}/vs2010/tesseract/libtesseract.rc.in
${CMAKE_BINARY_DIR}/vs2010/tesseract/libtesseract.rc @ONLY)
${CMAKE_SOURCE_DIR}/src/vs2010/tesseract/libtesseract.rc.in
${CMAKE_BINARY_DIR}/src/vs2010/tesseract/libtesseract.rc @ONLY)
configure_file(
${CMAKE_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in
${CMAKE_BINARY_DIR}/TesseractConfig-version.cmake @ONLY)
@ -160,101 +160,101 @@ include_directories(${Leptonica_INCLUDE_DIRS})
include_directories(${CMAKE_BINARY_DIR})
include_directories(api)
include_directories(src/api)
include_directories(${CMAKE_BINARY_DIR}/api)
include_directories(arch)
include_directories(ccmain)
include_directories(ccstruct)
include_directories(ccutil)
include_directories(classify)
include_directories(cutil)
include_directories(dict)
include_directories(lstm)
include_directories(opencl)
include_directories(textord)
include_directories(vs2010/port)
include_directories(viewer)
include_directories(wordrec)
include_directories(src/arch)
include_directories(src/ccmain)
include_directories(src/ccstruct)
include_directories(src/ccutil)
include_directories(src/classify)
include_directories(src/cutil)
include_directories(src/dict)
include_directories(src/lstm)
include_directories(src/opencl)
include_directories(src/textord)
include_directories(src/vs2010/port)
include_directories(src/viewer)
include_directories(src/wordrec)
########################################
# LIBRARY tesseract
########################################
file(GLOB tesseract_src
arch/*.cpp
ccmain/*.cpp
ccstruct/*.cpp
ccutil/*.cpp
classify/*.cpp
cutil/*.cpp
dict/*.cpp
lstm/*.cpp
opencl/*.cpp
textord/*.cpp
viewer/*.cpp
wordrec/*.cpp
src/arch/*.cpp
src/ccmain/*.cpp
src/ccstruct/*.cpp
src/ccutil/*.cpp
src/classify/*.cpp
src/cutil/*.cpp
src/dict/*.cpp
src/lstm/*.cpp
src/opencl/*.cpp
src/textord/*.cpp
src/viewer/*.cpp
src/wordrec/*.cpp
)
file(GLOB tesseract_hdr
api/*.h
arch/*.h
ccmain/*.h
ccstruct/*.h
ccutil/*.h
classify/*.h
cutil/*.h
dict/*.h
lstm/*.h
opencl/*.h
textord/*.h
viewer/*.h
wordrec/*.h
src/api/*.h
src/arch/*.h
src/ccmain/*.h
src/ccstruct/*.h
src/ccutil/*.h
src/classify/*.h
src/cutil/*.h
src/dict/*.h
src/lstm/*.h
src/opencl/*.h
src/textord/*.h
src/viewer/*.h
src/wordrec/*.h
)
if (WIN32)
file(GLOB tesseract_win32_src "vs2010/port/*.cpp")
file(GLOB tesseract_win32_hdr "vs2010/port/*.h")
file(GLOB tesseract_win32_src "src/vs2010/port/*.cpp")
file(GLOB tesseract_win32_hdr "src/vs2010/port/*.h")
set(tesseract_src ${tesseract_src} ${tesseract_win32_src})
set(tesseract_hdr ${tesseract_hdr} ${tesseract_win32_hdr})
endif()
set(tesseract_src ${tesseract_src}
api/baseapi.cpp
api/capi.cpp
api/renderer.cpp
api/pdfrenderer.cpp
src/api/baseapi.cpp
src/api/capi.cpp
src/api/renderer.cpp
src/api/pdfrenderer.cpp
)
if (WIN32)
if (MSVC)
include_directories(vs2010/tesseract)
include_directories(src/vs2010/tesseract)
set(tesseract_hdr
${tesseract_hdr}
${CMAKE_CURRENT_SOURCE_DIR}/vs2010/tesseract/resource.h)
set(tesseract_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/libtesseract.rc)
${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/resource.h)
set(tesseract_rsc ${CMAKE_BINARY_DIR}/src/vs2010/tesseract/libtesseract.rc)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductavx.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX")
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixavx2.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX2")
endif()
else()
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
PROPERTIES COMPILE_FLAGS "-msse4.1")
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixsse.cpp
PROPERTIES COMPILE_FLAGS "-msse4.1")
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductavx.cpp
PROPERTIES COMPILE_FLAGS "-mavx")
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixavx2.cpp
PROPERTIES COMPILE_FLAGS "-mavx2")
endif()
@ -291,7 +291,7 @@ endif()
# EXECUTABLE tesseractmain
########################################
set(tesseractmain_src api/tesseractmain.cpp)
set(tesseractmain_src src/api/tesseractmain.cpp)
if (MSVC)
set(tesseractmain_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc)
endif()
@ -326,74 +326,74 @@ install(FILES
install(FILES
# from api/makefile.am
api/apitypes.h
api/baseapi.h
api/capi.h
api/renderer.h
src/api/apitypes.h
src/api/baseapi.h
src/api/capi.h
src/api/renderer.h
${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h
#from arch/makefile.am
arch/dotproductavx.h
arch/dotproductsse.h
arch/intsimdmatrix.h
arch/intsimdmatrixavx2.h
arch/intsimdmatrixsse.h
arch/simddetect.h
src/arch/dotproductavx.h
src/arch/dotproductsse.h
src/arch/intsimdmatrix.h
src/arch/intsimdmatrixavx2.h
src/arch/intsimdmatrixsse.h
src/arch/simddetect.h
#from ccmain/makefile.am
ccmain/thresholder.h
ccmain/ltrresultiterator.h
ccmain/pageiterator.h
ccmain/resultiterator.h
ccmain/osdetect.h
src/ccmain/thresholder.h
src/ccmain/ltrresultiterator.h
src/ccmain/pageiterator.h
src/ccmain/resultiterator.h
src/ccmain/osdetect.h
#from ccstruct/makefile.am
ccstruct/publictypes.h
src/ccstruct/publictypes.h
#from ccutil/makefile.am
ccutil/basedir.h
ccutil/errcode.h
ccutil/fileerr.h
ccutil/genericvector.h
ccutil/helpers.h
ccutil/host.h
ccutil/memry.h
ccutil/ndminx.h
ccutil/params.h
ccutil/ocrclass.h
ccutil/platform.h
ccutil/serialis.h
ccutil/strngs.h
ccutil/tesscallback.h
ccutil/unichar.h
ccutil/unicharcompress.h
ccutil/unicharmap.h
ccutil/unicharset.h
src/ccutil/basedir.h
src/ccutil/errcode.h
src/ccutil/fileerr.h
src/ccutil/genericvector.h
src/ccutil/helpers.h
src/ccutil/host.h
src/ccutil/memry.h
src/ccutil/ndminx.h
src/ccutil/params.h
src/ccutil/ocrclass.h
src/ccutil/platform.h
src/ccutil/serialis.h
src/ccutil/strngs.h
src/ccutil/tesscallback.h
src/ccutil/unichar.h
src/ccutil/unicharcompress.h
src/ccutil/unicharmap.h
src/ccutil/unicharset.h
#from lstm/makefile.am
lstm/convolve.h
lstm/ctc.h
lstm/fullyconnected.h
lstm/functions.h
lstm/input.h
lstm/lstm.h
lstm/lstmrecognizer.h
lstm/lstmtrainer.h
lstm/maxpool.h
lstm/networkbuilder.h
lstm/network.h
lstm/networkio.h
lstm/networkscratch.h
lstm/parallel.h
lstm/plumbing.h
lstm/recodebeam.h
lstm/reconfig.h
lstm/reversed.h
lstm/series.h
lstm/static_shape.h
lstm/stridemap.h
lstm/tfnetwork.h
lstm/weightmatrix.h
src/lstm/convolve.h
src/lstm/ctc.h
src/lstm/fullyconnected.h
src/lstm/functions.h
src/lstm/input.h
src/lstm/lstm.h
src/lstm/lstmrecognizer.h
src/lstm/lstmtrainer.h
src/lstm/maxpool.h
src/lstm/networkbuilder.h
src/lstm/network.h
src/lstm/networkio.h
src/lstm/networkscratch.h
src/lstm/parallel.h
src/lstm/plumbing.h
src/lstm/recodebeam.h
src/lstm/reconfig.h
src/lstm/reversed.h
src/lstm/series.h
src/lstm/static_shape.h
src/lstm/stridemap.h
src/lstm/tfnetwork.h
src/lstm/weightmatrix.h
#${CMAKE_BINARY_DIR}/src/endianness.h
DESTINATION include/tesseract)

View File

@ -16,7 +16,7 @@ AC_LANG_COMPILER_REQUIRE
CXXFLAGS=${CXXFLAGS:-""}
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([config])
AC_CONFIG_SRCDIR([api/tesseractmain.cpp])
AC_CONFIG_SRCDIR([src/api/tesseractmain.cpp])
AC_PREFIX_DEFAULT([/usr/local])
# Automake configuration. Do not require README file (we use README.md).
@ -476,20 +476,20 @@ fi
# Output files
AC_CONFIG_FILES([Makefile tesseract.pc])
AC_CONFIG_FILES([api/Makefile])
AC_CONFIG_FILES([api/tess_version.h])
AC_CONFIG_FILES([arch/Makefile])
AC_CONFIG_FILES([ccmain/Makefile])
AC_CONFIG_FILES([opencl/Makefile])
AC_CONFIG_FILES([ccstruct/Makefile])
AC_CONFIG_FILES([ccutil/Makefile])
AC_CONFIG_FILES([classify/Makefile])
AC_CONFIG_FILES([cutil/Makefile])
AC_CONFIG_FILES([dict/Makefile])
AC_CONFIG_FILES([lstm/Makefile])
AC_CONFIG_FILES([textord/Makefile])
AC_CONFIG_FILES([viewer/Makefile])
AC_CONFIG_FILES([wordrec/Makefile])
AC_CONFIG_FILES([src/api/Makefile])
AC_CONFIG_FILES([src/api/tess_version.h])
AC_CONFIG_FILES([src/arch/Makefile])
AC_CONFIG_FILES([src/ccmain/Makefile])
AC_CONFIG_FILES([src/opencl/Makefile])
AC_CONFIG_FILES([src/ccstruct/Makefile])
AC_CONFIG_FILES([src/ccutil/Makefile])
AC_CONFIG_FILES([src/classify/Makefile])
AC_CONFIG_FILES([src/cutil/Makefile])
AC_CONFIG_FILES([src/dict/Makefile])
AC_CONFIG_FILES([src/lstm/Makefile])
AC_CONFIG_FILES([src/textord/Makefile])
AC_CONFIG_FILES([src/viewer/Makefile])
AC_CONFIG_FILES([src/wordrec/Makefile])
AC_CONFIG_FILES([tessdata/Makefile])
AC_CONFIG_FILES([tessdata/configs/Makefile])
AC_CONFIG_FILES([tessdata/tessconfigs/Makefile])

103
cppan.yml
View File

@ -35,59 +35,59 @@ projects:
type: lib
export_all_symbols: true
files:
- api/.*\.cpp
- arch/.*\.cpp
- ccmain/.*\.cpp
- ccstruct/.*\.cpp
- ccutil/.*\.cpp
- classify/.*\.cpp
- cutil/.*\.cpp
- dict/.*\.cpp
- lstm/.*\.cpp
- opencl/.*\.cpp
- textord/.*\.cpp
- viewer/.*\.cpp
- wordrec/.*\.cpp
- src/api/.*\.cpp
- src/arch/.*\.cpp
- src/ccmain/.*\.cpp
- src/ccstruct/.*\.cpp
- src/ccutil/.*\.cpp
- src/classify/.*\.cpp
- src/cutil/.*\.cpp
- src/dict/.*\.cpp
- src/lstm/.*\.cpp
- src/opencl/.*\.cpp
- src/textord/.*\.cpp
- src/viewer/.*\.cpp
- src/wordrec/.*\.cpp
- api/.*\.h
- arch/.*\.h
- ccmain/.*\.h
- ccstruct/.*\.h
- ccutil/.*\.h
- classify/.*\.h
- cutil/.*\.h
- dict/.*\.h
- lstm/.*\.h
- opencl/.*\.h
- textord/.*\.h
- viewer/.*\.h
- wordrec/.*\.h
- src/api/.*\.h
- src/arch/.*\.h
- src/ccmain/.*\.h
- src/ccstruct/.*\.h
- src/ccutil/.*\.h
- src/classify/.*\.h
- src/cutil/.*\.h
- src/dict/.*\.h
- src/lstm/.*\.h
- src/opencl/.*\.h
- src/textord/.*\.h
- src/viewer/.*\.h
- src/wordrec/.*\.h
- vs2010/port/.*
- src/vs2010/port/.*
exclude_from_build:
- api/tesseractmain.cpp
- viewer/svpaint.cpp
- src/api/tesseractmain.cpp
- src/viewer/svpaint.cpp
include_directories:
public:
#private:
- arch
- classify
- cutil
- ccutil
- dict
- lstm
- opencl
- textord
- vs2010/port
- viewer
- wordrec
- src/arch
- src/classify
- src/cutil
- src/ccutil
- src/dict
- src/lstm
- src/opencl
- src/textord
- src/vs2010/port
- src/viewer
- src/wordrec
#public:
- api
- ccmain
- ccstruct
- ccutil
- src/api
- src/ccmain
- src/ccstruct
- src/ccutil
check_function_exists:
- getline
@ -125,23 +125,26 @@ projects:
file_write_once(${BDIR}/config_auto.h "")
post_sources: |
configure_file(
${SDIR}/src/api/tess_version.h.in
${BDIR}/tess_version.h @ONLY)
if (WIN32)
if (MSVC)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
${SDIR}/src/arch/dotproductsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
${SDIR}/src/arch/intsimdmatrixsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
${SDIR}/src/arch/dotproductavx.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX")
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
${SDIR}/src/arch/intsimdmatrixavx2.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX2")
endif()
else()
remove_src_dir(vs2010/port/*)
remove_src_dir(src/vs2010/port/*)
endif()
options:
@ -162,7 +165,7 @@ projects:
pvt.cppan.demo.danbloomberg.leptonica: 1
tesseract:
files: api/tesseractmain.cpp
files: src/api/tesseractmain.cpp
dependencies:
- libtesseract

View File

@ -1,30 +1,30 @@
///////////////////////////////////////////////////////////////////////
// File: dotproductavx.h
// Description: Architecture-specific dot-product function.
// Author: Ray Smith
// Created: Wed Jul 22 10:51:05 PDT 2015
//
// (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
#define TESSERACT_ARCH_DOTPRODUCTAVX_H_
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel AVX intrinsics to access the SIMD instruction set.
double DotProductAVX(const double* u, const double* v, int n);
} // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_
///////////////////////////////////////////////////////////////////////
// File: dotproductavx.h
// Description: Architecture-specific dot-product function.
// Author: Ray Smith
// Created: Wed Jul 22 10:51:05 PDT 2015
//
// (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
#define TESSERACT_ARCH_DOTPRODUCTAVX_H_
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel AVX intrinsics to access the SIMD instruction set.
double DotProductAVX(const double* u, const double* v, int n);
} // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_

View File

@ -1,35 +1,35 @@
///////////////////////////////////////////////////////////////////////
// File: dotproductsse.h
// Description: Architecture-specific dot-product function.
// Author: Ray Smith
// Created: Wed Jul 22 10:57:05 PDT 2015
//
// (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
#define TESSERACT_ARCH_DOTPRODUCTSSE_H_
#include "host.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
double DotProductSSE(const double* u, const double* v, int n);
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n);
} // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_
///////////////////////////////////////////////////////////////////////
// File: dotproductsse.h
// Description: Architecture-specific dot-product function.
// Author: Ray Smith
// Created: Wed Jul 22 10:57:05 PDT 2015
//
// (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
#define TESSERACT_ARCH_DOTPRODUCTSSE_H_
#include "host.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
double DotProductSSE(const double* u, const double* v, int n);
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n);
} // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_

View File

@ -1,82 +1,82 @@
///////////////////////////////////////////////////////////////////////
// File: simddetect.cpp
// Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith)
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "simddetect.h"
#include "tprintf.h"
#undef X86_BUILD
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
#if !defined(ANDROID_BUILD)
#define X86_BUILD 1
#endif // !ANDROID_BUILD
#endif // x86 target
#if defined(X86_BUILD)
#if defined(__GNUC__)
#include <cpuid.h>
#elif defined(_WIN32)
#include <intrin.h>
#endif
#endif
SIMDDetect SIMDDetect::detector;
// If true, then AVX has been detected.
bool SIMDDetect::avx_available_;
bool SIMDDetect::avx2_available_;
bool SIMDDetect::avx512F_available_;
bool SIMDDetect::avx512BW_available_;
// If true, then SSe4.1 has been detected.
bool SIMDDetect::sse_available_;
// Constructor.
// Tests the architecture in a system-dependent way to detect AVX, SSE and
// any other available SIMD equipment.
// __GNUC__ is also defined by compilers that include GNU extensions such as
// clang.
SIMDDetect::SIMDDetect() {
#if defined(X86_BUILD)
#if defined(__GNUC__)
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
// Note that these tests all use hex because the older compilers don't have
// the newer flags.
sse_available_ = (ecx & 0x00080000) != 0;
avx_available_ = (ecx & 0x10000000) != 0;
if (avx_available_) {
// There is supposed to be a __get_cpuid_count function, but this is all
// there is in my cpuid.h. It is a macro for an asm statement and cannot
// be used inside an if.
__cpuid_count(7, 0, eax, ebx, ecx, edx);
avx2_available_ = (ebx & 0x00000020) != 0;
avx512F_available_ = (ebx & 0x00010000) != 0;
avx512BW_available_ = (ebx & 0x40000000) != 0;
}
}
#elif defined(_WIN32)
int cpuInfo[4];
__cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
__cpuid(cpuInfo, 1);
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
}
#else
#error "I don't know how to test for SIMD with this compiler"
#endif
#endif // X86_BUILD
}
///////////////////////////////////////////////////////////////////////
// File: simddetect.cpp
// Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith)
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "simddetect.h"
#include "tprintf.h"
#undef X86_BUILD
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
#if !defined(ANDROID_BUILD)
#define X86_BUILD 1
#endif // !ANDROID_BUILD
#endif // x86 target
#if defined(X86_BUILD)
#if defined(__GNUC__)
#include <cpuid.h>
#elif defined(_WIN32)
#include <intrin.h>
#endif
#endif
SIMDDetect SIMDDetect::detector;
// If true, then AVX has been detected.
bool SIMDDetect::avx_available_;
bool SIMDDetect::avx2_available_;
bool SIMDDetect::avx512F_available_;
bool SIMDDetect::avx512BW_available_;
// If true, then SSe4.1 has been detected.
bool SIMDDetect::sse_available_;
// Constructor.
// Tests the architecture in a system-dependent way to detect AVX, SSE and
// any other available SIMD equipment.
// __GNUC__ is also defined by compilers that include GNU extensions such as
// clang.
SIMDDetect::SIMDDetect() {
#if defined(X86_BUILD)
#if defined(__GNUC__)
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
// Note that these tests all use hex because the older compilers don't have
// the newer flags.
sse_available_ = (ecx & 0x00080000) != 0;
avx_available_ = (ecx & 0x10000000) != 0;
if (avx_available_) {
// There is supposed to be a __get_cpuid_count function, but this is all
// there is in my cpuid.h. It is a macro for an asm statement and cannot
// be used inside an if.
__cpuid_count(7, 0, eax, ebx, ecx, edx);
avx2_available_ = (ebx & 0x00000020) != 0;
avx512F_available_ = (ebx & 0x00010000) != 0;
avx512BW_available_ = (ebx & 0x40000000) != 0;
}
}
#elif defined(_WIN32)
int cpuInfo[4];
__cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
__cpuid(cpuInfo, 1);
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
}
#else
#error "I don't know how to test for SIMD with this compiler"
#endif
#endif // X86_BUILD
}

View File

@ -1,54 +1,54 @@
///////////////////////////////////////////////////////////////////////
// File: simddetect.h
// Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith)
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "platform.h"
// Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one.
class SIMDDetect {
public:
// Returns true if AVX is available on this system.
static inline bool IsAVXAvailable() { return detector.avx_available_; }
// Returns true if AVX2 (integer support) is available on this system.
static inline bool IsAVX2Available() { return detector.avx2_available_; }
// Returns true if AVX512 Foundation (float) is available on this system.
static inline bool IsAVX512FAvailable() {
return detector.avx512F_available_;
}
// Returns true if AVX512 integer is available on this system.
static inline bool IsAVX512BWAvailable() {
return detector.avx512BW_available_;
}
// Returns true if SSE4.1 is available on this system.
static inline bool IsSSEAvailable() { return detector.sse_available_; }
private:
// Constructor, must set all static member variables.
SIMDDetect();
private:
// Singleton.
static SIMDDetect detector;
// If true, then AVX has been detected.
static TESS_API bool avx_available_;
static TESS_API bool avx2_available_;
static TESS_API bool avx512F_available_;
static TESS_API bool avx512BW_available_;
// If true, then SSe4.1 has been detected.
static TESS_API bool sse_available_;
};
///////////////////////////////////////////////////////////////////////
// File: simddetect.h
// Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith)
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "platform.h"
// Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one.
class SIMDDetect {
public:
// Returns true if AVX is available on this system.
static inline bool IsAVXAvailable() { return detector.avx_available_; }
// Returns true if AVX2 (integer support) is available on this system.
static inline bool IsAVX2Available() { return detector.avx2_available_; }
// Returns true if AVX512 Foundation (float) is available on this system.
static inline bool IsAVX512FAvailable() {
return detector.avx512F_available_;
}
// Returns true if AVX512 integer is available on this system.
static inline bool IsAVX512BWAvailable() {
return detector.avx512BW_available_;
}
// Returns true if SSE4.1 is available on this system.
static inline bool IsSSEAvailable() { return detector.sse_available_; }
private:
// Constructor, must set all static member variables.
SIMDDetect();
private:
// Singleton.
static SIMDDetect detector;
// If true, then AVX has been detected.
static TESS_API bool avx_available_;
static TESS_API bool avx2_available_;
static TESS_API bool avx512F_available_;
static TESS_API bool avx512BW_available_;
// If true, then SSe4.1 has been detected.
static TESS_API bool sse_available_;
};

View File

@ -1,44 +1,44 @@
/**********************************************************************
* File: control.h (Formerly control.h)
* Description: Module-independent matcher controller.
* Author: Ray Smith
* Created: Thu Apr 23 11:09:58 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**
* @file control.h
* Module-independent matcher controller.
*/
#ifndef CONTROL_H
#define CONTROL_H
#include "params.h"
#include "ocrblock.h"
#include "ratngs.h"
#include "statistc.h"
#include "pageres.h"
enum ACCEPTABLE_WERD_TYPE
{
AC_UNACCEPTABLE, ///< Unacceptable word
AC_LOWER_CASE, ///< ALL lower case
AC_UPPER_CASE, ///< ALL upper case
AC_INITIAL_CAP, ///< ALL but initial lc
AC_LC_ABBREV, ///< a.b.c.
AC_UC_ABBREV ///< A.B.C.
};
#endif
/**********************************************************************
* File: control.h (Formerly control.h)
* Description: Module-independent matcher controller.
* Author: Ray Smith
* Created: Thu Apr 23 11:09:58 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**
* @file control.h
* Module-independent matcher controller.
*/
#ifndef CONTROL_H
#define CONTROL_H
#include "params.h"
#include "ocrblock.h"
#include "ratngs.h"
#include "statistc.h"
#include "pageres.h"
enum ACCEPTABLE_WERD_TYPE
{
AC_UNACCEPTABLE, ///< Unacceptable word
AC_LOWER_CASE, ///< ALL lower case
AC_UPPER_CASE, ///< ALL upper case
AC_INITIAL_CAP, ///< ALL but initial lc
AC_LC_ABBREV, ///< a.b.c.
AC_UC_ABBREV ///< A.B.C.
};
#endif

View File

@ -1,31 +1,31 @@
/******************************************************************
* File: fixspace.h (Formerly fixspace.h)
* Description: Implements a pass over the page res, exploring the alternative
* spacing possibilities, trying to use context to improve the
word spacing
* Author: Phil Cheatle
* Created: Thu Oct 21 11:38:43 BST 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef FIXSPACE_H
#define FIXSPACE_H
#include "pageres.h"
#include "params.h"
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
void transform_to_next_perm(WERD_RES_LIST &words);
void fixspace_dbg(WERD_RES *word);
#endif
/******************************************************************
* File: fixspace.h (Formerly fixspace.h)
* Description: Implements a pass over the page res, exploring the alternative
* spacing possibilities, trying to use context to improve the
word spacing
* Author: Phil Cheatle
* Created: Thu Oct 21 11:38:43 BST 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef FIXSPACE_H
#define FIXSPACE_H
#include "pageres.h"
#include "params.h"
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
void transform_to_next_perm(WERD_RES_LIST &words);
void fixspace_dbg(WERD_RES *word);
#endif

View File

@ -1,64 +1,64 @@
///////////////////////////////////////////////////////////////////////
// File: mutableiterator.h
// Description: Iterator for tesseract results providing access to
// both high-level API and Tesseract internal data structures.
// Author: David Eger
// Created: Thu Feb 24 19:01:06 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#include "resultiterator.h"
class BLOB_CHOICE_IT;
namespace tesseract {
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See apitypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// ResultIterator adds text-specific methods for access to OCR output.
// MutableIterator adds access to internal data structures.
class MutableIterator : public ResultIterator {
public:
// See argument descriptions in ResultIterator()
MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
int scale, int scaled_yres,
int rect_left, int rect_top,
int rect_width, int rect_height)
: ResultIterator(
LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
rect_top, rect_width, rect_height)) {}
virtual ~MutableIterator() {}
// See PageIterator and ResultIterator for most calls.
// Return access to Tesseract internals.
const PAGE_RES_IT *PageResIt() const { return it_; }
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_
///////////////////////////////////////////////////////////////////////
// File: mutableiterator.h
// Description: Iterator for tesseract results providing access to
// both high-level API and Tesseract internal data structures.
// Author: David Eger
// Created: Thu Feb 24 19:01:06 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#include "resultiterator.h"
class BLOB_CHOICE_IT;
namespace tesseract {
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See apitypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// ResultIterator adds text-specific methods for access to OCR output.
// MutableIterator adds access to internal data structures.
class MutableIterator : public ResultIterator {
public:
// See argument descriptions in ResultIterator()
MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
int scale, int scaled_yres,
int rect_left, int rect_top,
int rect_width, int rect_height)
: ResultIterator(
LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
rect_top, rect_width, rect_height)) {}
virtual ~MutableIterator() {}
// See PageIterator and ResultIterator for most calls.
// Return access to Tesseract internals.
const PAGE_RES_IT *PageResIt() const { return it_; }
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_

View File

@ -1,33 +1,33 @@
/******************************************************************
* File: output.h (Formerly output.h)
* Description: Output pass
* Author: Phil Cheatle
* Created: Thu Aug 4 10:56:08 BST 1994
*
* (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef OUTPUT_H
#define OUTPUT_H
#include "params.h"
//#include "epapconv.h"
#include "pageres.h"
/** test line ends */
char determine_newline_type(WERD *word, ///< word to do
BLOCK *block, ///< current block
WERD *next_word, ///< next word
BLOCK *next_block ///< block of next word
);
#endif
/******************************************************************
* File: output.h (Formerly output.h)
* Description: Output pass
* Author: Phil Cheatle
* Created: Thu Aug 4 10:56:08 BST 1994
*
* (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef OUTPUT_H
#define OUTPUT_H
#include "params.h"
//#include "epapconv.h"
#include "pageres.h"
/** test line ends */
char determine_newline_type(WERD *word, ///< word to do
BLOCK *block, ///< current block
WERD *next_word, ///< next word
BLOCK *next_block ///< block of next word
);
#endif

View File

@ -1,108 +1,108 @@
/**********************************************************************
* File: paragraphs.h
* Description: Paragraph Detection data structures.
* Author: David Eger
* Created: 25 February 2011
*
* (C) Copyright 2011, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
#define TESSERACT_CCMAIN_PARAGRAPHS_H_
#include "rect.h"
#include "ocrpara.h"
#include "genericvector.h"
#include "strngs.h"
class WERD;
class UNICHARSET;
namespace tesseract {
class MutableIterator;
// This structure captures all information needed about a text line for the
// purposes of paragraph detection. It is meant to be exceedingly light-weight
// so that we can easily test paragraph detection independent of the rest of
// Tesseract.
class RowInfo {
public:
// Constant data derived from Tesseract output.
STRING text; // the full UTF-8 text of the line.
bool ltr; // whether the majority of the text is left-to-right
// TODO(eger) make this more fine-grained.
bool has_leaders; // does the line contain leader dots (.....)?
bool has_drop_cap; // does the line have a drop cap?
int pix_ldistance; // distance to the left pblock boundary in pixels
int pix_rdistance; // distance to the right pblock boundary in pixels
float pix_xheight; // guessed xheight for the line
int average_interword_space; // average space between words in pixels.
int num_words;
TBOX lword_box; // in normalized (horiz text rows) space
TBOX rword_box; // in normalized (horiz text rows) space
STRING lword_text; // the UTF-8 text of the leftmost werd
STRING rword_text; // the UTF-8 text of the rightmost werd
// The text of a paragraph typically starts with the start of an idea and
// ends with the end of an idea. Here we define paragraph as something that
// may have a first line indent and a body indent which may be different.
// Typical words that start an idea are:
// 1. Words in western scripts that start with
// a capital letter, for example "The"
// 2. Bulleted or numbered list items, for
// example "2."
// Typical words which end an idea are words ending in punctuation marks. In
// this vocabulary, each list item is represented as a paragraph.
bool lword_indicates_list_item;
bool lword_likely_starts_idea;
bool lword_likely_ends_idea;
bool rword_indicates_list_item;
bool rword_likely_starts_idea;
bool rword_likely_ends_idea;
};
// Main entry point for Paragraph Detection Algorithm.
//
// Given a set of equally spaced textlines (described by row_infos),
// Split them into paragraphs. See http://goto/paragraphstalk
//
// Output:
// row_owners - one pointer for each row, to the paragraph it belongs to.
// paragraphs - this is the actual list of PARA objects.
// models - the list of paragraph models referenced by the PARA objects.
// caller is responsible for deleting the models.
void DetectParagraphs(int debug_level,
GenericVector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners,
PARA_LIST *paragraphs,
GenericVector<ParagraphModel *> *models);
// Given a MutableIterator to the start of a block, run DetectParagraphs on
// that block and commit the results to the underlying ROW and BLOCK structs,
// saving the ParagraphModels in models. Caller owns the models.
// We use unicharset during the function to answer questions such as "is the
// first letter of this word upper case?"
void DetectParagraphs(int debug_level,
bool after_text_recognition,
const MutableIterator *block_start,
GenericVector<ParagraphModel *> *models);
} // namespace
#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_
/**********************************************************************
* File: paragraphs.h
* Description: Paragraph Detection data structures.
* Author: David Eger
* Created: 25 February 2011
*
* (C) Copyright 2011, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
#define TESSERACT_CCMAIN_PARAGRAPHS_H_
#include "rect.h"
#include "ocrpara.h"
#include "genericvector.h"
#include "strngs.h"
class WERD;
class UNICHARSET;
namespace tesseract {
class MutableIterator;
// This structure captures all information needed about a text line for the
// purposes of paragraph detection. It is meant to be exceedingly light-weight
// so that we can easily test paragraph detection independent of the rest of
// Tesseract.
class RowInfo {
public:
// Constant data derived from Tesseract output.
STRING text; // the full UTF-8 text of the line.
bool ltr; // whether the majority of the text is left-to-right
// TODO(eger) make this more fine-grained.
bool has_leaders; // does the line contain leader dots (.....)?
bool has_drop_cap; // does the line have a drop cap?
int pix_ldistance; // distance to the left pblock boundary in pixels
int pix_rdistance; // distance to the right pblock boundary in pixels
float pix_xheight; // guessed xheight for the line
int average_interword_space; // average space between words in pixels.
int num_words;
TBOX lword_box; // in normalized (horiz text rows) space
TBOX rword_box; // in normalized (horiz text rows) space
STRING lword_text; // the UTF-8 text of the leftmost werd
STRING rword_text; // the UTF-8 text of the rightmost werd
// The text of a paragraph typically starts with the start of an idea and
// ends with the end of an idea. Here we define paragraph as something that
// may have a first line indent and a body indent which may be different.
// Typical words that start an idea are:
// 1. Words in western scripts that start with
// a capital letter, for example "The"
// 2. Bulleted or numbered list items, for
// example "2."
// Typical words which end an idea are words ending in punctuation marks. In
// this vocabulary, each list item is represented as a paragraph.
bool lword_indicates_list_item;
bool lword_likely_starts_idea;
bool lword_likely_ends_idea;
bool rword_indicates_list_item;
bool rword_likely_starts_idea;
bool rword_likely_ends_idea;
};
// Main entry point for Paragraph Detection Algorithm.
//
// Given a set of equally spaced textlines (described by row_infos),
// Split them into paragraphs. See http://goto/paragraphstalk
//
// Output:
// row_owners - one pointer for each row, to the paragraph it belongs to.
// paragraphs - this is the actual list of PARA objects.
// models - the list of paragraph models referenced by the PARA objects.
// caller is responsible for deleting the models.
void DetectParagraphs(int debug_level,
GenericVector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners,
PARA_LIST *paragraphs,
GenericVector<ParagraphModel *> *models);
// Given a MutableIterator to the start of a block, run DetectParagraphs on
// that block and commit the results to the underlying ROW and BLOCK structs,
// saving the ParagraphModels in models. Caller owns the models.
// We use unicharset during the function to answer questions such as "is the
// first letter of this word upper case?"
void DetectParagraphs(int debug_level,
bool after_text_recognition,
const MutableIterator *block_start,
GenericVector<ParagraphModel *> *models);
} // namespace
#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_

View File

@ -1,87 +1,87 @@
///////////////////////////////////////////////////////////////////////
// File: pgedit.h
// Description: Page structure file editor
// Author: Joern Wanke
// Created: Wed Jul 18 10:05:01 PDT 2007
//
// (C) Copyright 2007, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef PGEDIT_H
#define PGEDIT_H
#include "ocrblock.h"
#include "ocrrow.h"
#include "werd.h"
#include "rect.h"
#include "params.h"
#include "tesseractclass.h"
class ScrollView;
class SVMenuNode;
struct SVEvent;
// A small event handler class to process incoming events to
// this window.
class PGEventHandler : public SVEventHandler {
public:
PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
}
void Notify(const SVEvent* sve);
private:
tesseract::Tesseract* tess_;
};
extern BLOCK_LIST *current_block_list;
extern STRING_VAR_H (editor_image_win_name, "EditorImage",
"Editor image window name");
extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos");
extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos");
extern INT_VAR_H (editor_image_height, 680, "Editor image height");
extern INT_VAR_H (editor_image_width, 655, "Editor image width");
extern INT_VAR_H (editor_image_word_bb_color, BLUE,
"Word bounding box colour");
extern INT_VAR_H (editor_image_blob_bb_color, YELLOW,
"Blob bounding box colour");
extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour");
extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin",
"Editor debug window name");
extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos");
extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos");
extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height");
extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width");
extern STRING_VAR_H (editor_word_name, "BlnWords",
"BL normalised word window");
extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos");
extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos");
extern INT_VAR_H (editor_word_height, 240, "Word window height");
extern INT_VAR_H (editor_word_width, 655, "Word window width");
extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image");
ScrollView* bln_word_window_handle(); //return handle
void build_image_window(int width, int height);
void display_bln_lines(ScrollView window,
ScrollView::Color colour,
float scale_factor,
float y_offset,
float minx,
float maxx);
//function to call
void pgeditor_msg( //message display
const char *msg);
void pgeditor_show_point( //display coords
SVEvent *event);
//put bln word in box
void show_point(PAGE_RES* page_res, float x, float y);
#endif
///////////////////////////////////////////////////////////////////////
// File: pgedit.h
// Description: Page structure file editor
// Author: Joern Wanke
// Created: Wed Jul 18 10:05:01 PDT 2007
//
// (C) Copyright 2007, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef PGEDIT_H
#define PGEDIT_H
#include "ocrblock.h"
#include "ocrrow.h"
#include "werd.h"
#include "rect.h"
#include "params.h"
#include "tesseractclass.h"
class ScrollView;
class SVMenuNode;
struct SVEvent;
// A small event handler class to process incoming events to
// this window.
class PGEventHandler : public SVEventHandler {
public:
PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
}
void Notify(const SVEvent* sve);
private:
tesseract::Tesseract* tess_;
};
extern BLOCK_LIST *current_block_list;
extern STRING_VAR_H (editor_image_win_name, "EditorImage",
"Editor image window name");
extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos");
extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos");
extern INT_VAR_H (editor_image_height, 680, "Editor image height");
extern INT_VAR_H (editor_image_width, 655, "Editor image width");
extern INT_VAR_H (editor_image_word_bb_color, BLUE,
"Word bounding box colour");
extern INT_VAR_H (editor_image_blob_bb_color, YELLOW,
"Blob bounding box colour");
extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour");
extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin",
"Editor debug window name");
extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos");
extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos");
extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height");
extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width");
extern STRING_VAR_H (editor_word_name, "BlnWords",
"BL normalised word window");
extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos");
extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos");
extern INT_VAR_H (editor_word_height, 240, "Word window height");
extern INT_VAR_H (editor_word_width, 655, "Word window width");
extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image");
ScrollView* bln_word_window_handle(); //return handle
void build_image_window(int width, int height);
void display_bln_lines(ScrollView window,
ScrollView::Color colour,
float scale_factor,
float y_offset,
float minx,
float maxx);
//function to call
void pgeditor_msg( //message display
const char *msg);
void pgeditor_show_point( //display coords
SVEvent *event);
//put bln word in box
void show_point(PAGE_RES* page_res, float x, float y);
#endif

View File

@ -1,34 +1,34 @@
/**********************************************************************
* File: reject.h (Formerly reject.h)
* Description: Rejection functions used in tessedit
* Author: Phil Cheatle
* Created: Wed Sep 23 16:50:21 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef REJECT_H
#define REJECT_H
#include "params.h"
#include "pageres.h"
void reject_blanks(WERD_RES *word);
void reject_poor_matches(WERD_RES *word);
float compute_reject_threshold(WERD_CHOICE* word);
BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
void dont_allow_1Il(WERD_RES *word);
void flip_hyphens(WERD_RES *word);
void flip_0O(WERD_RES *word);
BOOL8 non_0_digit(const char* str, int length);
#endif
/**********************************************************************
* File: reject.h (Formerly reject.h)
* Description: Rejection functions used in tessedit
* Author: Phil Cheatle
* Created: Wed Sep 23 16:50:21 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef REJECT_H
#define REJECT_H
#include "params.h"
#include "pageres.h"
void reject_blanks(WERD_RES *word);
void reject_poor_matches(WERD_RES *word);
float compute_reject_threshold(WERD_CHOICE* word);
BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
void dont_allow_1Il(WERD_RES *word);
void flip_hyphens(WERD_RES *word);
void flip_0O(WERD_RES *word);
BOOL8 non_0_digit(const char* str, int length);
#endif

View File

@ -1,28 +1,28 @@
/**********************************************************************
* File: tessbox.h (Formerly tessbox.h)
* Description: Black boxed Tess for developing a resaljet.
* Author: Ray Smith
* Created: Thu Apr 23 11:03:36 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSBOX_H
#define TESSBOX_H
#include "ratngs.h"
#include "tesseractclass.h"
// TODO(ocr-team): Delete this along with other empty header files.
#endif
/**********************************************************************
* File: tessbox.h (Formerly tessbox.h)
* Description: Black boxed Tess for developing a resaljet.
* Author: Ray Smith
* Created: Thu Apr 23 11:03:36 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSBOX_H
#define TESSBOX_H
#include "ratngs.h"
#include "tesseractclass.h"
// TODO(ocr-team): Delete this along with other empty header files.
#endif

View File

@ -1,29 +1,29 @@
/**********************************************************************
* File: tessedit.h (Formerly tessedit.h)
* Description: Main program for merge of tess and editor.
* Author: Ray Smith
* Created: Tue Jan 07 15:21:46 GMT 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSEDIT_H
#define TESSEDIT_H
#include "blobs.h"
#include "pgedit.h"
//progress monitor
extern ETEXT_DESC *global_monitor;
#endif
/**********************************************************************
* File: tessedit.h (Formerly tessedit.h)
* Description: Main program for merge of tess and editor.
* Author: Ray Smith
* Created: Tue Jan 07 15:21:46 GMT 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSEDIT_H
#define TESSEDIT_H
#include "blobs.h"
#include "pgedit.h"
//progress monitor
extern ETEXT_DESC *global_monitor;
#endif

View File

@ -1,24 +1,24 @@
/**********************************************************************
* File: tessvars.cpp (Formerly tessvars.c)
* Description: Variables and other globals for tessedit.
* Author: Ray Smith
* Created: Mon Apr 13 13:13:23 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdio.h>
#include "tessvars.h"
FILE *debug_fp = stderr; // write debug stuff here
/**********************************************************************
* File: tessvars.cpp (Formerly tessvars.c)
* Description: Variables and other globals for tessedit.
* Author: Ray Smith
* Created: Mon Apr 13 13:13:23 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdio.h>
#include "tessvars.h"
FILE *debug_fp = stderr; // write debug stuff here

View File

@ -1,27 +1,27 @@
/**********************************************************************
* File: tessvars.h (Formerly tessvars.h)
* Description: Variables and other globals for tessedit.
* Author: Ray Smith
* Created: Mon Apr 13 13:13:23 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSVARS_H
#define TESSVARS_H
#include <stdio.h>
extern FILE *debug_fp; // write debug stuff here
#endif
/**********************************************************************
* File: tessvars.h (Formerly tessvars.h)
* Description: Variables and other globals for tessedit.
* Author: Ray Smith
* Created: Mon Apr 13 13:13:23 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSVARS_H
#define TESSVARS_H
#include <stdio.h>
extern FILE *debug_fp; // write debug stuff here
#endif

View File

@ -1,27 +1,27 @@
/**********************************************************************
* File: wordit.h
* Description: An iterator for passing over all the words in a document.
* Author: Ray Smith
* Created: Mon Apr 27 08:51:22 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef WERDIT_H
#define WERDIT_H
#include "pageres.h"
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
#endif
/**********************************************************************
* File: wordit.h
* Description: An iterator for passing over all the words in a document.
* Author: Ray Smith
* Created: Mon Apr 27 08:51:22 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef WERDIT_H
#define WERDIT_H
#include "pageres.h"
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
#endif

View File

@ -1,29 +1,29 @@
/**********************************************************************
* File: blckerr.h (Formerly blockerr.h)
* Description: Error codes for the page block classes.
* Author: Ray Smith
* Created: Tue Mar 19 17:43:30 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef BLCKERR_H
#define BLCKERR_H
#include "errcode.h"
const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
#endif
/**********************************************************************
* File: blckerr.h (Formerly blockerr.h)
* Description: Error codes for the page block classes.
* Author: Ray Smith
* Created: Tue Mar 19 17:43:30 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef BLCKERR_H
#define BLCKERR_H
#include "errcode.h"
const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
#endif

View File

@ -1,100 +1,100 @@
///////////////////////////////////////////////////////////////////////
// File: boxword.h
// Description: Class to represent the bounding boxes of the output.
// Author: Ray Smith
// Created: Tue May 25 14:18:14 PDT 2010
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
#define TESSERACT_CSTRUCT_BOXWORD_H_
#include "genericvector.h"
#include "rect.h"
#include "unichar.h"
class BLOCK;
class DENORM;
struct TWERD;
class UNICHARSET;
class WERD;
class WERD_CHOICE;
class WERD_RES;
namespace tesseract {
// Class to hold an array of bounding boxes for an output word and
// the bounding box of the whole word.
class BoxWord {
public:
BoxWord();
explicit BoxWord(const BoxWord& src);
~BoxWord();
BoxWord& operator=(const BoxWord& src);
void CopyFrom(const BoxWord& src);
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
// switch back to original image coordinates.
static BoxWord* CopyFromNormalized(TWERD* tessword);
// Clean up the bounding boxes from the polygonal approximation by
// expanding slightly, then clipping to the blobs from the original_word
// that overlap. If not null, the block provides the inverse rotation.
void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
// Merges the boxes from start to end, not including end, and deletes
// the boxes between start and end.
void MergeBoxes(int start, int end);
// Inserts a new box before the given index.
// Recomputes the bounding box.
void InsertBox(int index, const TBOX& box);
// Changes the box at the given index to the new box.
// Recomputes the bounding box.
void ChangeBox(int index, const TBOX& box);
// Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box.
void DeleteBox(int index);
// Deletes all the boxes stored in BoxWord.
void DeleteAllBoxes();
// This and other putatively are the same, so call the (permanent) callback
// for each blob index where the bounding boxes match.
// The callback is deleted on completion.
void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
const TBOX& bounding_box() const {
return bbox_;
}
int length() const { return length_; }
const TBOX& BlobBox(int index) const {
return boxes_[index];
}
private:
void ComputeBoundingBox();
TBOX bbox_;
int length_;
GenericVector<TBOX> boxes_;
};
} // namespace tesseract.
#endif // TESSERACT_CSTRUCT_BOXWORD_H_
///////////////////////////////////////////////////////////////////////
// File: boxword.h
// Description: Class to represent the bounding boxes of the output.
// Author: Ray Smith
// Created: Tue May 25 14:18:14 PDT 2010
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
#define TESSERACT_CSTRUCT_BOXWORD_H_
#include "genericvector.h"
#include "rect.h"
#include "unichar.h"
class BLOCK;
class DENORM;
struct TWERD;
class UNICHARSET;
class WERD;
class WERD_CHOICE;
class WERD_RES;
namespace tesseract {
// Class to hold an array of bounding boxes for an output word and
// the bounding box of the whole word.
class BoxWord {
public:
BoxWord();
explicit BoxWord(const BoxWord& src);
~BoxWord();
BoxWord& operator=(const BoxWord& src);
void CopyFrom(const BoxWord& src);
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
// switch back to original image coordinates.
static BoxWord* CopyFromNormalized(TWERD* tessword);
// Clean up the bounding boxes from the polygonal approximation by
// expanding slightly, then clipping to the blobs from the original_word
// that overlap. If not null, the block provides the inverse rotation.
void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
// Merges the boxes from start to end, not including end, and deletes
// the boxes between start and end.
void MergeBoxes(int start, int end);
// Inserts a new box before the given index.
// Recomputes the bounding box.
void InsertBox(int index, const TBOX& box);
// Changes the box at the given index to the new box.
// Recomputes the bounding box.
void ChangeBox(int index, const TBOX& box);
// Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box.
void DeleteBox(int index);
// Deletes all the boxes stored in BoxWord.
void DeleteAllBoxes();
// This and other putatively are the same, so call the (permanent) callback
// for each blob index where the bounding boxes match.
// The callback is deleted on completion.
void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
const TBOX& bounding_box() const {
return bbox_;
}
int length() const { return length_; }
const TBOX& BlobBox(int index) const {
return boxes_[index];
}
private:
void ComputeBoundingBox();
TBOX bbox_;
int length_;
GenericVector<TBOX> boxes_;
};
} // namespace tesseract.
#endif // TESSERACT_CSTRUCT_BOXWORD_H_

View File

@ -1,36 +1,36 @@
///////////////////////////////////////////////////////////////////////
// File: ccstruct.cpp
// Description: ccstruct class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "ccstruct.h"
namespace tesseract {
// APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height.
const double CCStruct::kDescenderFraction = 0.25;
const double CCStruct::kXHeightFraction = 0.5;
const double CCStruct::kAscenderFraction = 0.25;
const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
(CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
CCStruct::CCStruct() {}
CCStruct::~CCStruct() {
}
}
///////////////////////////////////////////////////////////////////////
// File: ccstruct.cpp
// Description: ccstruct class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "ccstruct.h"
namespace tesseract {
// APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height.
const double CCStruct::kDescenderFraction = 0.25;
const double CCStruct::kXHeightFraction = 0.5;
const double CCStruct::kAscenderFraction = 0.25;
const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
(CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
CCStruct::CCStruct() {}
CCStruct::~CCStruct() {
}
}

View File

@ -1,43 +1,43 @@
///////////////////////////////////////////////////////////////////////
// File: ccstruct.h
// Description: ccstruct class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
#include "cutil.h"
namespace tesseract {
class CCStruct : public CUtil {
public:
CCStruct();
~CCStruct();
// Globally accessible constants.
// APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height.
static const double kDescenderFraction; // = 0.25;
static const double kXHeightFraction; // = 0.5;
static const double kAscenderFraction; // = 0.25;
// Derived value giving the x-height as a fraction of cap-height.
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
};
class Tesseract;
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_
///////////////////////////////////////////////////////////////////////
// File: ccstruct.h
// Description: ccstruct class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
#include "cutil.h"
namespace tesseract {
class CCStruct : public CUtil {
public:
CCStruct();
~CCStruct();
// Globally accessible constants.
// APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height.
static const double kDescenderFraction; // = 0.25;
static const double kXHeightFraction; // = 0.5;
static const double kAscenderFraction; // = 0.25;
// Derived value giving the x-height as a fraction of cap-height.
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
};
class Tesseract;
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_

View File

@ -1,52 +1,52 @@
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#include "allheaders.h"
namespace tesseract {
// Class to hold a Pixa collection of debug images with captions and save them
// to a PDF file.
class DebugPixa {
public:
// TODO(rays) add another constructor with size control.
DebugPixa() {
pixa_ = pixaCreate(0);
fonts_ = bmfCreate(nullptr, 14);
}
// If the filename_ has been set and there are any debug images, they are
// written to the set filename_.
~DebugPixa() {
pixaDestroy(&pixa_);
bmfDestroy(&fonts_);
}
// Adds the given pix to the set of pages in the PDF file, with the given
// caption added to the top.
void AddPix(const Pix* pix, const char* caption) {
int depth = pixGetDepth(const_cast<Pix*>(pix));
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
Pix* pix_debug = pixAddSingleTextblock(
const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
pixaAddPix(pixa_, pix_debug, L_INSERT);
}
// Sets the destination filename and enables images to be written to a PDF
// on destruction.
void WritePDF(const char* filename) {
if (pixaGetCount(pixa_) > 0) {
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
pixaClear(pixa_);
}
}
private:
// The collection of images to put in the PDF.
Pixa* pixa_;
// The fonts used to draw text captions.
L_Bmf* fonts_;
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#include "allheaders.h"
namespace tesseract {
// Class to hold a Pixa collection of debug images with captions and save them
// to a PDF file.
class DebugPixa {
public:
// TODO(rays) add another constructor with size control.
DebugPixa() {
pixa_ = pixaCreate(0);
fonts_ = bmfCreate(nullptr, 14);
}
// If the filename_ has been set and there are any debug images, they are
// written to the set filename_.
~DebugPixa() {
pixaDestroy(&pixa_);
bmfDestroy(&fonts_);
}
// Adds the given pix to the set of pages in the PDF file, with the given
// caption added to the top.
void AddPix(const Pix* pix, const char* caption) {
int depth = pixGetDepth(const_cast<Pix*>(pix));
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
Pix* pix_debug = pixAddSingleTextblock(
const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
pixaAddPix(pixa_, pix_debug, L_INSERT);
}
// Sets the destination filename and enables images to be written to a PDF
// on destruction.
void WritePDF(const char* filename) {
if (pixaGetCount(pixa_) > 0) {
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
pixaClear(pixa_);
}
}
private:
// The collection of images to put in the PDF.
Pixa* pixa_;
// The fonts used to draw text captions.
L_Bmf* fonts_;
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_

View File

@ -1,295 +1,295 @@
///////////////////////////////////////////////////////////////////////
// File: detlinefit.cpp
// Description: Deterministic least median squares line fitting.
// Author: Ray Smith
// Created: Thu Feb 28 14:45:01 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "detlinefit.h"
#include "statistc.h"
#include "ndminx.h"
#include "tprintf.h"
namespace tesseract {
// The number of points to consider at each end.
const int kNumEndPoints = 3;
// The minimum number of points at which to switch to number of points
// for badly fitted lines.
// To ensure a sensible error metric, kMinPointsForErrorCount should be at
// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
// ComputeUpperQuartileError.
const int kMinPointsForErrorCount = 16;
// The maximum real distance to use before switching to number of
// mis-fitted points, which will get square-rooted for true distance.
const int kMaxRealDistance = 2.0;
DetLineFit::DetLineFit() : square_length_(0.0) {
}
DetLineFit::~DetLineFit() {
}
// Delete all Added points.
void DetLineFit::Clear() {
pts_.clear();
distances_.clear();
}
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
void DetLineFit::Add(const ICOORD& pt) {
pts_.push_back(PointWidth(pt, 0));
}
// Associates a half-width with the given point if a point overlaps the
// previous point by more than half the width, and its distance is further
// than the previous point, then the more distant point is ignored in the
// distance calculation. Useful for ignoring i dots and other diacritics.
void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
pts_.push_back(PointWidth(pt, halfwidth));
}
// Fits a line to the points, ignoring the skip_first initial points and the
// skip_last final points, returning the fitted line as a pair of points,
// and the upper quartile error.
double DetLineFit::Fit(int skip_first, int skip_last,
ICOORD* pt1, ICOORD* pt2) {
// Do something sensible with no points.
if (pts_.empty()) {
pt1->set_x(0);
pt1->set_y(0);
*pt2 = *pt1;
return 0.0;
}
// Count the points and find the first and last kNumEndPoints.
int pt_count = pts_.size();
ICOORD* starts[kNumEndPoints];
if (skip_first >= pt_count) skip_first = pt_count - 1;
int start_count = 0;
int end_i = MIN(skip_first + kNumEndPoints, pt_count);
for (int i = skip_first; i < end_i; ++i) {
starts[start_count++] = &pts_[i].pt;
}
ICOORD* ends[kNumEndPoints];
if (skip_last >= pt_count) skip_last = pt_count - 1;
int end_count = 0;
end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
ends[end_count++] = &pts_[i].pt;
}
// 1 or 2 points need special treatment.
if (pt_count <= 2) {
*pt1 = *starts[0];
if (pt_count > 1)
*pt2 = *ends[0];
else
*pt2 = *pt1;
return 0.0;
}
// Although with between 2 and 2*kNumEndPoints-1 points, there will be
// overlap in the starts, ends sets, this is OK and taken care of by the
// if (*start != *end) test below, which also tests for equal input points.
double best_uq = -1.0;
// Iterate each pair of points and find the best fitting line.
for (int i = 0; i < start_count; ++i) {
ICOORD* start = starts[i];
for (int j = 0; j < end_count; ++j) {
ICOORD* end = ends[j];
if (*start != *end) {
ComputeDistances(*start, *end);
// Compute the upper quartile error from the line.
double dist = EvaluateLineFit();
if (dist < best_uq || best_uq < 0.0) {
best_uq = dist;
*pt1 = *start;
*pt2 = *end;
}
}
}
}
// Finally compute the square root to return the true distance.
return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
}
// Constrained fit with a supplied direction vector. Finds the best line_pt,
// that is one of the supplied points having the median cross product with
// direction, ignoring points that have a cross product outside of the range
// [min_dist, max_dist]. Returns the resulting error metric using the same
// reduced set of points.
// *Makes use of floating point arithmetic*
double DetLineFit::ConstrainedFit(const FCOORD& direction,
double min_dist, double max_dist,
bool debug, ICOORD* line_pt) {
ComputeConstrainedDistances(direction, min_dist, max_dist);
// Do something sensible with no points or computed distances.
if (pts_.empty() || distances_.empty()) {
line_pt->set_x(0);
line_pt->set_y(0);
return 0.0;
}
int median_index = distances_.choose_nth_item(distances_.size() / 2);
*line_pt = distances_[median_index].data;
if (debug) {
tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
direction.x(), direction.y(),
line_pt->x(), line_pt->y(), distances_.size());
for (int i = 0; i < distances_.size(); ++i) {
tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
distances_[i].data.y(), distances_[i].key);
}
tprintf("Result = %d\n", median_index);
}
// Center distances on the fitted point.
double dist_origin = direction * *line_pt;
for (int i = 0; i < distances_.size(); ++i) {
distances_[i].key -= dist_origin;
}
return sqrt(EvaluateLineFit());
}
// Returns true if there were enough points at the last call to Fit or
// ConstrainedFit for the fitted points to be used on a badly fitted line.
bool DetLineFit::SufficientPointsForIndependentFit() const {
return distances_.size() >= kMinPointsForErrorCount;
}
// Backwards compatible fit returning a gradient and constant.
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
// function in preference to the LMS class.
double DetLineFit::Fit(float* m, float* c) {
ICOORD start, end;
double error = Fit(&start, &end);
if (end.x() != start.x()) {
*m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
*c = start.y() - *m * start.x();
} else {
*m = 0.0f;
*c = 0.0f;
}
return error;
}
// Backwards compatible constrained fit with a supplied gradient.
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
// to avoid potential difficulties with infinite gradients.
double DetLineFit::ConstrainedFit(double m, float* c) {
// Do something sensible with no points.
if (pts_.empty()) {
*c = 0.0f;
return 0.0;
}
double cos = 1.0 / sqrt(1.0 + m * m);
FCOORD direction(cos, m * cos);
ICOORD line_pt;
double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
&line_pt);
*c = line_pt.y() - line_pt.x() * m;
return error;
}
// Computes and returns the squared evaluation metric for a line fit.
double DetLineFit::EvaluateLineFit() {
// Compute the upper quartile error from the line.
double dist = ComputeUpperQuartileError();
if (distances_.size() >= kMinPointsForErrorCount &&
dist > kMaxRealDistance * kMaxRealDistance) {
// Use the number of mis-fitted points as the error metric, as this
// gives a better measure of fit for badly fitted lines where more
// than a quarter are badly fitted.
double threshold = kMaxRealDistance * sqrt(square_length_);
dist = NumberOfMisfittedPoints(threshold);
}
return dist;
}
// Computes the absolute error distances of the points from the line,
// and returns the squared upper-quartile error distance.
double DetLineFit::ComputeUpperQuartileError() {
int num_errors = distances_.size();
if (num_errors == 0) return 0.0;
// Get the absolute values of the errors.
for (int i = 0; i < num_errors; ++i) {
if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
}
// Now get the upper quartile distance.
int index = distances_.choose_nth_item(3 * num_errors / 4);
double dist = distances_[index].key;
// The true distance is the square root of the dist squared / square_length.
// Don't bother with the square root. Just return the square distance.
return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
}
// Returns the number of sample points that have an error more than threshold.
int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
int num_misfits = 0;
int num_dists = distances_.size();
// Get the absolute values of the errors.
for (int i = 0; i < num_dists; ++i) {
if (distances_[i].key > threshold)
++num_misfits;
}
return num_misfits;
}
// Computes all the cross product distances of the points from the line,
// storing the actual (signed) cross products in distances.
// Ignores distances of points that are further away than the previous point,
// and overlaps the previous point by at least half.
void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
distances_.truncate(0);
ICOORD line_vector = end;
line_vector -= start;
square_length_ = line_vector.sqlength();
int line_length = IntCastRounded(sqrt(square_length_));
// Compute the distance of each point from the line.
int prev_abs_dist = 0;
int prev_dot = 0;
for (int i = 0; i < pts_.size(); ++i) {
ICOORD pt_vector = pts_[i].pt;
pt_vector -= start;
int dot = line_vector % pt_vector;
// Compute |line_vector||pt_vector|sin(angle between)
int dist = line_vector * pt_vector;
int abs_dist = dist < 0 ? -dist : dist;
if (abs_dist > prev_abs_dist && i > 0) {
// Ignore this point if it overlaps the previous one.
int separation = abs(dot - prev_dot);
if (separation < line_length * pts_[i].halfwidth ||
separation < line_length * pts_[i - 1].halfwidth)
continue;
}
distances_.push_back(DistPointPair(dist, pts_[i].pt));
prev_abs_dist = abs_dist;
prev_dot = dot;
}
}
// Computes all the cross product distances of the points perpendicular to
// the given direction, ignoring distances outside of the give distance range,
// storing the actual (signed) cross products in distances_.
void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
double min_dist, double max_dist) {
distances_.truncate(0);
square_length_ = direction.sqlength();
// Compute the distance of each point from the line.
for (int i = 0; i < pts_.size(); ++i) {
FCOORD pt_vector = pts_[i].pt;
// Compute |line_vector||pt_vector|sin(angle between)
double dist = direction * pt_vector;
if (min_dist <= dist && dist <= max_dist)
distances_.push_back(DistPointPair(dist, pts_[i].pt));
}
}
} // namespace tesseract.
///////////////////////////////////////////////////////////////////////
// File: detlinefit.cpp
// Description: Deterministic least median squares line fitting.
// Author: Ray Smith
// Created: Thu Feb 28 14:45:01 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "detlinefit.h"
#include "statistc.h"
#include "ndminx.h"
#include "tprintf.h"
namespace tesseract {
// The number of points to consider at each end.
const int kNumEndPoints = 3;
// The minimum number of points at which to switch to number of points
// for badly fitted lines.
// To ensure a sensible error metric, kMinPointsForErrorCount should be at
// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
// ComputeUpperQuartileError.
const int kMinPointsForErrorCount = 16;
// The maximum real distance to use before switching to number of
// mis-fitted points, which will get square-rooted for true distance.
const int kMaxRealDistance = 2.0;
DetLineFit::DetLineFit() : square_length_(0.0) {
}
DetLineFit::~DetLineFit() {
}
// Delete all Added points.
void DetLineFit::Clear() {
pts_.clear();
distances_.clear();
}
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
void DetLineFit::Add(const ICOORD& pt) {
pts_.push_back(PointWidth(pt, 0));
}
// Associates a half-width with the given point if a point overlaps the
// previous point by more than half the width, and its distance is further
// than the previous point, then the more distant point is ignored in the
// distance calculation. Useful for ignoring i dots and other diacritics.
void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
pts_.push_back(PointWidth(pt, halfwidth));
}
// Fits a line to the points, ignoring the skip_first initial points and the
// skip_last final points, returning the fitted line as a pair of points,
// and the upper quartile error.
double DetLineFit::Fit(int skip_first, int skip_last,
ICOORD* pt1, ICOORD* pt2) {
// Do something sensible with no points.
if (pts_.empty()) {
pt1->set_x(0);
pt1->set_y(0);
*pt2 = *pt1;
return 0.0;
}
// Count the points and find the first and last kNumEndPoints.
int pt_count = pts_.size();
ICOORD* starts[kNumEndPoints];
if (skip_first >= pt_count) skip_first = pt_count - 1;
int start_count = 0;
int end_i = MIN(skip_first + kNumEndPoints, pt_count);
for (int i = skip_first; i < end_i; ++i) {
starts[start_count++] = &pts_[i].pt;
}
ICOORD* ends[kNumEndPoints];
if (skip_last >= pt_count) skip_last = pt_count - 1;
int end_count = 0;
end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
ends[end_count++] = &pts_[i].pt;
}
// 1 or 2 points need special treatment.
if (pt_count <= 2) {
*pt1 = *starts[0];
if (pt_count > 1)
*pt2 = *ends[0];
else
*pt2 = *pt1;
return 0.0;
}
// Although with between 2 and 2*kNumEndPoints-1 points, there will be
// overlap in the starts, ends sets, this is OK and taken care of by the
// if (*start != *end) test below, which also tests for equal input points.
double best_uq = -1.0;
// Iterate each pair of points and find the best fitting line.
for (int i = 0; i < start_count; ++i) {
ICOORD* start = starts[i];
for (int j = 0; j < end_count; ++j) {
ICOORD* end = ends[j];
if (*start != *end) {
ComputeDistances(*start, *end);
// Compute the upper quartile error from the line.
double dist = EvaluateLineFit();
if (dist < best_uq || best_uq < 0.0) {
best_uq = dist;
*pt1 = *start;
*pt2 = *end;
}
}
}
}
// Finally compute the square root to return the true distance.
return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
}
// Constrained fit with a supplied direction vector. Finds the best line_pt,
// that is one of the supplied points having the median cross product with
// direction, ignoring points that have a cross product outside of the range
// [min_dist, max_dist]. Returns the resulting error metric using the same
// reduced set of points.
// *Makes use of floating point arithmetic*
double DetLineFit::ConstrainedFit(const FCOORD& direction,
double min_dist, double max_dist,
bool debug, ICOORD* line_pt) {
ComputeConstrainedDistances(direction, min_dist, max_dist);
// Do something sensible with no points or computed distances.
if (pts_.empty() || distances_.empty()) {
line_pt->set_x(0);
line_pt->set_y(0);
return 0.0;
}
int median_index = distances_.choose_nth_item(distances_.size() / 2);
*line_pt = distances_[median_index].data;
if (debug) {
tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
direction.x(), direction.y(),
line_pt->x(), line_pt->y(), distances_.size());
for (int i = 0; i < distances_.size(); ++i) {
tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
distances_[i].data.y(), distances_[i].key);
}
tprintf("Result = %d\n", median_index);
}
// Center distances on the fitted point.
double dist_origin = direction * *line_pt;
for (int i = 0; i < distances_.size(); ++i) {
distances_[i].key -= dist_origin;
}
return sqrt(EvaluateLineFit());
}
// Returns true if there were enough points at the last call to Fit or
// ConstrainedFit for the fitted points to be used on a badly fitted line.
bool DetLineFit::SufficientPointsForIndependentFit() const {
return distances_.size() >= kMinPointsForErrorCount;
}
// Backwards compatible fit returning a gradient and constant.
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
// function in preference to the LMS class.
double DetLineFit::Fit(float* m, float* c) {
ICOORD start, end;
double error = Fit(&start, &end);
if (end.x() != start.x()) {
*m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
*c = start.y() - *m * start.x();
} else {
*m = 0.0f;
*c = 0.0f;
}
return error;
}
// Backwards compatible constrained fit with a supplied gradient.
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
// to avoid potential difficulties with infinite gradients.
double DetLineFit::ConstrainedFit(double m, float* c) {
// Do something sensible with no points.
if (pts_.empty()) {
*c = 0.0f;
return 0.0;
}
double cos = 1.0 / sqrt(1.0 + m * m);
FCOORD direction(cos, m * cos);
ICOORD line_pt;
double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
&line_pt);
*c = line_pt.y() - line_pt.x() * m;
return error;
}
// Computes and returns the squared evaluation metric for a line fit.
double DetLineFit::EvaluateLineFit() {
// Compute the upper quartile error from the line.
double dist = ComputeUpperQuartileError();
if (distances_.size() >= kMinPointsForErrorCount &&
dist > kMaxRealDistance * kMaxRealDistance) {
// Use the number of mis-fitted points as the error metric, as this
// gives a better measure of fit for badly fitted lines where more
// than a quarter are badly fitted.
double threshold = kMaxRealDistance * sqrt(square_length_);
dist = NumberOfMisfittedPoints(threshold);
}
return dist;
}
// Computes the absolute error distances of the points from the line,
// and returns the squared upper-quartile error distance.
double DetLineFit::ComputeUpperQuartileError() {
int num_errors = distances_.size();
if (num_errors == 0) return 0.0;
// Get the absolute values of the errors.
for (int i = 0; i < num_errors; ++i) {
if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
}
// Now get the upper quartile distance.
int index = distances_.choose_nth_item(3 * num_errors / 4);
double dist = distances_[index].key;
// The true distance is the square root of the dist squared / square_length.
// Don't bother with the square root. Just return the square distance.
return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
}
// Returns the number of sample points that have an error more than threshold.
int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
int num_misfits = 0;
int num_dists = distances_.size();
// Get the absolute values of the errors.
for (int i = 0; i < num_dists; ++i) {
if (distances_[i].key > threshold)
++num_misfits;
}
return num_misfits;
}
// Computes all the cross product distances of the points from the line,
// storing the actual (signed) cross products in distances.
// Ignores distances of points that are further away than the previous point,
// and overlaps the previous point by at least half.
void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
distances_.truncate(0);
ICOORD line_vector = end;
line_vector -= start;
square_length_ = line_vector.sqlength();
int line_length = IntCastRounded(sqrt(square_length_));
// Compute the distance of each point from the line.
int prev_abs_dist = 0;
int prev_dot = 0;
for (int i = 0; i < pts_.size(); ++i) {
ICOORD pt_vector = pts_[i].pt;
pt_vector -= start;
int dot = line_vector % pt_vector;
// Compute |line_vector||pt_vector|sin(angle between)
int dist = line_vector * pt_vector;
int abs_dist = dist < 0 ? -dist : dist;
if (abs_dist > prev_abs_dist && i > 0) {
// Ignore this point if it overlaps the previous one.
int separation = abs(dot - prev_dot);
if (separation < line_length * pts_[i].halfwidth ||
separation < line_length * pts_[i - 1].halfwidth)
continue;
}
distances_.push_back(DistPointPair(dist, pts_[i].pt));
prev_abs_dist = abs_dist;
prev_dot = dot;
}
}
// Computes all the cross product distances of the points perpendicular to
// the given direction, ignoring distances outside of the give distance range,
// storing the actual (signed) cross products in distances_.
void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
double min_dist, double max_dist) {
distances_.truncate(0);
square_length_ = direction.sqlength();
// Compute the distance of each point from the line.
for (int i = 0; i < pts_.size(); ++i) {
FCOORD pt_vector = pts_[i].pt;
// Compute |line_vector||pt_vector|sin(angle between)
double dist = direction * pt_vector;
if (min_dist <= dist && dist <= max_dist)
distances_.push_back(DistPointPair(dist, pts_[i].pt));
}
}
} // namespace tesseract.

View File

@ -1,164 +1,164 @@
///////////////////////////////////////////////////////////////////////
// File: detlinefit.h
// Description: Deterministic least upper-quartile squares line fitting.
// Author: Ray Smith
// Created: Thu Feb 28 14:35:01 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
#include "genericvector.h"
#include "kdpair.h"
#include "points.h"
namespace tesseract {
// This class fits a line to a set of ICOORD points.
// There is no restriction on the direction of the line, as it
// uses a vector method, ie no concern over infinite gradients.
// The fitted line has the least upper quartile of squares of perpendicular
// distances of all source points from the line, subject to the constraint
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
// i.e. the 9 combinations of one of the first 3 and last 3 points.
// A fundamental assumption of this algorithm is that one of the first 3 and
// one of the last 3 points are near the best line fit.
// The points must be Added in line order for the algorithm to work properly.
// No floating point calculations are needed* to make an accurate fit,
// and no random numbers are needed** so the algorithm is deterministic,
// architecture-stable, and compiler-stable as well as stable to minor
// changes in the input.
// *A single floating point division is used to compute each line's distance.
// This is unlikely to result in choice of a different line, but if it does,
// it would be easy to replace with a 64 bit integer calculation.
// **Random numbers are used in the nth_item function, but the worst
// non-determinism that can result is picking a different result among equals,
// and that wouldn't make any difference to the end-result distance, so the
// randomness does not affect the determinism of the algorithm. The random
// numbers are only there to guarantee average linear time.
// Fitting time is linear, but with a high constant, as it tries 9 different
// lines and computes the distance of all points each time.
// This class is aimed at replacing the LLSQ (linear least squares) and
// LMS (least median of squares) classes that are currently used for most
// of the line fitting in Tesseract.
class DetLineFit {
public:
DetLineFit();
~DetLineFit();
// Delete all Added points.
void Clear();
// Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
// Add must be called on points in sequence along the line.
void Add(const ICOORD& pt);
// Associates a half-width with the given point if a point overlaps the
// previous point by more than half the width, and its distance is further
// than the previous point, then the more distant point is ignored in the
// distance calculation. Useful for ignoring i dots and other diacritics.
void Add(const ICOORD& pt, int halfwidth);
// Fits a line to the points, returning the fitted line as a pair of
// points, and the upper quartile error.
double Fit(ICOORD* pt1, ICOORD* pt2) {
return Fit(0, 0, pt1, pt2);
}
// Fits a line to the points, ignoring the skip_first initial points and the
// skip_last final points, returning the fitted line as a pair of points,
// and the upper quartile error.
double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
// Constrained fit with a supplied direction vector. Finds the best line_pt,
// that is one of the supplied points having the median cross product with
// direction, ignoring points that have a cross product outside of the range
// [min_dist, max_dist]. Returns the resulting error metric using the same
// reduced set of points.
// *Makes use of floating point arithmetic*
double ConstrainedFit(const FCOORD& direction,
double min_dist, double max_dist,
bool debug, ICOORD* line_pt);
// Returns true if there were enough points at the last call to Fit or
// ConstrainedFit for the fitted points to be used on a badly fitted line.
bool SufficientPointsForIndependentFit() const;
// Backwards compatible fit returning a gradient and constant.
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
// function in preference to the LMS class.
double Fit(float* m, float* c);
// Backwards compatible constrained fit with a supplied gradient.
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
// to avoid potential difficulties with infinite gradients.
double ConstrainedFit(double m, float* c);
private:
// Simple struct to hold an ICOORD point and a halfwidth representing half
// the "width" (supposedly approximately parallel to the direction of the
// line) of each point, such that distant points can be discarded when they
// overlap nearer points. (Think i dot and other diacritics or noise.)
struct PointWidth {
PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
PointWidth(const ICOORD& pt0, int halfwidth0)
: pt(pt0), halfwidth(halfwidth0) {}
ICOORD pt;
int halfwidth;
};
// Type holds the distance of each point from the fitted line and the point
// itself. Use of double allows integer distances from ICOORDs to be stored
// exactly, and also the floating point results from ConstrainedFit.
typedef KDPairInc<double, ICOORD> DistPointPair;
// Computes and returns the squared evaluation metric for a line fit.
double EvaluateLineFit();
// Computes the absolute values of the precomputed distances_,
// and returns the squared upper-quartile error distance.
double ComputeUpperQuartileError();
// Returns the number of sample points that have an error more than threshold.
int NumberOfMisfittedPoints(double threshold) const;
// Computes all the cross product distances of the points from the line,
// storing the actual (signed) cross products in distances_.
// Ignores distances of points that are further away than the previous point,
// and overlaps the previous point by at least half.
void ComputeDistances(const ICOORD& start, const ICOORD& end);
// Computes all the cross product distances of the points perpendicular to
// the given direction, ignoring distances outside of the give distance range,
// storing the actual (signed) cross products in distances_.
void ComputeConstrainedDistances(const FCOORD& direction,
double min_dist, double max_dist);
// Stores all the source points in the order they were given and their
// halfwidths, if any.
GenericVector<PointWidth> pts_;
// Stores the computed perpendicular distances of (some of) the pts_ from a
// given vector (assuming it goes through the origin, making it a line).
// Since the distances may be a subset of the input points, and get
// re-ordered by the nth_item function, the original point is stored
// along side the distance.
GenericVector<DistPointPair> distances_; // Distances of points.
// The squared length of the vector used to compute distances_.
double square_length_;
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_
///////////////////////////////////////////////////////////////////////
// File: detlinefit.h
// Description: Deterministic least upper-quartile squares line fitting.
// Author: Ray Smith
// Created: Thu Feb 28 14:35:01 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
#include "genericvector.h"
#include "kdpair.h"
#include "points.h"
namespace tesseract {
// This class fits a line to a set of ICOORD points.
// There is no restriction on the direction of the line, as it
// uses a vector method, ie no concern over infinite gradients.
// The fitted line has the least upper quartile of squares of perpendicular
// distances of all source points from the line, subject to the constraint
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
// i.e. the 9 combinations of one of the first 3 and last 3 points.
// A fundamental assumption of this algorithm is that one of the first 3 and
// one of the last 3 points are near the best line fit.
// The points must be Added in line order for the algorithm to work properly.
// No floating point calculations are needed* to make an accurate fit,
// and no random numbers are needed** so the algorithm is deterministic,
// architecture-stable, and compiler-stable as well as stable to minor
// changes in the input.
// *A single floating point division is used to compute each line's distance.
// This is unlikely to result in choice of a different line, but if it does,
// it would be easy to replace with a 64 bit integer calculation.
// **Random numbers are used in the nth_item function, but the worst
// non-determinism that can result is picking a different result among equals,
// and that wouldn't make any difference to the end-result distance, so the
// randomness does not affect the determinism of the algorithm. The random
// numbers are only there to guarantee average linear time.
// Fitting time is linear, but with a high constant, as it tries 9 different
// lines and computes the distance of all points each time.
// This class is aimed at replacing the LLSQ (linear least squares) and
// LMS (least median of squares) classes that are currently used for most
// of the line fitting in Tesseract.
class DetLineFit {
public:
DetLineFit();
~DetLineFit();
// Delete all Added points.
void Clear();
// Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
// Add must be called on points in sequence along the line.
void Add(const ICOORD& pt);
// Associates a half-width with the given point if a point overlaps the
// previous point by more than half the width, and its distance is further
// than the previous point, then the more distant point is ignored in the
// distance calculation. Useful for ignoring i dots and other diacritics.
void Add(const ICOORD& pt, int halfwidth);
// Fits a line to the points, returning the fitted line as a pair of
// points, and the upper quartile error.
double Fit(ICOORD* pt1, ICOORD* pt2) {
return Fit(0, 0, pt1, pt2);
}
// Fits a line to the points, ignoring the skip_first initial points and the
// skip_last final points, returning the fitted line as a pair of points,
// and the upper quartile error.
double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
// Constrained fit with a supplied direction vector. Finds the best line_pt,
// that is one of the supplied points having the median cross product with
// direction, ignoring points that have a cross product outside of the range
// [min_dist, max_dist]. Returns the resulting error metric using the same
// reduced set of points.
// *Makes use of floating point arithmetic*
double ConstrainedFit(const FCOORD& direction,
double min_dist, double max_dist,
bool debug, ICOORD* line_pt);
// Returns true if there were enough points at the last call to Fit or
// ConstrainedFit for the fitted points to be used on a badly fitted line.
bool SufficientPointsForIndependentFit() const;
// Backwards compatible fit returning a gradient and constant.
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
// function in preference to the LMS class.
double Fit(float* m, float* c);
// Backwards compatible constrained fit with a supplied gradient.
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
// to avoid potential difficulties with infinite gradients.
double ConstrainedFit(double m, float* c);
private:
// Simple struct to hold an ICOORD point and a halfwidth representing half
// the "width" (supposedly approximately parallel to the direction of the
// line) of each point, such that distant points can be discarded when they
// overlap nearer points. (Think i dot and other diacritics or noise.)
struct PointWidth {
PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
PointWidth(const ICOORD& pt0, int halfwidth0)
: pt(pt0), halfwidth(halfwidth0) {}
ICOORD pt;
int halfwidth;
};
// Type holds the distance of each point from the fitted line and the point
// itself. Use of double allows integer distances from ICOORDs to be stored
// exactly, and also the floating point results from ConstrainedFit.
typedef KDPairInc<double, ICOORD> DistPointPair;
// Computes and returns the squared evaluation metric for a line fit.
double EvaluateLineFit();
// Computes the absolute values of the precomputed distances_,
// and returns the squared upper-quartile error distance.
double ComputeUpperQuartileError();
// Returns the number of sample points that have an error more than threshold.
int NumberOfMisfittedPoints(double threshold) const;
// Computes all the cross product distances of the points from the line,
// storing the actual (signed) cross products in distances_.
// Ignores distances of points that are further away than the previous point,
// and overlaps the previous point by at least half.
void ComputeDistances(const ICOORD& start, const ICOORD& end);
// Computes all the cross product distances of the points perpendicular to
// the given direction, ignoring distances outside of the give distance range,
// storing the actual (signed) cross products in distances_.
void ComputeConstrainedDistances(const FCOORD& direction,
double min_dist, double max_dist);
// Stores all the source points in the order they were given and their
// halfwidths, if any.
GenericVector<PointWidth> pts_;
// Stores the computed perpendicular distances of (some of) the pts_ from a
// given vector (assuming it goes through the origin, making it a line).
// Since the distances may be a subset of the input points, and get
// re-ordered by the nth_item function, the original point is stored
// along side the distance.
GenericVector<DistPointPair> distances_; // Distances of points.
// The squared length of the vector used to compute distances_.
double square_length_;
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_

Some files were not shown because too many files have changed in this diff Show More