mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Move sources into src dir. Update build scripts.
This commit is contained in:
parent
e8fceb58ab
commit
e95ff1159e
240
CMakeLists.txt
240
CMakeLists.txt
@ -128,14 +128,14 @@ configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
|
||||
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")
|
||||
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/api/tess_version.h.in
|
||||
${CMAKE_BINARY_DIR}/api/tess_version.h @ONLY)
|
||||
${CMAKE_SOURCE_DIR}/src/api/tess_version.h.in
|
||||
${CMAKE_BINARY_DIR}/src/api/tess_version.h @ONLY)
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/vs2010/tesseract/tesseract.rc.in
|
||||
${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY)
|
||||
${CMAKE_SOURCE_DIR}/src/vs2010/tesseract/tesseract.rc.in
|
||||
${CMAKE_BINARY_DIR}/src/vs2010/tesseract/tesseract.rc @ONLY)
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/vs2010/tesseract/libtesseract.rc.in
|
||||
${CMAKE_BINARY_DIR}/vs2010/tesseract/libtesseract.rc @ONLY)
|
||||
${CMAKE_SOURCE_DIR}/src/vs2010/tesseract/libtesseract.rc.in
|
||||
${CMAKE_BINARY_DIR}/src/vs2010/tesseract/libtesseract.rc @ONLY)
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in
|
||||
${CMAKE_BINARY_DIR}/TesseractConfig-version.cmake @ONLY)
|
||||
@ -160,101 +160,101 @@ include_directories(${Leptonica_INCLUDE_DIRS})
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
include_directories(api)
|
||||
include_directories(src/api)
|
||||
include_directories(${CMAKE_BINARY_DIR}/api)
|
||||
include_directories(arch)
|
||||
include_directories(ccmain)
|
||||
include_directories(ccstruct)
|
||||
include_directories(ccutil)
|
||||
include_directories(classify)
|
||||
include_directories(cutil)
|
||||
include_directories(dict)
|
||||
include_directories(lstm)
|
||||
include_directories(opencl)
|
||||
include_directories(textord)
|
||||
include_directories(vs2010/port)
|
||||
include_directories(viewer)
|
||||
include_directories(wordrec)
|
||||
include_directories(src/arch)
|
||||
include_directories(src/ccmain)
|
||||
include_directories(src/ccstruct)
|
||||
include_directories(src/ccutil)
|
||||
include_directories(src/classify)
|
||||
include_directories(src/cutil)
|
||||
include_directories(src/dict)
|
||||
include_directories(src/lstm)
|
||||
include_directories(src/opencl)
|
||||
include_directories(src/textord)
|
||||
include_directories(src/vs2010/port)
|
||||
include_directories(src/viewer)
|
||||
include_directories(src/wordrec)
|
||||
|
||||
########################################
|
||||
# LIBRARY tesseract
|
||||
########################################
|
||||
|
||||
file(GLOB tesseract_src
|
||||
arch/*.cpp
|
||||
ccmain/*.cpp
|
||||
ccstruct/*.cpp
|
||||
ccutil/*.cpp
|
||||
classify/*.cpp
|
||||
cutil/*.cpp
|
||||
dict/*.cpp
|
||||
lstm/*.cpp
|
||||
opencl/*.cpp
|
||||
textord/*.cpp
|
||||
viewer/*.cpp
|
||||
wordrec/*.cpp
|
||||
src/arch/*.cpp
|
||||
src/ccmain/*.cpp
|
||||
src/ccstruct/*.cpp
|
||||
src/ccutil/*.cpp
|
||||
src/classify/*.cpp
|
||||
src/cutil/*.cpp
|
||||
src/dict/*.cpp
|
||||
src/lstm/*.cpp
|
||||
src/opencl/*.cpp
|
||||
src/textord/*.cpp
|
||||
src/viewer/*.cpp
|
||||
src/wordrec/*.cpp
|
||||
)
|
||||
file(GLOB tesseract_hdr
|
||||
api/*.h
|
||||
arch/*.h
|
||||
ccmain/*.h
|
||||
ccstruct/*.h
|
||||
ccutil/*.h
|
||||
classify/*.h
|
||||
cutil/*.h
|
||||
dict/*.h
|
||||
lstm/*.h
|
||||
opencl/*.h
|
||||
textord/*.h
|
||||
viewer/*.h
|
||||
wordrec/*.h
|
||||
src/api/*.h
|
||||
src/arch/*.h
|
||||
src/ccmain/*.h
|
||||
src/ccstruct/*.h
|
||||
src/ccutil/*.h
|
||||
src/classify/*.h
|
||||
src/cutil/*.h
|
||||
src/dict/*.h
|
||||
src/lstm/*.h
|
||||
src/opencl/*.h
|
||||
src/textord/*.h
|
||||
src/viewer/*.h
|
||||
src/wordrec/*.h
|
||||
)
|
||||
if (WIN32)
|
||||
file(GLOB tesseract_win32_src "vs2010/port/*.cpp")
|
||||
file(GLOB tesseract_win32_hdr "vs2010/port/*.h")
|
||||
file(GLOB tesseract_win32_src "src/vs2010/port/*.cpp")
|
||||
file(GLOB tesseract_win32_hdr "src/vs2010/port/*.h")
|
||||
set(tesseract_src ${tesseract_src} ${tesseract_win32_src})
|
||||
set(tesseract_hdr ${tesseract_hdr} ${tesseract_win32_hdr})
|
||||
endif()
|
||||
|
||||
set(tesseract_src ${tesseract_src}
|
||||
api/baseapi.cpp
|
||||
api/capi.cpp
|
||||
api/renderer.cpp
|
||||
api/pdfrenderer.cpp
|
||||
src/api/baseapi.cpp
|
||||
src/api/capi.cpp
|
||||
src/api/renderer.cpp
|
||||
src/api/pdfrenderer.cpp
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
if (MSVC)
|
||||
include_directories(vs2010/tesseract)
|
||||
include_directories(src/vs2010/tesseract)
|
||||
set(tesseract_hdr
|
||||
${tesseract_hdr}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vs2010/tesseract/resource.h)
|
||||
set(tesseract_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/libtesseract.rc)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/vs2010/tesseract/resource.h)
|
||||
set(tesseract_rsc ${CMAKE_BINARY_DIR}/src/vs2010/tesseract/libtesseract.rc)
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
|
||||
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixsse.cpp
|
||||
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductavx.cpp
|
||||
PROPERTIES COMPILE_FLAGS "/arch:AVX")
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixavx2.cpp
|
||||
PROPERTIES COMPILE_FLAGS "/arch:AVX2")
|
||||
endif()
|
||||
else()
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductsse.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-msse4.1")
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixsse.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-msse4.1")
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/dotproductavx.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-mavx")
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/arch/intsimdmatrixavx2.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-mavx2")
|
||||
endif()
|
||||
|
||||
@ -291,7 +291,7 @@ endif()
|
||||
# EXECUTABLE tesseractmain
|
||||
########################################
|
||||
|
||||
set(tesseractmain_src api/tesseractmain.cpp)
|
||||
set(tesseractmain_src src/api/tesseractmain.cpp)
|
||||
if (MSVC)
|
||||
set(tesseractmain_rsc ${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc)
|
||||
endif()
|
||||
@ -326,74 +326,74 @@ install(FILES
|
||||
|
||||
install(FILES
|
||||
# from api/makefile.am
|
||||
api/apitypes.h
|
||||
api/baseapi.h
|
||||
api/capi.h
|
||||
api/renderer.h
|
||||
src/api/apitypes.h
|
||||
src/api/baseapi.h
|
||||
src/api/capi.h
|
||||
src/api/renderer.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h
|
||||
|
||||
#from arch/makefile.am
|
||||
arch/dotproductavx.h
|
||||
arch/dotproductsse.h
|
||||
arch/intsimdmatrix.h
|
||||
arch/intsimdmatrixavx2.h
|
||||
arch/intsimdmatrixsse.h
|
||||
arch/simddetect.h
|
||||
src/arch/dotproductavx.h
|
||||
src/arch/dotproductsse.h
|
||||
src/arch/intsimdmatrix.h
|
||||
src/arch/intsimdmatrixavx2.h
|
||||
src/arch/intsimdmatrixsse.h
|
||||
src/arch/simddetect.h
|
||||
|
||||
#from ccmain/makefile.am
|
||||
ccmain/thresholder.h
|
||||
ccmain/ltrresultiterator.h
|
||||
ccmain/pageiterator.h
|
||||
ccmain/resultiterator.h
|
||||
ccmain/osdetect.h
|
||||
src/ccmain/thresholder.h
|
||||
src/ccmain/ltrresultiterator.h
|
||||
src/ccmain/pageiterator.h
|
||||
src/ccmain/resultiterator.h
|
||||
src/ccmain/osdetect.h
|
||||
|
||||
#from ccstruct/makefile.am
|
||||
ccstruct/publictypes.h
|
||||
src/ccstruct/publictypes.h
|
||||
|
||||
#from ccutil/makefile.am
|
||||
ccutil/basedir.h
|
||||
ccutil/errcode.h
|
||||
ccutil/fileerr.h
|
||||
ccutil/genericvector.h
|
||||
ccutil/helpers.h
|
||||
ccutil/host.h
|
||||
ccutil/memry.h
|
||||
ccutil/ndminx.h
|
||||
ccutil/params.h
|
||||
ccutil/ocrclass.h
|
||||
ccutil/platform.h
|
||||
ccutil/serialis.h
|
||||
ccutil/strngs.h
|
||||
ccutil/tesscallback.h
|
||||
ccutil/unichar.h
|
||||
ccutil/unicharcompress.h
|
||||
ccutil/unicharmap.h
|
||||
ccutil/unicharset.h
|
||||
src/ccutil/basedir.h
|
||||
src/ccutil/errcode.h
|
||||
src/ccutil/fileerr.h
|
||||
src/ccutil/genericvector.h
|
||||
src/ccutil/helpers.h
|
||||
src/ccutil/host.h
|
||||
src/ccutil/memry.h
|
||||
src/ccutil/ndminx.h
|
||||
src/ccutil/params.h
|
||||
src/ccutil/ocrclass.h
|
||||
src/ccutil/platform.h
|
||||
src/ccutil/serialis.h
|
||||
src/ccutil/strngs.h
|
||||
src/ccutil/tesscallback.h
|
||||
src/ccutil/unichar.h
|
||||
src/ccutil/unicharcompress.h
|
||||
src/ccutil/unicharmap.h
|
||||
src/ccutil/unicharset.h
|
||||
|
||||
#from lstm/makefile.am
|
||||
lstm/convolve.h
|
||||
lstm/ctc.h
|
||||
lstm/fullyconnected.h
|
||||
lstm/functions.h
|
||||
lstm/input.h
|
||||
lstm/lstm.h
|
||||
lstm/lstmrecognizer.h
|
||||
lstm/lstmtrainer.h
|
||||
lstm/maxpool.h
|
||||
lstm/networkbuilder.h
|
||||
lstm/network.h
|
||||
lstm/networkio.h
|
||||
lstm/networkscratch.h
|
||||
lstm/parallel.h
|
||||
lstm/plumbing.h
|
||||
lstm/recodebeam.h
|
||||
lstm/reconfig.h
|
||||
lstm/reversed.h
|
||||
lstm/series.h
|
||||
lstm/static_shape.h
|
||||
lstm/stridemap.h
|
||||
lstm/tfnetwork.h
|
||||
lstm/weightmatrix.h
|
||||
src/lstm/convolve.h
|
||||
src/lstm/ctc.h
|
||||
src/lstm/fullyconnected.h
|
||||
src/lstm/functions.h
|
||||
src/lstm/input.h
|
||||
src/lstm/lstm.h
|
||||
src/lstm/lstmrecognizer.h
|
||||
src/lstm/lstmtrainer.h
|
||||
src/lstm/maxpool.h
|
||||
src/lstm/networkbuilder.h
|
||||
src/lstm/network.h
|
||||
src/lstm/networkio.h
|
||||
src/lstm/networkscratch.h
|
||||
src/lstm/parallel.h
|
||||
src/lstm/plumbing.h
|
||||
src/lstm/recodebeam.h
|
||||
src/lstm/reconfig.h
|
||||
src/lstm/reversed.h
|
||||
src/lstm/series.h
|
||||
src/lstm/static_shape.h
|
||||
src/lstm/stridemap.h
|
||||
src/lstm/tfnetwork.h
|
||||
src/lstm/weightmatrix.h
|
||||
|
||||
#${CMAKE_BINARY_DIR}/src/endianness.h
|
||||
DESTINATION include/tesseract)
|
||||
|
30
configure.ac
30
configure.ac
@ -16,7 +16,7 @@ AC_LANG_COMPILER_REQUIRE
|
||||
CXXFLAGS=${CXXFLAGS:-""}
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_AUX_DIR([config])
|
||||
AC_CONFIG_SRCDIR([api/tesseractmain.cpp])
|
||||
AC_CONFIG_SRCDIR([src/api/tesseractmain.cpp])
|
||||
AC_PREFIX_DEFAULT([/usr/local])
|
||||
|
||||
# Automake configuration. Do not require README file (we use README.md).
|
||||
@ -476,20 +476,20 @@ fi
|
||||
|
||||
# Output files
|
||||
AC_CONFIG_FILES([Makefile tesseract.pc])
|
||||
AC_CONFIG_FILES([api/Makefile])
|
||||
AC_CONFIG_FILES([api/tess_version.h])
|
||||
AC_CONFIG_FILES([arch/Makefile])
|
||||
AC_CONFIG_FILES([ccmain/Makefile])
|
||||
AC_CONFIG_FILES([opencl/Makefile])
|
||||
AC_CONFIG_FILES([ccstruct/Makefile])
|
||||
AC_CONFIG_FILES([ccutil/Makefile])
|
||||
AC_CONFIG_FILES([classify/Makefile])
|
||||
AC_CONFIG_FILES([cutil/Makefile])
|
||||
AC_CONFIG_FILES([dict/Makefile])
|
||||
AC_CONFIG_FILES([lstm/Makefile])
|
||||
AC_CONFIG_FILES([textord/Makefile])
|
||||
AC_CONFIG_FILES([viewer/Makefile])
|
||||
AC_CONFIG_FILES([wordrec/Makefile])
|
||||
AC_CONFIG_FILES([src/api/Makefile])
|
||||
AC_CONFIG_FILES([src/api/tess_version.h])
|
||||
AC_CONFIG_FILES([src/arch/Makefile])
|
||||
AC_CONFIG_FILES([src/ccmain/Makefile])
|
||||
AC_CONFIG_FILES([src/opencl/Makefile])
|
||||
AC_CONFIG_FILES([src/ccstruct/Makefile])
|
||||
AC_CONFIG_FILES([src/ccutil/Makefile])
|
||||
AC_CONFIG_FILES([src/classify/Makefile])
|
||||
AC_CONFIG_FILES([src/cutil/Makefile])
|
||||
AC_CONFIG_FILES([src/dict/Makefile])
|
||||
AC_CONFIG_FILES([src/lstm/Makefile])
|
||||
AC_CONFIG_FILES([src/textord/Makefile])
|
||||
AC_CONFIG_FILES([src/viewer/Makefile])
|
||||
AC_CONFIG_FILES([src/wordrec/Makefile])
|
||||
AC_CONFIG_FILES([tessdata/Makefile])
|
||||
AC_CONFIG_FILES([tessdata/configs/Makefile])
|
||||
AC_CONFIG_FILES([tessdata/tessconfigs/Makefile])
|
||||
|
103
cppan.yml
103
cppan.yml
@ -35,59 +35,59 @@ projects:
|
||||
type: lib
|
||||
export_all_symbols: true
|
||||
files:
|
||||
- api/.*\.cpp
|
||||
- arch/.*\.cpp
|
||||
- ccmain/.*\.cpp
|
||||
- ccstruct/.*\.cpp
|
||||
- ccutil/.*\.cpp
|
||||
- classify/.*\.cpp
|
||||
- cutil/.*\.cpp
|
||||
- dict/.*\.cpp
|
||||
- lstm/.*\.cpp
|
||||
- opencl/.*\.cpp
|
||||
- textord/.*\.cpp
|
||||
- viewer/.*\.cpp
|
||||
- wordrec/.*\.cpp
|
||||
- src/api/.*\.cpp
|
||||
- src/arch/.*\.cpp
|
||||
- src/ccmain/.*\.cpp
|
||||
- src/ccstruct/.*\.cpp
|
||||
- src/ccutil/.*\.cpp
|
||||
- src/classify/.*\.cpp
|
||||
- src/cutil/.*\.cpp
|
||||
- src/dict/.*\.cpp
|
||||
- src/lstm/.*\.cpp
|
||||
- src/opencl/.*\.cpp
|
||||
- src/textord/.*\.cpp
|
||||
- src/viewer/.*\.cpp
|
||||
- src/wordrec/.*\.cpp
|
||||
|
||||
- api/.*\.h
|
||||
- arch/.*\.h
|
||||
- ccmain/.*\.h
|
||||
- ccstruct/.*\.h
|
||||
- ccutil/.*\.h
|
||||
- classify/.*\.h
|
||||
- cutil/.*\.h
|
||||
- dict/.*\.h
|
||||
- lstm/.*\.h
|
||||
- opencl/.*\.h
|
||||
- textord/.*\.h
|
||||
- viewer/.*\.h
|
||||
- wordrec/.*\.h
|
||||
- src/api/.*\.h
|
||||
- src/arch/.*\.h
|
||||
- src/ccmain/.*\.h
|
||||
- src/ccstruct/.*\.h
|
||||
- src/ccutil/.*\.h
|
||||
- src/classify/.*\.h
|
||||
- src/cutil/.*\.h
|
||||
- src/dict/.*\.h
|
||||
- src/lstm/.*\.h
|
||||
- src/opencl/.*\.h
|
||||
- src/textord/.*\.h
|
||||
- src/viewer/.*\.h
|
||||
- src/wordrec/.*\.h
|
||||
|
||||
- vs2010/port/.*
|
||||
- src/vs2010/port/.*
|
||||
|
||||
exclude_from_build:
|
||||
- api/tesseractmain.cpp
|
||||
- viewer/svpaint.cpp
|
||||
- src/api/tesseractmain.cpp
|
||||
- src/viewer/svpaint.cpp
|
||||
|
||||
include_directories:
|
||||
public:
|
||||
#private:
|
||||
- arch
|
||||
- classify
|
||||
- cutil
|
||||
- ccutil
|
||||
- dict
|
||||
- lstm
|
||||
- opencl
|
||||
- textord
|
||||
- vs2010/port
|
||||
- viewer
|
||||
- wordrec
|
||||
- src/arch
|
||||
- src/classify
|
||||
- src/cutil
|
||||
- src/ccutil
|
||||
- src/dict
|
||||
- src/lstm
|
||||
- src/opencl
|
||||
- src/textord
|
||||
- src/vs2010/port
|
||||
- src/viewer
|
||||
- src/wordrec
|
||||
#public:
|
||||
- api
|
||||
- ccmain
|
||||
- ccstruct
|
||||
- ccutil
|
||||
- src/api
|
||||
- src/ccmain
|
||||
- src/ccstruct
|
||||
- src/ccutil
|
||||
|
||||
check_function_exists:
|
||||
- getline
|
||||
@ -125,23 +125,26 @@ projects:
|
||||
file_write_once(${BDIR}/config_auto.h "")
|
||||
|
||||
post_sources: |
|
||||
configure_file(
|
||||
${SDIR}/src/api/tess_version.h.in
|
||||
${BDIR}/tess_version.h @ONLY)
|
||||
if (WIN32)
|
||||
if (MSVC)
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
|
||||
${SDIR}/src/arch/dotproductsse.cpp
|
||||
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
|
||||
${SDIR}/src/arch/intsimdmatrixsse.cpp
|
||||
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
|
||||
${SDIR}/src/arch/dotproductavx.cpp
|
||||
PROPERTIES COMPILE_FLAGS "/arch:AVX")
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
|
||||
${SDIR}/src/arch/intsimdmatrixavx2.cpp
|
||||
PROPERTIES COMPILE_FLAGS "/arch:AVX2")
|
||||
endif()
|
||||
else()
|
||||
remove_src_dir(vs2010/port/*)
|
||||
remove_src_dir(src/vs2010/port/*)
|
||||
endif()
|
||||
|
||||
options:
|
||||
@ -162,7 +165,7 @@ projects:
|
||||
pvt.cppan.demo.danbloomberg.leptonica: 1
|
||||
|
||||
tesseract:
|
||||
files: api/tesseractmain.cpp
|
||||
files: src/api/tesseractmain.cpp
|
||||
dependencies:
|
||||
- libtesseract
|
||||
|
||||
|
@ -1,30 +1,30 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductavx.h
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 22 10:51:05 PDT 2015
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
||||
#define TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
double DotProductAVX(const double* u, const double* v, int n);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductavx.h
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 22 10:51:05 PDT 2015
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
||||
#define TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
double DotProductAVX(const double* u, const double* v, int n);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
@ -1,35 +1,35 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductsse.h
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 22 10:57:05 PDT 2015
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
||||
#define TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
||||
|
||||
#include "host.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
double DotProductSSE(const double* u, const double* v, int n);
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductsse.h
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 22 10:57:05 PDT 2015
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
||||
#define TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
||||
|
||||
#include "host.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
double DotProductSSE(const double* u, const double* v, int n);
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
@ -1,82 +1,82 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: simddetect.cpp
|
||||
// Description: Architecture detector.
|
||||
// Author: Stefan Weil (based on code from Ray Smith)
|
||||
//
|
||||
// (C) Copyright 2014, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "simddetect.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#undef X86_BUILD
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
|
||||
#if !defined(ANDROID_BUILD)
|
||||
#define X86_BUILD 1
|
||||
#endif // !ANDROID_BUILD
|
||||
#endif // x86 target
|
||||
|
||||
#if defined(X86_BUILD)
|
||||
#if defined(__GNUC__)
|
||||
#include <cpuid.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
SIMDDetect SIMDDetect::detector;
|
||||
|
||||
// If true, then AVX has been detected.
|
||||
bool SIMDDetect::avx_available_;
|
||||
bool SIMDDetect::avx2_available_;
|
||||
bool SIMDDetect::avx512F_available_;
|
||||
bool SIMDDetect::avx512BW_available_;
|
||||
// If true, then SSe4.1 has been detected.
|
||||
bool SIMDDetect::sse_available_;
|
||||
|
||||
// Constructor.
|
||||
// Tests the architecture in a system-dependent way to detect AVX, SSE and
|
||||
// any other available SIMD equipment.
|
||||
// __GNUC__ is also defined by compilers that include GNU extensions such as
|
||||
// clang.
|
||||
SIMDDetect::SIMDDetect() {
|
||||
#if defined(X86_BUILD)
|
||||
#if defined(__GNUC__)
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
|
||||
// Note that these tests all use hex because the older compilers don't have
|
||||
// the newer flags.
|
||||
sse_available_ = (ecx & 0x00080000) != 0;
|
||||
avx_available_ = (ecx & 0x10000000) != 0;
|
||||
if (avx_available_) {
|
||||
// There is supposed to be a __get_cpuid_count function, but this is all
|
||||
// there is in my cpuid.h. It is a macro for an asm statement and cannot
|
||||
// be used inside an if.
|
||||
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
||||
avx2_available_ = (ebx & 0x00000020) != 0;
|
||||
avx512F_available_ = (ebx & 0x00010000) != 0;
|
||||
avx512BW_available_ = (ebx & 0x40000000) != 0;
|
||||
}
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
int cpuInfo[4];
|
||||
__cpuid(cpuInfo, 0);
|
||||
if (cpuInfo[0] >= 1) {
|
||||
__cpuid(cpuInfo, 1);
|
||||
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
|
||||
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
|
||||
}
|
||||
#else
|
||||
#error "I don't know how to test for SIMD with this compiler"
|
||||
#endif
|
||||
#endif // X86_BUILD
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: simddetect.cpp
|
||||
// Description: Architecture detector.
|
||||
// Author: Stefan Weil (based on code from Ray Smith)
|
||||
//
|
||||
// (C) Copyright 2014, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "simddetect.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#undef X86_BUILD
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
|
||||
#if !defined(ANDROID_BUILD)
|
||||
#define X86_BUILD 1
|
||||
#endif // !ANDROID_BUILD
|
||||
#endif // x86 target
|
||||
|
||||
#if defined(X86_BUILD)
|
||||
#if defined(__GNUC__)
|
||||
#include <cpuid.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
SIMDDetect SIMDDetect::detector;
|
||||
|
||||
// If true, then AVX has been detected.
|
||||
bool SIMDDetect::avx_available_;
|
||||
bool SIMDDetect::avx2_available_;
|
||||
bool SIMDDetect::avx512F_available_;
|
||||
bool SIMDDetect::avx512BW_available_;
|
||||
// If true, then SSe4.1 has been detected.
|
||||
bool SIMDDetect::sse_available_;
|
||||
|
||||
// Constructor.
|
||||
// Tests the architecture in a system-dependent way to detect AVX, SSE and
|
||||
// any other available SIMD equipment.
|
||||
// __GNUC__ is also defined by compilers that include GNU extensions such as
|
||||
// clang.
|
||||
SIMDDetect::SIMDDetect() {
|
||||
#if defined(X86_BUILD)
|
||||
#if defined(__GNUC__)
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
|
||||
// Note that these tests all use hex because the older compilers don't have
|
||||
// the newer flags.
|
||||
sse_available_ = (ecx & 0x00080000) != 0;
|
||||
avx_available_ = (ecx & 0x10000000) != 0;
|
||||
if (avx_available_) {
|
||||
// There is supposed to be a __get_cpuid_count function, but this is all
|
||||
// there is in my cpuid.h. It is a macro for an asm statement and cannot
|
||||
// be used inside an if.
|
||||
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
||||
avx2_available_ = (ebx & 0x00000020) != 0;
|
||||
avx512F_available_ = (ebx & 0x00010000) != 0;
|
||||
avx512BW_available_ = (ebx & 0x40000000) != 0;
|
||||
}
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
int cpuInfo[4];
|
||||
__cpuid(cpuInfo, 0);
|
||||
if (cpuInfo[0] >= 1) {
|
||||
__cpuid(cpuInfo, 1);
|
||||
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
|
||||
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
|
||||
}
|
||||
#else
|
||||
#error "I don't know how to test for SIMD with this compiler"
|
||||
#endif
|
||||
#endif // X86_BUILD
|
||||
}
|
@ -1,54 +1,54 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: simddetect.h
|
||||
// Description: Architecture detector.
|
||||
// Author: Stefan Weil (based on code from Ray Smith)
|
||||
//
|
||||
// (C) Copyright 2014, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
// Architecture detector. Add code here to detect any other architectures for
|
||||
// SIMD-based faster dot product functions. Intended to be a single static
|
||||
// object, but it does no real harm to have more than one.
|
||||
class SIMDDetect {
|
||||
public:
|
||||
// Returns true if AVX is available on this system.
|
||||
static inline bool IsAVXAvailable() { return detector.avx_available_; }
|
||||
// Returns true if AVX2 (integer support) is available on this system.
|
||||
static inline bool IsAVX2Available() { return detector.avx2_available_; }
|
||||
// Returns true if AVX512 Foundation (float) is available on this system.
|
||||
static inline bool IsAVX512FAvailable() {
|
||||
return detector.avx512F_available_;
|
||||
}
|
||||
// Returns true if AVX512 integer is available on this system.
|
||||
static inline bool IsAVX512BWAvailable() {
|
||||
return detector.avx512BW_available_;
|
||||
}
|
||||
// Returns true if SSE4.1 is available on this system.
|
||||
static inline bool IsSSEAvailable() { return detector.sse_available_; }
|
||||
|
||||
private:
|
||||
// Constructor, must set all static member variables.
|
||||
SIMDDetect();
|
||||
|
||||
private:
|
||||
// Singleton.
|
||||
static SIMDDetect detector;
|
||||
// If true, then AVX has been detected.
|
||||
static TESS_API bool avx_available_;
|
||||
static TESS_API bool avx2_available_;
|
||||
static TESS_API bool avx512F_available_;
|
||||
static TESS_API bool avx512BW_available_;
|
||||
// If true, then SSe4.1 has been detected.
|
||||
static TESS_API bool sse_available_;
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: simddetect.h
|
||||
// Description: Architecture detector.
|
||||
// Author: Stefan Weil (based on code from Ray Smith)
|
||||
//
|
||||
// (C) Copyright 2014, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
// Architecture detector. Add code here to detect any other architectures for
|
||||
// SIMD-based faster dot product functions. Intended to be a single static
|
||||
// object, but it does no real harm to have more than one.
|
||||
class SIMDDetect {
|
||||
public:
|
||||
// Returns true if AVX is available on this system.
|
||||
static inline bool IsAVXAvailable() { return detector.avx_available_; }
|
||||
// Returns true if AVX2 (integer support) is available on this system.
|
||||
static inline bool IsAVX2Available() { return detector.avx2_available_; }
|
||||
// Returns true if AVX512 Foundation (float) is available on this system.
|
||||
static inline bool IsAVX512FAvailable() {
|
||||
return detector.avx512F_available_;
|
||||
}
|
||||
// Returns true if AVX512 integer is available on this system.
|
||||
static inline bool IsAVX512BWAvailable() {
|
||||
return detector.avx512BW_available_;
|
||||
}
|
||||
// Returns true if SSE4.1 is available on this system.
|
||||
static inline bool IsSSEAvailable() { return detector.sse_available_; }
|
||||
|
||||
private:
|
||||
// Constructor, must set all static member variables.
|
||||
SIMDDetect();
|
||||
|
||||
private:
|
||||
// Singleton.
|
||||
static SIMDDetect detector;
|
||||
// If true, then AVX has been detected.
|
||||
static TESS_API bool avx_available_;
|
||||
static TESS_API bool avx2_available_;
|
||||
static TESS_API bool avx512F_available_;
|
||||
static TESS_API bool avx512BW_available_;
|
||||
// If true, then SSe4.1 has been detected.
|
||||
static TESS_API bool sse_available_;
|
||||
};
|
@ -1,44 +1,44 @@
|
||||
/**********************************************************************
|
||||
* File: control.h (Formerly control.h)
|
||||
* Description: Module-independent matcher controller.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Apr 23 11:09:58 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* @file control.h
|
||||
* Module-independent matcher controller.
|
||||
*/
|
||||
|
||||
#ifndef CONTROL_H
|
||||
#define CONTROL_H
|
||||
|
||||
#include "params.h"
|
||||
#include "ocrblock.h"
|
||||
#include "ratngs.h"
|
||||
#include "statistc.h"
|
||||
#include "pageres.h"
|
||||
|
||||
enum ACCEPTABLE_WERD_TYPE
|
||||
{
|
||||
AC_UNACCEPTABLE, ///< Unacceptable word
|
||||
AC_LOWER_CASE, ///< ALL lower case
|
||||
AC_UPPER_CASE, ///< ALL upper case
|
||||
AC_INITIAL_CAP, ///< ALL but initial lc
|
||||
AC_LC_ABBREV, ///< a.b.c.
|
||||
AC_UC_ABBREV ///< A.B.C.
|
||||
};
|
||||
|
||||
#endif
|
||||
/**********************************************************************
|
||||
* File: control.h (Formerly control.h)
|
||||
* Description: Module-independent matcher controller.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Apr 23 11:09:58 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* @file control.h
|
||||
* Module-independent matcher controller.
|
||||
*/
|
||||
|
||||
#ifndef CONTROL_H
|
||||
#define CONTROL_H
|
||||
|
||||
#include "params.h"
|
||||
#include "ocrblock.h"
|
||||
#include "ratngs.h"
|
||||
#include "statistc.h"
|
||||
#include "pageres.h"
|
||||
|
||||
enum ACCEPTABLE_WERD_TYPE
|
||||
{
|
||||
AC_UNACCEPTABLE, ///< Unacceptable word
|
||||
AC_LOWER_CASE, ///< ALL lower case
|
||||
AC_UPPER_CASE, ///< ALL upper case
|
||||
AC_INITIAL_CAP, ///< ALL but initial lc
|
||||
AC_LC_ABBREV, ///< a.b.c.
|
||||
AC_UC_ABBREV ///< A.B.C.
|
||||
};
|
||||
|
||||
#endif
|
@ -1,31 +1,31 @@
|
||||
/******************************************************************
|
||||
* File: fixspace.h (Formerly fixspace.h)
|
||||
* Description: Implements a pass over the page res, exploring the alternative
|
||||
* spacing possibilities, trying to use context to improve the
|
||||
word spacing
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Oct 21 11:38:43 BST 1993
|
||||
*
|
||||
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef FIXSPACE_H
|
||||
#define FIXSPACE_H
|
||||
|
||||
#include "pageres.h"
|
||||
#include "params.h"
|
||||
|
||||
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
|
||||
void transform_to_next_perm(WERD_RES_LIST &words);
|
||||
void fixspace_dbg(WERD_RES *word);
|
||||
#endif
|
||||
/******************************************************************
|
||||
* File: fixspace.h (Formerly fixspace.h)
|
||||
* Description: Implements a pass over the page res, exploring the alternative
|
||||
* spacing possibilities, trying to use context to improve the
|
||||
word spacing
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Oct 21 11:38:43 BST 1993
|
||||
*
|
||||
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef FIXSPACE_H
|
||||
#define FIXSPACE_H
|
||||
|
||||
#include "pageres.h"
|
||||
#include "params.h"
|
||||
|
||||
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
|
||||
void transform_to_next_perm(WERD_RES_LIST &words);
|
||||
void fixspace_dbg(WERD_RES *word);
|
||||
#endif
|
@ -1,64 +1,64 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: mutableiterator.h
|
||||
// Description: Iterator for tesseract results providing access to
|
||||
// both high-level API and Tesseract internal data structures.
|
||||
// Author: David Eger
|
||||
// Created: Thu Feb 24 19:01:06 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
||||
|
||||
#include "resultiterator.h"
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See apitypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// ResultIterator adds text-specific methods for access to OCR output.
|
||||
// MutableIterator adds access to internal data structures.
|
||||
|
||||
class MutableIterator : public ResultIterator {
|
||||
public:
|
||||
// See argument descriptions in ResultIterator()
|
||||
MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
|
||||
int scale, int scaled_yres,
|
||||
int rect_left, int rect_top,
|
||||
int rect_width, int rect_height)
|
||||
: ResultIterator(
|
||||
LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
|
||||
rect_top, rect_width, rect_height)) {}
|
||||
virtual ~MutableIterator() {}
|
||||
|
||||
// See PageIterator and ResultIterator for most calls.
|
||||
|
||||
// Return access to Tesseract internals.
|
||||
const PAGE_RES_IT *PageResIt() const { return it_; }
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: mutableiterator.h
|
||||
// Description: Iterator for tesseract results providing access to
|
||||
// both high-level API and Tesseract internal data structures.
|
||||
// Author: David Eger
|
||||
// Created: Thu Feb 24 19:01:06 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
||||
|
||||
#include "resultiterator.h"
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See apitypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// ResultIterator adds text-specific methods for access to OCR output.
|
||||
// MutableIterator adds access to internal data structures.
|
||||
|
||||
class MutableIterator : public ResultIterator {
|
||||
public:
|
||||
// See argument descriptions in ResultIterator()
|
||||
MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
|
||||
int scale, int scaled_yres,
|
||||
int rect_left, int rect_top,
|
||||
int rect_width, int rect_height)
|
||||
: ResultIterator(
|
||||
LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
|
||||
rect_top, rect_width, rect_height)) {}
|
||||
virtual ~MutableIterator() {}
|
||||
|
||||
// See PageIterator and ResultIterator for most calls.
|
||||
|
||||
// Return access to Tesseract internals.
|
||||
const PAGE_RES_IT *PageResIt() const { return it_; }
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
@ -1,33 +1,33 @@
|
||||
/******************************************************************
|
||||
* File: output.h (Formerly output.h)
|
||||
* Description: Output pass
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Aug 4 10:56:08 BST 1994
|
||||
*
|
||||
* (C) Copyright 1994, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef OUTPUT_H
|
||||
#define OUTPUT_H
|
||||
|
||||
#include "params.h"
|
||||
//#include "epapconv.h"
|
||||
#include "pageres.h"
|
||||
|
||||
/** test line ends */
|
||||
char determine_newline_type(WERD *word, ///< word to do
|
||||
BLOCK *block, ///< current block
|
||||
WERD *next_word, ///< next word
|
||||
BLOCK *next_block ///< block of next word
|
||||
);
|
||||
#endif
|
||||
/******************************************************************
|
||||
* File: output.h (Formerly output.h)
|
||||
* Description: Output pass
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Aug 4 10:56:08 BST 1994
|
||||
*
|
||||
* (C) Copyright 1994, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef OUTPUT_H
|
||||
#define OUTPUT_H
|
||||
|
||||
#include "params.h"
|
||||
//#include "epapconv.h"
|
||||
#include "pageres.h"
|
||||
|
||||
/** test line ends */
|
||||
char determine_newline_type(WERD *word, ///< word to do
|
||||
BLOCK *block, ///< current block
|
||||
WERD *next_word, ///< next word
|
||||
BLOCK *next_block ///< block of next word
|
||||
);
|
||||
#endif
|
@ -1,108 +1,108 @@
|
||||
/**********************************************************************
|
||||
* File: paragraphs.h
|
||||
* Description: Paragraph Detection data structures.
|
||||
* Author: David Eger
|
||||
* Created: 25 February 2011
|
||||
*
|
||||
* (C) Copyright 2011, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
|
||||
#define TESSERACT_CCMAIN_PARAGRAPHS_H_
|
||||
|
||||
#include "rect.h"
|
||||
#include "ocrpara.h"
|
||||
#include "genericvector.h"
|
||||
#include "strngs.h"
|
||||
|
||||
|
||||
class WERD;
|
||||
class UNICHARSET;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class MutableIterator;
|
||||
|
||||
// This structure captures all information needed about a text line for the
|
||||
// purposes of paragraph detection. It is meant to be exceedingly light-weight
|
||||
// so that we can easily test paragraph detection independent of the rest of
|
||||
// Tesseract.
|
||||
class RowInfo {
|
||||
public:
|
||||
// Constant data derived from Tesseract output.
|
||||
STRING text; // the full UTF-8 text of the line.
|
||||
bool ltr; // whether the majority of the text is left-to-right
|
||||
// TODO(eger) make this more fine-grained.
|
||||
|
||||
bool has_leaders; // does the line contain leader dots (.....)?
|
||||
bool has_drop_cap; // does the line have a drop cap?
|
||||
int pix_ldistance; // distance to the left pblock boundary in pixels
|
||||
int pix_rdistance; // distance to the right pblock boundary in pixels
|
||||
float pix_xheight; // guessed xheight for the line
|
||||
int average_interword_space; // average space between words in pixels.
|
||||
|
||||
int num_words;
|
||||
TBOX lword_box; // in normalized (horiz text rows) space
|
||||
TBOX rword_box; // in normalized (horiz text rows) space
|
||||
|
||||
STRING lword_text; // the UTF-8 text of the leftmost werd
|
||||
STRING rword_text; // the UTF-8 text of the rightmost werd
|
||||
|
||||
// The text of a paragraph typically starts with the start of an idea and
|
||||
// ends with the end of an idea. Here we define paragraph as something that
|
||||
// may have a first line indent and a body indent which may be different.
|
||||
// Typical words that start an idea are:
|
||||
// 1. Words in western scripts that start with
|
||||
// a capital letter, for example "The"
|
||||
// 2. Bulleted or numbered list items, for
|
||||
// example "2."
|
||||
// Typical words which end an idea are words ending in punctuation marks. In
|
||||
// this vocabulary, each list item is represented as a paragraph.
|
||||
bool lword_indicates_list_item;
|
||||
bool lword_likely_starts_idea;
|
||||
bool lword_likely_ends_idea;
|
||||
|
||||
bool rword_indicates_list_item;
|
||||
bool rword_likely_starts_idea;
|
||||
bool rword_likely_ends_idea;
|
||||
};
|
||||
|
||||
// Main entry point for Paragraph Detection Algorithm.
|
||||
//
|
||||
// Given a set of equally spaced textlines (described by row_infos),
|
||||
// Split them into paragraphs. See http://goto/paragraphstalk
|
||||
//
|
||||
// Output:
|
||||
// row_owners - one pointer for each row, to the paragraph it belongs to.
|
||||
// paragraphs - this is the actual list of PARA objects.
|
||||
// models - the list of paragraph models referenced by the PARA objects.
|
||||
// caller is responsible for deleting the models.
|
||||
void DetectParagraphs(int debug_level,
|
||||
GenericVector<RowInfo> *row_infos,
|
||||
GenericVector<PARA *> *row_owners,
|
||||
PARA_LIST *paragraphs,
|
||||
GenericVector<ParagraphModel *> *models);
|
||||
|
||||
// Given a MutableIterator to the start of a block, run DetectParagraphs on
|
||||
// that block and commit the results to the underlying ROW and BLOCK structs,
|
||||
// saving the ParagraphModels in models. Caller owns the models.
|
||||
// We use unicharset during the function to answer questions such as "is the
|
||||
// first letter of this word upper case?"
|
||||
void DetectParagraphs(int debug_level,
|
||||
bool after_text_recognition,
|
||||
const MutableIterator *block_start,
|
||||
GenericVector<ParagraphModel *> *models);
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_
|
||||
/**********************************************************************
|
||||
* File: paragraphs.h
|
||||
* Description: Paragraph Detection data structures.
|
||||
* Author: David Eger
|
||||
* Created: 25 February 2011
|
||||
*
|
||||
* (C) Copyright 2011, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
|
||||
#define TESSERACT_CCMAIN_PARAGRAPHS_H_
|
||||
|
||||
#include "rect.h"
|
||||
#include "ocrpara.h"
|
||||
#include "genericvector.h"
|
||||
#include "strngs.h"
|
||||
|
||||
|
||||
class WERD;
|
||||
class UNICHARSET;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class MutableIterator;
|
||||
|
||||
// This structure captures all information needed about a text line for the
|
||||
// purposes of paragraph detection. It is meant to be exceedingly light-weight
|
||||
// so that we can easily test paragraph detection independent of the rest of
|
||||
// Tesseract.
|
||||
class RowInfo {
|
||||
public:
|
||||
// Constant data derived from Tesseract output.
|
||||
STRING text; // the full UTF-8 text of the line.
|
||||
bool ltr; // whether the majority of the text is left-to-right
|
||||
// TODO(eger) make this more fine-grained.
|
||||
|
||||
bool has_leaders; // does the line contain leader dots (.....)?
|
||||
bool has_drop_cap; // does the line have a drop cap?
|
||||
int pix_ldistance; // distance to the left pblock boundary in pixels
|
||||
int pix_rdistance; // distance to the right pblock boundary in pixels
|
||||
float pix_xheight; // guessed xheight for the line
|
||||
int average_interword_space; // average space between words in pixels.
|
||||
|
||||
int num_words;
|
||||
TBOX lword_box; // in normalized (horiz text rows) space
|
||||
TBOX rword_box; // in normalized (horiz text rows) space
|
||||
|
||||
STRING lword_text; // the UTF-8 text of the leftmost werd
|
||||
STRING rword_text; // the UTF-8 text of the rightmost werd
|
||||
|
||||
// The text of a paragraph typically starts with the start of an idea and
|
||||
// ends with the end of an idea. Here we define paragraph as something that
|
||||
// may have a first line indent and a body indent which may be different.
|
||||
// Typical words that start an idea are:
|
||||
// 1. Words in western scripts that start with
|
||||
// a capital letter, for example "The"
|
||||
// 2. Bulleted or numbered list items, for
|
||||
// example "2."
|
||||
// Typical words which end an idea are words ending in punctuation marks. In
|
||||
// this vocabulary, each list item is represented as a paragraph.
|
||||
bool lword_indicates_list_item;
|
||||
bool lword_likely_starts_idea;
|
||||
bool lword_likely_ends_idea;
|
||||
|
||||
bool rword_indicates_list_item;
|
||||
bool rword_likely_starts_idea;
|
||||
bool rword_likely_ends_idea;
|
||||
};
|
||||
|
||||
// Main entry point for Paragraph Detection Algorithm.
|
||||
//
|
||||
// Given a set of equally spaced textlines (described by row_infos),
|
||||
// Split them into paragraphs. See http://goto/paragraphstalk
|
||||
//
|
||||
// Output:
|
||||
// row_owners - one pointer for each row, to the paragraph it belongs to.
|
||||
// paragraphs - this is the actual list of PARA objects.
|
||||
// models - the list of paragraph models referenced by the PARA objects.
|
||||
// caller is responsible for deleting the models.
|
||||
void DetectParagraphs(int debug_level,
|
||||
GenericVector<RowInfo> *row_infos,
|
||||
GenericVector<PARA *> *row_owners,
|
||||
PARA_LIST *paragraphs,
|
||||
GenericVector<ParagraphModel *> *models);
|
||||
|
||||
// Given a MutableIterator to the start of a block, run DetectParagraphs on
|
||||
// that block and commit the results to the underlying ROW and BLOCK structs,
|
||||
// saving the ParagraphModels in models. Caller owns the models.
|
||||
// We use unicharset during the function to answer questions such as "is the
|
||||
// first letter of this word upper case?"
|
||||
void DetectParagraphs(int debug_level,
|
||||
bool after_text_recognition,
|
||||
const MutableIterator *block_start,
|
||||
GenericVector<ParagraphModel *> *models);
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_
|
@ -1,87 +1,87 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: pgedit.h
|
||||
// Description: Page structure file editor
|
||||
// Author: Joern Wanke
|
||||
// Created: Wed Jul 18 10:05:01 PDT 2007
|
||||
//
|
||||
// (C) Copyright 2007, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef PGEDIT_H
|
||||
#define PGEDIT_H
|
||||
|
||||
#include "ocrblock.h"
|
||||
#include "ocrrow.h"
|
||||
#include "werd.h"
|
||||
#include "rect.h"
|
||||
#include "params.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
class ScrollView;
|
||||
class SVMenuNode;
|
||||
struct SVEvent;
|
||||
|
||||
// A small event handler class to process incoming events to
|
||||
// this window.
|
||||
class PGEventHandler : public SVEventHandler {
|
||||
public:
|
||||
PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
|
||||
}
|
||||
void Notify(const SVEvent* sve);
|
||||
private:
|
||||
tesseract::Tesseract* tess_;
|
||||
};
|
||||
|
||||
extern BLOCK_LIST *current_block_list;
|
||||
extern STRING_VAR_H (editor_image_win_name, "EditorImage",
|
||||
"Editor image window name");
|
||||
extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos");
|
||||
extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos");
|
||||
extern INT_VAR_H (editor_image_height, 680, "Editor image height");
|
||||
extern INT_VAR_H (editor_image_width, 655, "Editor image width");
|
||||
extern INT_VAR_H (editor_image_word_bb_color, BLUE,
|
||||
"Word bounding box colour");
|
||||
extern INT_VAR_H (editor_image_blob_bb_color, YELLOW,
|
||||
"Blob bounding box colour");
|
||||
extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour");
|
||||
extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin",
|
||||
"Editor debug window name");
|
||||
extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos");
|
||||
extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos");
|
||||
extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height");
|
||||
extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width");
|
||||
extern STRING_VAR_H (editor_word_name, "BlnWords",
|
||||
"BL normalised word window");
|
||||
extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos");
|
||||
extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos");
|
||||
extern INT_VAR_H (editor_word_height, 240, "Word window height");
|
||||
extern INT_VAR_H (editor_word_width, 655, "Word window width");
|
||||
extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image");
|
||||
|
||||
ScrollView* bln_word_window_handle(); //return handle
|
||||
void build_image_window(int width, int height);
|
||||
void display_bln_lines(ScrollView window,
|
||||
ScrollView::Color colour,
|
||||
float scale_factor,
|
||||
float y_offset,
|
||||
float minx,
|
||||
float maxx);
|
||||
//function to call
|
||||
void pgeditor_msg( //message display
|
||||
const char *msg);
|
||||
void pgeditor_show_point( //display coords
|
||||
SVEvent *event);
|
||||
//put bln word in box
|
||||
void show_point(PAGE_RES* page_res, float x, float y);
|
||||
|
||||
#endif
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: pgedit.h
|
||||
// Description: Page structure file editor
|
||||
// Author: Joern Wanke
|
||||
// Created: Wed Jul 18 10:05:01 PDT 2007
|
||||
//
|
||||
// (C) Copyright 2007, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef PGEDIT_H
|
||||
#define PGEDIT_H
|
||||
|
||||
#include "ocrblock.h"
|
||||
#include "ocrrow.h"
|
||||
#include "werd.h"
|
||||
#include "rect.h"
|
||||
#include "params.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
class ScrollView;
|
||||
class SVMenuNode;
|
||||
struct SVEvent;
|
||||
|
||||
// A small event handler class to process incoming events to
|
||||
// this window.
|
||||
class PGEventHandler : public SVEventHandler {
|
||||
public:
|
||||
PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
|
||||
}
|
||||
void Notify(const SVEvent* sve);
|
||||
private:
|
||||
tesseract::Tesseract* tess_;
|
||||
};
|
||||
|
||||
extern BLOCK_LIST *current_block_list;
|
||||
extern STRING_VAR_H (editor_image_win_name, "EditorImage",
|
||||
"Editor image window name");
|
||||
extern INT_VAR_H (editor_image_xpos, 590, "Editor image X Pos");
|
||||
extern INT_VAR_H (editor_image_ypos, 10, "Editor image Y Pos");
|
||||
extern INT_VAR_H (editor_image_height, 680, "Editor image height");
|
||||
extern INT_VAR_H (editor_image_width, 655, "Editor image width");
|
||||
extern INT_VAR_H (editor_image_word_bb_color, BLUE,
|
||||
"Word bounding box colour");
|
||||
extern INT_VAR_H (editor_image_blob_bb_color, YELLOW,
|
||||
"Blob bounding box colour");
|
||||
extern INT_VAR_H (editor_image_text_color, WHITE, "Correct text colour");
|
||||
extern STRING_VAR_H (editor_dbwin_name, "EditorDBWin",
|
||||
"Editor debug window name");
|
||||
extern INT_VAR_H (editor_dbwin_xpos, 50, "Editor debug window X Pos");
|
||||
extern INT_VAR_H (editor_dbwin_ypos, 500, "Editor debug window Y Pos");
|
||||
extern INT_VAR_H (editor_dbwin_height, 24, "Editor debug window height");
|
||||
extern INT_VAR_H (editor_dbwin_width, 80, "Editor debug window width");
|
||||
extern STRING_VAR_H (editor_word_name, "BlnWords",
|
||||
"BL normalised word window");
|
||||
extern INT_VAR_H (editor_word_xpos, 60, "Word window X Pos");
|
||||
extern INT_VAR_H (editor_word_ypos, 510, "Word window Y Pos");
|
||||
extern INT_VAR_H (editor_word_height, 240, "Word window height");
|
||||
extern INT_VAR_H (editor_word_width, 655, "Word window width");
|
||||
extern double_VAR_H (editor_smd_scale_factor, 1.0, "Scaling for smd image");
|
||||
|
||||
ScrollView* bln_word_window_handle(); //return handle
|
||||
void build_image_window(int width, int height);
|
||||
void display_bln_lines(ScrollView window,
|
||||
ScrollView::Color colour,
|
||||
float scale_factor,
|
||||
float y_offset,
|
||||
float minx,
|
||||
float maxx);
|
||||
//function to call
|
||||
void pgeditor_msg( //message display
|
||||
const char *msg);
|
||||
void pgeditor_show_point( //display coords
|
||||
SVEvent *event);
|
||||
//put bln word in box
|
||||
void show_point(PAGE_RES* page_res, float x, float y);
|
||||
|
||||
#endif
|
@ -1,34 +1,34 @@
|
||||
/**********************************************************************
|
||||
* File: reject.h (Formerly reject.h)
|
||||
* Description: Rejection functions used in tessedit
|
||||
* Author: Phil Cheatle
|
||||
* Created: Wed Sep 23 16:50:21 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef REJECT_H
|
||||
#define REJECT_H
|
||||
|
||||
#include "params.h"
|
||||
#include "pageres.h"
|
||||
|
||||
void reject_blanks(WERD_RES *word);
|
||||
void reject_poor_matches(WERD_RES *word);
|
||||
float compute_reject_threshold(WERD_CHOICE* word);
|
||||
BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
|
||||
void dont_allow_1Il(WERD_RES *word);
|
||||
void flip_hyphens(WERD_RES *word);
|
||||
void flip_0O(WERD_RES *word);
|
||||
BOOL8 non_0_digit(const char* str, int length);
|
||||
#endif
|
||||
/**********************************************************************
|
||||
* File: reject.h (Formerly reject.h)
|
||||
* Description: Rejection functions used in tessedit
|
||||
* Author: Phil Cheatle
|
||||
* Created: Wed Sep 23 16:50:21 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef REJECT_H
|
||||
#define REJECT_H
|
||||
|
||||
#include "params.h"
|
||||
#include "pageres.h"
|
||||
|
||||
void reject_blanks(WERD_RES *word);
|
||||
void reject_poor_matches(WERD_RES *word);
|
||||
float compute_reject_threshold(WERD_CHOICE* word);
|
||||
BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
|
||||
void dont_allow_1Il(WERD_RES *word);
|
||||
void flip_hyphens(WERD_RES *word);
|
||||
void flip_0O(WERD_RES *word);
|
||||
BOOL8 non_0_digit(const char* str, int length);
|
||||
#endif
|
@ -1,28 +1,28 @@
|
||||
/**********************************************************************
|
||||
* File: tessbox.h (Formerly tessbox.h)
|
||||
* Description: Black boxed Tess for developing a resaljet.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Apr 23 11:03:36 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSBOX_H
|
||||
#define TESSBOX_H
|
||||
|
||||
#include "ratngs.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
// TODO(ocr-team): Delete this along with other empty header files.
|
||||
|
||||
#endif
|
||||
/**********************************************************************
|
||||
* File: tessbox.h (Formerly tessbox.h)
|
||||
* Description: Black boxed Tess for developing a resaljet.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Apr 23 11:03:36 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSBOX_H
|
||||
#define TESSBOX_H
|
||||
|
||||
#include "ratngs.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
// TODO(ocr-team): Delete this along with other empty header files.
|
||||
|
||||
#endif
|
@ -1,29 +1,29 @@
|
||||
/**********************************************************************
|
||||
* File: tessedit.h (Formerly tessedit.h)
|
||||
* Description: Main program for merge of tess and editor.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Jan 07 15:21:46 GMT 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSEDIT_H
|
||||
#define TESSEDIT_H
|
||||
|
||||
#include "blobs.h"
|
||||
#include "pgedit.h"
|
||||
|
||||
//progress monitor
|
||||
extern ETEXT_DESC *global_monitor;
|
||||
|
||||
#endif
|
||||
/**********************************************************************
|
||||
* File: tessedit.h (Formerly tessedit.h)
|
||||
* Description: Main program for merge of tess and editor.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Jan 07 15:21:46 GMT 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSEDIT_H
|
||||
#define TESSEDIT_H
|
||||
|
||||
#include "blobs.h"
|
||||
#include "pgedit.h"
|
||||
|
||||
//progress monitor
|
||||
extern ETEXT_DESC *global_monitor;
|
||||
|
||||
#endif
|
@ -1,24 +1,24 @@
|
||||
/**********************************************************************
|
||||
* File: tessvars.cpp (Formerly tessvars.c)
|
||||
* Description: Variables and other globals for tessedit.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 13 13:13:23 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "tessvars.h"
|
||||
|
||||
FILE *debug_fp = stderr; // write debug stuff here
|
||||
/**********************************************************************
|
||||
* File: tessvars.cpp (Formerly tessvars.c)
|
||||
* Description: Variables and other globals for tessedit.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 13 13:13:23 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "tessvars.h"
|
||||
|
||||
FILE *debug_fp = stderr; // write debug stuff here
|
@ -1,27 +1,27 @@
|
||||
/**********************************************************************
|
||||
* File: tessvars.h (Formerly tessvars.h)
|
||||
* Description: Variables and other globals for tessedit.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 13 13:13:23 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSVARS_H
|
||||
#define TESSVARS_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
extern FILE *debug_fp; // write debug stuff here
|
||||
#endif
|
||||
/**********************************************************************
|
||||
* File: tessvars.h (Formerly tessvars.h)
|
||||
* Description: Variables and other globals for tessedit.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 13 13:13:23 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSVARS_H
|
||||
#define TESSVARS_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
extern FILE *debug_fp; // write debug stuff here
|
||||
#endif
|
@ -1,27 +1,27 @@
|
||||
/**********************************************************************
|
||||
* File: wordit.h
|
||||
* Description: An iterator for passing over all the words in a document.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 27 08:51:22 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef WERDIT_H
|
||||
#define WERDIT_H
|
||||
|
||||
#include "pageres.h"
|
||||
|
||||
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
|
||||
|
||||
#endif
|
||||
/**********************************************************************
|
||||
* File: wordit.h
|
||||
* Description: An iterator for passing over all the words in a document.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 27 08:51:22 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef WERDIT_H
|
||||
#define WERDIT_H
|
||||
|
||||
#include "pageres.h"
|
||||
|
||||
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
|
||||
|
||||
#endif
|
@ -1,29 +1,29 @@
|
||||
/**********************************************************************
|
||||
* File: blckerr.h (Formerly blockerr.h)
|
||||
* Description: Error codes for the page block classes.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Mar 19 17:43:30 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLCKERR_H
|
||||
#define BLCKERR_H
|
||||
|
||||
#include "errcode.h"
|
||||
|
||||
const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
|
||||
const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
|
||||
const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
|
||||
const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
|
||||
#endif
|
||||
/**********************************************************************
|
||||
* File: blckerr.h (Formerly blockerr.h)
|
||||
* Description: Error codes for the page block classes.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Mar 19 17:43:30 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLCKERR_H
|
||||
#define BLCKERR_H
|
||||
|
||||
#include "errcode.h"
|
||||
|
||||
const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
|
||||
const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
|
||||
const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
|
||||
const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
|
||||
#endif
|
@ -1,100 +1,100 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: boxword.h
|
||||
// Description: Class to represent the bounding boxes of the output.
|
||||
// Author: Ray Smith
|
||||
// Created: Tue May 25 14:18:14 PDT 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
#define TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "rect.h"
|
||||
#include "unichar.h"
|
||||
|
||||
class BLOCK;
|
||||
class DENORM;
|
||||
struct TWERD;
|
||||
class UNICHARSET;
|
||||
class WERD;
|
||||
class WERD_CHOICE;
|
||||
class WERD_RES;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Class to hold an array of bounding boxes for an output word and
|
||||
// the bounding box of the whole word.
|
||||
class BoxWord {
|
||||
public:
|
||||
BoxWord();
|
||||
explicit BoxWord(const BoxWord& src);
|
||||
~BoxWord();
|
||||
|
||||
BoxWord& operator=(const BoxWord& src);
|
||||
|
||||
void CopyFrom(const BoxWord& src);
|
||||
|
||||
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
|
||||
// switch back to original image coordinates.
|
||||
static BoxWord* CopyFromNormalized(TWERD* tessword);
|
||||
|
||||
// Clean up the bounding boxes from the polygonal approximation by
|
||||
// expanding slightly, then clipping to the blobs from the original_word
|
||||
// that overlap. If not null, the block provides the inverse rotation.
|
||||
void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
|
||||
|
||||
// Merges the boxes from start to end, not including end, and deletes
|
||||
// the boxes between start and end.
|
||||
void MergeBoxes(int start, int end);
|
||||
|
||||
// Inserts a new box before the given index.
|
||||
// Recomputes the bounding box.
|
||||
void InsertBox(int index, const TBOX& box);
|
||||
|
||||
// Changes the box at the given index to the new box.
|
||||
// Recomputes the bounding box.
|
||||
void ChangeBox(int index, const TBOX& box);
|
||||
|
||||
// Deletes the box with the given index, and shuffles up the rest.
|
||||
// Recomputes the bounding box.
|
||||
void DeleteBox(int index);
|
||||
|
||||
// Deletes all the boxes stored in BoxWord.
|
||||
void DeleteAllBoxes();
|
||||
|
||||
// This and other putatively are the same, so call the (permanent) callback
|
||||
// for each blob index where the bounding boxes match.
|
||||
// The callback is deleted on completion.
|
||||
void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
|
||||
|
||||
const TBOX& bounding_box() const {
|
||||
return bbox_;
|
||||
}
|
||||
int length() const { return length_; }
|
||||
const TBOX& BlobBox(int index) const {
|
||||
return boxes_[index];
|
||||
}
|
||||
|
||||
private:
|
||||
void ComputeBoundingBox();
|
||||
|
||||
TBOX bbox_;
|
||||
int length_;
|
||||
GenericVector<TBOX> boxes_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: boxword.h
|
||||
// Description: Class to represent the bounding boxes of the output.
|
||||
// Author: Ray Smith
|
||||
// Created: Tue May 25 14:18:14 PDT 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
#define TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "rect.h"
|
||||
#include "unichar.h"
|
||||
|
||||
class BLOCK;
|
||||
class DENORM;
|
||||
struct TWERD;
|
||||
class UNICHARSET;
|
||||
class WERD;
|
||||
class WERD_CHOICE;
|
||||
class WERD_RES;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Class to hold an array of bounding boxes for an output word and
|
||||
// the bounding box of the whole word.
|
||||
class BoxWord {
|
||||
public:
|
||||
BoxWord();
|
||||
explicit BoxWord(const BoxWord& src);
|
||||
~BoxWord();
|
||||
|
||||
BoxWord& operator=(const BoxWord& src);
|
||||
|
||||
void CopyFrom(const BoxWord& src);
|
||||
|
||||
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
|
||||
// switch back to original image coordinates.
|
||||
static BoxWord* CopyFromNormalized(TWERD* tessword);
|
||||
|
||||
// Clean up the bounding boxes from the polygonal approximation by
|
||||
// expanding slightly, then clipping to the blobs from the original_word
|
||||
// that overlap. If not null, the block provides the inverse rotation.
|
||||
void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
|
||||
|
||||
// Merges the boxes from start to end, not including end, and deletes
|
||||
// the boxes between start and end.
|
||||
void MergeBoxes(int start, int end);
|
||||
|
||||
// Inserts a new box before the given index.
|
||||
// Recomputes the bounding box.
|
||||
void InsertBox(int index, const TBOX& box);
|
||||
|
||||
// Changes the box at the given index to the new box.
|
||||
// Recomputes the bounding box.
|
||||
void ChangeBox(int index, const TBOX& box);
|
||||
|
||||
// Deletes the box with the given index, and shuffles up the rest.
|
||||
// Recomputes the bounding box.
|
||||
void DeleteBox(int index);
|
||||
|
||||
// Deletes all the boxes stored in BoxWord.
|
||||
void DeleteAllBoxes();
|
||||
|
||||
// This and other putatively are the same, so call the (permanent) callback
|
||||
// for each blob index where the bounding boxes match.
|
||||
// The callback is deleted on completion.
|
||||
void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
|
||||
|
||||
const TBOX& bounding_box() const {
|
||||
return bbox_;
|
||||
}
|
||||
int length() const { return length_; }
|
||||
const TBOX& BlobBox(int index) const {
|
||||
return boxes_[index];
|
||||
}
|
||||
|
||||
private:
|
||||
void ComputeBoundingBox();
|
||||
|
||||
TBOX bbox_;
|
||||
int length_;
|
||||
GenericVector<TBOX> boxes_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CSTRUCT_BOXWORD_H_
|
@ -1,36 +1,36 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ccstruct.cpp
|
||||
// Description: ccstruct class.
|
||||
// Author: Samuel Charron
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "ccstruct.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// APPROXIMATIONS of the fractions of the character cell taken by
|
||||
// the descenders, ascenders, and x-height.
|
||||
const double CCStruct::kDescenderFraction = 0.25;
|
||||
const double CCStruct::kXHeightFraction = 0.5;
|
||||
const double CCStruct::kAscenderFraction = 0.25;
|
||||
const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
|
||||
(CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
|
||||
|
||||
CCStruct::CCStruct() {}
|
||||
|
||||
CCStruct::~CCStruct() {
|
||||
}
|
||||
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ccstruct.cpp
|
||||
// Description: ccstruct class.
|
||||
// Author: Samuel Charron
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "ccstruct.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// APPROXIMATIONS of the fractions of the character cell taken by
|
||||
// the descenders, ascenders, and x-height.
|
||||
const double CCStruct::kDescenderFraction = 0.25;
|
||||
const double CCStruct::kXHeightFraction = 0.5;
|
||||
const double CCStruct::kAscenderFraction = 0.25;
|
||||
const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
|
||||
(CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
|
||||
|
||||
CCStruct::CCStruct() {}
|
||||
|
||||
CCStruct::~CCStruct() {
|
||||
}
|
||||
|
||||
}
|
@ -1,43 +1,43 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ccstruct.h
|
||||
// Description: ccstruct class.
|
||||
// Author: Samuel Charron
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
|
||||
#include "cutil.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CCStruct : public CUtil {
|
||||
public:
|
||||
CCStruct();
|
||||
~CCStruct();
|
||||
|
||||
// Globally accessible constants.
|
||||
// APPROXIMATIONS of the fractions of the character cell taken by
|
||||
// the descenders, ascenders, and x-height.
|
||||
static const double kDescenderFraction; // = 0.25;
|
||||
static const double kXHeightFraction; // = 0.5;
|
||||
static const double kAscenderFraction; // = 0.25;
|
||||
// Derived value giving the x-height as a fraction of cap-height.
|
||||
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
|
||||
};
|
||||
|
||||
class Tesseract;
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ccstruct.h
|
||||
// Description: ccstruct class.
|
||||
// Author: Samuel Charron
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
|
||||
#include "cutil.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CCStruct : public CUtil {
|
||||
public:
|
||||
CCStruct();
|
||||
~CCStruct();
|
||||
|
||||
// Globally accessible constants.
|
||||
// APPROXIMATIONS of the fractions of the character cell taken by
|
||||
// the descenders, ascenders, and x-height.
|
||||
static const double kDescenderFraction; // = 0.25;
|
||||
static const double kXHeightFraction; // = 0.5;
|
||||
static const double kAscenderFraction; // = 0.25;
|
||||
// Derived value giving the x-height as a fraction of cap-height.
|
||||
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
|
||||
};
|
||||
|
||||
class Tesseract;
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
@ -1,52 +1,52 @@
|
||||
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||
|
||||
#include "allheaders.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Class to hold a Pixa collection of debug images with captions and save them
|
||||
// to a PDF file.
|
||||
class DebugPixa {
|
||||
public:
|
||||
// TODO(rays) add another constructor with size control.
|
||||
DebugPixa() {
|
||||
pixa_ = pixaCreate(0);
|
||||
fonts_ = bmfCreate(nullptr, 14);
|
||||
}
|
||||
// If the filename_ has been set and there are any debug images, they are
|
||||
// written to the set filename_.
|
||||
~DebugPixa() {
|
||||
pixaDestroy(&pixa_);
|
||||
bmfDestroy(&fonts_);
|
||||
}
|
||||
|
||||
// Adds the given pix to the set of pages in the PDF file, with the given
|
||||
// caption added to the top.
|
||||
void AddPix(const Pix* pix, const char* caption) {
|
||||
int depth = pixGetDepth(const_cast<Pix*>(pix));
|
||||
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
|
||||
Pix* pix_debug = pixAddSingleTextblock(
|
||||
const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
|
||||
pixaAddPix(pixa_, pix_debug, L_INSERT);
|
||||
}
|
||||
|
||||
// Sets the destination filename and enables images to be written to a PDF
|
||||
// on destruction.
|
||||
void WritePDF(const char* filename) {
|
||||
if (pixaGetCount(pixa_) > 0) {
|
||||
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
|
||||
pixaClear(pixa_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// The collection of images to put in the PDF.
|
||||
Pixa* pixa_;
|
||||
// The fonts used to draw text captions.
|
||||
L_Bmf* fonts_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||
|
||||
#include "allheaders.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Class to hold a Pixa collection of debug images with captions and save them
|
||||
// to a PDF file.
|
||||
class DebugPixa {
|
||||
public:
|
||||
// TODO(rays) add another constructor with size control.
|
||||
DebugPixa() {
|
||||
pixa_ = pixaCreate(0);
|
||||
fonts_ = bmfCreate(nullptr, 14);
|
||||
}
|
||||
// If the filename_ has been set and there are any debug images, they are
|
||||
// written to the set filename_.
|
||||
~DebugPixa() {
|
||||
pixaDestroy(&pixa_);
|
||||
bmfDestroy(&fonts_);
|
||||
}
|
||||
|
||||
// Adds the given pix to the set of pages in the PDF file, with the given
|
||||
// caption added to the top.
|
||||
void AddPix(const Pix* pix, const char* caption) {
|
||||
int depth = pixGetDepth(const_cast<Pix*>(pix));
|
||||
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
|
||||
Pix* pix_debug = pixAddSingleTextblock(
|
||||
const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
|
||||
pixaAddPix(pixa_, pix_debug, L_INSERT);
|
||||
}
|
||||
|
||||
// Sets the destination filename and enables images to be written to a PDF
|
||||
// on destruction.
|
||||
void WritePDF(const char* filename) {
|
||||
if (pixaGetCount(pixa_) > 0) {
|
||||
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
|
||||
pixaClear(pixa_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// The collection of images to put in the PDF.
|
||||
Pixa* pixa_;
|
||||
// The fonts used to draw text captions.
|
||||
L_Bmf* fonts_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
@ -1,295 +1,295 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: detlinefit.cpp
|
||||
// Description: Deterministic least median squares line fitting.
|
||||
// Author: Ray Smith
|
||||
// Created: Thu Feb 28 14:45:01 PDT 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "detlinefit.h"
|
||||
#include "statistc.h"
|
||||
#include "ndminx.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// The number of points to consider at each end.
|
||||
const int kNumEndPoints = 3;
|
||||
// The minimum number of points at which to switch to number of points
|
||||
// for badly fitted lines.
|
||||
// To ensure a sensible error metric, kMinPointsForErrorCount should be at
|
||||
// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
|
||||
// ComputeUpperQuartileError.
|
||||
const int kMinPointsForErrorCount = 16;
|
||||
// The maximum real distance to use before switching to number of
|
||||
// mis-fitted points, which will get square-rooted for true distance.
|
||||
const int kMaxRealDistance = 2.0;
|
||||
|
||||
DetLineFit::DetLineFit() : square_length_(0.0) {
|
||||
}
|
||||
|
||||
DetLineFit::~DetLineFit() {
|
||||
}
|
||||
|
||||
// Delete all Added points.
|
||||
void DetLineFit::Clear() {
|
||||
pts_.clear();
|
||||
distances_.clear();
|
||||
}
|
||||
|
||||
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
|
||||
void DetLineFit::Add(const ICOORD& pt) {
|
||||
pts_.push_back(PointWidth(pt, 0));
|
||||
}
|
||||
// Associates a half-width with the given point if a point overlaps the
|
||||
// previous point by more than half the width, and its distance is further
|
||||
// than the previous point, then the more distant point is ignored in the
|
||||
// distance calculation. Useful for ignoring i dots and other diacritics.
|
||||
void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
|
||||
pts_.push_back(PointWidth(pt, halfwidth));
|
||||
}
|
||||
|
||||
// Fits a line to the points, ignoring the skip_first initial points and the
|
||||
// skip_last final points, returning the fitted line as a pair of points,
|
||||
// and the upper quartile error.
|
||||
double DetLineFit::Fit(int skip_first, int skip_last,
|
||||
ICOORD* pt1, ICOORD* pt2) {
|
||||
// Do something sensible with no points.
|
||||
if (pts_.empty()) {
|
||||
pt1->set_x(0);
|
||||
pt1->set_y(0);
|
||||
*pt2 = *pt1;
|
||||
return 0.0;
|
||||
}
|
||||
// Count the points and find the first and last kNumEndPoints.
|
||||
int pt_count = pts_.size();
|
||||
ICOORD* starts[kNumEndPoints];
|
||||
if (skip_first >= pt_count) skip_first = pt_count - 1;
|
||||
int start_count = 0;
|
||||
int end_i = MIN(skip_first + kNumEndPoints, pt_count);
|
||||
for (int i = skip_first; i < end_i; ++i) {
|
||||
starts[start_count++] = &pts_[i].pt;
|
||||
}
|
||||
ICOORD* ends[kNumEndPoints];
|
||||
if (skip_last >= pt_count) skip_last = pt_count - 1;
|
||||
int end_count = 0;
|
||||
end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
|
||||
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
|
||||
ends[end_count++] = &pts_[i].pt;
|
||||
}
|
||||
// 1 or 2 points need special treatment.
|
||||
if (pt_count <= 2) {
|
||||
*pt1 = *starts[0];
|
||||
if (pt_count > 1)
|
||||
*pt2 = *ends[0];
|
||||
else
|
||||
*pt2 = *pt1;
|
||||
return 0.0;
|
||||
}
|
||||
// Although with between 2 and 2*kNumEndPoints-1 points, there will be
|
||||
// overlap in the starts, ends sets, this is OK and taken care of by the
|
||||
// if (*start != *end) test below, which also tests for equal input points.
|
||||
double best_uq = -1.0;
|
||||
// Iterate each pair of points and find the best fitting line.
|
||||
for (int i = 0; i < start_count; ++i) {
|
||||
ICOORD* start = starts[i];
|
||||
for (int j = 0; j < end_count; ++j) {
|
||||
ICOORD* end = ends[j];
|
||||
if (*start != *end) {
|
||||
ComputeDistances(*start, *end);
|
||||
// Compute the upper quartile error from the line.
|
||||
double dist = EvaluateLineFit();
|
||||
if (dist < best_uq || best_uq < 0.0) {
|
||||
best_uq = dist;
|
||||
*pt1 = *start;
|
||||
*pt2 = *end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Finally compute the square root to return the true distance.
|
||||
return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
|
||||
}
|
||||
|
||||
// Constrained fit with a supplied direction vector. Finds the best line_pt,
|
||||
// that is one of the supplied points having the median cross product with
|
||||
// direction, ignoring points that have a cross product outside of the range
|
||||
// [min_dist, max_dist]. Returns the resulting error metric using the same
|
||||
// reduced set of points.
|
||||
// *Makes use of floating point arithmetic*
|
||||
double DetLineFit::ConstrainedFit(const FCOORD& direction,
|
||||
double min_dist, double max_dist,
|
||||
bool debug, ICOORD* line_pt) {
|
||||
ComputeConstrainedDistances(direction, min_dist, max_dist);
|
||||
// Do something sensible with no points or computed distances.
|
||||
if (pts_.empty() || distances_.empty()) {
|
||||
line_pt->set_x(0);
|
||||
line_pt->set_y(0);
|
||||
return 0.0;
|
||||
}
|
||||
int median_index = distances_.choose_nth_item(distances_.size() / 2);
|
||||
*line_pt = distances_[median_index].data;
|
||||
if (debug) {
|
||||
tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
|
||||
direction.x(), direction.y(),
|
||||
line_pt->x(), line_pt->y(), distances_.size());
|
||||
for (int i = 0; i < distances_.size(); ++i) {
|
||||
tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
|
||||
distances_[i].data.y(), distances_[i].key);
|
||||
}
|
||||
tprintf("Result = %d\n", median_index);
|
||||
}
|
||||
// Center distances on the fitted point.
|
||||
double dist_origin = direction * *line_pt;
|
||||
for (int i = 0; i < distances_.size(); ++i) {
|
||||
distances_[i].key -= dist_origin;
|
||||
}
|
||||
return sqrt(EvaluateLineFit());
|
||||
}
|
||||
|
||||
// Returns true if there were enough points at the last call to Fit or
|
||||
// ConstrainedFit for the fitted points to be used on a badly fitted line.
|
||||
bool DetLineFit::SufficientPointsForIndependentFit() const {
|
||||
return distances_.size() >= kMinPointsForErrorCount;
|
||||
}
|
||||
|
||||
// Backwards compatible fit returning a gradient and constant.
|
||||
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
|
||||
// function in preference to the LMS class.
|
||||
double DetLineFit::Fit(float* m, float* c) {
|
||||
ICOORD start, end;
|
||||
double error = Fit(&start, &end);
|
||||
if (end.x() != start.x()) {
|
||||
*m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
|
||||
*c = start.y() - *m * start.x();
|
||||
} else {
|
||||
*m = 0.0f;
|
||||
*c = 0.0f;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// Backwards compatible constrained fit with a supplied gradient.
|
||||
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
|
||||
// to avoid potential difficulties with infinite gradients.
|
||||
double DetLineFit::ConstrainedFit(double m, float* c) {
|
||||
// Do something sensible with no points.
|
||||
if (pts_.empty()) {
|
||||
*c = 0.0f;
|
||||
return 0.0;
|
||||
}
|
||||
double cos = 1.0 / sqrt(1.0 + m * m);
|
||||
FCOORD direction(cos, m * cos);
|
||||
ICOORD line_pt;
|
||||
double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
|
||||
&line_pt);
|
||||
*c = line_pt.y() - line_pt.x() * m;
|
||||
return error;
|
||||
}
|
||||
|
||||
// Computes and returns the squared evaluation metric for a line fit.
|
||||
double DetLineFit::EvaluateLineFit() {
|
||||
// Compute the upper quartile error from the line.
|
||||
double dist = ComputeUpperQuartileError();
|
||||
if (distances_.size() >= kMinPointsForErrorCount &&
|
||||
dist > kMaxRealDistance * kMaxRealDistance) {
|
||||
// Use the number of mis-fitted points as the error metric, as this
|
||||
// gives a better measure of fit for badly fitted lines where more
|
||||
// than a quarter are badly fitted.
|
||||
double threshold = kMaxRealDistance * sqrt(square_length_);
|
||||
dist = NumberOfMisfittedPoints(threshold);
|
||||
}
|
||||
return dist;
|
||||
}
|
||||
|
||||
// Computes the absolute error distances of the points from the line,
|
||||
// and returns the squared upper-quartile error distance.
|
||||
double DetLineFit::ComputeUpperQuartileError() {
|
||||
int num_errors = distances_.size();
|
||||
if (num_errors == 0) return 0.0;
|
||||
// Get the absolute values of the errors.
|
||||
for (int i = 0; i < num_errors; ++i) {
|
||||
if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
|
||||
}
|
||||
// Now get the upper quartile distance.
|
||||
int index = distances_.choose_nth_item(3 * num_errors / 4);
|
||||
double dist = distances_[index].key;
|
||||
// The true distance is the square root of the dist squared / square_length.
|
||||
// Don't bother with the square root. Just return the square distance.
|
||||
return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
|
||||
}
|
||||
|
||||
// Returns the number of sample points that have an error more than threshold.
|
||||
int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
|
||||
int num_misfits = 0;
|
||||
int num_dists = distances_.size();
|
||||
// Get the absolute values of the errors.
|
||||
for (int i = 0; i < num_dists; ++i) {
|
||||
if (distances_[i].key > threshold)
|
||||
++num_misfits;
|
||||
}
|
||||
return num_misfits;
|
||||
}
|
||||
|
||||
// Computes all the cross product distances of the points from the line,
|
||||
// storing the actual (signed) cross products in distances.
|
||||
// Ignores distances of points that are further away than the previous point,
|
||||
// and overlaps the previous point by at least half.
|
||||
void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
|
||||
distances_.truncate(0);
|
||||
ICOORD line_vector = end;
|
||||
line_vector -= start;
|
||||
square_length_ = line_vector.sqlength();
|
||||
int line_length = IntCastRounded(sqrt(square_length_));
|
||||
// Compute the distance of each point from the line.
|
||||
int prev_abs_dist = 0;
|
||||
int prev_dot = 0;
|
||||
for (int i = 0; i < pts_.size(); ++i) {
|
||||
ICOORD pt_vector = pts_[i].pt;
|
||||
pt_vector -= start;
|
||||
int dot = line_vector % pt_vector;
|
||||
// Compute |line_vector||pt_vector|sin(angle between)
|
||||
int dist = line_vector * pt_vector;
|
||||
int abs_dist = dist < 0 ? -dist : dist;
|
||||
if (abs_dist > prev_abs_dist && i > 0) {
|
||||
// Ignore this point if it overlaps the previous one.
|
||||
int separation = abs(dot - prev_dot);
|
||||
if (separation < line_length * pts_[i].halfwidth ||
|
||||
separation < line_length * pts_[i - 1].halfwidth)
|
||||
continue;
|
||||
}
|
||||
distances_.push_back(DistPointPair(dist, pts_[i].pt));
|
||||
prev_abs_dist = abs_dist;
|
||||
prev_dot = dot;
|
||||
}
|
||||
}
|
||||
|
||||
// Computes all the cross product distances of the points perpendicular to
|
||||
// the given direction, ignoring distances outside of the give distance range,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
|
||||
double min_dist, double max_dist) {
|
||||
distances_.truncate(0);
|
||||
square_length_ = direction.sqlength();
|
||||
// Compute the distance of each point from the line.
|
||||
for (int i = 0; i < pts_.size(); ++i) {
|
||||
FCOORD pt_vector = pts_[i].pt;
|
||||
// Compute |line_vector||pt_vector|sin(angle between)
|
||||
double dist = direction * pt_vector;
|
||||
if (min_dist <= dist && dist <= max_dist)
|
||||
distances_.push_back(DistPointPair(dist, pts_[i].pt));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: detlinefit.cpp
|
||||
// Description: Deterministic least median squares line fitting.
|
||||
// Author: Ray Smith
|
||||
// Created: Thu Feb 28 14:45:01 PDT 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "detlinefit.h"
|
||||
#include "statistc.h"
|
||||
#include "ndminx.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// The number of points to consider at each end.
|
||||
const int kNumEndPoints = 3;
|
||||
// The minimum number of points at which to switch to number of points
|
||||
// for badly fitted lines.
|
||||
// To ensure a sensible error metric, kMinPointsForErrorCount should be at
|
||||
// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
|
||||
// ComputeUpperQuartileError.
|
||||
const int kMinPointsForErrorCount = 16;
|
||||
// The maximum real distance to use before switching to number of
|
||||
// mis-fitted points, which will get square-rooted for true distance.
|
||||
const int kMaxRealDistance = 2.0;
|
||||
|
||||
DetLineFit::DetLineFit() : square_length_(0.0) {
|
||||
}
|
||||
|
||||
DetLineFit::~DetLineFit() {
|
||||
}
|
||||
|
||||
// Delete all Added points.
|
||||
void DetLineFit::Clear() {
|
||||
pts_.clear();
|
||||
distances_.clear();
|
||||
}
|
||||
|
||||
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
|
||||
void DetLineFit::Add(const ICOORD& pt) {
|
||||
pts_.push_back(PointWidth(pt, 0));
|
||||
}
|
||||
// Associates a half-width with the given point if a point overlaps the
|
||||
// previous point by more than half the width, and its distance is further
|
||||
// than the previous point, then the more distant point is ignored in the
|
||||
// distance calculation. Useful for ignoring i dots and other diacritics.
|
||||
void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
|
||||
pts_.push_back(PointWidth(pt, halfwidth));
|
||||
}
|
||||
|
||||
// Fits a line to the points, ignoring the skip_first initial points and the
|
||||
// skip_last final points, returning the fitted line as a pair of points,
|
||||
// and the upper quartile error.
|
||||
double DetLineFit::Fit(int skip_first, int skip_last,
|
||||
ICOORD* pt1, ICOORD* pt2) {
|
||||
// Do something sensible with no points.
|
||||
if (pts_.empty()) {
|
||||
pt1->set_x(0);
|
||||
pt1->set_y(0);
|
||||
*pt2 = *pt1;
|
||||
return 0.0;
|
||||
}
|
||||
// Count the points and find the first and last kNumEndPoints.
|
||||
int pt_count = pts_.size();
|
||||
ICOORD* starts[kNumEndPoints];
|
||||
if (skip_first >= pt_count) skip_first = pt_count - 1;
|
||||
int start_count = 0;
|
||||
int end_i = MIN(skip_first + kNumEndPoints, pt_count);
|
||||
for (int i = skip_first; i < end_i; ++i) {
|
||||
starts[start_count++] = &pts_[i].pt;
|
||||
}
|
||||
ICOORD* ends[kNumEndPoints];
|
||||
if (skip_last >= pt_count) skip_last = pt_count - 1;
|
||||
int end_count = 0;
|
||||
end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
|
||||
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
|
||||
ends[end_count++] = &pts_[i].pt;
|
||||
}
|
||||
// 1 or 2 points need special treatment.
|
||||
if (pt_count <= 2) {
|
||||
*pt1 = *starts[0];
|
||||
if (pt_count > 1)
|
||||
*pt2 = *ends[0];
|
||||
else
|
||||
*pt2 = *pt1;
|
||||
return 0.0;
|
||||
}
|
||||
// Although with between 2 and 2*kNumEndPoints-1 points, there will be
|
||||
// overlap in the starts, ends sets, this is OK and taken care of by the
|
||||
// if (*start != *end) test below, which also tests for equal input points.
|
||||
double best_uq = -1.0;
|
||||
// Iterate each pair of points and find the best fitting line.
|
||||
for (int i = 0; i < start_count; ++i) {
|
||||
ICOORD* start = starts[i];
|
||||
for (int j = 0; j < end_count; ++j) {
|
||||
ICOORD* end = ends[j];
|
||||
if (*start != *end) {
|
||||
ComputeDistances(*start, *end);
|
||||
// Compute the upper quartile error from the line.
|
||||
double dist = EvaluateLineFit();
|
||||
if (dist < best_uq || best_uq < 0.0) {
|
||||
best_uq = dist;
|
||||
*pt1 = *start;
|
||||
*pt2 = *end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Finally compute the square root to return the true distance.
|
||||
return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
|
||||
}
|
||||
|
||||
// Constrained fit with a supplied direction vector. Finds the best line_pt,
|
||||
// that is one of the supplied points having the median cross product with
|
||||
// direction, ignoring points that have a cross product outside of the range
|
||||
// [min_dist, max_dist]. Returns the resulting error metric using the same
|
||||
// reduced set of points.
|
||||
// *Makes use of floating point arithmetic*
|
||||
double DetLineFit::ConstrainedFit(const FCOORD& direction,
|
||||
double min_dist, double max_dist,
|
||||
bool debug, ICOORD* line_pt) {
|
||||
ComputeConstrainedDistances(direction, min_dist, max_dist);
|
||||
// Do something sensible with no points or computed distances.
|
||||
if (pts_.empty() || distances_.empty()) {
|
||||
line_pt->set_x(0);
|
||||
line_pt->set_y(0);
|
||||
return 0.0;
|
||||
}
|
||||
int median_index = distances_.choose_nth_item(distances_.size() / 2);
|
||||
*line_pt = distances_[median_index].data;
|
||||
if (debug) {
|
||||
tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
|
||||
direction.x(), direction.y(),
|
||||
line_pt->x(), line_pt->y(), distances_.size());
|
||||
for (int i = 0; i < distances_.size(); ++i) {
|
||||
tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
|
||||
distances_[i].data.y(), distances_[i].key);
|
||||
}
|
||||
tprintf("Result = %d\n", median_index);
|
||||
}
|
||||
// Center distances on the fitted point.
|
||||
double dist_origin = direction * *line_pt;
|
||||
for (int i = 0; i < distances_.size(); ++i) {
|
||||
distances_[i].key -= dist_origin;
|
||||
}
|
||||
return sqrt(EvaluateLineFit());
|
||||
}
|
||||
|
||||
// Returns true if there were enough points at the last call to Fit or
|
||||
// ConstrainedFit for the fitted points to be used on a badly fitted line.
|
||||
bool DetLineFit::SufficientPointsForIndependentFit() const {
|
||||
return distances_.size() >= kMinPointsForErrorCount;
|
||||
}
|
||||
|
||||
// Backwards compatible fit returning a gradient and constant.
|
||||
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
|
||||
// function in preference to the LMS class.
|
||||
double DetLineFit::Fit(float* m, float* c) {
|
||||
ICOORD start, end;
|
||||
double error = Fit(&start, &end);
|
||||
if (end.x() != start.x()) {
|
||||
*m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
|
||||
*c = start.y() - *m * start.x();
|
||||
} else {
|
||||
*m = 0.0f;
|
||||
*c = 0.0f;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// Backwards compatible constrained fit with a supplied gradient.
|
||||
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
|
||||
// to avoid potential difficulties with infinite gradients.
|
||||
double DetLineFit::ConstrainedFit(double m, float* c) {
|
||||
// Do something sensible with no points.
|
||||
if (pts_.empty()) {
|
||||
*c = 0.0f;
|
||||
return 0.0;
|
||||
}
|
||||
double cos = 1.0 / sqrt(1.0 + m * m);
|
||||
FCOORD direction(cos, m * cos);
|
||||
ICOORD line_pt;
|
||||
double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
|
||||
&line_pt);
|
||||
*c = line_pt.y() - line_pt.x() * m;
|
||||
return error;
|
||||
}
|
||||
|
||||
// Computes and returns the squared evaluation metric for a line fit.
|
||||
double DetLineFit::EvaluateLineFit() {
|
||||
// Compute the upper quartile error from the line.
|
||||
double dist = ComputeUpperQuartileError();
|
||||
if (distances_.size() >= kMinPointsForErrorCount &&
|
||||
dist > kMaxRealDistance * kMaxRealDistance) {
|
||||
// Use the number of mis-fitted points as the error metric, as this
|
||||
// gives a better measure of fit for badly fitted lines where more
|
||||
// than a quarter are badly fitted.
|
||||
double threshold = kMaxRealDistance * sqrt(square_length_);
|
||||
dist = NumberOfMisfittedPoints(threshold);
|
||||
}
|
||||
return dist;
|
||||
}
|
||||
|
||||
// Computes the absolute error distances of the points from the line,
|
||||
// and returns the squared upper-quartile error distance.
|
||||
double DetLineFit::ComputeUpperQuartileError() {
|
||||
int num_errors = distances_.size();
|
||||
if (num_errors == 0) return 0.0;
|
||||
// Get the absolute values of the errors.
|
||||
for (int i = 0; i < num_errors; ++i) {
|
||||
if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
|
||||
}
|
||||
// Now get the upper quartile distance.
|
||||
int index = distances_.choose_nth_item(3 * num_errors / 4);
|
||||
double dist = distances_[index].key;
|
||||
// The true distance is the square root of the dist squared / square_length.
|
||||
// Don't bother with the square root. Just return the square distance.
|
||||
return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
|
||||
}
|
||||
|
||||
// Returns the number of sample points that have an error more than threshold.
|
||||
int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
|
||||
int num_misfits = 0;
|
||||
int num_dists = distances_.size();
|
||||
// Get the absolute values of the errors.
|
||||
for (int i = 0; i < num_dists; ++i) {
|
||||
if (distances_[i].key > threshold)
|
||||
++num_misfits;
|
||||
}
|
||||
return num_misfits;
|
||||
}
|
||||
|
||||
// Computes all the cross product distances of the points from the line,
|
||||
// storing the actual (signed) cross products in distances.
|
||||
// Ignores distances of points that are further away than the previous point,
|
||||
// and overlaps the previous point by at least half.
|
||||
void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
|
||||
distances_.truncate(0);
|
||||
ICOORD line_vector = end;
|
||||
line_vector -= start;
|
||||
square_length_ = line_vector.sqlength();
|
||||
int line_length = IntCastRounded(sqrt(square_length_));
|
||||
// Compute the distance of each point from the line.
|
||||
int prev_abs_dist = 0;
|
||||
int prev_dot = 0;
|
||||
for (int i = 0; i < pts_.size(); ++i) {
|
||||
ICOORD pt_vector = pts_[i].pt;
|
||||
pt_vector -= start;
|
||||
int dot = line_vector % pt_vector;
|
||||
// Compute |line_vector||pt_vector|sin(angle between)
|
||||
int dist = line_vector * pt_vector;
|
||||
int abs_dist = dist < 0 ? -dist : dist;
|
||||
if (abs_dist > prev_abs_dist && i > 0) {
|
||||
// Ignore this point if it overlaps the previous one.
|
||||
int separation = abs(dot - prev_dot);
|
||||
if (separation < line_length * pts_[i].halfwidth ||
|
||||
separation < line_length * pts_[i - 1].halfwidth)
|
||||
continue;
|
||||
}
|
||||
distances_.push_back(DistPointPair(dist, pts_[i].pt));
|
||||
prev_abs_dist = abs_dist;
|
||||
prev_dot = dot;
|
||||
}
|
||||
}
|
||||
|
||||
// Computes all the cross product distances of the points perpendicular to
|
||||
// the given direction, ignoring distances outside of the give distance range,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
|
||||
double min_dist, double max_dist) {
|
||||
distances_.truncate(0);
|
||||
square_length_ = direction.sqlength();
|
||||
// Compute the distance of each point from the line.
|
||||
for (int i = 0; i < pts_.size(); ++i) {
|
||||
FCOORD pt_vector = pts_[i].pt;
|
||||
// Compute |line_vector||pt_vector|sin(angle between)
|
||||
double dist = direction * pt_vector;
|
||||
if (min_dist <= dist && dist <= max_dist)
|
||||
distances_.push_back(DistPointPair(dist, pts_[i].pt));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
@ -1,164 +1,164 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: detlinefit.h
|
||||
// Description: Deterministic least upper-quartile squares line fitting.
|
||||
// Author: Ray Smith
|
||||
// Created: Thu Feb 28 14:35:01 PDT 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "kdpair.h"
|
||||
#include "points.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This class fits a line to a set of ICOORD points.
|
||||
// There is no restriction on the direction of the line, as it
|
||||
// uses a vector method, ie no concern over infinite gradients.
|
||||
// The fitted line has the least upper quartile of squares of perpendicular
|
||||
// distances of all source points from the line, subject to the constraint
|
||||
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
|
||||
// i.e. the 9 combinations of one of the first 3 and last 3 points.
|
||||
// A fundamental assumption of this algorithm is that one of the first 3 and
|
||||
// one of the last 3 points are near the best line fit.
|
||||
// The points must be Added in line order for the algorithm to work properly.
|
||||
// No floating point calculations are needed* to make an accurate fit,
|
||||
// and no random numbers are needed** so the algorithm is deterministic,
|
||||
// architecture-stable, and compiler-stable as well as stable to minor
|
||||
// changes in the input.
|
||||
// *A single floating point division is used to compute each line's distance.
|
||||
// This is unlikely to result in choice of a different line, but if it does,
|
||||
// it would be easy to replace with a 64 bit integer calculation.
|
||||
// **Random numbers are used in the nth_item function, but the worst
|
||||
// non-determinism that can result is picking a different result among equals,
|
||||
// and that wouldn't make any difference to the end-result distance, so the
|
||||
// randomness does not affect the determinism of the algorithm. The random
|
||||
// numbers are only there to guarantee average linear time.
|
||||
// Fitting time is linear, but with a high constant, as it tries 9 different
|
||||
// lines and computes the distance of all points each time.
|
||||
// This class is aimed at replacing the LLSQ (linear least squares) and
|
||||
// LMS (least median of squares) classes that are currently used for most
|
||||
// of the line fitting in Tesseract.
|
||||
class DetLineFit {
|
||||
public:
|
||||
DetLineFit();
|
||||
~DetLineFit();
|
||||
|
||||
// Delete all Added points.
|
||||
void Clear();
|
||||
|
||||
// Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
|
||||
// Add must be called on points in sequence along the line.
|
||||
void Add(const ICOORD& pt);
|
||||
// Associates a half-width with the given point if a point overlaps the
|
||||
// previous point by more than half the width, and its distance is further
|
||||
// than the previous point, then the more distant point is ignored in the
|
||||
// distance calculation. Useful for ignoring i dots and other diacritics.
|
||||
void Add(const ICOORD& pt, int halfwidth);
|
||||
|
||||
// Fits a line to the points, returning the fitted line as a pair of
|
||||
// points, and the upper quartile error.
|
||||
double Fit(ICOORD* pt1, ICOORD* pt2) {
|
||||
return Fit(0, 0, pt1, pt2);
|
||||
}
|
||||
// Fits a line to the points, ignoring the skip_first initial points and the
|
||||
// skip_last final points, returning the fitted line as a pair of points,
|
||||
// and the upper quartile error.
|
||||
double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
|
||||
|
||||
// Constrained fit with a supplied direction vector. Finds the best line_pt,
|
||||
// that is one of the supplied points having the median cross product with
|
||||
// direction, ignoring points that have a cross product outside of the range
|
||||
// [min_dist, max_dist]. Returns the resulting error metric using the same
|
||||
// reduced set of points.
|
||||
// *Makes use of floating point arithmetic*
|
||||
double ConstrainedFit(const FCOORD& direction,
|
||||
double min_dist, double max_dist,
|
||||
bool debug, ICOORD* line_pt);
|
||||
|
||||
// Returns true if there were enough points at the last call to Fit or
|
||||
// ConstrainedFit for the fitted points to be used on a badly fitted line.
|
||||
bool SufficientPointsForIndependentFit() const;
|
||||
|
||||
// Backwards compatible fit returning a gradient and constant.
|
||||
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
|
||||
// function in preference to the LMS class.
|
||||
double Fit(float* m, float* c);
|
||||
|
||||
// Backwards compatible constrained fit with a supplied gradient.
|
||||
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
|
||||
// to avoid potential difficulties with infinite gradients.
|
||||
double ConstrainedFit(double m, float* c);
|
||||
|
||||
private:
|
||||
// Simple struct to hold an ICOORD point and a halfwidth representing half
|
||||
// the "width" (supposedly approximately parallel to the direction of the
|
||||
// line) of each point, such that distant points can be discarded when they
|
||||
// overlap nearer points. (Think i dot and other diacritics or noise.)
|
||||
struct PointWidth {
|
||||
PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
|
||||
PointWidth(const ICOORD& pt0, int halfwidth0)
|
||||
: pt(pt0), halfwidth(halfwidth0) {}
|
||||
|
||||
ICOORD pt;
|
||||
int halfwidth;
|
||||
};
|
||||
// Type holds the distance of each point from the fitted line and the point
|
||||
// itself. Use of double allows integer distances from ICOORDs to be stored
|
||||
// exactly, and also the floating point results from ConstrainedFit.
|
||||
typedef KDPairInc<double, ICOORD> DistPointPair;
|
||||
|
||||
// Computes and returns the squared evaluation metric for a line fit.
|
||||
double EvaluateLineFit();
|
||||
|
||||
// Computes the absolute values of the precomputed distances_,
|
||||
// and returns the squared upper-quartile error distance.
|
||||
double ComputeUpperQuartileError();
|
||||
|
||||
// Returns the number of sample points that have an error more than threshold.
|
||||
int NumberOfMisfittedPoints(double threshold) const;
|
||||
|
||||
// Computes all the cross product distances of the points from the line,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
// Ignores distances of points that are further away than the previous point,
|
||||
// and overlaps the previous point by at least half.
|
||||
void ComputeDistances(const ICOORD& start, const ICOORD& end);
|
||||
|
||||
// Computes all the cross product distances of the points perpendicular to
|
||||
// the given direction, ignoring distances outside of the give distance range,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
void ComputeConstrainedDistances(const FCOORD& direction,
|
||||
double min_dist, double max_dist);
|
||||
|
||||
// Stores all the source points in the order they were given and their
|
||||
// halfwidths, if any.
|
||||
GenericVector<PointWidth> pts_;
|
||||
// Stores the computed perpendicular distances of (some of) the pts_ from a
|
||||
// given vector (assuming it goes through the origin, making it a line).
|
||||
// Since the distances may be a subset of the input points, and get
|
||||
// re-ordered by the nth_item function, the original point is stored
|
||||
// along side the distance.
|
||||
GenericVector<DistPointPair> distances_; // Distances of points.
|
||||
// The squared length of the vector used to compute distances_.
|
||||
double square_length_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: detlinefit.h
|
||||
// Description: Deterministic least upper-quartile squares line fitting.
|
||||
// Author: Ray Smith
|
||||
// Created: Thu Feb 28 14:35:01 PDT 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "kdpair.h"
|
||||
#include "points.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This class fits a line to a set of ICOORD points.
|
||||
// There is no restriction on the direction of the line, as it
|
||||
// uses a vector method, ie no concern over infinite gradients.
|
||||
// The fitted line has the least upper quartile of squares of perpendicular
|
||||
// distances of all source points from the line, subject to the constraint
|
||||
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
|
||||
// i.e. the 9 combinations of one of the first 3 and last 3 points.
|
||||
// A fundamental assumption of this algorithm is that one of the first 3 and
|
||||
// one of the last 3 points are near the best line fit.
|
||||
// The points must be Added in line order for the algorithm to work properly.
|
||||
// No floating point calculations are needed* to make an accurate fit,
|
||||
// and no random numbers are needed** so the algorithm is deterministic,
|
||||
// architecture-stable, and compiler-stable as well as stable to minor
|
||||
// changes in the input.
|
||||
// *A single floating point division is used to compute each line's distance.
|
||||
// This is unlikely to result in choice of a different line, but if it does,
|
||||
// it would be easy to replace with a 64 bit integer calculation.
|
||||
// **Random numbers are used in the nth_item function, but the worst
|
||||
// non-determinism that can result is picking a different result among equals,
|
||||
// and that wouldn't make any difference to the end-result distance, so the
|
||||
// randomness does not affect the determinism of the algorithm. The random
|
||||
// numbers are only there to guarantee average linear time.
|
||||
// Fitting time is linear, but with a high constant, as it tries 9 different
|
||||
// lines and computes the distance of all points each time.
|
||||
// This class is aimed at replacing the LLSQ (linear least squares) and
|
||||
// LMS (least median of squares) classes that are currently used for most
|
||||
// of the line fitting in Tesseract.
|
||||
class DetLineFit {
|
||||
public:
|
||||
DetLineFit();
|
||||
~DetLineFit();
|
||||
|
||||
// Delete all Added points.
|
||||
void Clear();
|
||||
|
||||
// Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
|
||||
// Add must be called on points in sequence along the line.
|
||||
void Add(const ICOORD& pt);
|
||||
// Associates a half-width with the given point if a point overlaps the
|
||||
// previous point by more than half the width, and its distance is further
|
||||
// than the previous point, then the more distant point is ignored in the
|
||||
// distance calculation. Useful for ignoring i dots and other diacritics.
|
||||
void Add(const ICOORD& pt, int halfwidth);
|
||||
|
||||
// Fits a line to the points, returning the fitted line as a pair of
|
||||
// points, and the upper quartile error.
|
||||
double Fit(ICOORD* pt1, ICOORD* pt2) {
|
||||
return Fit(0, 0, pt1, pt2);
|
||||
}
|
||||
// Fits a line to the points, ignoring the skip_first initial points and the
|
||||
// skip_last final points, returning the fitted line as a pair of points,
|
||||
// and the upper quartile error.
|
||||
double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
|
||||
|
||||
// Constrained fit with a supplied direction vector. Finds the best line_pt,
|
||||
// that is one of the supplied points having the median cross product with
|
||||
// direction, ignoring points that have a cross product outside of the range
|
||||
// [min_dist, max_dist]. Returns the resulting error metric using the same
|
||||
// reduced set of points.
|
||||
// *Makes use of floating point arithmetic*
|
||||
double ConstrainedFit(const FCOORD& direction,
|
||||
double min_dist, double max_dist,
|
||||
bool debug, ICOORD* line_pt);
|
||||
|
||||
// Returns true if there were enough points at the last call to Fit or
|
||||
// ConstrainedFit for the fitted points to be used on a badly fitted line.
|
||||
bool SufficientPointsForIndependentFit() const;
|
||||
|
||||
// Backwards compatible fit returning a gradient and constant.
|
||||
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
|
||||
// function in preference to the LMS class.
|
||||
double Fit(float* m, float* c);
|
||||
|
||||
// Backwards compatible constrained fit with a supplied gradient.
|
||||
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
|
||||
// to avoid potential difficulties with infinite gradients.
|
||||
double ConstrainedFit(double m, float* c);
|
||||
|
||||
private:
|
||||
// Simple struct to hold an ICOORD point and a halfwidth representing half
|
||||
// the "width" (supposedly approximately parallel to the direction of the
|
||||
// line) of each point, such that distant points can be discarded when they
|
||||
// overlap nearer points. (Think i dot and other diacritics or noise.)
|
||||
struct PointWidth {
|
||||
PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
|
||||
PointWidth(const ICOORD& pt0, int halfwidth0)
|
||||
: pt(pt0), halfwidth(halfwidth0) {}
|
||||
|
||||
ICOORD pt;
|
||||
int halfwidth;
|
||||
};
|
||||
// Type holds the distance of each point from the fitted line and the point
|
||||
// itself. Use of double allows integer distances from ICOORDs to be stored
|
||||
// exactly, and also the floating point results from ConstrainedFit.
|
||||
typedef KDPairInc<double, ICOORD> DistPointPair;
|
||||
|
||||
// Computes and returns the squared evaluation metric for a line fit.
|
||||
double EvaluateLineFit();
|
||||
|
||||
// Computes the absolute values of the precomputed distances_,
|
||||
// and returns the squared upper-quartile error distance.
|
||||
double ComputeUpperQuartileError();
|
||||
|
||||
// Returns the number of sample points that have an error more than threshold.
|
||||
int NumberOfMisfittedPoints(double threshold) const;
|
||||
|
||||
// Computes all the cross product distances of the points from the line,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
// Ignores distances of points that are further away than the previous point,
|
||||
// and overlaps the previous point by at least half.
|
||||
void ComputeDistances(const ICOORD& start, const ICOORD& end);
|
||||
|
||||
// Computes all the cross product distances of the points perpendicular to
|
||||
// the given direction, ignoring distances outside of the give distance range,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
void ComputeConstrainedDistances(const FCOORD& direction,
|
||||
double min_dist, double max_dist);
|
||||
|
||||
// Stores all the source points in the order they were given and their
|
||||
// halfwidths, if any.
|
||||
GenericVector<PointWidth> pts_;
|
||||
// Stores the computed perpendicular distances of (some of) the pts_ from a
|
||||
// given vector (assuming it goes through the origin, making it a line).
|
||||
// Since the distances may be a subset of the input points, and get
|
||||
// re-ordered by the nth_item function, the original point is stored
|
||||
// along side the distance.
|
||||
GenericVector<DistPointPair> distances_; // Distances of points.
|
||||
// The squared length of the vector used to compute distances_.
|
||||
double square_length_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user