mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
Merge branch 'master' into ocricola-cleanup
This commit is contained in:
commit
3ee8db4e6b
9
.gitignore
vendored
9
.gitignore
vendored
@ -52,6 +52,8 @@ training/wordlist2dawg
|
||||
*.patch
|
||||
|
||||
# ignore compilation files
|
||||
build/*
|
||||
/bin
|
||||
*/.deps/*
|
||||
*/.libs/*
|
||||
*.lo
|
||||
@ -63,8 +65,6 @@ training/wordlist2dawg
|
||||
*.jar
|
||||
|
||||
# tessdata
|
||||
*.cube.*
|
||||
*.tesseract_cube.*
|
||||
*.traineddata
|
||||
|
||||
# OpenCL
|
||||
@ -73,5 +73,10 @@ kernel*.bin
|
||||
|
||||
# build dirs
|
||||
/build*
|
||||
/.cppan
|
||||
/cppan
|
||||
/*.dll
|
||||
/*.lib
|
||||
/*.exe
|
||||
/*.lnk
|
||||
/win*
|
22
.travis.yml
22
.travis.yml
@ -7,11 +7,11 @@ sudo: required
|
||||
|
||||
os:
|
||||
- linux
|
||||
#- osx
|
||||
- osx
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
#branches:
|
||||
#only:
|
||||
#- master
|
||||
|
||||
addons:
|
||||
apt:
|
||||
@ -25,16 +25,16 @@ before_install:
|
||||
- if [[ $TRAVIS_OS_NAME == linux ]]; then LINUX=true; fi
|
||||
- if [[ $TRAVIS_OS_NAME == osx ]]; then OSX=true; fi
|
||||
|
||||
- if [[ $OSX ]]; then brew update; fi
|
||||
#- if [[ $OSX ]]; then brew update; fi
|
||||
|
||||
- export LEPT_VER=1.73
|
||||
- export LEPT_VER=1.74.1
|
||||
|
||||
install:
|
||||
- if [[ $OSX ]]; then brew install icu4c pango; brew link --force gettext; fi
|
||||
- if [[ $OSX ]]; then export ICU_ROOT=/usr/local/opt/icu4c ; fi
|
||||
- wget https://www.cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh
|
||||
- sudo sh cmake-3.6.1-Linux-x86_64.sh --skip-license --prefix=/usr
|
||||
- wget -O leptonica.zip https://github.com/DanBloomberg/leptonica/archive/v$LEPT_VER.zip
|
||||
#- if [[ $OSX ]]; then brew install icu4c pango; brew link --force gettext; fi
|
||||
#- if [[ $OSX ]]; then export ICU_ROOT=/usr/local/opt/icu4c ; fi
|
||||
- if [[ $LINUX ]]; then wget https://www.cmake.org/files/v3.7/cmake-3.7.2-Linux-x86_64.sh; fi
|
||||
- if [[ $LINUX ]]; then sudo sh cmake-3.7.2-Linux-x86_64.sh --skip-license --prefix=/usr; fi
|
||||
- wget -O leptonica.zip https://github.com/DanBloomberg/leptonica/archive/$LEPT_VER.zip
|
||||
- unzip leptonica.zip -d .
|
||||
- cmake -Hleptonica-$LEPT_VER -Bleptonica-$LEPT_VER/build
|
||||
- make -C leptonica-$LEPT_VER/build
|
||||
|
14
AUTHORS
14
AUTHORS
@ -2,12 +2,14 @@ Ray Smith (lead developer) <theraysmith@gmail.com>
|
||||
Ahmad Abdulkader
|
||||
Rika Antonova
|
||||
Nicholas Beato
|
||||
Jeff Breidenbach
|
||||
Samuel Charron
|
||||
Phil Cheatle
|
||||
Simon Crouch
|
||||
David Eger
|
||||
Sheelagh Huddleston
|
||||
Dan Johnson
|
||||
Rajesh Katikam
|
||||
Thomas Kielbus
|
||||
Dar-Shyang Lee
|
||||
Zongyi (Joe) Liu
|
||||
@ -26,3 +28,15 @@ Joern Wanke
|
||||
Ping Ping Xiu
|
||||
Andrew Ziem
|
||||
Oscar Zuniga
|
||||
|
||||
Community Contributors:
|
||||
Zdenko Podobný (Maintainer)
|
||||
Jim Regan (Maintainer)
|
||||
James R Barlow
|
||||
Amit Dovev
|
||||
Martin Ettl
|
||||
Tom Morris
|
||||
Tobias Müller
|
||||
Egor Pugin
|
||||
Sundar M. Vaidya
|
||||
Stefan Weil
|
||||
|
180
CMakeLists.txt
180
CMakeLists.txt
@ -10,6 +10,12 @@
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.11)
|
||||
|
||||
if (NOT APPVEYOR)
|
||||
if (WIN32 AND (CMAKE_VERSION VERSION_EQUAL 3.6 OR (CMAKE_VERSION VERSION_GREATER 3.6 AND CMAKE_VERSION VERSION_LESS 3.7)))
|
||||
message(FATAL_ERROR "You have bugged CMake version 3.6 which is known to not work with tesseract. Please, upgrade CMake.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# In-source builds are disabled.
|
||||
if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
||||
message(FATAL_ERROR
|
||||
@ -40,25 +46,31 @@ set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "CMake Targets")
|
||||
|
||||
project(tesseract C CXX)
|
||||
|
||||
set(VERSION_MAJOR 3)
|
||||
set(VERSION_MINOR 05)
|
||||
set(VERSION_MAJOR 4)
|
||||
set(VERSION_MINOR 00)
|
||||
set(VERSION_PLAIN ${VERSION_MAJOR}.${VERSION_MINOR})
|
||||
|
||||
set(MINIMUM_LEPTONICA_VERSION 1.71)
|
||||
set(MINIMUM_LEPTONICA_VERSION 1.74)
|
||||
|
||||
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/cppan)
|
||||
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.cppan)
|
||||
if (NOT Leptonica_DIR AND NOT MSVC)
|
||||
find_package(PkgConfig REQUIRED)
|
||||
pkg_check_modules(Leptonica REQUIRED lept)
|
||||
pkg_check_modules(Leptonica REQUIRED lept>=${MINIMUM_LEPTONICA_VERSION})
|
||||
else()
|
||||
find_package(Leptonica ${MINIMUM_LEPTONICA_VERSION} REQUIRED CONFIG)
|
||||
endif()
|
||||
else()
|
||||
add_subdirectory(cppan)
|
||||
if (STATIC)
|
||||
set(CPPAN_BUILD_SHARED_LIBS 0)
|
||||
else()
|
||||
set(CPPAN_BUILD_SHARED_LIBS 1)
|
||||
endif()
|
||||
add_subdirectory(.cppan)
|
||||
endif()
|
||||
|
||||
find_package(OpenCL QUIET)
|
||||
find_package(PkgConfig)
|
||||
|
||||
option(BUILD_TRAINING_TOOLS "Build training tools" ON)
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
@ -76,6 +88,9 @@ if (WIN32)
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
|
||||
if (APPVEYOR)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W0")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(LIB_Ws2_32 Ws2_32)
|
||||
@ -104,12 +119,7 @@ include(Configure)
|
||||
|
||||
configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
|
||||
|
||||
set(INCLUDE_DIR
|
||||
${CMAKE_SOURCE_DIR}/api
|
||||
${CMAKE_SOURCE_DIR}/ccmain
|
||||
${CMAKE_SOURCE_DIR}/ccstruct
|
||||
${CMAKE_SOURCE_DIR}/ccutil
|
||||
)
|
||||
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")
|
||||
|
||||
configure_file(
|
||||
${CMAKE_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in
|
||||
@ -137,14 +147,14 @@ include_directories(${Leptonica_INCLUDE_DIRS})
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
include_directories(api)
|
||||
include_directories(arch)
|
||||
include_directories(ccmain)
|
||||
include_directories(ccstruct)
|
||||
include_directories(ccutil)
|
||||
include_directories(classify)
|
||||
include_directories(cube)
|
||||
include_directories(cutil)
|
||||
include_directories(dict)
|
||||
include_directories(neural_networks/runtime)
|
||||
include_directories(lstm)
|
||||
include_directories(opencl)
|
||||
include_directories(textord)
|
||||
include_directories(vs2010/port)
|
||||
@ -159,14 +169,14 @@ string(SUBSTRING ${VERSION_MINOR} 0 1 VERSION_MINOR_0)
|
||||
string(SUBSTRING ${VERSION_MINOR} 1 1 VERSION_MINOR_1)
|
||||
|
||||
file(GLOB tesseract_src
|
||||
arch/*.cpp
|
||||
ccmain/*.cpp
|
||||
ccstruct/*.cpp
|
||||
ccutil/*.cpp
|
||||
classify/*.cpp
|
||||
cube/*.cpp
|
||||
cutil/*.cpp
|
||||
dict/*.cpp
|
||||
neural_networks/runtime/*.cpp
|
||||
lstm/*.cpp
|
||||
opencl/*.cpp
|
||||
textord/*.cpp
|
||||
viewer/*.cpp
|
||||
@ -174,14 +184,14 @@ file(GLOB tesseract_src
|
||||
)
|
||||
file(GLOB tesseract_hdr
|
||||
api/*.h
|
||||
arch/*.h
|
||||
ccmain/*.h
|
||||
ccstruct/*.h
|
||||
ccutil/*.h
|
||||
classify/*.h
|
||||
cube/*.h
|
||||
cutil/*.h
|
||||
dict/*.h
|
||||
neural_networks/runtime/*.h
|
||||
lstm/*.h
|
||||
opencl/*.h
|
||||
textord/*.h
|
||||
viewer/*.h
|
||||
@ -201,25 +211,40 @@ set(tesseract_src ${tesseract_src}
|
||||
api/pdfrenderer.cpp
|
||||
)
|
||||
|
||||
add_library (tesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr})
|
||||
if (NOT STATIC)
|
||||
target_compile_definitions (tesseract PUBLIC -DTESS_EXPORTS)
|
||||
endif()
|
||||
target_link_libraries (tesseract ${LIB_Ws2_32} ${LIB_pthread})
|
||||
set_target_properties (tesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
|
||||
set_target_properties (tesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
|
||||
if (WIN32)
|
||||
set_target_properties (tesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR})
|
||||
set_target_properties (tesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d)
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
|
||||
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
|
||||
if (MSVC)
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
|
||||
PROPERTIES COMPILE_FLAGS "/arch:AVX")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_library (libtesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr})
|
||||
if (NOT STATIC)
|
||||
target_compile_definitions (libtesseract
|
||||
PRIVATE -DTESS_EXPORTS
|
||||
INTERFACE -DTESS_IMPORTS
|
||||
)
|
||||
set_target_properties (libtesseract PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS True)
|
||||
endif()
|
||||
target_link_libraries (libtesseract ${LIB_Ws2_32} ${LIB_pthread})
|
||||
set_target_properties (libtesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
|
||||
set_target_properties (libtesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
|
||||
if (WIN32)
|
||||
set_target_properties (libtesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR})
|
||||
set_target_properties (libtesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d)
|
||||
endif()
|
||||
|
||||
if (NOT CPPAN_BUILD)
|
||||
target_link_libraries (tesseract ${Leptonica_LIBRARIES})
|
||||
export(TARGETS tesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
|
||||
target_link_libraries (libtesseract ${Leptonica_LIBRARIES})
|
||||
export(TARGETS libtesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
|
||||
else()
|
||||
target_link_libraries (tesseract cppan)
|
||||
target_link_libraries (libtesseract pvt.cppan.demo.danbloomberg.leptonica)
|
||||
file(WRITE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_BINARY_DIR}/cppan.cmake)\n")
|
||||
export(TARGETS tesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
|
||||
export(TARGETS libtesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
|
||||
endif()
|
||||
|
||||
########################################
|
||||
@ -231,12 +256,97 @@ set(tesseractmain_src
|
||||
vs2010/tesseract/resource.h
|
||||
vs2010/tesseract/tesseract.rc
|
||||
)
|
||||
add_executable (tesseractmain ${tesseractmain_src})
|
||||
target_link_libraries (tesseractmain tesseract)
|
||||
set_target_properties (tesseractmain PROPERTIES OUTPUT_NAME tesseract)
|
||||
add_executable (tesseract ${tesseractmain_src})
|
||||
target_link_libraries (tesseract libtesseract)
|
||||
|
||||
########################################
|
||||
|
||||
if (BUILD_TRAINING_TOOLS)
|
||||
add_subdirectory(training)
|
||||
endif()
|
||||
|
||||
get_target_property(tesseract_NAME libtesseract NAME)
|
||||
get_target_property(tesseract_VERSION libtesseract VERSION)
|
||||
get_target_property(tesseract_OUTPUT_NAME libtesseract OUTPUT_NAME)
|
||||
configure_file(tesseract.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc @ONLY)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc DESTINATION lib/pkgconfig)
|
||||
install(TARGETS tesseract RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
|
||||
install(TARGETS libtesseract EXPORT TesseractTargets RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
|
||||
install(EXPORT TesseractTargets DESTINATION cmake)
|
||||
install(FILES
|
||||
${CMAKE_BINARY_DIR}/TesseractConfig.cmake
|
||||
${CMAKE_BINARY_DIR}/TesseractConfig-version.cmake
|
||||
DESTINATION cmake)
|
||||
|
||||
install(FILES
|
||||
# from api/makefile.am
|
||||
api/apitypes.h
|
||||
api/baseapi.h
|
||||
api/capi.h
|
||||
api/renderer.h
|
||||
|
||||
#from arch/makefile.am
|
||||
arch/dotproductavx.h
|
||||
arch/dotproductsse.h
|
||||
arch/simddetect.h
|
||||
|
||||
#from ccmain/makefile.am
|
||||
ccmain/thresholder.h
|
||||
ccmain/ltrresultiterator.h
|
||||
ccmain/pageiterator.h
|
||||
ccmain/resultiterator.h
|
||||
ccmain/osdetect.h
|
||||
|
||||
#from ccstruct/makefile.am
|
||||
ccstruct/publictypes.h
|
||||
|
||||
#from ccutil/makefile.am
|
||||
ccutil/basedir.h
|
||||
ccutil/errcode.h
|
||||
ccutil/fileerr.h
|
||||
ccutil/genericvector.h
|
||||
ccutil/helpers.h
|
||||
ccutil/host.h
|
||||
ccutil/memry.h
|
||||
ccutil/ndminx.h
|
||||
ccutil/params.h
|
||||
ccutil/ocrclass.h
|
||||
ccutil/platform.h
|
||||
ccutil/serialis.h
|
||||
ccutil/strngs.h
|
||||
ccutil/tesscallback.h
|
||||
ccutil/unichar.h
|
||||
ccutil/unicharcompress.h
|
||||
ccutil/unicharmap.h
|
||||
ccutil/unicharset.h
|
||||
|
||||
#from lstm/makefile.am
|
||||
lstm/convolve.h
|
||||
lstm/ctc.h
|
||||
lstm/fullyconnected.h
|
||||
lstm/functions.h
|
||||
lstm/input.h
|
||||
lstm/lstm.h
|
||||
lstm/lstmrecognizer.h
|
||||
lstm/lstmtrainer.h
|
||||
lstm/maxpool.h
|
||||
lstm/networkbuilder.h
|
||||
lstm/network.h
|
||||
lstm/networkio.h
|
||||
lstm/networkscratch.h
|
||||
lstm/parallel.h
|
||||
lstm/plumbing.h
|
||||
lstm/recodebeam.h
|
||||
lstm/reconfig.h
|
||||
lstm/reversed.h
|
||||
lstm/series.h
|
||||
lstm/static_shape.h
|
||||
lstm/stridemap.h
|
||||
lstm/tfnetwork.h
|
||||
lstm/weightmatrix.h
|
||||
|
||||
#${CMAKE_BINARY_DIR}/src/endianness.h
|
||||
DESTINATION include/tesseract)
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -24,7 +24,7 @@ When creating an issue, please report your operating system, including its speci
|
||||
|
||||
Search through open and closed issues to see if similar issue has been reported already (and sometimes also has been solved).
|
||||
|
||||
Similary, before you post your question in the forum, search through past threads to see if similar question has been asked already.
|
||||
Similarly, before you post your question in the forum, search through past threads to see if similar question has been asked already.
|
||||
|
||||
Read the [wiki](https://github.com/tesseract-ocr/tesseract/wiki) before you report your issue or ask a question in the forum.
|
||||
|
||||
@ -44,7 +44,7 @@ When attaching a file to the issue report / forum ...
|
||||
|
||||
Do not attach programs or libraries to your issues/posts.
|
||||
|
||||
For large files or for programs, add a link to a iocation where they can be downloaded (your site, Git repo, Google Drive, Dropbox etc.)
|
||||
For large files or for programs, add a link to a location where they can be downloaded (your site, Git repo, Google Drive, Dropbox etc.)
|
||||
|
||||
Attaching a multi-page TIFF image is useful only if you have problem with multi-page functionality, otherwise attach only one or a few single page images.
|
||||
|
||||
|
4
COPYING
4
COPYING
@ -17,5 +17,5 @@ in this distribution is now licensed under the Apache License:
|
||||
Other Dependencies and Licenses:
|
||||
================================
|
||||
|
||||
Tesseract uses Leptonica library (http://leptonica.com/) with a very weakly
|
||||
restricted copyright license (http://leptonica.com/about-the-license.html)
|
||||
Tesseract uses Leptonica library (http://leptonica.com/) which essentially
|
||||
uses a BSD 2-clause license. (http://leptonica.com/about-the-license.html)
|
||||
|
43
ChangeLog
43
ChangeLog
@ -1,3 +1,46 @@
|
||||
2017-03-24 - V4.00.00-alpha
|
||||
* Added new neural network system based on LSTMs, with major accuracy gains.
|
||||
* Improvements to PDF rendering.
|
||||
* Fixes to trainingdata rendering.
|
||||
* Added LSTM models+lang models to 101 languages. (tessdata repository)
|
||||
* Improved multi-page TIFF handling.
|
||||
* Fixed damage to binary images when processing PDFs.
|
||||
* Fixes to training process to allow incremental training from a recognition model.
|
||||
* Made LSTM the default engine, pushed cube out.
|
||||
* Deleted cube code.
|
||||
* Changed OEModes --oem 0 for legacy tesseract engine, --oem 1 for LSTM, --oem 2 for both, --oem 3 for default.
|
||||
* Avoid use of Leptonica debug parameters or functions.
|
||||
* Fixed multi-language mode.
|
||||
* Removed support for VS2010.
|
||||
* Added Support for VS2015 and VS2017 with CPPAN.
|
||||
* Implemented invisible text only for PDF.
|
||||
* Added AVX / SSE support for Windows
|
||||
* Enabled OpenMP support.
|
||||
* Miscellaneous Fixes.
|
||||
|
||||
2017-02-16 - V3.05.00
|
||||
* Made some fine tuning to the hOCR output.
|
||||
* Added TSV as another optional output format.
|
||||
* Fixed ABI break introduced in 3.04.00 with the AnalyseLayout() method.
|
||||
* text2image tool - Enable all OpenType ligatures available in a font. This feature requires Pango 1.38 or newer.
|
||||
* Training tools - Replaced asserts with tprintf() and exit(1).
|
||||
* Fixed Cygwin compatibility.
|
||||
* Improved multipage tiff processing.
|
||||
* Improved the embedded pdf font (pdf.ttf).
|
||||
* Enable selection of OCR engine mode from command line.
|
||||
* Changed tesseract command line parameter '-psm' to '--psm'.
|
||||
* Write output of tesseract --help, --version and --list-langs to stdout instead of stderr.
|
||||
* Added new C API for orientation and script detection, removed the old one.
|
||||
* Increased minimum autoconf version to 2.59.
|
||||
* Removed dead code.
|
||||
* Require Leptonica 1.74 or higher.
|
||||
* Fixed many compiler warning.
|
||||
* Fixed memory and resource leaks.
|
||||
* Fixed some issues with the 'Cube' OCR engine.
|
||||
* Fixed some openCL issues.
|
||||
* Added option to build Tesseract with CMake build system.
|
||||
* Implemented CPPAN support for easy Windows building.
|
||||
|
||||
2016-02-17 - V3.04.01
|
||||
* Added OSD renderer for psm 0. Works for single page and multi-page images.
|
||||
* Improve tesstrain.sh script.
|
||||
|
2
INSTALL
2
INSTALL
@ -45,7 +45,7 @@ The simplest way to compile this package is:
|
||||
`sh ./configure' instead to prevent `csh' from trying to execute
|
||||
`configure' itself.
|
||||
|
||||
Running `configure' takes awhile. While running, it prints some
|
||||
Running `configure' takes a while. While running, it prints some
|
||||
messages telling which features it is checking for.
|
||||
|
||||
2. Type `make' to compile the package.
|
||||
|
@ -3,11 +3,12 @@
|
||||
If you have cloned Tesseract from GitHub, you must generate
|
||||
the configure script.
|
||||
|
||||
If you have tesseract 3.0x installation in your system, please remove it
|
||||
If you have tesseract 4.0x installation in your system, please remove it
|
||||
before new build.
|
||||
|
||||
Known dependencies for training tools (excluding leptonica):
|
||||
* compiler with c++ support
|
||||
* compiler with c++11 support
|
||||
* autoconf-archive
|
||||
* pango-devel
|
||||
* cairo-devel
|
||||
* icu-devel
|
||||
@ -24,7 +25,7 @@ So, the steps for making Tesseract are:
|
||||
You need to install at least English language and OSD data files to TESSDATA_PREFIX
|
||||
directory. You can retrieve single file with tools like [wget](https://www.gnu.org/software/wget/), [curl](https://curl.haxx.se/), [GithubDownloader](https://github.com/intezer/GithubDownloader) or browser.
|
||||
|
||||
All language data files can be retrieved from git repository (usefull only for packagers!):
|
||||
All language data files can be retrieved from git repository (useful only for packagers!):
|
||||
|
||||
$ git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata
|
||||
|
||||
|
202
LICENSE
Normal file
202
LICENSE
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
24
Makefile.am
24
Makefile.am
@ -4,9 +4,12 @@ ACLOCAL_AMFLAGS = -I m4
|
||||
if ENABLE_TRAINING
|
||||
TRAINING_SUBDIR = training
|
||||
training:
|
||||
$(MAKE)
|
||||
@cd "$(top_builddir)/training" && $(MAKE)
|
||||
training-install:
|
||||
@cd "$(top_builddir)/training" && $(MAKE) install
|
||||
training-uninstall:
|
||||
@cd "$(top_builddir)/training" && $(MAKE) uninstall
|
||||
clean-local:
|
||||
@cd "$(top_builddir)/training" && $(MAKE) clean
|
||||
else
|
||||
@ -16,10 +19,7 @@ endif
|
||||
|
||||
.PHONY: install-langs ScrollView.jar install-jars training
|
||||
|
||||
SUBDIRS = ccutil viewer cutil opencl ccstruct dict classify wordrec textord
|
||||
if !NO_CUBE_BUILD
|
||||
SUBDIRS += neural_networks/runtime cube
|
||||
endif
|
||||
SUBDIRS = arch ccutil viewer cutil opencl ccstruct dict classify wordrec textord lstm
|
||||
SUBDIRS += ccmain api . tessdata doc
|
||||
|
||||
EXTRA_DIST = README.md\
|
||||
@ -35,14 +35,14 @@ dist-hook:
|
||||
# Need to remove .svn directories from directories
|
||||
# added using EXTRA_DIST. $(distdir)/tessdata would in
|
||||
# theory suffice.
|
||||
rm -rf `find $(distdir) -name .svn`
|
||||
rm -rf `find $(distdir) -name .git`
|
||||
rm -rf `find $(distdir) -name .deps`
|
||||
rm -rf `find $(distdir) -name .libs`
|
||||
rm -rf `find $(distdir) -name *.o`
|
||||
rm -rf `find $(distdir) -name *.lo`
|
||||
rm -rf `find $(distdir) -name *.la`
|
||||
rm -rf `find $(distdir)/training -executable -type f`
|
||||
rm -rf $(find $(distdir) -name .svn)
|
||||
rm -rf $(find $(distdir) -name .git)
|
||||
rm -rf $(find $(distdir) -name .deps)
|
||||
rm -rf $(find $(distdir) -name .libs)
|
||||
rm -rf $(find $(distdir) -name *.o)
|
||||
rm -rf $(find $(distdir) -name *.lo)
|
||||
rm -rf $(find $(distdir) -name *.la)
|
||||
rm -rf $(find $(distdir)/training -executable -type f)
|
||||
rm -rf $(distdir)/doc/html/*
|
||||
|
||||
ScrollView.jar:
|
||||
|
33
README.md
33
README.md
@ -1,29 +1,30 @@
|
||||
[![Build Status](https://travis-ci.org/tesseract-ocr/tesseract.svg?branch=master)](https://travis-ci.org/tesseract-ocr/tesseract)
|
||||
[![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
|
||||
[![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
|
||||
|
||||
For the latest online version of the README.md see:
|
||||
|
||||
https://github.com/tesseract-ocr/tesseract/blob/master/README.md
|
||||
|
||||
#About
|
||||
# About
|
||||
|
||||
This package contains an OCR engine - `libtesseract` and a command line program - `tesseract`.
|
||||
|
||||
The lead developer is Ray Smith. The maintainer is Zdenko Podobny.
|
||||
For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS) and github's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
|
||||
For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS)
|
||||
and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
|
||||
|
||||
Tesseract has unicode (UTF-8) support, and can recognize more than 100
|
||||
languages "out of the box". It can be trained to recognize other languages. See [Tesseract Training](https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract) for more information.
|
||||
|
||||
Tesseract supports various output formats: plain-text, hocr(html), pdf.
|
||||
|
||||
This project does not include a GUI application. If you need one, please see the [3rdParty](https://github.com/tesseract-ocr/tesseract/wiki/3rdParty) wiki page.
|
||||
This project does not include a GUI application. If you need one, please see the [3rdParty](https://github.com/tesseract-ocr/tesseract/wiki/User-Projects-%E2%80%93-3rdParty) wiki page.
|
||||
|
||||
You should note that in many cases, in order to get better OCR results, you'll need to [improve the quality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality) of the image you are giving Tesseract.
|
||||
|
||||
The latest stable version is 3.04.01, released in February 2016.
|
||||
The latest stable version is 3.05.00, released in February 2017.
|
||||
|
||||
#Brief history
|
||||
# Brief history
|
||||
|
||||
Tesseract was originally developed at Hewlett-Packard Laboratories Bristol and
|
||||
at Hewlett-Packard Co, Greeley Colorado between 1985 and 1994, with some
|
||||
@ -33,13 +34,13 @@ In 2005 Tesseract was open sourced by HP. Since 2006 it is developed by Google.
|
||||
|
||||
[Release Notes](https://github.com/tesseract-ocr/tesseract/wiki/ReleaseNotes)
|
||||
|
||||
#For developers
|
||||
# For developers
|
||||
|
||||
Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/master/api/capi.h) or [C++](https://github.com/tesseract-ocr/tesseract/blob/master/api/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the [wrapper](https://github.com/tesseract-ocr/tesseract/wiki/AddOns#tesseract-wrappers) section on AddOns wiki page.
|
||||
|
||||
Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](http://tesseract-ocr.github.io/).
|
||||
|
||||
#License
|
||||
# License
|
||||
|
||||
The code in this repository is licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -55,19 +56,27 @@ Documentation of Tesseract generated from source code by doxygen can be found on
|
||||
|
||||
**NOTE**: This software depends on other packages that may be licensed under different open source licenses.
|
||||
|
||||
#Installing Tesseract
|
||||
# Installing Tesseract
|
||||
|
||||
You can either [Install Tesseract via pre-built binary package](https://github.com/tesseract-ocr/tesseract/wiki) or [build it from source](https://github.com/tesseract-ocr/tesseract/wiki/Compiling).
|
||||
|
||||
#Running Tesseract
|
||||
## Supported Compilers
|
||||
|
||||
* GCC 4.8 and above
|
||||
* Clang 3.4 and above
|
||||
* MSVC 2015, 2017
|
||||
|
||||
Other compilers might work, but are not officially supported.
|
||||
|
||||
# Running Tesseract
|
||||
|
||||
Basic command line usage:
|
||||
|
||||
tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...]
|
||||
tesseract imagename outputbase [-l lang] [--psm pagesegmode] [configfiles...]
|
||||
|
||||
For more information about the various command line options use `tesseract --help` or `man tesseract`.
|
||||
|
||||
#Support
|
||||
# Support
|
||||
|
||||
Mailing-lists:
|
||||
* [tesseract-ocr](https://groups.google.com/d/forum/tesseract-ocr) - For tesseract users.
|
||||
|
@ -4,7 +4,7 @@ include $(CLEAR_VARS)
|
||||
LOCAL_MODULE := tesseract-$(APP_ABI)
|
||||
|
||||
LOCAL_STATIC_LIBRARIES := \
|
||||
mobile_base \
|
||||
base \
|
||||
leptonica-$(APP_ABI)
|
||||
|
||||
LOCAL_C_INCLUDES := $(APP_C_INCLUDES)
|
||||
@ -30,13 +30,6 @@ $(info local path=$(LOCAL_PATH))
|
||||
LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/../../api/*.cpp $(LOCAL_PATH)/../../ccmain/*.cpp $(LOCAL_PATH)/../../ccstruct/*.cpp $(LOCAL_PATH)/../../ccutil/*.cpp $(LOCAL_PATH)/../../classify/*.cpp $(LOCAL_PATH)/../../cutil/*.cpp $(LOCAL_PATH)/../../dict/*.cpp $(LOCAL_PATH)/../../image/*.cpp $(LOCAL_PATH)/../../textord/*.cpp $(LOCAL_PATH)/../../viewer/*.cpp $(LOCAL_PATH)/../../wordrec/*.cpp)
|
||||
|
||||
EXPLICIT_SRC_EXCLUDES := \
|
||||
$(LOCAL_PATH)/../../ccmain/cubeclassifier.cpp \
|
||||
$(LOCAL_PATH)/../../ccmain/cubeclassifier.h \
|
||||
$(LOCAL_PATH)/../../ccmain/cube_control.cpp \
|
||||
$(LOCAL_PATH)/../../ccmain/cube_reco_context.cpp \
|
||||
$(LOCAL_PATH)/../../ccmain/cube_reco_context.h \
|
||||
$(LOCAL_PATH)/../../ccmain/tesseract_cube_combiner.cpp \
|
||||
$(LOCAL_PATH)/../../ccmain/tesseract_cube_combiner.h \
|
||||
$(LOCAL_PATH)/../../api/pdfrenderer.cpp \
|
||||
$(LOCAL_PATH)/../../api/tesseractmain.cpp \
|
||||
|
||||
@ -47,11 +40,10 @@ LOCAL_SRC_FILES := $(LOCAL_SRC_FILES:$(LOCAL_PATH)/%=%)
|
||||
$(info local src files = $(LOCAL_SRC_FILES))
|
||||
|
||||
LOCAL_LDLIBS := -ldl -llog -ljnigraphics
|
||||
LOCAL_CFLAGS := -DANDROID_BUILD -DNO_CUBE_BUILD -DGRAPHICS_DISABLED
|
||||
LOCAL_CFLAGS := -DANDROID_BUILD -DGRAPHICS_DISABLED
|
||||
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
|
||||
$(call import-module,mobile/base)
|
||||
$(call import-module,mobile/base)
|
||||
$(call import-module,base/port)
|
||||
$(call import-module,mobile/util/hash)
|
||||
$(call import-module,third_party/leptonica/android/jni)
|
||||
|
@ -1,6 +1,7 @@
|
||||
AM_CPPFLAGS += -DLOCALEDIR=\"$(localedir)\"\
|
||||
-DUSE_STD_NAMESPACE \
|
||||
-I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct -I$(top_srcdir)/cube \
|
||||
-I$(top_srcdir)/arch -I$(top_srcdir)/lstm \
|
||||
-I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \
|
||||
-I$(top_srcdir)/viewer \
|
||||
-I$(top_srcdir)/textord -I$(top_srcdir)/dict \
|
||||
-I$(top_srcdir)/classify -I$(top_srcdir)/ccmain \
|
||||
@ -27,15 +28,15 @@ libtesseract_api_la_LIBADD = \
|
||||
../wordrec/libtesseract_wordrec.la \
|
||||
../classify/libtesseract_classify.la \
|
||||
../dict/libtesseract_dict.la \
|
||||
../arch/libtesseract_arch.la \
|
||||
../arch/libtesseract_avx.la \
|
||||
../arch/libtesseract_sse.la \
|
||||
../lstm/libtesseract_lstm.la \
|
||||
../ccstruct/libtesseract_ccstruct.la \
|
||||
../cutil/libtesseract_cutil.la \
|
||||
../viewer/libtesseract_viewer.la \
|
||||
../ccutil/libtesseract_ccutil.la \
|
||||
../opencl/libtesseract_opencl.la
|
||||
if !NO_CUBE_BUILD
|
||||
libtesseract_api_la_LIBADD += ../cube/libtesseract_cube.la \
|
||||
../neural_networks/runtime/libtesseract_neural.la \
|
||||
endif
|
||||
endif
|
||||
|
||||
libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS)
|
||||
@ -45,7 +46,7 @@ endif
|
||||
libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp
|
||||
|
||||
lib_LTLIBRARIES += libtesseract.la
|
||||
libtesseract_la_LDFLAGS =
|
||||
libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS)
|
||||
libtesseract_la_SOURCES =
|
||||
# Dummy C++ source to cause C++ linking.
|
||||
# see http://www.gnu.org/s/hello/manual/automake/Libtool-Convenience-Libraries.html#Libtool-Convenience-Libraries
|
||||
@ -57,15 +58,15 @@ libtesseract_la_LIBADD = \
|
||||
../wordrec/libtesseract_wordrec.la \
|
||||
../classify/libtesseract_classify.la \
|
||||
../dict/libtesseract_dict.la \
|
||||
../arch/libtesseract_arch.la \
|
||||
../arch/libtesseract_avx.la \
|
||||
../arch/libtesseract_sse.la \
|
||||
../lstm/libtesseract_lstm.la \
|
||||
../ccstruct/libtesseract_ccstruct.la \
|
||||
../cutil/libtesseract_cutil.la \
|
||||
../viewer/libtesseract_viewer.la \
|
||||
../ccutil/libtesseract_ccutil.la \
|
||||
../opencl/libtesseract_opencl.la
|
||||
if !NO_CUBE_BUILD
|
||||
libtesseract_la_LIBADD += ../cube/libtesseract_cube.la \
|
||||
../neural_networks/runtime/libtesseract_neural.la
|
||||
endif
|
||||
|
||||
libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION) -no-undefined
|
||||
|
||||
@ -81,9 +82,10 @@ tesseract_LDADD = libtesseract.la
|
||||
|
||||
tesseract_LDFLAGS = $(OPENCL_LDFLAGS)
|
||||
|
||||
if OPENMP
|
||||
tesseract_LDADD += $(OPENMP_CFLAGS)
|
||||
endif
|
||||
tesseract_LDADD += $(LEPTONICA_LIBS)
|
||||
tesseract_LDADD += $(OPENMP_CXXFLAGS)
|
||||
|
||||
tesseract_LDADD += -ltiff
|
||||
|
||||
if T_WIN
|
||||
tesseract_LDADD += -lws2_32
|
||||
@ -92,4 +94,3 @@ endif
|
||||
if ADD_RT
|
||||
tesseract_LDADD += -lrt
|
||||
endif
|
||||
|
||||
|
@ -17,8 +17,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_APITYPES_H__
|
||||
#define TESSERACT_API_APITYPES_H__
|
||||
#ifndef TESSERACT_API_APITYPES_H_
|
||||
#define TESSERACT_API_APITYPES_H_
|
||||
|
||||
#include "publictypes.h"
|
||||
|
||||
@ -30,4 +30,4 @@
|
||||
// than the lower-level one, and lower-level code should be sure to include
|
||||
// only the lower-level file.
|
||||
|
||||
#endif // TESSERACT_API_APITYPES_H__
|
||||
#endif // TESSERACT_API_APITYPES_H_
|
||||
|
377
api/baseapi.cpp
377
api/baseapi.cpp
@ -34,8 +34,6 @@
|
||||
// workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
|
||||
#undef __STRICT_ANSI__
|
||||
#endif // _MSC_VER
|
||||
#include <stdlib.h>
|
||||
#include <windows.h>
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#else
|
||||
@ -110,27 +108,30 @@ const int kMinCredibleResolution = 70;
|
||||
const int kMaxCredibleResolution = 2400;
|
||||
|
||||
TessBaseAPI::TessBaseAPI()
|
||||
: tesseract_(NULL),
|
||||
osd_tesseract_(NULL),
|
||||
equ_detect_(NULL),
|
||||
: tesseract_(nullptr),
|
||||
osd_tesseract_(nullptr),
|
||||
equ_detect_(nullptr),
|
||||
reader_(nullptr),
|
||||
// Thresholder is initialized to NULL here, but will be set before use by:
|
||||
// A constructor of a derived API, SetThresholder(), or
|
||||
// created implicitly when used in InternalSetImage.
|
||||
thresholder_(NULL),
|
||||
paragraph_models_(NULL),
|
||||
block_list_(NULL),
|
||||
page_res_(NULL),
|
||||
input_file_(NULL),
|
||||
input_image_(NULL),
|
||||
output_file_(NULL),
|
||||
datapath_(NULL),
|
||||
language_(NULL),
|
||||
thresholder_(nullptr),
|
||||
paragraph_models_(nullptr),
|
||||
block_list_(nullptr),
|
||||
page_res_(nullptr),
|
||||
input_file_(nullptr),
|
||||
output_file_(nullptr),
|
||||
datapath_(nullptr),
|
||||
language_(nullptr),
|
||||
last_oem_requested_(OEM_DEFAULT),
|
||||
recognition_done_(false),
|
||||
truth_cb_(NULL),
|
||||
rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0),
|
||||
image_width_(0), image_height_(0) {
|
||||
}
|
||||
rect_left_(0),
|
||||
rect_top_(0),
|
||||
rect_width_(0),
|
||||
rect_height_(0),
|
||||
image_width_(0),
|
||||
image_height_(0) {}
|
||||
|
||||
TessBaseAPI::~TessBaseAPI() {
|
||||
End();
|
||||
@ -278,20 +279,33 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_non_debug_params) {
|
||||
return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
|
||||
vars_values, set_only_non_debug_params, nullptr);
|
||||
}
|
||||
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array. Also implements the version with a datapath in data,
|
||||
// flagged by data_size = 0.
|
||||
int TessBaseAPI::Init(const char* data, int data_size, const char* language,
|
||||
OcrEngineMode oem, char** configs, int configs_size,
|
||||
const GenericVector<STRING>* vars_vec,
|
||||
const GenericVector<STRING>* vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader) {
|
||||
PERF_COUNT_START("TessBaseAPI::Init")
|
||||
// Default language is "eng".
|
||||
if (language == NULL) language = "eng";
|
||||
if (language == nullptr) language = "eng";
|
||||
STRING datapath = data_size == 0 ? data : language;
|
||||
// If the datapath, OcrEngineMode or the language have changed - start again.
|
||||
// Note that the language_ field stores the last requested language that was
|
||||
// initialized successfully, while tesseract_->lang stores the language
|
||||
// actually used. They differ only if the requested language was NULL, in
|
||||
// which case tesseract_->lang is set to the Tesseract default ("eng").
|
||||
if (tesseract_ != NULL &&
|
||||
(datapath_ == NULL || language_ == NULL ||
|
||||
*datapath_ != datapath || last_oem_requested_ != oem ||
|
||||
if (tesseract_ != nullptr &&
|
||||
(datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
|
||||
last_oem_requested_ != oem ||
|
||||
(*language_ != language && tesseract_->lang != language))) {
|
||||
delete tesseract_;
|
||||
tesseract_ = NULL;
|
||||
tesseract_ = nullptr;
|
||||
}
|
||||
// PERF_COUNT_SUB("delete tesseract_")
|
||||
#ifdef USE_OPENCL
|
||||
@ -300,19 +314,25 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
|
||||
#endif
|
||||
PERF_COUNT_SUB("OD::InitEnv()")
|
||||
bool reset_classifier = true;
|
||||
if (tesseract_ == NULL) {
|
||||
if (tesseract_ == nullptr) {
|
||||
reset_classifier = false;
|
||||
tesseract_ = new Tesseract;
|
||||
if (reader != nullptr) reader_ = reader;
|
||||
TessdataManager mgr(reader_);
|
||||
if (data_size != 0) {
|
||||
mgr.LoadMemBuffer(language, data, data_size);
|
||||
}
|
||||
if (tesseract_->init_tesseract(
|
||||
datapath, output_file_ != NULL ? output_file_->string() : NULL,
|
||||
datapath.string(),
|
||||
output_file_ != nullptr ? output_file_->string() : nullptr,
|
||||
language, oem, configs, configs_size, vars_vec, vars_values,
|
||||
set_only_non_debug_params) != 0) {
|
||||
set_only_non_debug_params, &mgr) != 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
PERF_COUNT_SUB("update tesseract_")
|
||||
// Update datapath and language requested for the last valid initialization.
|
||||
if (datapath_ == NULL)
|
||||
if (datapath_ == nullptr)
|
||||
datapath_ = new STRING(datapath);
|
||||
else
|
||||
*datapath_ = datapath;
|
||||
@ -320,7 +340,7 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
|
||||
(strcmp(tesseract_->datadir.string(), "") != 0))
|
||||
*datapath_ = tesseract_->datadir;
|
||||
|
||||
if (language_ == NULL)
|
||||
if (language_ == nullptr)
|
||||
language_ = new STRING(language);
|
||||
else
|
||||
*language_ = language;
|
||||
@ -424,7 +444,8 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
|
||||
tesseract_ = new Tesseract;
|
||||
else
|
||||
ParamUtils::ResetToDefaults(tesseract_->params());
|
||||
return tesseract_->init_tesseract_lm(datapath, NULL, language);
|
||||
TessdataManager mgr;
|
||||
return tesseract_->init_tesseract_lm(datapath, NULL, language, &mgr);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -434,7 +455,7 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
|
||||
void TessBaseAPI::InitForAnalysePage() {
|
||||
if (tesseract_ == NULL) {
|
||||
tesseract_ = new Tesseract;
|
||||
tesseract_->InitAdaptiveClassifier(false);
|
||||
tesseract_->InitAdaptiveClassifier(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -515,9 +536,7 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Does not copy the image buffer, or take
|
||||
* ownership. The source image may be destroyed after Recognize is called,
|
||||
* either explicitly or implicitly via one of the Get*Text functions.
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
@ -525,9 +544,11 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
|
||||
void TessBaseAPI::SetImage(const unsigned char* imagedata,
|
||||
int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line) {
|
||||
if (InternalSetImage())
|
||||
if (InternalSetImage()) {
|
||||
thresholder_->SetImage(imagedata, width, height,
|
||||
bytes_per_pixel, bytes_per_line);
|
||||
SetInputImage(thresholder_->GetPixRect());
|
||||
}
|
||||
}
|
||||
|
||||
void TessBaseAPI::SetSourceResolution(int ppi) {
|
||||
@ -539,18 +560,17 @@ void TessBaseAPI::SetSourceResolution(int ppi) {
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract doesn't take a copy or ownership or pixDestroy the image, so
|
||||
* it must persist until after Recognize.
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. A future version of Tesseract may choose to use Pix
|
||||
* as its internal representation and discard IMAGE altogether.
|
||||
* Because of that, an implementation that sources and targets Pix may end up
|
||||
* with less copies than an implementation that does not.
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void TessBaseAPI::SetImage(Pix* pix) {
|
||||
if (InternalSetImage())
|
||||
if (InternalSetImage()) {
|
||||
thresholder_->SetImage(pix);
|
||||
SetInputImage(pix);
|
||||
SetInputImage(thresholder_->GetPixRect());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -693,8 +713,8 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
|
||||
if (pixa != NULL) {
|
||||
Pix* pix = NULL;
|
||||
if (raw_image) {
|
||||
pix = page_it->GetImage(level, raw_padding, input_image_,
|
||||
&left, &top);
|
||||
pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
|
||||
&top);
|
||||
} else {
|
||||
pix = page_it->GetBinaryImage(level);
|
||||
}
|
||||
@ -747,53 +767,6 @@ void TessBaseAPI::DumpPGM(const char* filename) {
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
/**
|
||||
* Placeholder for call to Cube and test that the input data is correct.
|
||||
* reskew is the direction of baselines in the skewed image in
|
||||
* normalized (cos theta, sin theta) form, so (0.866, 0.5) would represent
|
||||
* a 30 degree anticlockwise skew.
|
||||
*/
|
||||
int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
|
||||
Boxa* boxa_words, Pixa* pixa_words,
|
||||
const FCOORD& reskew, Pix* page_pix,
|
||||
PAGE_RES* page_res) {
|
||||
int block_count = boxaGetCount(boxa_blocks);
|
||||
ASSERT_HOST(block_count == pixaGetCount(pixa_blocks));
|
||||
// Write each block to the current directory as junk_write_display.nnn.png.
|
||||
for (int i = 0; i < block_count; ++i) {
|
||||
Pix* pix = pixaGetPix(pixa_blocks, i, L_CLONE);
|
||||
pixDisplayWrite(pix, 1);
|
||||
}
|
||||
int word_count = boxaGetCount(boxa_words);
|
||||
ASSERT_HOST(word_count == pixaGetCount(pixa_words));
|
||||
int pr_word = 0;
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
for (page_res_it.restart_page(); page_res_it.word () != NULL;
|
||||
page_res_it.forward(), ++pr_word) {
|
||||
WERD_RES *word = page_res_it.word();
|
||||
WERD_CHOICE* choice = word->best_choice;
|
||||
// Write the first 100 words to files names wordims/<wordstring>.tif.
|
||||
if (pr_word < 100) {
|
||||
STRING filename("wordims/");
|
||||
if (choice != NULL) {
|
||||
filename += choice->unichar_string();
|
||||
} else {
|
||||
char numbuf[32];
|
||||
filename += "unclassified";
|
||||
snprintf(numbuf, 32, "%03d", pr_word);
|
||||
filename += numbuf;
|
||||
}
|
||||
filename += ".tif";
|
||||
Pix* pix = pixaGetPix(pixa_words, pr_word, L_CLONE);
|
||||
pixWrite(filename.string(), pix, IFF_TIFF_G4);
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(pr_word == word_count);
|
||||
return 0;
|
||||
}
|
||||
#endif // NO_CUBE_BUILD
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
@ -809,9 +782,7 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator* TessBaseAPI::AnalyseLayout() {
|
||||
return AnalyseLayout(false);
|
||||
}
|
||||
PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); }
|
||||
|
||||
PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
|
||||
if (FindLines() == 0) {
|
||||
@ -836,7 +807,6 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
return -1;
|
||||
if (FindLines() != 0)
|
||||
return -1;
|
||||
if (page_res_ != NULL)
|
||||
delete page_res_;
|
||||
if (block_list_->empty()) {
|
||||
page_res_ = new PAGE_RES(false, block_list_,
|
||||
@ -851,13 +821,17 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
} else if (tesseract_->tessedit_resegment_from_boxes) {
|
||||
page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
|
||||
} else {
|
||||
// TODO(rays) LSTM here.
|
||||
page_res_ = new PAGE_RES(false,
|
||||
page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
|
||||
block_list_, &tesseract_->prev_word_best_choice_);
|
||||
}
|
||||
if (page_res_ == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (tesseract_->tessedit_train_line_recognizer) {
|
||||
tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
|
||||
tesseract_->CorrectClassifyWords(page_res_);
|
||||
return 0;
|
||||
}
|
||||
if (tesseract_->tessedit_make_boxes_from_boxes) {
|
||||
tesseract_->CorrectClassifyWords(page_res_);
|
||||
return 0;
|
||||
@ -940,17 +914,10 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TessBaseAPI::SetInputImage(Pix *pix) {
|
||||
if (input_image_)
|
||||
pixDestroy(&input_image_);
|
||||
input_image_ = NULL;
|
||||
if (pix)
|
||||
input_image_ = pixCopy(NULL, pix);
|
||||
}
|
||||
// Takes ownership of the input pix.
|
||||
void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); }
|
||||
|
||||
Pix* TessBaseAPI::GetInputImage() {
|
||||
return input_image_;
|
||||
}
|
||||
Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); }
|
||||
|
||||
const char * TessBaseAPI::GetInputName() {
|
||||
if (input_file_)
|
||||
@ -994,8 +961,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
|
||||
}
|
||||
|
||||
// Begin producing output
|
||||
const char* kUnknownTitle = "";
|
||||
if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
|
||||
if (renderer && !renderer->BeginDocument(unknown_title_)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1038,26 +1004,13 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
|
||||
int tessedit_page_number) {
|
||||
#ifndef ANDROID_BUILD
|
||||
Pix *pix = NULL;
|
||||
#ifdef USE_OPENCL
|
||||
OpenclDevice od;
|
||||
#endif // USE_OPENCL
|
||||
int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
|
||||
size_t offset = 0;
|
||||
for (; ; ++page) {
|
||||
if (tessedit_page_number >= 0)
|
||||
page = tessedit_page_number;
|
||||
#ifdef USE_OPENCL
|
||||
if ( od.selectedDeviceIsOpenCL() ) {
|
||||
pix = (data) ?
|
||||
od.pixReadMemTiffCl(data, size, page) :
|
||||
od.pixReadTiffCl(filename, page);
|
||||
} else {
|
||||
#endif // USE_OPENCL
|
||||
pix = (data) ?
|
||||
pixReadMemTiff(data, size, page) :
|
||||
pixReadTiff(filename, page);
|
||||
#ifdef USE_OPENCL
|
||||
}
|
||||
#endif // USE_OPENCL
|
||||
pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
|
||||
: pixReadFromMultipageTiff(filename, &offset);
|
||||
if (pix == NULL) break;
|
||||
tprintf("Page %d\n", page + 1);
|
||||
char page_str[kMaxIntSize];
|
||||
@ -1068,6 +1021,7 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
|
||||
pixDestroy(&pix);
|
||||
if (!r) return false;
|
||||
if (tessedit_page_number >= 0) break;
|
||||
if (!offset) break;
|
||||
}
|
||||
return true;
|
||||
#else
|
||||
@ -1107,7 +1061,6 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
|
||||
const char* retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer* renderer) {
|
||||
#ifndef ANDROID_BUILD
|
||||
PERF_COUNT_START("ProcessPages")
|
||||
bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
|
||||
if (stdInput) {
|
||||
@ -1142,7 +1095,15 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
|
||||
|
||||
// Maybe we have a filelist
|
||||
if (r != 0 || format == IFF_UNKNOWN) {
|
||||
STRING s(buf.c_str());
|
||||
STRING s;
|
||||
if (stdInput) {
|
||||
s = buf.c_str();
|
||||
} else {
|
||||
std::ifstream t(filename);
|
||||
std::string u((std::istreambuf_iterator<char>(t)),
|
||||
std::istreambuf_iterator<char>());
|
||||
s = u.c_str();
|
||||
}
|
||||
return ProcessPagesFileList(NULL, &s, retry_config,
|
||||
timeout_millisec, renderer,
|
||||
tesseract_->tessedit_page_number);
|
||||
@ -1164,8 +1125,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
|
||||
}
|
||||
|
||||
// Begin the output
|
||||
const char* kUnknownTitle = "";
|
||||
if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
|
||||
if (renderer && !renderer->BeginDocument(unknown_title_)) {
|
||||
pixDestroy(&pix);
|
||||
return false;
|
||||
}
|
||||
@ -1187,9 +1147,6 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
|
||||
}
|
||||
PERF_COUNT_END
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
|
||||
@ -1379,8 +1336,9 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it,
|
||||
hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
|
||||
}
|
||||
|
||||
static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2) {
|
||||
const unsigned long BUFSIZE = 64;
|
||||
static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
|
||||
int num2) {
|
||||
const size_t BUFSIZE = 64;
|
||||
char id_buffer[BUFSIZE];
|
||||
if (num2 >= 0) {
|
||||
snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
|
||||
@ -1393,8 +1351,7 @@ static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int
|
||||
*hocr_str += "'";
|
||||
}
|
||||
|
||||
static void AddBoxTohOCR(const ResultIterator *it,
|
||||
PageIteratorLevel level,
|
||||
static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
|
||||
STRING* hocr_str) {
|
||||
int left, top, right, bottom;
|
||||
it->BoundingBox(level, &left, &top, &right, &bottom);
|
||||
@ -1410,7 +1367,7 @@ static void AddBoxTohOCR(const ResultIterator *it,
|
||||
// add custom height measures
|
||||
float row_height, descenders, ascenders; // row attributes
|
||||
it->RowAttributes(&row_height, &descenders, &ascenders);
|
||||
// TODO: Do we want to limit these to a single decimal place?
|
||||
// TODO(rays): Do we want to limit these to a single decimal place?
|
||||
hocr_str->add_str_double("; x_size ", row_height);
|
||||
hocr_str->add_str_double("; x_descenders ", descenders * -1);
|
||||
hocr_str->add_str_double("; x_ascenders ", ascenders);
|
||||
@ -1418,8 +1375,7 @@ static void AddBoxTohOCR(const ResultIterator *it,
|
||||
*hocr_str += "\">";
|
||||
}
|
||||
|
||||
static void AddBoxToTSV(const PageIterator *it,
|
||||
PageIteratorLevel level,
|
||||
static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
|
||||
STRING* hocr_str) {
|
||||
int left, top, right, bottom;
|
||||
it->BoundingBox(level, &left, &top, &right, &bottom);
|
||||
@ -1429,8 +1385,6 @@ static void AddBoxToTSV(const PageIterator *it,
|
||||
hocr_str->add_str_int("\t", bottom - top);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
@ -1440,7 +1394,7 @@ static void AddBoxToTSV(const PageIterator *it,
|
||||
* STL removed from original patch submission and refactored by rays.
|
||||
*/
|
||||
char* TessBaseAPI::GetHOCRText(int page_number) {
|
||||
return GetHOCRText(NULL,page_number);
|
||||
return GetHOCRText(NULL, page_number);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1452,8 +1406,7 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
|
||||
* STL removed from original patch submission and refactored by rays.
|
||||
*/
|
||||
char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
|
||||
if (tesseract_ == NULL ||
|
||||
(page_res_ == NULL && Recognize(monitor) < 0))
|
||||
if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0))
|
||||
return NULL;
|
||||
|
||||
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
|
||||
@ -1470,13 +1423,13 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
|
||||
|
||||
#ifdef _WIN32
|
||||
// convert input name from ANSI encoding to utf-8
|
||||
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
|
||||
NULL, 0);
|
||||
int str16_len =
|
||||
MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, 0);
|
||||
wchar_t *uni16_str = new WCHAR[str16_len];
|
||||
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
|
||||
uni16_str, str16_len);
|
||||
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL,
|
||||
0, NULL, NULL);
|
||||
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 0,
|
||||
NULL, NULL);
|
||||
char *utf8_str = new char[utf8_len];
|
||||
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
|
||||
utf8_len, NULL, NULL);
|
||||
@ -1567,8 +1520,12 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
|
||||
}
|
||||
switch (res_it->WordDirection()) {
|
||||
// Only emit direction if different from current paragraph direction
|
||||
case DIR_LEFT_TO_RIGHT: if (!para_is_ltr) hocr_str += " dir='ltr'"; break;
|
||||
case DIR_RIGHT_TO_LEFT: if (para_is_ltr) hocr_str += " dir='rtl'"; break;
|
||||
case DIR_LEFT_TO_RIGHT:
|
||||
if (!para_is_ltr) hocr_str += " dir='ltr'";
|
||||
break;
|
||||
case DIR_RIGHT_TO_LEFT:
|
||||
if (para_is_ltr) hocr_str += " dir='rtl'";
|
||||
break;
|
||||
case DIR_MIX:
|
||||
case DIR_NEUTRAL:
|
||||
default: // Do nothing.
|
||||
@ -1620,8 +1577,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
*/
|
||||
char* TessBaseAPI::GetTSVText(int page_number) {
|
||||
if (tesseract_ == NULL ||
|
||||
(page_res_ == NULL && Recognize(NULL) < 0))
|
||||
if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0))
|
||||
return NULL;
|
||||
|
||||
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
|
||||
@ -1629,7 +1585,8 @@ char* TessBaseAPI::GetTSVText(int page_number) {
|
||||
|
||||
STRING tsv_str("");
|
||||
|
||||
int page_num = page_id, block_num = 0, par_num = 0, line_num = 0, word_num = 0;
|
||||
int page_num = page_id, block_num = 0, par_num = 0, line_num = 0,
|
||||
word_num = 0;
|
||||
|
||||
tsv_str.add_str_int("1\t", page_num); // level 1 - page
|
||||
tsv_str.add_str_int("\t", block_num);
|
||||
@ -1642,7 +1599,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
|
||||
tsv_str.add_str_int("\t", rect_height_);
|
||||
tsv_str += "\t-1\t\n";
|
||||
|
||||
ResultIterator *res_it = GetIterator();
|
||||
ResultIterator* res_it = GetIterator();
|
||||
while (!res_it->Empty(RIL_BLOCK)) {
|
||||
if (res_it->Empty(RIL_WORD)) {
|
||||
res_it->Next(RIL_WORD);
|
||||
@ -1683,13 +1640,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
|
||||
|
||||
// Now, process the word...
|
||||
int left, top, right, bottom;
|
||||
bool bold, italic, underlined, monospace, serif, smallcaps;
|
||||
int pointsize, font_id;
|
||||
const char *font_name;
|
||||
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
|
||||
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
|
||||
&monospace, &serif, &smallcaps,
|
||||
&pointsize, &font_id);
|
||||
word_num++;
|
||||
tsv_str.add_str_int("5\t", page_num); // level 5 - word
|
||||
tsv_str.add_str_int("\t", block_num);
|
||||
@ -1716,7 +1667,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
|
||||
wcnt++;
|
||||
}
|
||||
|
||||
char *ret = new char[tsv_str.length() + 1];
|
||||
char* ret = new char[tsv_str.length() + 1];
|
||||
strcpy(ret, tsv_str.string());
|
||||
delete res_it;
|
||||
return ret;
|
||||
@ -1760,7 +1711,7 @@ char* TessBaseAPI::GetBoxText(int page_number) {
|
||||
int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
|
||||
kMaxBytesPerLine;
|
||||
char* result = new char[total_length];
|
||||
strcpy(result, "\0");
|
||||
result[0] = '\0';
|
||||
int output_length = 0;
|
||||
LTRResultIterator* it = GetLTRIterator();
|
||||
do {
|
||||
@ -1907,43 +1858,70 @@ char* TessBaseAPI::GetUNLVText() {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf,
|
||||
const char** script_name,
|
||||
float* script_conf) {
|
||||
OSResults osr;
|
||||
|
||||
bool osd = DetectOS(&osr);
|
||||
if (!osd) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int orient_id = osr.best_result.orientation_id;
|
||||
int script_id = osr.get_best_script(orient_id);
|
||||
if (orient_conf) *orient_conf = osr.best_result.oconfidence;
|
||||
if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees
|
||||
|
||||
if (script_name) {
|
||||
const char* script = osr.unicharset->get_script_from_script_id(script_id);
|
||||
|
||||
*script_name = script;
|
||||
}
|
||||
|
||||
if (script_conf) *script_conf = osr.best_result.sconfidence;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char* TessBaseAPI::GetOsdText(int page_number) {
|
||||
OSResults osr;
|
||||
int orient_deg;
|
||||
float orient_conf;
|
||||
const char* script_name;
|
||||
float script_conf;
|
||||
|
||||
bool osd = DetectOS(&osr);
|
||||
if (!osd) {
|
||||
if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
|
||||
&script_conf))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int orient_id = osr.best_result.orientation_id;
|
||||
int script_id = osr.get_best_script(orient_id);
|
||||
float orient_conf = osr.best_result.oconfidence;
|
||||
float script_conf = osr.best_result.sconfidence;
|
||||
const char* script_name =
|
||||
osr.unicharset->get_script_from_script_id(script_id);
|
||||
|
||||
// clockwise orientation of the input image, in degrees
|
||||
int orient_deg = orient_id * 90;
|
||||
|
||||
// clockwise rotation needed to make the page upright
|
||||
int rotate = OrientationIdToValue(orient_id);
|
||||
int rotate = OrientationIdToValue(orient_deg / 90);
|
||||
|
||||
char* osd_buf = new char[255];
|
||||
snprintf(osd_buf, 255,
|
||||
const int kOsdBufsize = 255;
|
||||
char* osd_buf = new char[kOsdBufsize];
|
||||
snprintf(osd_buf, kOsdBufsize,
|
||||
"Page number: %d\n"
|
||||
"Orientation in degrees: %d\n"
|
||||
"Rotate: %d\n"
|
||||
"Orientation confidence: %.2f\n"
|
||||
"Script: %s\n"
|
||||
"Script confidence: %.2f\n",
|
||||
page_number,
|
||||
orient_deg, rotate, orient_conf,
|
||||
script_name, script_conf);
|
||||
page_number, orient_deg, rotate, orient_conf, script_name,
|
||||
script_conf);
|
||||
|
||||
return osd_buf;
|
||||
}
|
||||
@ -2020,8 +1998,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
|
||||
for (t = 0; text[t] != '\0'; ++t) {
|
||||
if (text[t] == '\n' || text[t] == ' ')
|
||||
continue;
|
||||
while (wordstr[w] != '\0' && wordstr[w] == ' ')
|
||||
++w;
|
||||
while (wordstr[w] == ' ') ++w;
|
||||
if (text[t] != wordstr[w])
|
||||
break;
|
||||
++w;
|
||||
@ -2063,7 +2040,7 @@ void TessBaseAPI::Clear() {
|
||||
if (thresholder_ != NULL)
|
||||
thresholder_->Clear();
|
||||
ClearResults();
|
||||
SetInputImage(NULL);
|
||||
if (tesseract_ != NULL) SetInputImage(NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2073,6 +2050,7 @@ void TessBaseAPI::Clear() {
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void TessBaseAPI::End() {
|
||||
Clear();
|
||||
if (thresholder_ != NULL) {
|
||||
delete thresholder_;
|
||||
thresholder_ = NULL;
|
||||
@ -2108,10 +2086,6 @@ void TessBaseAPI::End() {
|
||||
delete input_file_;
|
||||
input_file_ = NULL;
|
||||
}
|
||||
if (input_image_ != NULL) {
|
||||
pixDestroy(&input_image_);
|
||||
input_image_ = NULL;
|
||||
}
|
||||
if (output_file_ != NULL) {
|
||||
delete output_file_;
|
||||
output_file_ = NULL;
|
||||
@ -2242,6 +2216,8 @@ void TessBaseAPI::Threshold(Pix** pix) {
|
||||
if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
|
||||
// Use the minimum default resolution, as it is safer to under-estimate
|
||||
// than over-estimate resolution.
|
||||
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", y_res,
|
||||
kMinCredibleResolution);
|
||||
thresholder_->SetSourceYResolution(kMinCredibleResolution);
|
||||
}
|
||||
PageSegMode pageseg_mode =
|
||||
@ -2286,7 +2262,7 @@ int TessBaseAPI::FindLines() {
|
||||
}
|
||||
if (tesseract_ == NULL) {
|
||||
tesseract_ = new Tesseract;
|
||||
tesseract_->InitAdaptiveClassifier(false);
|
||||
tesseract_->InitAdaptiveClassifier(nullptr);
|
||||
}
|
||||
if (tesseract_->pix_binary() == NULL)
|
||||
Threshold(tesseract_->mutable_pix_binary());
|
||||
@ -2308,14 +2284,16 @@ int TessBaseAPI::FindLines() {
|
||||
|
||||
Tesseract* osd_tess = osd_tesseract_;
|
||||
OSResults osr;
|
||||
if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) {
|
||||
if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
|
||||
osd_tess == nullptr) {
|
||||
if (strcmp(language_->string(), "osd") == 0) {
|
||||
osd_tess = tesseract_;
|
||||
} else {
|
||||
osd_tesseract_ = new Tesseract;
|
||||
if (osd_tesseract_->init_tesseract(
|
||||
datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY,
|
||||
NULL, 0, NULL, NULL, false) == 0) {
|
||||
TessdataManager mgr(reader_);
|
||||
if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, "osd",
|
||||
OEM_TESSERACT_ONLY, nullptr, 0,
|
||||
nullptr, nullptr, false, &mgr) == 0) {
|
||||
osd_tess = osd_tesseract_;
|
||||
osd_tesseract_->set_source_resolution(
|
||||
thresholder_->GetSourceYResolution());
|
||||
@ -2323,7 +2301,7 @@ int TessBaseAPI::FindLines() {
|
||||
tprintf("Warning: Auto orientation and script detection requested,"
|
||||
" but osd language failed to load\n");
|
||||
delete osd_tesseract_;
|
||||
osd_tesseract_ = NULL;
|
||||
osd_tesseract_ = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2766,7 +2744,7 @@ void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
|
||||
INT_FX_RESULT_STRUCT fx_info;
|
||||
tesseract_->ExtractFeatures(*blob, false, &bl_features,
|
||||
&cn_features, &fx_info, &outline_counts);
|
||||
if (cn_features.size() == 0 || cn_features.size() > MAX_NUM_INT_FEATURES) {
|
||||
if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
|
||||
*num_features = 0;
|
||||
return; // Feature extraction failed.
|
||||
}
|
||||
@ -2847,13 +2825,6 @@ int TessBaseAPI::NumDawgs() const {
|
||||
return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
|
||||
}
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
/** Return a pointer to underlying CubeRecoContext object if present. */
|
||||
CubeRecoContext *TessBaseAPI::GetCubeRecoContext() const {
|
||||
return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
|
||||
}
|
||||
#endif // NO_CUBE_BUILD
|
||||
|
||||
/** Escape a char string - remove <>&"' with HTML codes. */
|
||||
STRING HOcrEscape(const char* text) {
|
||||
STRING ret;
|
||||
|
@ -17,11 +17,11 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H__
|
||||
#define TESSERACT_API_BASEAPI_H__
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
||||
#define TESSERACT_VERSION_STR "3.05.00dev"
|
||||
#define TESSERACT_VERSION 0x030500
|
||||
#define TESSERACT_VERSION_STR "4.00.00alpha"
|
||||
#define TESSERACT_VERSION 0x040000
|
||||
#define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
|
||||
(patch))
|
||||
|
||||
@ -29,14 +29,15 @@
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include "platform.h"
|
||||
#include "apitypes.h"
|
||||
#include "pageiterator.h"
|
||||
#include "platform.h"
|
||||
#include "publictypes.h"
|
||||
#include "resultiterator.h"
|
||||
#include "serialis.h"
|
||||
#include "tesscallback.h"
|
||||
#include "thresholder.h"
|
||||
#include "unichar.h"
|
||||
#include "tesscallback.h"
|
||||
#include "publictypes.h"
|
||||
#include "pageiterator.h"
|
||||
#include "resultiterator.h"
|
||||
|
||||
template <typename T> class GenericVector;
|
||||
class PAGE_RES;
|
||||
@ -65,9 +66,6 @@ struct TBLOB;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
class CubeRecoContext;
|
||||
#endif // NO_CUBE_BUILD
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
@ -142,6 +140,7 @@ class TESS_API TessBaseAPI {
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char* GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix* GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
@ -239,6 +238,13 @@ class TESS_API TessBaseAPI {
|
||||
int Init(const char* datapath, const char* language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
|
||||
}
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array, and/or reads data via a FileReader.
|
||||
int Init(const char* data, int data_size, const char* language,
|
||||
OcrEngineMode mode, char** configs, int configs_size,
|
||||
const GenericVector<STRING>* vars_vec,
|
||||
const GenericVector<STRING>* vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader);
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
@ -333,9 +339,7 @@ class TESS_API TessBaseAPI {
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Does not copy the image buffer, or take
|
||||
* ownership. The source image may be destroyed after Recognize is called,
|
||||
* either explicitly or implicitly via one of the Get*Text functions.
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
@ -345,13 +349,11 @@ class TESS_API TessBaseAPI {
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract doesn't take a copy or ownership or pixDestroy the image, so
|
||||
* it must persist until after Recognize.
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. A future version of Tesseract may choose to use Pix
|
||||
* as its internal representation and discard IMAGE altogether.
|
||||
* Because of that, an implementation that sources and targets Pix may end up
|
||||
* with less copies than an implementation that does not.
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix* pix);
|
||||
|
||||
@ -376,7 +378,6 @@ class TESS_API TessBaseAPI {
|
||||
* delete it when it it is replaced or the API is destructed.
|
||||
*/
|
||||
void SetThresholder(ImageThresholder* thresholder) {
|
||||
if (thresholder_ != NULL)
|
||||
delete thresholder_;
|
||||
thresholder_ = thresholder;
|
||||
ClearResults();
|
||||
@ -622,6 +623,18 @@ class TESS_API TessBaseAPI {
|
||||
*/
|
||||
char* GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int* orient_deg, float* orient_conf,
|
||||
const char** script_name, float* script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
@ -750,21 +763,12 @@ class TESS_API TessBaseAPI {
|
||||
*/
|
||||
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
|
||||
|
||||
Tesseract* tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
Tesseract* tesseract() const { return tesseract_; }
|
||||
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
OcrEngineMode oem() const { return last_oem_requested_; }
|
||||
|
||||
void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
/** Return a pointer to underlying CubeRecoContext object if present. */
|
||||
CubeRecoContext *GetCubeRecoContext() const;
|
||||
#endif // NO_CUBE_BUILD
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
|
||||
/**
|
||||
@ -855,9 +859,7 @@ class TESS_API TessBaseAPI {
|
||||
int** y1,
|
||||
PAGE_RES* page_res);
|
||||
|
||||
TESS_LOCAL const PAGE_RES* GetPageRes() const {
|
||||
return page_res_;
|
||||
};
|
||||
TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
|
||||
/* @} */
|
||||
|
||||
|
||||
@ -865,12 +867,12 @@ class TESS_API TessBaseAPI {
|
||||
Tesseract* tesseract_; ///< The underlying data object.
|
||||
Tesseract* osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect* equ_detect_; ///<The equation detector.
|
||||
FileReader reader_; ///< Reads files from any filesystem.
|
||||
ImageThresholder* thresholder_; ///< Image thresholding module.
|
||||
GenericVector<ParagraphModel *>* paragraph_models_;
|
||||
BLOCK_LIST* block_list_; ///< The page layout.
|
||||
PAGE_RES* page_res_; ///< The page-level data.
|
||||
STRING* input_file_; ///< Name used by training code.
|
||||
Pix* input_image_; ///< Image used for searchable PDF
|
||||
STRING* output_file_; ///< Name used by debug code.
|
||||
STRING* datapath_; ///< Current location of tessdata.
|
||||
STRING* language_; ///< Last initialized language.
|
||||
@ -898,7 +900,7 @@ class TESS_API TessBaseAPI {
|
||||
const char* retry_config, int timeout_millisec,
|
||||
TessResultRenderer* renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data,
|
||||
size_t size,
|
||||
const char* filename,
|
||||
@ -906,10 +908,16 @@ class TESS_API TessBaseAPI {
|
||||
int timeout_millisec,
|
||||
TessResultRenderer* renderer,
|
||||
int tessedit_page_number);
|
||||
// There's currently no way to pass a document title from the
|
||||
// Tesseract command line, and we have multiple places that choose
|
||||
// to set the title to an empty string. Using a single named
|
||||
// variable will hopefully reduce confusion if the situation changes
|
||||
// in the future.
|
||||
const char *unknown_title_ = "";
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
STRING HOcrEscape(const char* text);
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H__
|
||||
#endif // TESSERACT_API_BASEAPI_H_
|
||||
|
23
api/capi.cpp
23
api/capi.cpp
@ -64,9 +64,10 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu
|
||||
return new TessHOcrRenderer(outputbase, font_info);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir)
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
|
||||
BOOL textonly)
|
||||
{
|
||||
return new TessPDFRenderer(outputbase, datadir);
|
||||
return new TessPDFRenderer(outputbase, datadir, textonly);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase)
|
||||
@ -538,9 +539,18 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results)
|
||||
{
|
||||
return handle->DetectOS(results) ? TRUE : FALSE;
|
||||
return FALSE; // Unsafe ABI, return FALSE always
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
|
||||
int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
|
||||
{
|
||||
bool success;
|
||||
success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
|
||||
return (BOOL)success;
|
||||
}
|
||||
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
|
||||
int* num_features, int* FeatureOutlineIndex)
|
||||
{
|
||||
@ -598,13 +608,6 @@ TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTr
|
||||
handle->InitTruthCallback(cb);
|
||||
}
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
TESS_API TessCubeRecoContext* TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetCubeRecoContext();
|
||||
}
|
||||
#endif // NO_CUBE_BUILD
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin)
|
||||
{
|
||||
handle->set_min_orientation_margin(margin);
|
||||
|
18
api/capi.h
18
api/capi.h
@ -68,9 +68,6 @@ typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc;
|
||||
typedef tesseract::FillLatticeFunc TessFillLatticeFunc;
|
||||
typedef tesseract::Dawg TessDawg;
|
||||
typedef tesseract::TruthCallback TessTruthCallback;
|
||||
#ifndef NO_CUBE_BUILD
|
||||
typedef tesseract::CubeRecoContext TessCubeRecoContext;
|
||||
#endif // NO_CUBE_BUILD
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
@ -88,7 +85,7 @@ typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode { OEM_TESSERACT_ONLY, OEM_CUBE_ONLY, OEM_TESSERACT_CUBE_COMBINED, OEM_DEFAULT } TessOcrEngineMode;
|
||||
typedef enum TessOcrEngineMode { OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT } TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD, PSM_COUNT } TessPageSegMode;
|
||||
@ -122,7 +119,8 @@ TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase);
|
||||
|
||||
@ -285,7 +283,10 @@ TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
|
||||
TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results);
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this function
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
|
||||
int* orient_deg, float* orient_conf, const char **script_name, float* script_conf);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
|
||||
int* num_features, int* FeatureOutlineIndex);
|
||||
@ -313,11 +314,6 @@ TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric
|
||||
TESS_API TessOcrEngineMode
|
||||
TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
|
||||
TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb);
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
TESS_API TessCubeRecoContext*
|
||||
TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle);
|
||||
#endif // NO_CUBE_BUILD
|
||||
#endif
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin);
|
||||
|
@ -20,12 +20,12 @@
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "baseapi.h"
|
||||
#include "renderer.h"
|
||||
#include "math.h"
|
||||
#include "renderer.h"
|
||||
#include "strngs.h"
|
||||
#include "tprintf.h"
|
||||
#include "allheaders.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include "mathfix.h"
|
||||
@ -159,7 +159,7 @@ CIDToGIDMap.
|
||||
|
||||
OK there is a small problem there, if I use GID 0 then Acrobat gets
|
||||
upset about it and complains it cannot extract the font. If I set the
|
||||
CIDToGIDMap so that all the entries are 1 instead, its happy. Totally
|
||||
CIDToGIDMap so that all the entries are 1 instead, it's happy. Totally
|
||||
mad......
|
||||
|
||||
*/
|
||||
@ -169,19 +169,26 @@ namespace tesseract {
|
||||
// Use for PDF object fragments. Must be large enough
|
||||
// to hold a colormap with 256 colors in the verbose
|
||||
// PDF representation.
|
||||
const int kBasicBufSize = 2048;
|
||||
static const int kBasicBufSize = 2048;
|
||||
|
||||
// If the font is 10 pts, nominal character width is 5 pts
|
||||
const int kCharWidth = 2;
|
||||
static const int kCharWidth = 2;
|
||||
|
||||
// Used for memory allocation. A codepoint must take no more than this
|
||||
// many bytes, when written in the PDF way. e.g. "<0063>" for the
|
||||
// letter 'c'
|
||||
static const int kMaxBytesPerCodepoint = 20;
|
||||
|
||||
/**********************************************************************
|
||||
* PDF Renderer interface implementation
|
||||
**********************************************************************/
|
||||
|
||||
TessPDFRenderer::TessPDFRenderer(const char* outputbase, const char *datadir)
|
||||
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly)
|
||||
: TessResultRenderer(outputbase, "pdf") {
|
||||
obj_ = 0;
|
||||
datadir_ = datadir;
|
||||
textonly_ = textonly;
|
||||
offsets_.push_back(0);
|
||||
}
|
||||
|
||||
@ -282,7 +289,7 @@ void AffineMatrix(int writing_direction,
|
||||
}
|
||||
}
|
||||
|
||||
// There are some really stupid PDF viewers in the wild, such as
|
||||
// There are some really awkward PDF viewers in the wild, such as
|
||||
// 'Preview' which ships with the Mac. They do a better job with text
|
||||
// selection and highlighting when given perfectly flat baseline
|
||||
// instead of very slightly tilted. We clip small tilts to appease
|
||||
@ -302,6 +309,23 @@ void ClipBaseline(int ppi, int x1, int y1, int x2, int y2,
|
||||
*line_y1 = *line_y2 = (y1 + y2) / 2;
|
||||
}
|
||||
|
||||
bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
|
||||
if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) {
|
||||
tprintf("Dropping invalid codepoint %d\n", code);
|
||||
return false;
|
||||
}
|
||||
if (code < 0x10000) {
|
||||
snprintf(utf16, kMaxBytesPerCodepoint, "%04X", code);
|
||||
} else {
|
||||
int a = code - 0x010000;
|
||||
int high_surrogate = (0x03FF & (a >> 10)) + 0xD800;
|
||||
int low_surrogate = (0x03FF & a) + 0xDC00;
|
||||
snprintf(utf16, kMaxBytesPerCodepoint,
|
||||
"%04X%04X", high_surrogate, low_surrogate);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
||||
double width, double height) {
|
||||
STRING pdf_str("");
|
||||
@ -326,7 +350,11 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
||||
pdf_str.add_str_double("", prec(width));
|
||||
pdf_str += " 0 0 ";
|
||||
pdf_str.add_str_double("", prec(height));
|
||||
pdf_str += " 0 0 cm /Im1 Do Q\n";
|
||||
pdf_str += " 0 0 cm";
|
||||
if (!textonly_) {
|
||||
pdf_str += " /Im1 Do";
|
||||
}
|
||||
pdf_str += " Q\n";
|
||||
|
||||
int line_x1 = 0;
|
||||
int line_y1 = 0;
|
||||
@ -436,27 +464,15 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
||||
if (grapheme && grapheme[0] != '\0') {
|
||||
GenericVector<int> unicodes;
|
||||
UNICHAR::UTF8ToUnicode(grapheme, &unicodes);
|
||||
char utf16[20];
|
||||
char utf16[kMaxBytesPerCodepoint];
|
||||
for (int i = 0; i < unicodes.length(); i++) {
|
||||
int code = unicodes[i];
|
||||
// Convert to UTF-16BE https://en.wikipedia.org/wiki/UTF-16
|
||||
if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) {
|
||||
tprintf("Dropping invalid codepoint %d\n", code);
|
||||
continue;
|
||||
}
|
||||
if (code < 0x10000) {
|
||||
snprintf(utf16, sizeof(utf16), "<%04X>", code);
|
||||
} else {
|
||||
int a = code - 0x010000;
|
||||
int high_surrogate = (0x03FF & (a >> 10)) + 0xD800;
|
||||
int low_surrogate = (0x03FF & a) + 0xDC00;
|
||||
snprintf(utf16, sizeof(utf16), "<%04X%04X>",
|
||||
high_surrogate, low_surrogate);
|
||||
}
|
||||
if (CodepointToUtf16be(code, utf16)) {
|
||||
pdf_word += utf16;
|
||||
pdf_word_len++;
|
||||
}
|
||||
}
|
||||
}
|
||||
delete []grapheme;
|
||||
res_it->Next(RIL_SYMBOL);
|
||||
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
|
||||
@ -465,9 +481,9 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
||||
kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len));
|
||||
pdf_str.add_str_double("", h_stretch);
|
||||
pdf_str += " Tz"; // horizontal stretch
|
||||
pdf_str += " [ ";
|
||||
pdf_str += " [ <";
|
||||
pdf_str += pdf_word; // UTF-16BE representation
|
||||
pdf_str += " ] TJ"; // show the text
|
||||
pdf_str += "> ] TJ"; // show the text
|
||||
}
|
||||
if (last_word_in_line) {
|
||||
pdf_str += " \n";
|
||||
@ -567,7 +583,8 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
"<<\n"
|
||||
" /Length %lu /Filter /FlateDecode\n"
|
||||
">>\n"
|
||||
"stream\n", (unsigned long)len);
|
||||
"stream\n",
|
||||
(unsigned long)len);
|
||||
if (n >= sizeof(buf)) {
|
||||
lept_free(comp);
|
||||
return false;
|
||||
@ -619,7 +636,6 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
AppendPDFObject(buf);
|
||||
|
||||
// FONT DESCRIPTOR
|
||||
const int kCharHeight = 2; // Effect: highlights are half height
|
||||
n = snprintf(buf, sizeof(buf),
|
||||
"7 0 obj\n"
|
||||
"<<\n"
|
||||
@ -635,10 +651,10 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
" /Type /FontDescriptor\n"
|
||||
">>\n"
|
||||
"endobj\n",
|
||||
1000 / kCharHeight,
|
||||
1000 / kCharHeight,
|
||||
1000,
|
||||
1000,
|
||||
1000 / kCharWidth,
|
||||
1000 / kCharHeight,
|
||||
1000,
|
||||
8L // Font data
|
||||
);
|
||||
if (n >= sizeof(buf)) return false;
|
||||
@ -703,11 +719,6 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
|
||||
L_COMP_DATA *cid = NULL;
|
||||
const int kJpegQuality = 85;
|
||||
|
||||
// TODO(jbreiden) Leptonica 1.71 doesn't correctly handle certain
|
||||
// types of PNG files, especially if there are 2 samples per pixel.
|
||||
// We can get rid of this logic after Leptonica 1.72 is released and
|
||||
// has propagated everywhere. Bug discussion as follows.
|
||||
// https://code.google.com/p/tesseract-ocr/issues/detail?id=1300
|
||||
int format, sad;
|
||||
findFileFormat(filename, &format);
|
||||
if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
|
||||
@ -819,10 +830,6 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
|
||||
*pdf_object_size =
|
||||
b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len;
|
||||
*pdf_object = new char[*pdf_object_size];
|
||||
if (!pdf_object) {
|
||||
l_CIDataDestroy(&cid);
|
||||
return false;
|
||||
}
|
||||
|
||||
char *p = *pdf_object;
|
||||
memcpy(p, b1, b1_len);
|
||||
@ -841,6 +848,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
|
||||
bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
size_t n;
|
||||
char buf[kBasicBufSize];
|
||||
char buf2[kBasicBufSize];
|
||||
Pix *pix = api->GetInputImage();
|
||||
char *filename = (char *)api->GetInputName();
|
||||
int ppi = api->GetSourceYResolution();
|
||||
@ -849,6 +857,9 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
double width = pixGetWidth(pix) * 72.0 / ppi;
|
||||
double height = pixGetHeight(pix) * 72.0 / ppi;
|
||||
|
||||
snprintf(buf2, sizeof(buf2), "/XObject << /Im1 %ld 0 R >>\n", obj_ + 2);
|
||||
const char *xobject = (textonly_) ? "" : buf2;
|
||||
|
||||
// PAGE
|
||||
n = snprintf(buf, sizeof(buf),
|
||||
"%ld 0 obj\n"
|
||||
@ -859,7 +870,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
" /Contents %ld 0 R\n"
|
||||
" /Resources\n"
|
||||
" <<\n"
|
||||
" /XObject << /Im1 %ld 0 R >>\n"
|
||||
" %s"
|
||||
" /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n"
|
||||
" /Font << /f-0-0 %ld 0 R >>\n"
|
||||
" >>\n"
|
||||
@ -867,10 +878,9 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
"endobj\n",
|
||||
obj_,
|
||||
2L, // Pages object
|
||||
width,
|
||||
height,
|
||||
width, height,
|
||||
obj_ + 1, // Contents object
|
||||
obj_ + 2, // Image object
|
||||
xobject, // Image object
|
||||
3L); // Type0 Font
|
||||
if (n >= sizeof(buf)) return false;
|
||||
pages_.push_back(obj_);
|
||||
@ -908,13 +918,15 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
objsize += strlen(b2);
|
||||
AppendPDFObjectDIY(objsize);
|
||||
|
||||
char *pdf_object;
|
||||
if (!textonly_) {
|
||||
char *pdf_object = nullptr;
|
||||
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
|
||||
return false;
|
||||
}
|
||||
AppendData(pdf_object, objsize);
|
||||
AppendPDFObjectDIY(objsize);
|
||||
delete[] pdf_object;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -958,15 +970,27 @@ bool TessPDFRenderer::EndDocumentHandler() {
|
||||
offsets_.back() += pages_objsize; // manipulation #2
|
||||
|
||||
// INFO
|
||||
STRING utf16_title = "FEFF"; // byte_order_marker
|
||||
GenericVector<int> unicodes;
|
||||
UNICHAR::UTF8ToUnicode(title(), &unicodes);
|
||||
char utf16[kMaxBytesPerCodepoint];
|
||||
for (int i = 0; i < unicodes.length(); i++) {
|
||||
int code = unicodes[i];
|
||||
if (CodepointToUtf16be(code, utf16)) {
|
||||
utf16_title += utf16;
|
||||
}
|
||||
}
|
||||
|
||||
char* datestr = l_getFormattedDate();
|
||||
n = snprintf(buf, sizeof(buf),
|
||||
"%ld 0 obj\n"
|
||||
"<<\n"
|
||||
" /Producer (Tesseract %s)\n"
|
||||
" /CreationDate (D:%s)\n"
|
||||
" /Title (%s)"
|
||||
" /Title <%s>\n"
|
||||
">>\n"
|
||||
"endobj\n", obj_, TESSERACT_VERSION_STR, datestr, title());
|
||||
"endobj\n",
|
||||
obj_, TESSERACT_VERSION_STR, datestr, utf16_title.c_str());
|
||||
lept_free(datestr);
|
||||
if (n >= sizeof(buf)) return false;
|
||||
AppendPDFObject(buf);
|
||||
|
@ -198,25 +198,25 @@ bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
/**********************************************************************
|
||||
* TSV Text Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessTsvRenderer::TessTsvRenderer(const char *outputbase)
|
||||
TessTsvRenderer::TessTsvRenderer(const char* outputbase)
|
||||
: TessResultRenderer(outputbase, "tsv") {
|
||||
font_info_ = false;
|
||||
}
|
||||
|
||||
TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info)
|
||||
TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
|
||||
: TessResultRenderer(outputbase, "tsv") {
|
||||
font_info_ = font_info;
|
||||
}
|
||||
|
||||
bool TessTsvRenderer::BeginDocumentHandler() {
|
||||
// Output TSV column headings
|
||||
AppendString("level\tpage_num\tblock_num\tpar_num\tline_num\tword_num\tleft\ttop\twidth\theight\tconf\ttext\n");
|
||||
AppendString(
|
||||
"level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
|
||||
"num\tleft\ttop\twidth\theight\tconf\ttext\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TessTsvRenderer::EndDocumentHandler() {
|
||||
return true;
|
||||
}
|
||||
bool TessTsvRenderer::EndDocumentHandler() { return true; }
|
||||
|
||||
bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
char* tsv = api->GetTSVText(imagenum());
|
||||
@ -266,8 +266,7 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
* Osd Text Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessOsdRenderer::TessOsdRenderer(const char* outputbase)
|
||||
: TessResultRenderer(outputbase, "osd") {
|
||||
}
|
||||
: TessResultRenderer(outputbase, "osd") {}
|
||||
|
||||
bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
char* osd = api->GetOsdText(imagenum());
|
||||
|
@ -15,8 +15,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H__
|
||||
#define TESSERACT_API_RENDERER_H__
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
@ -57,6 +57,7 @@ class TESS_API TessResultRenderer {
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char* title);
|
||||
|
||||
@ -77,7 +78,7 @@ class TESS_API TessResultRenderer {
|
||||
bool EndDocument();
|
||||
|
||||
const char* file_extension() const { return file_extension_; }
|
||||
const char* title() const { return title_; }
|
||||
const char* title() const { return title_.c_str(); }
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
@ -126,7 +127,7 @@ class TESS_API TessResultRenderer {
|
||||
|
||||
private:
|
||||
const char* file_extension_; // standard extension for generated output
|
||||
const char* title_; // title of document being renderered
|
||||
STRING title_; // title of document being renderered
|
||||
int imagenum_; // index of last image added
|
||||
|
||||
FILE* fout_; // output file pointer
|
||||
@ -153,12 +154,12 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
protected:
|
||||
virtual bool BeginDocumentHandler();
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
private:
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
@ -167,15 +168,15 @@ private:
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char *outputbase);
|
||||
explicit TessTsvRenderer(const char* outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char* outputbase);
|
||||
|
||||
protected:
|
||||
protected:
|
||||
virtual bool BeginDocumentHandler();
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
private:
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
@ -186,30 +187,30 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir);
|
||||
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly);
|
||||
|
||||
protected:
|
||||
protected:
|
||||
virtual bool BeginDocumentHandler();
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
private:
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end that metadata is
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
GenericVector<long int> offsets_; // offset of every PDF object in bytes
|
||||
GenericVector<long int> pages_; // object number for every /Page object
|
||||
const char *datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
static char* GetPDFTextObjects(TessBaseAPI* api,
|
||||
double width, double height);
|
||||
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size);
|
||||
@ -251,4 +252,4 @@ class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H__
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
||||
|
@ -27,16 +27,16 @@
|
||||
#include "allheaders.h"
|
||||
#include "baseapi.h"
|
||||
#include "basedir.h"
|
||||
#include "renderer.h"
|
||||
#include "strngs.h"
|
||||
#include "tprintf.h"
|
||||
#include "openclwrapper.h"
|
||||
#include "osdetect.h"
|
||||
#include "renderer.h"
|
||||
#include "simddetect.h"
|
||||
#include "strngs.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
|
||||
|
||||
#include <tiffio.h>
|
||||
#include <windows.h>
|
||||
|
||||
static void Win32WarningHandler(const char* module, const char* fmt,
|
||||
va_list ap) {
|
||||
@ -51,7 +51,7 @@ static void Win32WarningHandler(const char* module, const char* fmt,
|
||||
#endif /* HAVE_TIFFIO_H && _WIN32 */
|
||||
|
||||
void PrintVersionInfo() {
|
||||
char *versionStrP;
|
||||
char* versionStrP;
|
||||
|
||||
printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
|
||||
|
||||
@ -64,33 +64,45 @@ void PrintVersionInfo() {
|
||||
lept_free(versionStrP);
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
cl_platform_id platform;
|
||||
cl_platform_id platform[4];
|
||||
cl_uint num_platforms;
|
||||
cl_device_id devices[2];
|
||||
cl_uint num_devices;
|
||||
char info[256];
|
||||
int i;
|
||||
|
||||
printf(" OpenCL info:\n");
|
||||
clGetPlatformIDs(1, &platform, &num_platforms);
|
||||
printf(" Found %d platforms.\n", num_platforms);
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0);
|
||||
printf(" Platform name: %s.\n", info);
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0);
|
||||
if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
|
||||
printf(" Found %u platform(s).\n", num_platforms);
|
||||
for (unsigned n = 0; n < num_platforms; n++) {
|
||||
char info[256];
|
||||
if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) ==
|
||||
CL_SUCCESS) {
|
||||
printf(" Platform %u name: %s.\n", n + 1, info);
|
||||
}
|
||||
if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) ==
|
||||
CL_SUCCESS) {
|
||||
printf(" Version: %s.\n", info);
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices);
|
||||
printf(" Found %d devices.\n", num_devices);
|
||||
for (i = 0; i < num_devices; ++i) {
|
||||
clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0);
|
||||
printf(" Device %d name: %s.\n", i+1, info);
|
||||
}
|
||||
cl_device_id devices[2];
|
||||
cl_uint num_devices;
|
||||
if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices,
|
||||
&num_devices) == CL_SUCCESS) {
|
||||
printf(" Found %u device(s).\n", num_devices);
|
||||
for (unsigned i = 0; i < num_devices; ++i) {
|
||||
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) ==
|
||||
CL_SUCCESS) {
|
||||
printf(" Device %u name: %s.\n", i + 1, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n");
|
||||
if (SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n");
|
||||
}
|
||||
|
||||
void PrintUsage(const char* program) {
|
||||
printf(
|
||||
"Usage:\n"
|
||||
" %s --help | --help-psm | --version\n"
|
||||
" %s --help | --help-psm | --help-oem | --version\n"
|
||||
" %s --list-langs [--tessdata-dir PATH]\n"
|
||||
" %s --print-parameters [options...] [configfile...]\n"
|
||||
" %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
|
||||
@ -111,16 +123,22 @@ void PrintHelpForPSM() {
|
||||
" 8 Treat the image as a single word.\n"
|
||||
" 9 Treat the image as a single word in a circle.\n"
|
||||
" 10 Treat the image as a single character.\n"
|
||||
|
||||
//TODO: Consider publishing these modes.
|
||||
#if 0
|
||||
" 11 Sparse text. Find as much text as possible in no"
|
||||
" particular order.\n"
|
||||
" 12 Sparse text with OSD.\n"
|
||||
" 13 Raw line. Treat the image as a single text line,\n"
|
||||
"\t\t\tbypassing hacks that are Tesseract-specific.\n"
|
||||
#endif
|
||||
;
|
||||
"\t\t\tbypassing hacks that are Tesseract-specific.\n";
|
||||
|
||||
printf("%s", msg);
|
||||
}
|
||||
|
||||
void PrintHelpForOEM() {
|
||||
const char* msg =
|
||||
"OCR Engine modes:\n"
|
||||
" 0 Original Tesseract only.\n"
|
||||
" 1 Neural nets LSTM only.\n"
|
||||
" 2 Tesseract + LSTM.\n"
|
||||
" 3 Default, based on what is available.\n";
|
||||
|
||||
printf("%s", msg);
|
||||
}
|
||||
@ -136,32 +154,34 @@ void PrintHelpMessage(const char* program) {
|
||||
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
|
||||
" -c VAR=VALUE Set value for config variables.\n"
|
||||
" Multiple -c arguments are allowed.\n"
|
||||
" -psm NUM Specify page segmentation mode.\n"
|
||||
"NOTE: These options must occur before any configfile.\n"
|
||||
;
|
||||
" --psm NUM Specify page segmentation mode.\n"
|
||||
" --oem NUM Specify OCR Engine mode.\n"
|
||||
"NOTE: These options must occur before any configfile.\n";
|
||||
|
||||
printf("\n%s\n", ocr_options);
|
||||
PrintHelpForPSM();
|
||||
PrintHelpForOEM();
|
||||
|
||||
const char *single_options =
|
||||
const char* single_options =
|
||||
"Single options:\n"
|
||||
" -h, --help Show this help message.\n"
|
||||
" --help-psm Show page segmentation modes.\n"
|
||||
" --help-oem Show OCR Engine modes.\n"
|
||||
" -v, --version Show version information.\n"
|
||||
" --list-langs List available languages for tesseract engine.\n"
|
||||
" --print-parameters Print tesseract parameters to stdout.\n"
|
||||
;
|
||||
" --print-parameters Print tesseract parameters.\n";
|
||||
|
||||
printf("\n%s", single_options);
|
||||
}
|
||||
|
||||
void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) {
|
||||
void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
|
||||
char** argv) {
|
||||
char opt1[256], opt2[255];
|
||||
for (int i = 0; i < argc; i++) {
|
||||
if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
|
||||
strncpy(opt1, argv[i + 1], 255);
|
||||
opt1[255] = '\0';
|
||||
char *p = strchr(opt1, '=');
|
||||
char* p = strchr(opt1, '=');
|
||||
if (!p) {
|
||||
fprintf(stderr, "Missing = in configvar assignment\n");
|
||||
exit(1);
|
||||
@ -215,25 +235,20 @@ void FixPageSegMode(tesseract::TessBaseAPI* api,
|
||||
}
|
||||
|
||||
// NOTE: arg_i is used here to avoid ugly *i so many times in this function
|
||||
void ParseArgs(const int argc, char** argv,
|
||||
const char** lang,
|
||||
const char** image,
|
||||
const char** outputbase,
|
||||
const char** datapath,
|
||||
bool* list_langs,
|
||||
bool* print_parameters,
|
||||
void ParseArgs(const int argc, char** argv, const char** lang,
|
||||
const char** image, const char** outputbase,
|
||||
const char** datapath, bool* list_langs, bool* print_parameters,
|
||||
GenericVector<STRING>* vars_vec,
|
||||
GenericVector<STRING>* vars_values,
|
||||
int* arg_i,
|
||||
tesseract::PageSegMode* pagesegmode) {
|
||||
GenericVector<STRING>* vars_values, int* arg_i,
|
||||
tesseract::PageSegMode* pagesegmode,
|
||||
tesseract::OcrEngineMode* enginemode) {
|
||||
if (argc == 1) {
|
||||
PrintHelpMessage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (argc == 2) {
|
||||
if ((strcmp(argv[1], "-h") == 0) ||
|
||||
(strcmp(argv[1], "--help") == 0)) {
|
||||
if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
|
||||
PrintHelpMessage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
@ -241,8 +256,11 @@ void ParseArgs(const int argc, char** argv,
|
||||
PrintHelpForPSM();
|
||||
exit(0);
|
||||
}
|
||||
if ((strcmp(argv[1], "-v") == 0) ||
|
||||
(strcmp(argv[1], "--version") == 0)) {
|
||||
if ((strcmp(argv[1], "--help-oem") == 0)) {
|
||||
PrintHelpForOEM();
|
||||
exit(0);
|
||||
}
|
||||
if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) {
|
||||
PrintVersionInfo();
|
||||
exit(0);
|
||||
}
|
||||
@ -269,8 +287,16 @@ void ParseArgs(const int argc, char** argv,
|
||||
noocr = true;
|
||||
*list_langs = true;
|
||||
} else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
|
||||
// The parameter -psm is deprecated and was replaced by --psm.
|
||||
// It is still supported for compatibility reasons.
|
||||
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
|
||||
++i;
|
||||
} else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
|
||||
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
|
||||
++i;
|
||||
} else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
|
||||
*enginemode = static_cast<tesseract::OcrEngineMode>(atoi(argv[i + 1]));
|
||||
++i;
|
||||
} else if (strcmp(argv[i], "--print-parameters") == 0) {
|
||||
noocr = true;
|
||||
*print_parameters = true;
|
||||
@ -298,10 +324,10 @@ void ParseArgs(const int argc, char** argv,
|
||||
}
|
||||
}
|
||||
|
||||
void PreloadRenderers(tesseract::TessBaseAPI* api,
|
||||
void PreloadRenderers(
|
||||
tesseract::TessBaseAPI* api,
|
||||
tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
|
||||
tesseract::PageSegMode pagesegmode,
|
||||
const char* outputbase) {
|
||||
tesseract::PageSegMode pagesegmode, const char* outputbase) {
|
||||
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
|
||||
renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
|
||||
} else {
|
||||
@ -324,8 +350,10 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
|
||||
|
||||
api->GetBoolVariable("tessedit_create_pdf", &b);
|
||||
if (b) {
|
||||
renderers->push_back(new tesseract::TessPDFRenderer(outputbase,
|
||||
api->GetDatapath()));
|
||||
bool textonly;
|
||||
api->GetBoolVariable("textonly_pdf", &textonly);
|
||||
renderers->push_back(new tesseract::TessPDFRenderer(
|
||||
outputbase, api->GetDatapath(), textonly));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_write_unlv", &b);
|
||||
@ -358,26 +386,36 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
|
||||
* main()
|
||||
*
|
||||
**********************************************************************/
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
const char* lang = "eng";
|
||||
const char* image = NULL;
|
||||
const char* outputbase = NULL;
|
||||
const char* datapath = NULL;
|
||||
bool list_langs = false;
|
||||
bool print_parameters = false;
|
||||
GenericVector<STRING> vars_vec, vars_values;
|
||||
int arg_i = 1;
|
||||
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
|
||||
tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
|
||||
/* main() calls functions like ParseArgs which call exit().
|
||||
* This results in memory leaks if vars_vec and vars_values are
|
||||
* declared as auto variables (destructor is not called then). */
|
||||
static GenericVector<STRING> vars_vec;
|
||||
static GenericVector<STRING> vars_values;
|
||||
|
||||
#if !defined(DEBUG)
|
||||
// Disable debugging and informational messages from Leptonica.
|
||||
setMsgSeverity(L_SEVERITY_ERROR);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
|
||||
/* Show libtiff warnings on console (not in GUI). */
|
||||
TIFFSetWarningHandler(Win32WarningHandler);
|
||||
#endif /* HAVE_TIFFIO_H && _WIN32 */
|
||||
|
||||
ParseArgs(argc, argv,
|
||||
&lang, &image, &outputbase, &datapath,
|
||||
&list_langs, &print_parameters,
|
||||
&vars_vec, &vars_values, &arg_i, &pagesegmode);
|
||||
ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
|
||||
&print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
|
||||
&enginemode);
|
||||
|
||||
bool banner = false;
|
||||
if (outputbase != NULL && strcmp(outputbase, "-") &&
|
||||
@ -390,8 +428,8 @@ int main(int argc, char **argv) {
|
||||
|
||||
api.SetOutputName(outputbase);
|
||||
|
||||
int init_failed = api.Init(datapath, lang, tesseract::OEM_DEFAULT,
|
||||
&(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values, false);
|
||||
int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
|
||||
argc - arg_i, &vars_vec, &vars_values, false);
|
||||
if (init_failed) {
|
||||
fprintf(stderr, "Could not initialize tesseract.\n");
|
||||
exit(1);
|
||||
@ -433,7 +471,8 @@ int main(int argc, char **argv) {
|
||||
tesseract::PageIterator* it = api.AnalyseLayout();
|
||||
if (it) {
|
||||
it->Orientation(&orientation, &direction, &order, &deskew_angle);
|
||||
tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
|
||||
tprintf(
|
||||
"Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
|
||||
"Deskew angle: %.4f\n",
|
||||
orientation, direction, order, deskew_angle);
|
||||
} else {
|
||||
@ -456,8 +495,6 @@ int main(int argc, char **argv) {
|
||||
|
||||
tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
|
||||
|
||||
|
||||
|
||||
if (in_training_mode) {
|
||||
renderers.push_back(NULL);
|
||||
} else {
|
||||
|
34
appveyor.yml
34
appveyor.yml
@ -1,4 +1,4 @@
|
||||
os: Visual Studio 2015
|
||||
os: Visual Studio 2017
|
||||
|
||||
platform:
|
||||
- Win32
|
||||
@ -7,19 +7,39 @@ platform:
|
||||
configuration:
|
||||
- Release
|
||||
|
||||
# for curl
|
||||
install:
|
||||
- set PATH=C:\Program Files\Git\mingw64\bin;%PATH%
|
||||
|
||||
before_build:
|
||||
- if %platform%==Win32 set generator=Visual Studio 14
|
||||
- if %platform%==Win64 set generator=Visual Studio 14 Win64
|
||||
- if %platform%==Win32 set generator=Visual Studio 15 2017
|
||||
- if %platform%==Win64 set generator=Visual Studio 15 2017 Win64
|
||||
- if %platform%==Win32 set vcplatform=Win32
|
||||
- if %platform%==Win64 set vcplatform=x64
|
||||
|
||||
- curl -fsS -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip
|
||||
- curl -fsS -L -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip
|
||||
- 7z x cppan.zip
|
||||
- set PATH=%PATH%;%cd%
|
||||
|
||||
- cppan # dummy run to create %USERPROFILE%\.cppan\cppan.yml
|
||||
- ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nbuild_warning_level: 0`n"'
|
||||
- ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nbuild_system_verbose: false`n"'
|
||||
- ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nvar_check_jobs: 1`n"'
|
||||
|
||||
build_script:
|
||||
- cppan
|
||||
- mkdir build
|
||||
- mkdir build\bin
|
||||
- mkdir build\bin\Release
|
||||
- cd build
|
||||
- cmake .. -G "%generator%" -DSTATIC=1
|
||||
- msbuild tesseract.sln /p:Platform=%vcplatform% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
|
||||
#- cmd: 'echo local_settings: > cppan.yml'
|
||||
#- cmd: 'echo generator: %generator% >> cppan.yml'
|
||||
#- cmd: 'echo use_shared_libs: true >> cppan.yml'
|
||||
#- cppan --build ..
|
||||
- cmake .. -G "%generator%" -DBUILD_TRAINING_TOOLS=Off -DAPPVEYOR=1
|
||||
- cmake --build . --config Release > bin\Release\log.txt 2>&1
|
||||
|
||||
artifacts:
|
||||
- path: build\bin\Release
|
||||
#- path: build
|
||||
name: tesseract-$(APPVEYOR_BUILD_VERSION)
|
||||
|
||||
|
38
arch/Makefile.am
Normal file
38
arch/Makefile.am
Normal file
@ -0,0 +1,38 @@
|
||||
AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer
|
||||
AUTOMAKE_OPTIONS = subdir-objects
|
||||
SUBDIRS =
|
||||
AM_CXXFLAGS =
|
||||
|
||||
if VISIBILITY
|
||||
AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
|
||||
AM_CPPFLAGS += -DTESS_EXPORTS
|
||||
endif
|
||||
|
||||
include_HEADERS = dotproductavx.h dotproductsse.h simddetect.h
|
||||
|
||||
noinst_HEADERS =
|
||||
|
||||
if !USING_MULTIPLELIBS
|
||||
noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la
|
||||
noinst_LTLIBRARIES += libtesseract_arch.la
|
||||
else
|
||||
lib_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la
|
||||
lib_LTLIBRARIES += libtesseract_arch.la
|
||||
libtesseract_arch_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||
libtesseract_avx_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||
libtesseract_sse_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||
endif
|
||||
|
||||
if AVX_OPT
|
||||
libtesseract_avx_la_CXXFLAGS = -mavx
|
||||
endif
|
||||
if SSE41_OPT
|
||||
libtesseract_sse_la_CXXFLAGS = -msse4.1
|
||||
endif
|
||||
|
||||
libtesseract_arch_la_SOURCES = simddetect.cpp
|
||||
|
||||
libtesseract_avx_la_SOURCES = dotproductavx.cpp
|
||||
|
||||
libtesseract_sse_la_SOURCES = dotproductsse.cpp
|
||||
|
112
arch/dotproductavx.cpp
Normal file
112
arch/dotproductavx.cpp
Normal file
@ -0,0 +1,112 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductavx.cpp
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 22 10:48:05 PDT 2015
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if !defined(__AVX__)
|
||||
// Implementation for non-avx archs.
|
||||
|
||||
#include "dotproductavx.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace tesseract {
|
||||
double DotProductAVX(const double* u, const double* v, int n) {
|
||||
fprintf(stderr, "DotProductAVX can't be used on Android\n");
|
||||
abort();
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
#else // !defined(__AVX__)
|
||||
// Implementation for avx capable archs.
|
||||
#include <immintrin.h>
|
||||
#include <stdint.h>
|
||||
#include "dotproductavx.h"
|
||||
#include "host.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
double DotProductAVX(const double* u, const double* v, int n) {
|
||||
int max_offset = n - 4;
|
||||
int offset = 0;
|
||||
// Accumulate a set of 4 sums in sum, by loading pairs of 4 values from u and
|
||||
// v, and multiplying them together in parallel.
|
||||
__m256d sum = _mm256_setzero_pd();
|
||||
if (offset <= max_offset) {
|
||||
offset = 4;
|
||||
// Aligned load is reputedly faster but requires 32 byte aligned input.
|
||||
if ((reinterpret_cast<const uintptr_t>(u) & 31) == 0 &&
|
||||
(reinterpret_cast<const uintptr_t>(v) & 31) == 0) {
|
||||
// Use aligned load.
|
||||
__m256d floats1 = _mm256_load_pd(u);
|
||||
__m256d floats2 = _mm256_load_pd(v);
|
||||
// Multiply.
|
||||
sum = _mm256_mul_pd(floats1, floats2);
|
||||
while (offset <= max_offset) {
|
||||
floats1 = _mm256_load_pd(u + offset);
|
||||
floats2 = _mm256_load_pd(v + offset);
|
||||
offset += 4;
|
||||
__m256d product = _mm256_mul_pd(floats1, floats2);
|
||||
sum = _mm256_add_pd(sum, product);
|
||||
}
|
||||
} else {
|
||||
// Use unaligned load.
|
||||
__m256d floats1 = _mm256_loadu_pd(u);
|
||||
__m256d floats2 = _mm256_loadu_pd(v);
|
||||
// Multiply.
|
||||
sum = _mm256_mul_pd(floats1, floats2);
|
||||
while (offset <= max_offset) {
|
||||
floats1 = _mm256_loadu_pd(u + offset);
|
||||
floats2 = _mm256_loadu_pd(v + offset);
|
||||
offset += 4;
|
||||
__m256d product = _mm256_mul_pd(floats1, floats2);
|
||||
sum = _mm256_add_pd(sum, product);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add the 4 product sums together horizontally. Not so easy as with sse, as
|
||||
// there is no add across the upper/lower 128 bit boundary, so permute to
|
||||
// move the upper 128 bits to lower in another register.
|
||||
__m256d sum2 = _mm256_permute2f128_pd(sum, sum, 1);
|
||||
sum = _mm256_hadd_pd(sum, sum2);
|
||||
sum = _mm256_hadd_pd(sum, sum);
|
||||
double result;
|
||||
// _mm256_extract_f64 doesn't exist, but resist the temptation to use an sse
|
||||
// instruction, as that introduces a 70 cycle delay. All this casting is to
|
||||
// fool the intrinsics into thinking we are extracting the bottom int64.
|
||||
auto cast_sum = _mm256_castpd_si256(sum);
|
||||
*(reinterpret_cast<inT64*>(&result)) =
|
||||
#if defined(_WIN32) || defined(__i386__)
|
||||
// This is a very simple workaround that is activated
|
||||
// for all platforms that do not have _mm256_extract_epi64.
|
||||
// _mm256_extract_epi64(X, Y) == ((uint64_t*)&X)[Y]
|
||||
((uint64_t*)&cast_sum)[0]
|
||||
#else
|
||||
_mm256_extract_epi64(cast_sum, 0)
|
||||
#endif
|
||||
;
|
||||
while (offset < n) {
|
||||
result += u[offset] * v[offset];
|
||||
++offset;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // ANDROID_BUILD
|
30
arch/dotproductavx.h
Normal file
30
arch/dotproductavx.h
Normal file
@ -0,0 +1,30 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductavx.h
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 22 10:51:05 PDT 2015
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
||||
#define TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
double DotProductAVX(const double* u, const double* v, int n);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_
|
141
arch/dotproductsse.cpp
Normal file
141
arch/dotproductsse.cpp
Normal file
@ -0,0 +1,141 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductsse.cpp
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 22 10:57:45 PDT 2015
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if !defined(__SSE4_1__)
|
||||
// This code can't compile with "-msse4.1", so use dummy stubs.
|
||||
|
||||
#include "dotproductsse.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace tesseract {
|
||||
double DotProductSSE(const double* u, const double* v, int n) {
|
||||
fprintf(stderr, "DotProductSSE can't be used on Android\n");
|
||||
abort();
|
||||
}
|
||||
inT32 IntDotProductSSE(const inT8* u, const inT8* v, int n) {
|
||||
fprintf(stderr, "IntDotProductSSE can't be used on Android\n");
|
||||
abort();
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
#else // !defined(__SSE4_1__)
|
||||
// Non-Android code here
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include <stdint.h>
|
||||
#include "dotproductsse.h"
|
||||
#include "host.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
double DotProductSSE(const double* u, const double* v, int n) {
|
||||
int max_offset = n - 2;
|
||||
int offset = 0;
|
||||
// Accumulate a set of 2 sums in sum, by loading pairs of 2 values from u and
|
||||
// v, and multiplying them together in parallel.
|
||||
__m128d sum = _mm_setzero_pd();
|
||||
if (offset <= max_offset) {
|
||||
offset = 2;
|
||||
// Aligned load is reputedly faster but requires 16 byte aligned input.
|
||||
if ((reinterpret_cast<const uintptr_t>(u) & 15) == 0 &&
|
||||
(reinterpret_cast<const uintptr_t>(v) & 15) == 0) {
|
||||
// Use aligned load.
|
||||
sum = _mm_load_pd(u);
|
||||
__m128d floats2 = _mm_load_pd(v);
|
||||
// Multiply.
|
||||
sum = _mm_mul_pd(sum, floats2);
|
||||
while (offset <= max_offset) {
|
||||
__m128d floats1 = _mm_load_pd(u + offset);
|
||||
floats2 = _mm_load_pd(v + offset);
|
||||
offset += 2;
|
||||
floats1 = _mm_mul_pd(floats1, floats2);
|
||||
sum = _mm_add_pd(sum, floats1);
|
||||
}
|
||||
} else {
|
||||
// Use unaligned load.
|
||||
sum = _mm_loadu_pd(u);
|
||||
__m128d floats2 = _mm_loadu_pd(v);
|
||||
// Multiply.
|
||||
sum = _mm_mul_pd(sum, floats2);
|
||||
while (offset <= max_offset) {
|
||||
__m128d floats1 = _mm_loadu_pd(u + offset);
|
||||
floats2 = _mm_loadu_pd(v + offset);
|
||||
offset += 2;
|
||||
floats1 = _mm_mul_pd(floats1, floats2);
|
||||
sum = _mm_add_pd(sum, floats1);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add the 2 sums in sum horizontally.
|
||||
sum = _mm_hadd_pd(sum, sum);
|
||||
// Extract the low result.
|
||||
double result = _mm_cvtsd_f64(sum);
|
||||
// Add on any left-over products.
|
||||
while (offset < n) {
|
||||
result += u[offset] * v[offset];
|
||||
++offset;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
inT32 IntDotProductSSE(const inT8* u, const inT8* v, int n) {
|
||||
int max_offset = n - 8;
|
||||
int offset = 0;
|
||||
// Accumulate a set of 4 32-bit sums in sum, by loading 8 pairs of 8-bit
|
||||
// values, extending to 16 bit, multiplying to make 32 bit results.
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
if (offset <= max_offset) {
|
||||
offset = 8;
|
||||
__m128i packed1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(u));
|
||||
__m128i packed2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(v));
|
||||
sum = _mm_cvtepi8_epi16(packed1);
|
||||
packed2 = _mm_cvtepi8_epi16(packed2);
|
||||
// The magic _mm_add_epi16 is perfect here. It multiplies 8 pairs of 16 bit
|
||||
// ints to make 32 bit results, which are then horizontally added in pairs
|
||||
// to make 4 32 bit results that still fit in a 128 bit register.
|
||||
sum = _mm_madd_epi16(sum, packed2);
|
||||
while (offset <= max_offset) {
|
||||
packed1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(u + offset));
|
||||
packed2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(v + offset));
|
||||
offset += 8;
|
||||
packed1 = _mm_cvtepi8_epi16(packed1);
|
||||
packed2 = _mm_cvtepi8_epi16(packed2);
|
||||
packed1 = _mm_madd_epi16(packed1, packed2);
|
||||
sum = _mm_add_epi32(sum, packed1);
|
||||
}
|
||||
}
|
||||
// Sum the 4 packed 32 bit sums and extract the low result.
|
||||
sum = _mm_hadd_epi32(sum, sum);
|
||||
sum = _mm_hadd_epi32(sum, sum);
|
||||
inT32 result = _mm_cvtsi128_si32(sum);
|
||||
while (offset < n) {
|
||||
result += u[offset] * v[offset];
|
||||
++offset;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // ANDROID_BUILD
|
35
arch/dotproductsse.h
Normal file
35
arch/dotproductsse.h
Normal file
@ -0,0 +1,35 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductsse.h
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 22 10:57:05 PDT 2015
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
||||
#define TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
||||
|
||||
#include "host.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
double DotProductSSE(const double* u, const double* v, int n);
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
inT32 IntDotProductSSE(const inT8* u, const inT8* v, int n);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_
|
68
arch/simddetect.cpp
Normal file
68
arch/simddetect.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: simddetect.h
|
||||
// Description: Architecture detector.
|
||||
// Author: Stefan Weil (based on code from Ray Smith)
|
||||
//
|
||||
// (C) Copyright 2014, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "simddetect.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#undef X86_BUILD
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
|
||||
#if !defined(ANDROID_BUILD)
|
||||
#define X86_BUILD 1
|
||||
#endif // !ANDROID_BUILD
|
||||
#endif // x86 target
|
||||
|
||||
#if defined(X86_BUILD)
|
||||
#if defined(__GNUC__)
|
||||
#include <cpuid.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
SIMDDetect SIMDDetect::detector;
|
||||
|
||||
// If true, then AVX has been detected.
|
||||
bool SIMDDetect::avx_available_;
|
||||
// If true, then SSe4.1 has been detected.
|
||||
bool SIMDDetect::sse_available_;
|
||||
|
||||
// Constructor.
|
||||
// Tests the architecture in a system-dependent way to detect AVX, SSE and
|
||||
// any other available SIMD equipment.
|
||||
// __GNUC__ is also defined by compilers that include GNU extensions such as
|
||||
// clang.
|
||||
SIMDDetect::SIMDDetect() {
|
||||
#if defined(X86_BUILD)
|
||||
#if defined(__GNUC__)
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
|
||||
sse_available_ = (ecx & 0x00080000) != 0;
|
||||
avx_available_ = (ecx & 0x10000000) != 0;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
int cpuInfo[4];
|
||||
__cpuid(cpuInfo, 0);
|
||||
if (cpuInfo[0] >= 1) {
|
||||
__cpuid(cpuInfo, 1);
|
||||
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
|
||||
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
|
||||
}
|
||||
#else
|
||||
#error "I don't know how to test for SIMD with this compiler"
|
||||
#endif
|
||||
#endif // X86_BUILD
|
||||
}
|
41
arch/simddetect.h
Normal file
41
arch/simddetect.h
Normal file
@ -0,0 +1,41 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: simddetect.h
|
||||
// Description: Architecture detector.
|
||||
// Author: Stefan Weil (based on code from Ray Smith)
|
||||
//
|
||||
// (C) Copyright 2014, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
// Architecture detector. Add code here to detect any other architectures for
|
||||
// SIMD-based faster dot product functions. Intended to be a single static
|
||||
// object, but it does no real harm to have more than one.
|
||||
class SIMDDetect {
|
||||
public:
|
||||
// Returns true if AVX is available on this system.
|
||||
static inline bool IsAVXAvailable() { return detector.avx_available_; }
|
||||
// Returns true if SSE4.1 is available on this system.
|
||||
static inline bool IsSSEAvailable() { return detector.sse_available_; }
|
||||
|
||||
private:
|
||||
// Constructor, must set all static member variables.
|
||||
SIMDDetect();
|
||||
|
||||
private:
|
||||
// Singleton.
|
||||
static SIMDDetect detector;
|
||||
// If true, then AVX has been detected.
|
||||
static TESS_API bool avx_available_;
|
||||
// If true, then SSe4.1 has been detected.
|
||||
static TESS_API bool sse_available_;
|
||||
};
|
30
autogen.sh
30
autogen.sh
@ -1,4 +1,13 @@
|
||||
#!/bin/sh
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This is a simple script which is meant to help developers
|
||||
# better deal with the GNU autotools, specifically:
|
||||
@ -37,7 +46,20 @@ if [ "$1" = "clean" ]; then
|
||||
find . -iname "Makefile.in" -type f -exec rm '{}' +
|
||||
fi
|
||||
|
||||
# create m4 directory if it not exists
|
||||
# Prevent any errors that might result from failing to properly invoke
|
||||
# `libtoolize` or `glibtoolize,` whichever is present on your system,
|
||||
# from occurring by testing for its existence and capturing the absolute path to
|
||||
# its location for caching purposes prior to using it later on in 'Step 2:'
|
||||
if command -v libtoolize >/dev/null 2>&1; then
|
||||
LIBTOOLIZE="$(command -v libtoolize)"
|
||||
elif command -v glibtoolize >/dev/null 2>&1; then
|
||||
LIBTOOLIZE="$(command -v glibtoolize)"
|
||||
else
|
||||
echo "Unable to find a valid copy of libtoolize or glibtoolize in your PATH!"
|
||||
bail_out
|
||||
fi
|
||||
|
||||
# create m4 directory if it does not exist
|
||||
if [ ! -d m4 ]; then
|
||||
mkdir m4
|
||||
fi
|
||||
@ -61,9 +83,9 @@ aclocal -I config || bail_out
|
||||
|
||||
# --- Step 2:
|
||||
|
||||
echo "Running libtoolize"
|
||||
libtoolize -f -c || glibtoolize -f -c || bail_out
|
||||
libtoolize --automake || glibtoolize --automake || bail_out
|
||||
echo "Running $LIBTOOLIZE"
|
||||
$LIBTOOLIZE -f -c || bail_out
|
||||
$LIBTOOLIZE --automake || bail_out
|
||||
|
||||
# --- Step 3: Generate config.h.in from:
|
||||
# . configure.ac (look for AM_CONFIG_HEADER tag or AC_CONFIG_HEADER tag)
|
||||
|
@ -1,12 +1,14 @@
|
||||
AM_CPPFLAGS += \
|
||||
-DUSE_STD_NAMESPACE \
|
||||
-I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \
|
||||
-I$(top_srcdir)/arch -I$(top_srcdir)/lstm \
|
||||
-I$(top_srcdir)/viewer \
|
||||
-I$(top_srcdir)/classify -I$(top_srcdir)/dict \
|
||||
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
|
||||
-I$(top_srcdir)/textord -I$(top_srcdir)/opencl
|
||||
|
||||
AM_CPPFLAGS += $(OPENCL_CPPFLAGS)
|
||||
AM_CPPFLAGS += $(OPENMP_CXXFLAGS)
|
||||
|
||||
if VISIBILITY
|
||||
AM_CPPFLAGS += -DTESS_EXPORTS \
|
||||
@ -33,18 +35,18 @@ libtesseract_main_la_LIBADD = \
|
||||
../ccstruct/libtesseract_ccstruct.la \
|
||||
../viewer/libtesseract_viewer.la \
|
||||
../dict/libtesseract_dict.la \
|
||||
../arch/libtesseract_avx.la \
|
||||
../arch/libtesseract_sse.la \
|
||||
../lstm/libtesseract_lstm.la \
|
||||
../classify/libtesseract_classify.la \
|
||||
../cutil/libtesseract_cutil.la \
|
||||
../opencl/libtesseract_opencl.la
|
||||
if !NO_CUBE_BUILD
|
||||
libtesseract_main_la_LIBADD += ../cube/libtesseract_cube.la
|
||||
endif
|
||||
endif
|
||||
|
||||
libtesseract_main_la_SOURCES = \
|
||||
adaptions.cpp applybox.cpp control.cpp \
|
||||
docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \
|
||||
ltrresultiterator.cpp \
|
||||
linerec.cpp ltrresultiterator.cpp \
|
||||
osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \
|
||||
pagewalk.cpp par_control.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
|
||||
reject.cpp resultiterator.cpp superscript.cpp \
|
||||
@ -52,12 +54,3 @@ libtesseract_main_la_SOURCES = \
|
||||
tfacepp.cpp thresholder.cpp \
|
||||
werdit.cpp
|
||||
|
||||
if !NO_CUBE_BUILD
|
||||
AM_CPPFLAGS += \
|
||||
-I$(top_srcdir)/neural_networks/runtime -I$(top_srcdir)/cube
|
||||
noinst_HEADERS += \
|
||||
cube_reco_context.h cubeclassifier.h tesseract_cube_combiner.h
|
||||
libtesseract_main_la_SOURCES += \
|
||||
cube_control.cpp cube_reco_context.cpp cubeclassifier.cpp \
|
||||
tesseract_cube_combiner.cpp
|
||||
endif
|
||||
|
@ -31,21 +31,22 @@
|
||||
#include <errno.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
#include "ocrclass.h"
|
||||
#include "werdit.h"
|
||||
#include "callcpp.h"
|
||||
#include "control.h"
|
||||
#include "docqual.h"
|
||||
#include "drawfx.h"
|
||||
#include "tessbox.h"
|
||||
#include "tessvars.h"
|
||||
#include "fixspace.h"
|
||||
#include "globals.h"
|
||||
#include "lstmrecognizer.h"
|
||||
#include "ocrclass.h"
|
||||
#include "output.h"
|
||||
#include "pgedit.h"
|
||||
#include "reject.h"
|
||||
#include "fixspace.h"
|
||||
#include "docqual.h"
|
||||
#include "control.h"
|
||||
#include "output.h"
|
||||
#include "callcpp.h"
|
||||
#include "globals.h"
|
||||
#include "sorthelper.h"
|
||||
#include "tessbox.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tessvars.h"
|
||||
#include "werdit.h"
|
||||
|
||||
#define MIN_FONT_ROW_COUNT 8
|
||||
#define MAX_XHEIGHT_DIFF 3
|
||||
@ -73,7 +74,6 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Recognize a single word in interactive mode.
|
||||
*
|
||||
@ -85,7 +85,12 @@ BOOL8 Tesseract::recog_interactive(PAGE_RES_IT* pr_it) {
|
||||
|
||||
WordData word_data(*pr_it);
|
||||
SetupWordPassN(2, &word_data);
|
||||
// LSTM doesn't run on pass2, but we want to run pass2 for tesseract.
|
||||
if (lstm_recognizer_ == NULL) {
|
||||
classify_word_and_language(2, pr_it, &word_data);
|
||||
} else {
|
||||
classify_word_and_language(1, pr_it, &word_data);
|
||||
}
|
||||
if (tessedit_debug_quality_metrics) {
|
||||
WERD_RES* word_res = pr_it->word();
|
||||
word_char_quality(word_res, pr_it->row()->row, &char_qual, &good_char_qual);
|
||||
@ -188,8 +193,8 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
|
||||
WERD_RES* word_res = new WERD_RES;
|
||||
word_res->InitForRetryRecognition(*word->word);
|
||||
word->lang_words.push_back(word_res);
|
||||
// Cube doesn't get setup for pass2.
|
||||
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
|
||||
// LSTM doesn't get setup for pass2.
|
||||
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
|
||||
word_res->SetupForRecognition(
|
||||
lang_t->unicharset, lang_t, BestPix(),
|
||||
lang_t->tessedit_ocr_engine_mode, NULL,
|
||||
@ -220,15 +225,13 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
|
||||
monitor->progress = 70 * w / words->size();
|
||||
if (monitor->progress_callback != NULL) {
|
||||
TBOX box = pr_it->word()->word->bounding_box();
|
||||
(*monitor->progress_callback)(monitor->progress,
|
||||
box.left(), box.right(),
|
||||
box.top(), box.bottom());
|
||||
(*monitor->progress_callback)(monitor->progress, box.left(),
|
||||
box.right(), box.top(), box.bottom());
|
||||
}
|
||||
} else {
|
||||
monitor->progress = 70 + 30 * w / words->size();
|
||||
if (monitor->progress_callback!=NULL) {
|
||||
(*monitor->progress_callback)(monitor->progress,
|
||||
0, 0, 0, 0);
|
||||
if (monitor->progress_callback != NULL) {
|
||||
(*monitor->progress_callback)(monitor->progress, 0, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
if (monitor->deadline_exceeded() ||
|
||||
@ -253,7 +256,8 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
|
||||
pr_it->forward();
|
||||
ASSERT_HOST(pr_it->word() != NULL);
|
||||
bool make_next_word_fuzzy = false;
|
||||
if (ReassignDiacritics(pass_n, pr_it, &make_next_word_fuzzy)) {
|
||||
if (!AnyLSTMLang() &&
|
||||
ReassignDiacritics(pass_n, pr_it, &make_next_word_fuzzy)) {
|
||||
// Needs to be setup again to see the new outlines in the chopped_word.
|
||||
SetupWordPassN(pass_n, word);
|
||||
}
|
||||
@ -384,9 +388,8 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
|
||||
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false;
|
||||
}
|
||||
|
||||
// The next passes can only be run if tesseract has been used, as cube
|
||||
// doesn't set all the necessary outputs in WERD_RES.
|
||||
if (AnyTessLang()) {
|
||||
// The next passes are only required for Tess-only.
|
||||
if (AnyTessLang() && !AnyLSTMLang()) {
|
||||
// ****************** Pass 3 *******************
|
||||
// Fix fuzzy spaces.
|
||||
set_global_loc_code(LOC_FUZZY_SPACE);
|
||||
@ -402,15 +405,6 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
|
||||
// ****************** Pass 5,6 *******************
|
||||
rejection_passes(page_res, monitor, target_word_box, word_config);
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
// ****************** Pass 7 *******************
|
||||
// Cube combiner.
|
||||
// If cube is loaded and its combiner is present, run it.
|
||||
if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) {
|
||||
run_cube_combiner(page_res);
|
||||
}
|
||||
#endif
|
||||
|
||||
// ****************** Pass 8 *******************
|
||||
font_recognition_pass(page_res);
|
||||
|
||||
@ -438,9 +432,14 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
|
||||
for (page_res_it.restart_page(); page_res_it.word() != NULL;
|
||||
page_res_it.forward()) {
|
||||
WERD_RES* word = page_res_it.word();
|
||||
if (word->best_choice == NULL || word->best_choice->length() == 0)
|
||||
POLY_BLOCK* pb = page_res_it.block()->block != NULL
|
||||
? page_res_it.block()->block->poly_block()
|
||||
: NULL;
|
||||
if (word->best_choice == NULL || word->best_choice->length() == 0 ||
|
||||
(word->best_choice->IsAllSpaces() && (pb == NULL || pb->IsText()))) {
|
||||
page_res_it.DeleteCurrentWord();
|
||||
}
|
||||
}
|
||||
|
||||
if (monitor != NULL) {
|
||||
monitor->progress = 100;
|
||||
@ -539,7 +538,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (overrides_word1.size() >= 1) {
|
||||
if (!overrides_word1.empty()) {
|
||||
// Excellent, we have some bigram matches.
|
||||
if (EqualIgnoringCaseAndTerminalPunct(*w_prev->best_choice,
|
||||
*overrides_word1[best_idx]) &&
|
||||
@ -755,16 +754,32 @@ void Tesseract::script_pos_pass(PAGE_RES* page_res) {
|
||||
}
|
||||
}
|
||||
|
||||
// Factored helper considers the indexed word and updates all the pointed
|
||||
// values.
|
||||
static void EvaluateWord(const PointerVector<WERD_RES>& words, int index,
|
||||
float* rating, float* certainty, bool* bad,
|
||||
bool* valid_permuter, int* right, int* next_left) {
|
||||
// Helper finds the gap between the index word and the next.
|
||||
static void WordGap(const PointerVector<WERD_RES>& words, int index, int* right,
|
||||
int* next_left) {
|
||||
*right = -MAX_INT32;
|
||||
*next_left = MAX_INT32;
|
||||
if (index < words.size()) {
|
||||
*right = words[index]->word->bounding_box().right();
|
||||
if (index + 1 < words.size())
|
||||
*next_left = words[index + 1]->word->bounding_box().left();
|
||||
}
|
||||
}
|
||||
|
||||
// Factored helper computes the rating, certainty, badness and validity of
|
||||
// the permuter of the words in [first_index, end_index).
|
||||
static void EvaluateWordSpan(const PointerVector<WERD_RES>& words,
|
||||
int first_index, int end_index, float* rating,
|
||||
float* certainty, bool* bad,
|
||||
bool* valid_permuter) {
|
||||
if (end_index <= first_index) {
|
||||
*bad = true;
|
||||
*valid_permuter = false;
|
||||
}
|
||||
for (int index = first_index; index < end_index && index < words.size();
|
||||
++index) {
|
||||
WERD_CHOICE* choice = words[index]->best_choice;
|
||||
if (choice == NULL) {
|
||||
if (choice == nullptr) {
|
||||
*bad = true;
|
||||
} else {
|
||||
*rating += choice->rating();
|
||||
@ -772,12 +787,6 @@ static void EvaluateWord(const PointerVector<WERD_RES>& words, int index,
|
||||
if (!Dict::valid_word_permuter(choice->permuter(), false))
|
||||
*valid_permuter = false;
|
||||
}
|
||||
*right = words[index]->word->bounding_box().right();
|
||||
if (index + 1 < words.size())
|
||||
*next_left = words[index + 1]->word->bounding_box().left();
|
||||
} else {
|
||||
*valid_permuter = false;
|
||||
*bad = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -802,24 +811,13 @@ static int SelectBestWords(double rating_ratio,
|
||||
while (b < best_words->size() || n < new_words->size()) {
|
||||
// Start of the current run in each.
|
||||
int start_b = b, start_n = n;
|
||||
// Rating of the current run in each.
|
||||
float b_rating = 0.0f, n_rating = 0.0f;
|
||||
// Certainty of the current run in each.
|
||||
float b_certainty = 0.0f, n_certainty = 0.0f;
|
||||
// True if any word is missing its best choice.
|
||||
bool b_bad = false, n_bad = false;
|
||||
// True if all words have a valid permuter.
|
||||
bool b_valid_permuter = true, n_valid_permuter = true;
|
||||
|
||||
while (b < best_words->size() || n < new_words->size()) {
|
||||
int b_right = -MAX_INT32;
|
||||
int next_b_left = MAX_INT32;
|
||||
EvaluateWord(*best_words, b, &b_rating, &b_certainty, &b_bad,
|
||||
&b_valid_permuter, &b_right, &next_b_left);
|
||||
WordGap(*best_words, b, &b_right, &next_b_left);
|
||||
int n_right = -MAX_INT32;
|
||||
int next_n_left = MAX_INT32;
|
||||
EvaluateWord(*new_words, n, &n_rating, &n_certainty, &n_bad,
|
||||
&n_valid_permuter, &n_right, &next_n_left);
|
||||
WordGap(*new_words, n, &n_right, &next_n_left);
|
||||
if (MAX(b_right, n_right) < MIN(next_b_left, next_n_left)) {
|
||||
// The word breaks overlap. [start_b,b] and [start_n, n] match.
|
||||
break;
|
||||
@ -831,6 +829,20 @@ static int SelectBestWords(double rating_ratio,
|
||||
else
|
||||
++n;
|
||||
}
|
||||
// Rating of the current run in each.
|
||||
float b_rating = 0.0f, n_rating = 0.0f;
|
||||
// Certainty of the current run in each.
|
||||
float b_certainty = 0.0f, n_certainty = 0.0f;
|
||||
// True if any word is missing its best choice.
|
||||
bool b_bad = false, n_bad = false;
|
||||
// True if all words have a valid permuter.
|
||||
bool b_valid_permuter = true, n_valid_permuter = true;
|
||||
int end_b = b < best_words->size() ? b + 1 : b;
|
||||
int end_n = n < new_words->size() ? n + 1 : n;
|
||||
EvaluateWordSpan(*best_words, start_b, end_b, &b_rating, &b_certainty,
|
||||
&b_bad, &b_valid_permuter);
|
||||
EvaluateWordSpan(*new_words, start_n, end_n, &n_rating, &n_certainty,
|
||||
&n_bad, &n_valid_permuter);
|
||||
bool new_better = false;
|
||||
if (!n_bad && (b_bad || (n_certainty > b_certainty &&
|
||||
n_rating < b_rating) ||
|
||||
@ -838,7 +850,7 @@ static int SelectBestWords(double rating_ratio,
|
||||
n_rating < b_rating * rating_ratio &&
|
||||
n_certainty > b_certainty - certainty_margin))) {
|
||||
// New is better.
|
||||
for (int i = start_n; i <= n; ++i) {
|
||||
for (int i = start_n; i < end_n; ++i) {
|
||||
out_words.push_back((*new_words)[i]);
|
||||
(*new_words)[i] = NULL;
|
||||
++num_new;
|
||||
@ -846,14 +858,12 @@ static int SelectBestWords(double rating_ratio,
|
||||
new_better = true;
|
||||
} else if (!b_bad) {
|
||||
// Current best is better.
|
||||
for (int i = start_b; i <= b; ++i) {
|
||||
for (int i = start_b; i < end_b; ++i) {
|
||||
out_words.push_back((*best_words)[i]);
|
||||
(*best_words)[i] = NULL;
|
||||
++num_best;
|
||||
}
|
||||
}
|
||||
int end_b = b < best_words->size() ? b + 1 : b;
|
||||
int end_n = n < new_words->size() ? n + 1 : n;
|
||||
if (debug) {
|
||||
tprintf("%d new words %s than %d old words: r: %g v %g c: %g v %g"
|
||||
" valid dict: %d v %d\n",
|
||||
@ -876,10 +886,9 @@ static int SelectBestWords(double rating_ratio,
|
||||
// Returns positive if this recognizer found more new best words than the
|
||||
// number kept from best_words.
|
||||
int Tesseract::RetryWithLanguage(const WordData& word_data,
|
||||
WordRecognizer recognizer,
|
||||
WordRecognizer recognizer, bool debug,
|
||||
WERD_RES** in_word,
|
||||
PointerVector<WERD_RES>* best_words) {
|
||||
bool debug = classify_debug_level || cube_debug_level;
|
||||
if (debug) {
|
||||
tprintf("Trying word using lang %s, oem %d\n",
|
||||
lang.string(), static_cast<int>(tessedit_ocr_engine_mode));
|
||||
@ -898,8 +907,7 @@ int Tesseract::RetryWithLanguage(const WordData& word_data,
|
||||
new_words[i]->DebugTopChoice("Lang result");
|
||||
}
|
||||
// Initial version is a bit of a hack based on better certainty and rating
|
||||
// (to reduce false positives from cube) or a dictionary vs non-dictionary
|
||||
// word.
|
||||
// or a dictionary vs non-dictionary word.
|
||||
return SelectBestWords(classify_max_rating_ratio,
|
||||
classify_max_certainty_margin,
|
||||
debug, &new_words, best_words);
|
||||
@ -1283,7 +1291,8 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
|
||||
// Points to the best result. May be word or in lang_words.
|
||||
WERD_RES* word = word_data->word;
|
||||
clock_t start_t = clock();
|
||||
if (classify_debug_level || cube_debug_level) {
|
||||
bool debug = classify_debug_level > 0 || multilang_debug_level > 0;
|
||||
if (debug) {
|
||||
tprintf("%s word with lang %s at:",
|
||||
word->done ? "Already done" : "Processing",
|
||||
most_recently_used_->lang.string());
|
||||
@ -1302,12 +1311,12 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
|
||||
most_recently_used_ != sub_langs_[sub]; ++sub) {}
|
||||
}
|
||||
most_recently_used_->RetryWithLanguage(
|
||||
*word_data, recognizer, &word_data->lang_words[sub], &best_words);
|
||||
*word_data, recognizer, debug, &word_data->lang_words[sub], &best_words);
|
||||
Tesseract* best_lang_tess = most_recently_used_;
|
||||
if (!WordsAcceptable(best_words)) {
|
||||
// Try all the other languages to see if they are any better.
|
||||
if (most_recently_used_ != this &&
|
||||
this->RetryWithLanguage(*word_data, recognizer,
|
||||
this->RetryWithLanguage(*word_data, recognizer, debug,
|
||||
&word_data->lang_words[sub_langs_.size()],
|
||||
&best_words) > 0) {
|
||||
best_lang_tess = this;
|
||||
@ -1315,7 +1324,7 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
|
||||
for (int i = 0; !WordsAcceptable(best_words) && i < sub_langs_.size();
|
||||
++i) {
|
||||
if (most_recently_used_ != sub_langs_[i] &&
|
||||
sub_langs_[i]->RetryWithLanguage(*word_data, recognizer,
|
||||
sub_langs_[i]->RetryWithLanguage(*word_data, recognizer, debug,
|
||||
&word_data->lang_words[i],
|
||||
&best_words) > 0) {
|
||||
best_lang_tess = sub_langs_[i];
|
||||
@ -1357,12 +1366,26 @@ void Tesseract::classify_word_pass1(const WordData& word_data,
|
||||
BLOCK* block = word_data.block;
|
||||
prev_word_best_choice_ = word_data.prev_word != NULL
|
||||
? word_data.prev_word->word->best_choice : NULL;
|
||||
#ifndef NO_CUBE_BUILD
|
||||
// If we only intend to run cube - run it and return.
|
||||
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
|
||||
cube_word_pass1(block, row, *in_word);
|
||||
#ifndef ANDROID_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
|
||||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
|
||||
if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
LSTMRecognizeWord(*block, row, *in_word, out_words);
|
||||
if (!out_words->empty())
|
||||
return; // Successful lstm recognition.
|
||||
}
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
// No fallback allowed, so use a fake.
|
||||
(*in_word)->SetupFake(lstm_recognizer_->GetUnicharset());
|
||||
return;
|
||||
}
|
||||
// Fall back to tesseract for failed words or odd words.
|
||||
(*in_word)->SetupForRecognition(unicharset, this, BestPix(),
|
||||
OEM_TESSERACT_ONLY, NULL,
|
||||
classify_bln_numeric_mode,
|
||||
textord_use_cjk_fp_model,
|
||||
poly_allow_detailed_fx, row, block);
|
||||
}
|
||||
#endif
|
||||
WERD_RES* word = *in_word;
|
||||
match_word_pass_n(1, word, row, block);
|
||||
@ -1497,11 +1520,7 @@ void Tesseract::classify_word_pass2(const WordData& word_data,
|
||||
WERD_RES** in_word,
|
||||
PointerVector<WERD_RES>* out_words) {
|
||||
// Return if we do not want to run Tesseract.
|
||||
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY &&
|
||||
tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED &&
|
||||
word_data.word->best_choice != NULL)
|
||||
return;
|
||||
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
return;
|
||||
}
|
||||
ROW* row = word_data.row;
|
||||
@ -1886,7 +1905,7 @@ static void find_modal_font( //good chars in word
|
||||
* Get the fonts for the word.
|
||||
*/
|
||||
void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
// Don't try to set the word fonts for a cube word, as the configs
|
||||
// Don't try to set the word fonts for an lstm word, as the configs
|
||||
// will be meaningless.
|
||||
if (word->chopped_word == NULL) return;
|
||||
ASSERT_HOST(word->best_choice != NULL);
|
||||
|
@ -1,432 +0,0 @@
|
||||
/******************************************************************
|
||||
* File: cube_control.cpp
|
||||
* Description: Tesseract class methods for invoking cube convolutional
|
||||
* neural network word recognizer.
|
||||
* Author: Raquel Romano
|
||||
* Created: September 2009
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "allheaders.h"
|
||||
|
||||
#include "cube_object.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tesseract_cube_combiner.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* @name convert_prob_to_tess_certainty
|
||||
*
|
||||
* Normalize a probability in the range [0.0, 1.0] to a tesseract
|
||||
* certainty in the range [-20.0, 0.0]
|
||||
*/
|
||||
static float convert_prob_to_tess_certainty(float prob) {
|
||||
return (prob - 1.0) * 20.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name char_box_to_tbox
|
||||
*
|
||||
* Create a TBOX from a character bounding box. If nonzero, the
|
||||
* x_offset accounts for any additional padding of the word box that
|
||||
* should be taken into account.
|
||||
*
|
||||
*/
|
||||
TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
|
||||
l_int32 left;
|
||||
l_int32 top;
|
||||
l_int32 width;
|
||||
l_int32 height;
|
||||
l_int32 right;
|
||||
l_int32 bottom;
|
||||
|
||||
boxGetGeometry(char_box, &left, &top, &width, &height);
|
||||
left += word_box.left() - x_offset;
|
||||
right = left + width;
|
||||
top = word_box.bottom() + word_box.height() - top;
|
||||
bottom = top - height;
|
||||
return TBOX(left, bottom, right, top);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name extract_cube_state
|
||||
*
|
||||
* Extract CharSamp objects and character bounding boxes from the
|
||||
* CubeObject's state. The caller should free both structres.
|
||||
*
|
||||
*/
|
||||
bool Tesseract::extract_cube_state(CubeObject* cube_obj,
|
||||
int* num_chars,
|
||||
Boxa** char_boxes,
|
||||
CharSamp*** char_samples) {
|
||||
if (!cube_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
|
||||
"passed to extract_cube_state\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Note that the CubeObject accessors return either the deslanted or
|
||||
// regular objects search object or beam search object, whichever
|
||||
// was used in the last call to Recognize()
|
||||
CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
|
||||
if (!cube_search_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
|
||||
"cube's search object in extract_cube_state.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
BeamSearch *beam_search_obj = cube_obj->BeamObj();
|
||||
if (!beam_search_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
|
||||
"cube's beam search object in extract_cube_state.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the character samples and bounding boxes by backtracking
|
||||
// through the beam search path
|
||||
int best_node_index = beam_search_obj->BestPresortedNodeIndex();
|
||||
*char_samples = beam_search_obj->BackTrack(
|
||||
cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
|
||||
if (!*char_samples)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name create_cube_box_word
|
||||
*
|
||||
* Fill the given BoxWord with boxes from character bounding
|
||||
* boxes. The char_boxes have local coordinates w.r.t. the
|
||||
* word bounding box, i.e., the left-most character bbox of each word
|
||||
* has (0,0) left-top coord, but the BoxWord must be defined in page
|
||||
* coordinates.
|
||||
*/
|
||||
bool Tesseract::create_cube_box_word(Boxa *char_boxes,
|
||||
int num_chars,
|
||||
TBOX word_box,
|
||||
BoxWord* box_word) {
|
||||
if (!box_word) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find the x-coordinate of left-most char_box, which could be
|
||||
// nonzero if the word image was padded before recognition took place.
|
||||
int x_offset = -1;
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
|
||||
if (x_offset < 0 || char_box->x < x_offset) {
|
||||
x_offset = char_box->x;
|
||||
}
|
||||
boxDestroy(&char_box);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
|
||||
TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
|
||||
boxDestroy(&char_box);
|
||||
box_word->InsertBox(i, tbox);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name init_cube_objects
|
||||
*
|
||||
* Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
|
||||
* Returns false if cube context could not be created or if load_combiner is
|
||||
* true, but the combiner could not be loaded.
|
||||
*/
|
||||
bool Tesseract::init_cube_objects(bool load_combiner,
|
||||
TessdataManager *tessdata_manager) {
|
||||
ASSERT_HOST(cube_cntxt_ == NULL);
|
||||
ASSERT_HOST(tess_cube_combiner_ == NULL);
|
||||
|
||||
// Create the cube context object
|
||||
cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
|
||||
if (cube_cntxt_ == NULL) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
|
||||
"instantiate CubeRecoContext\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the combiner object and load the combiner net for target languages.
|
||||
if (load_combiner) {
|
||||
tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
|
||||
if (!tess_cube_combiner_ || !tess_cube_combiner_->LoadCombinerNet()) {
|
||||
delete cube_cntxt_;
|
||||
cube_cntxt_ = NULL;
|
||||
if (tess_cube_combiner_ != NULL) {
|
||||
delete tess_cube_combiner_;
|
||||
tess_cube_combiner_ = NULL;
|
||||
}
|
||||
if (cube_debug_level > 0)
|
||||
tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name run_cube_combiner
|
||||
*
|
||||
* Iterates through tesseract's results and calls cube on each word,
|
||||
* combining the results with the existing tesseract result.
|
||||
*/
|
||||
void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
|
||||
if (page_res == NULL || tess_cube_combiner_ == NULL)
|
||||
return;
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
// Iterate through the word results and call cube on each word.
|
||||
for (page_res_it.restart_page(); page_res_it.word () != NULL;
|
||||
page_res_it.forward()) {
|
||||
BLOCK* block = page_res_it.block()->block;
|
||||
if (block->poly_block() != NULL && !block->poly_block()->IsText())
|
||||
continue; // Don't deal with non-text blocks.
|
||||
WERD_RES* word = page_res_it.word();
|
||||
// Skip cube entirely if tesseract's certainty is greater than threshold.
|
||||
int combiner_run_thresh = convert_prob_to_tess_certainty(
|
||||
cube_cntxt_->Params()->CombinerRunThresh());
|
||||
if (word->best_choice->certainty() >= combiner_run_thresh) {
|
||||
continue;
|
||||
}
|
||||
// Use the same language as Tesseract used for the word.
|
||||
Tesseract* lang_tess = word->tesseract;
|
||||
|
||||
// Setup a trial WERD_RES in which to classify with cube.
|
||||
WERD_RES cube_word;
|
||||
cube_word.InitForRetryRecognition(*word);
|
||||
cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
|
||||
OEM_CUBE_ONLY,
|
||||
NULL, false, false, false,
|
||||
page_res_it.row()->row,
|
||||
page_res_it.block()->block);
|
||||
CubeObject *cube_obj = lang_tess->cube_recognize_word(
|
||||
page_res_it.block()->block, &cube_word);
|
||||
if (cube_obj != NULL)
|
||||
lang_tess->cube_combine_word(cube_obj, &cube_word, word);
|
||||
delete cube_obj;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_word_pass1
|
||||
*
|
||||
* Recognizes a single word using (only) cube. Compatible with
|
||||
* Tesseract's classify_word_pass1/classify_word_pass2.
|
||||
*/
|
||||
void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
|
||||
CubeObject *cube_obj = cube_recognize_word(block, word);
|
||||
delete cube_obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_recognize_word
|
||||
*
|
||||
* Cube recognizer to recognize a single word as with classify_word_pass1
|
||||
* but also returns the cube object in case the combiner is needed.
|
||||
*/
|
||||
CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
|
||||
if (!cube_binary_ || !cube_cntxt_) {
|
||||
if (cube_debug_level > 0 && !cube_binary_)
|
||||
tprintf("Tesseract::run_cube(): NULL binary image.\n");
|
||||
word->SetupFake(unicharset);
|
||||
return NULL;
|
||||
}
|
||||
TBOX word_box = word->word->bounding_box();
|
||||
if (block != NULL && (block->re_rotation().x() != 1.0f ||
|
||||
block->re_rotation().y() != 0.0f)) {
|
||||
// TODO(rays) We have to rotate the bounding box to get the true coords.
|
||||
// This will be achieved in the future via DENORM.
|
||||
// In the mean time, cube can't process this word.
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube can't process rotated word at:");
|
||||
word_box.print();
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return NULL;
|
||||
}
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, cube_binary_, word_box.left(),
|
||||
pixGetHeight(cube_binary_) - word_box.top(),
|
||||
word_box.width(), word_box.height());
|
||||
if (!cube_recognize(cube_obj, block, word)) {
|
||||
delete cube_obj;
|
||||
return NULL;
|
||||
}
|
||||
return cube_obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_combine_word
|
||||
*
|
||||
* Combines the cube and tesseract results for a single word, leaving the
|
||||
* result in tess_word.
|
||||
*/
|
||||
void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
|
||||
WERD_RES* tess_word) {
|
||||
float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
|
||||
cube_obj);
|
||||
// If combiner probability is greater than tess/cube combiner
|
||||
// classifier threshold, i.e. tesseract wins, then just return the
|
||||
// tesseract result unchanged, as the combiner knows nothing about how
|
||||
// correct the answer is. If cube and tesseract agree, then improve the
|
||||
// scores before returning.
|
||||
WERD_CHOICE* tess_best = tess_word->best_choice;
|
||||
WERD_CHOICE* cube_best = cube_word->best_choice;
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Combiner prob = %g vs threshold %g\n",
|
||||
combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
|
||||
}
|
||||
if (combiner_prob >=
|
||||
cube_cntxt_->Params()->CombinerClassifierThresh()) {
|
||||
if (tess_best->unichar_string() == cube_best->unichar_string()) {
|
||||
// Cube and tess agree, so improve the scores.
|
||||
tess_best->set_rating(tess_best->rating() / 2);
|
||||
tess_best->set_certainty(tess_best->certainty() / 2);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Cube wins.
|
||||
// It is better for the language combiner to have all tesseract scores,
|
||||
// so put them in the cube result.
|
||||
cube_best->set_rating(tess_best->rating());
|
||||
cube_best->set_certainty(tess_best->certainty());
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
|
||||
tess_best->unichar_string().string(),
|
||||
cube_best->unichar_string().string());
|
||||
}
|
||||
tess_word->ConsumeWordResults(cube_word);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_recognize
|
||||
*
|
||||
* Call cube on the current word, and write the result to word.
|
||||
* Sets up a fake result and returns false if something goes wrong.
|
||||
*/
|
||||
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
|
||||
WERD_RES *word) {
|
||||
// Run cube
|
||||
WordAltList *cube_alt_list = cube_obj->RecognizeWord();
|
||||
if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube returned nothing for word at:");
|
||||
word->word->bounding_box().print();
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get cube's best result and its probability, mapped to tesseract's
|
||||
// certainty range
|
||||
char_32 *cube_best_32 = cube_alt_list->Alt(0);
|
||||
double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
|
||||
float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
|
||||
string cube_best_str;
|
||||
CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
|
||||
|
||||
// Retrieve Cube's character bounding boxes and CharSamples,
|
||||
// corresponding to the most recent call to RecognizeWord().
|
||||
Boxa *char_boxes = NULL;
|
||||
CharSamp **char_samples = NULL;;
|
||||
int num_chars;
|
||||
if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
|
||||
&& cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
|
||||
"cube state.\n");
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert cube's character bounding boxes to a BoxWord.
|
||||
BoxWord cube_box_word;
|
||||
TBOX tess_word_box = word->word->bounding_box();
|
||||
if (word->denorm.block() != NULL)
|
||||
tess_word_box.rotate(word->denorm.block()->re_rotation());
|
||||
bool box_word_success = create_cube_box_word(char_boxes, num_chars,
|
||||
tess_word_box,
|
||||
&cube_box_word);
|
||||
boxaDestroy(&char_boxes);
|
||||
if (!box_word_success) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
|
||||
"create cube BoxWord\n");
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill tesseract result's fields with cube results
|
||||
fill_werd_res(cube_box_word, cube_best_str.c_str(), word);
|
||||
|
||||
// Create cube's best choice.
|
||||
BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
UNICHAR_ID uch_id =
|
||||
cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
|
||||
choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
|
||||
-1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
|
||||
}
|
||||
word->FakeClassifyWord(num_chars, choices);
|
||||
// within a word, cube recognizes the word in reading order.
|
||||
word->best_choice->set_unichars_in_script_order(true);
|
||||
delete [] choices;
|
||||
delete [] char_samples;
|
||||
|
||||
// Some sanity checks
|
||||
ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
|
||||
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Cube result: %s r=%g, c=%g\n",
|
||||
word->best_choice->unichar_string().string(),
|
||||
word->best_choice->rating(),
|
||||
word->best_choice->certainty());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name fill_werd_res
|
||||
*
|
||||
* Fill Tesseract's word result fields with cube's.
|
||||
*
|
||||
*/
|
||||
void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
|
||||
const char* cube_best_str,
|
||||
WERD_RES* tess_werd_res) {
|
||||
delete tess_werd_res->box_word;
|
||||
tess_werd_res->box_word = new BoxWord(cube_box_word);
|
||||
tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
|
||||
tess_werd_res->word);
|
||||
// Fill text and remaining fields
|
||||
tess_werd_res->word->set_text(cube_best_str);
|
||||
tess_werd_res->tess_failed = FALSE;
|
||||
tess_werd_res->tess_accepted = tess_acceptable_word(tess_werd_res);
|
||||
// There is no output word, so we can' call AdaptableWord, but then I don't
|
||||
// think we need to. Fudge the result with accepted.
|
||||
tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
|
||||
|
||||
// Set word to done, i.e., ignore all of tesseract's tests for rejection
|
||||
tess_werd_res->done = tess_werd_res->tess_accepted;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
@ -1,208 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_reco_context.cpp
|
||||
* Description: Implementation of the Cube Recognition Context Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <limits.h>
|
||||
|
||||
#include "cube_reco_context.h"
|
||||
|
||||
#include "classifier_factory.h"
|
||||
#include "cube_tuning_params.h"
|
||||
#include "dict.h"
|
||||
#include "feature_bmp.h"
|
||||
#include "tessdatamanager.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tess_lang_model.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* Instantiate a CubeRecoContext object using a Tesseract object.
|
||||
* CubeRecoContext will not take ownership of tess_obj, but will
|
||||
* record the pointer to it and will make use of various Tesseract
|
||||
* components (language model, flags, etc). Thus the caller should
|
||||
* keep tess_obj alive so long as the instantiated CubeRecoContext is used.
|
||||
*/
|
||||
CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
|
||||
tess_obj_ = tess_obj;
|
||||
lang_ = "";
|
||||
loaded_ = false;
|
||||
lang_mod_ = NULL;
|
||||
params_ = NULL;
|
||||
char_classifier_ = NULL;
|
||||
char_set_ = NULL;
|
||||
word_size_model_ = NULL;
|
||||
char_bigrams_ = NULL;
|
||||
word_unigrams_ = NULL;
|
||||
noisy_input_ = false;
|
||||
size_normalization_ = false;
|
||||
}
|
||||
|
||||
CubeRecoContext::~CubeRecoContext() {
|
||||
if (char_classifier_ != NULL) {
|
||||
delete char_classifier_;
|
||||
char_classifier_ = NULL;
|
||||
}
|
||||
|
||||
if (word_size_model_ != NULL) {
|
||||
delete word_size_model_;
|
||||
word_size_model_ = NULL;
|
||||
}
|
||||
|
||||
if (char_set_ != NULL) {
|
||||
delete char_set_;
|
||||
char_set_ = NULL;
|
||||
}
|
||||
|
||||
if (char_bigrams_ != NULL) {
|
||||
delete char_bigrams_;
|
||||
char_bigrams_ = NULL;
|
||||
}
|
||||
|
||||
if (word_unigrams_ != NULL) {
|
||||
delete word_unigrams_;
|
||||
word_unigrams_ = NULL;
|
||||
}
|
||||
|
||||
if (lang_mod_ != NULL) {
|
||||
delete lang_mod_;
|
||||
lang_mod_ = NULL;
|
||||
}
|
||||
|
||||
if (params_ != NULL) {
|
||||
delete params_;
|
||||
params_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path of the data files by looking up the TESSDATA_PREFIX
|
||||
* environment variable and appending a "tessdata" directory to it
|
||||
*/
|
||||
bool CubeRecoContext::GetDataFilePath(string *path) const {
|
||||
*path = tess_obj_->datadir.string();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The object initialization function that loads all the necessary
|
||||
* components of a RecoContext. TessdataManager is used to load the
|
||||
* data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET
|
||||
* component is present, Cube will be instantiated with the unicharset
|
||||
* specified in this component and the corresponding dictionary
|
||||
* (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
|
||||
* Tesseract's. Otherwise, TessdataManager will assume that Cube will
|
||||
* be using Tesseract's unicharset and dawgs, and will load the
|
||||
* unicharset from the TESSDATA_UNICHARSET component and will load the
|
||||
* dawgs from TESSDATA_*_DAWG components.
|
||||
*/
|
||||
bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
ASSERT_HOST(tess_obj_ != NULL);
|
||||
tess_unicharset_ = tess_unicharset;
|
||||
string data_file_path;
|
||||
|
||||
// Get the data file path.
|
||||
if (GetDataFilePath(&data_file_path) == false) {
|
||||
fprintf(stderr, "Unable to get data file path\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the language from the Tesseract object.
|
||||
lang_ = tess_obj_->lang.string();
|
||||
|
||||
// Create the char set.
|
||||
if ((char_set_ =
|
||||
CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
|
||||
"CharSet\n");
|
||||
return false;
|
||||
}
|
||||
// Create the language model.
|
||||
string lm_file_name = data_file_path + lang_ + ".cube.lm";
|
||||
string lm_params;
|
||||
if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
|
||||
"language model params from %s\n", lm_file_name.c_str());
|
||||
return false;
|
||||
}
|
||||
lang_mod_ = new TessLangModel(lm_params, data_file_path,
|
||||
tess_obj_->getDict().load_system_dawg,
|
||||
tessdata_manager, this);
|
||||
if (lang_mod_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create "
|
||||
"TessLangModel\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the optional char bigrams object.
|
||||
char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
|
||||
|
||||
// Create the optional word unigrams object.
|
||||
word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
|
||||
|
||||
// Create the optional size model.
|
||||
word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
|
||||
char_set_, Contextual());
|
||||
|
||||
// Load tuning params.
|
||||
params_ = CubeTuningParams::Create(data_file_path, lang_);
|
||||
if (params_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
|
||||
"CubeTuningParams from %s\n", data_file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the char classifier.
|
||||
char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
|
||||
lang_mod_, char_set_,
|
||||
params_);
|
||||
if (char_classifier_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
|
||||
"CharClassifierFactory object from %s\n", data_file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
loaded_ = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Creates a CubeRecoContext object using a tesseract object */
|
||||
CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
|
||||
TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
// create the object
|
||||
CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
|
||||
if (cntxt == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create "
|
||||
"CubeRecoContext object\n");
|
||||
return NULL;
|
||||
}
|
||||
// load the necessary components
|
||||
if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
|
||||
"CubeRecoContext object\n");
|
||||
delete cntxt;
|
||||
return NULL;
|
||||
}
|
||||
// success
|
||||
return cntxt;
|
||||
}
|
||||
} // tesseract}
|
@ -1,157 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_reco_context.h
|
||||
* Description: Declaration of the Cube Recognition Context Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
|
||||
// (or a thread) would create one CubeRecoContext object per language.
|
||||
// The CubeRecoContext object also provides methods to get and set the
|
||||
// different attribues of the Cube OCR Engine.
|
||||
|
||||
#ifndef CUBE_RECO_CONTEXT_H
|
||||
#define CUBE_RECO_CONTEXT_H
|
||||
|
||||
#include <string>
|
||||
#include "neural_net.h"
|
||||
#include "lang_model.h"
|
||||
#include "classifier_base.h"
|
||||
#include "feature_base.h"
|
||||
#include "char_set.h"
|
||||
#include "word_size_model.h"
|
||||
#include "char_bigrams.h"
|
||||
#include "word_unigrams.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
class TessdataManager;
|
||||
|
||||
class CubeRecoContext {
|
||||
public:
|
||||
// Reading order enum type
|
||||
enum ReadOrder {
|
||||
L2R,
|
||||
R2L
|
||||
};
|
||||
|
||||
// Instantiate using a Tesseract object
|
||||
CubeRecoContext(Tesseract *tess_obj);
|
||||
|
||||
~CubeRecoContext();
|
||||
|
||||
// accessor functions
|
||||
inline const string & Lang() const { return lang_; }
|
||||
inline CharSet *CharacterSet() const { return char_set_; }
|
||||
const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
|
||||
inline CharClassifier *Classifier() const { return char_classifier_; }
|
||||
inline WordSizeModel *SizeModel() const { return word_size_model_; }
|
||||
inline CharBigrams *Bigrams() const { return char_bigrams_; }
|
||||
inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
|
||||
inline TuningParams *Params() const { return params_; }
|
||||
inline LangModel *LangMod() const { return lang_mod_; }
|
||||
|
||||
// the reading order of the language
|
||||
inline ReadOrder ReadingOrder() const {
|
||||
return ((lang_ == "ara") ? R2L : L2R);
|
||||
}
|
||||
|
||||
// does the language support case
|
||||
inline bool HasCase() const {
|
||||
return (lang_ != "ara" && lang_ != "hin");
|
||||
}
|
||||
|
||||
inline bool Cursive() const {
|
||||
return (lang_ == "ara");
|
||||
}
|
||||
|
||||
inline bool HasItalics() const {
|
||||
return (lang_ != "ara" && lang_ != "hin");
|
||||
}
|
||||
|
||||
inline bool Contextual() const {
|
||||
return (lang_ == "ara");
|
||||
}
|
||||
|
||||
// RecoContext runtime flags accessor functions
|
||||
inline bool SizeNormalization() const { return size_normalization_; }
|
||||
inline bool NoisyInput() const { return noisy_input_; }
|
||||
inline bool OOD() const { return lang_mod_->OOD(); }
|
||||
inline bool Numeric() const { return lang_mod_->Numeric(); }
|
||||
inline bool WordList() const { return lang_mod_->WordList(); }
|
||||
inline bool Punc() const { return lang_mod_->Punc(); }
|
||||
inline bool CaseSensitive() const {
|
||||
return char_classifier_->CaseSensitive();
|
||||
}
|
||||
|
||||
inline void SetSizeNormalization(bool size_normalization) {
|
||||
size_normalization_ = size_normalization;
|
||||
}
|
||||
inline void SetNoisyInput(bool noisy_input) {
|
||||
noisy_input_ = noisy_input;
|
||||
}
|
||||
inline void SetOOD(bool ood_enabled) {
|
||||
lang_mod_->SetOOD(ood_enabled);
|
||||
}
|
||||
inline void SetNumeric(bool numeric_enabled) {
|
||||
lang_mod_->SetNumeric(numeric_enabled);
|
||||
}
|
||||
inline void SetWordList(bool word_list_enabled) {
|
||||
lang_mod_->SetWordList(word_list_enabled);
|
||||
}
|
||||
inline void SetPunc(bool punc_enabled) {
|
||||
lang_mod_->SetPunc(punc_enabled);
|
||||
}
|
||||
inline void SetCaseSensitive(bool case_sensitive) {
|
||||
char_classifier_->SetCaseSensitive(case_sensitive);
|
||||
}
|
||||
inline tesseract::Tesseract *TesseractObject() const {
|
||||
return tess_obj_;
|
||||
}
|
||||
|
||||
// Returns the path of the data files
|
||||
bool GetDataFilePath(string *path) const;
|
||||
// Creates a CubeRecoContext object using a tesseract object. Data
|
||||
// files are loaded via the tessdata_manager, and the tesseract
|
||||
// unicharset is provided in order to map Cube's unicharset to
|
||||
// Tesseract's in the case where the two unicharsets differ.
|
||||
static CubeRecoContext *Create(Tesseract *tess_obj,
|
||||
TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset);
|
||||
|
||||
private:
|
||||
bool loaded_;
|
||||
string lang_;
|
||||
CharSet *char_set_;
|
||||
UNICHARSET *tess_unicharset_;
|
||||
WordSizeModel *word_size_model_;
|
||||
CharClassifier *char_classifier_;
|
||||
CharBigrams *char_bigrams_;
|
||||
WordUnigrams *word_unigrams_;
|
||||
TuningParams *params_;
|
||||
LangModel *lang_mod_;
|
||||
Tesseract *tess_obj_; // CubeRecoContext does not own this pointer
|
||||
bool size_normalization_;
|
||||
bool noisy_input_;
|
||||
|
||||
// Loads and initialized all the necessary components of a
|
||||
// CubeRecoContext. See .cpp for more details.
|
||||
bool Load(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUBE_RECO_CONTEXT_H
|
@ -1,134 +0,0 @@
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: cubeclassifier.cpp
|
||||
// Description: Cube implementation of a ShapeClassifier.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Nov 23 10:39:45 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "cubeclassifier.h"
|
||||
|
||||
#include "char_altlist.h"
|
||||
#include "char_set.h"
|
||||
#include "cube_object.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "tessclassifier.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "trainingsample.h"
|
||||
#include "unicharset.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
|
||||
: cube_cntxt_(tesseract->GetCubeRecoContext()),
|
||||
shape_table_(*tesseract->shape_table()) {
|
||||
}
|
||||
CubeClassifier::~CubeClassifier() {
|
||||
}
|
||||
|
||||
/// Classifies the given [training] sample, writing to results.
|
||||
/// See ShapeClassifier for a full description.
|
||||
int CubeClassifier::UnicharClassifySample(
|
||||
const TrainingSample& sample, Pix* page_pix, int debug,
|
||||
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
|
||||
results->clear();
|
||||
if (page_pix == NULL) return 0;
|
||||
|
||||
ASSERT_HOST(cube_cntxt_ != NULL);
|
||||
const TBOX& char_box = sample.bounding_box();
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, page_pix, char_box.left(),
|
||||
pixGetHeight(page_pix) - char_box.top(),
|
||||
char_box.width(), char_box.height());
|
||||
CharAltList* alt_list = cube_obj->RecognizeChar();
|
||||
if (alt_list != NULL) {
|
||||
alt_list->Sort();
|
||||
CharSet* char_set = cube_cntxt_->CharacterSet();
|
||||
for (int i = 0; i < alt_list->AltCount(); ++i) {
|
||||
// Convert cube representation to a shape_id.
|
||||
int alt_id = alt_list->Alt(i);
|
||||
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
|
||||
if (unichar_id >= 0)
|
||||
results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
|
||||
}
|
||||
delete alt_list;
|
||||
}
|
||||
delete cube_obj;
|
||||
return results->size();
|
||||
}
|
||||
|
||||
/** Provides access to the ShapeTable that this classifier works with. */
|
||||
const ShapeTable* CubeClassifier::GetShapeTable() const {
|
||||
return &shape_table_;
|
||||
}
|
||||
|
||||
CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract)
|
||||
: cube_cntxt_(tesseract->GetCubeRecoContext()),
|
||||
shape_table_(*tesseract->shape_table()),
|
||||
pruner_(new TessClassifier(true, tesseract)) {
|
||||
}
|
||||
CubeTessClassifier::~CubeTessClassifier() {
|
||||
delete pruner_;
|
||||
}
|
||||
|
||||
/// Classifies the given [training] sample, writing to results.
|
||||
/// See ShapeClassifier for a full description.
|
||||
int CubeTessClassifier::UnicharClassifySample(
|
||||
const TrainingSample& sample, Pix* page_pix, int debug,
|
||||
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
|
||||
int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
|
||||
keep_this, results);
|
||||
if (page_pix == NULL) return num_results;
|
||||
|
||||
ASSERT_HOST(cube_cntxt_ != NULL);
|
||||
const TBOX& char_box = sample.bounding_box();
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, page_pix, char_box.left(),
|
||||
pixGetHeight(page_pix) - char_box.top(),
|
||||
char_box.width(), char_box.height());
|
||||
CharAltList* alt_list = cube_obj->RecognizeChar();
|
||||
CharSet* char_set = cube_cntxt_->CharacterSet();
|
||||
if (alt_list != NULL) {
|
||||
for (int r = 0; r < num_results; ++r) {
|
||||
// Get the best cube probability of the unichar in the result.
|
||||
double best_prob = 0.0;
|
||||
for (int i = 0; i < alt_list->AltCount(); ++i) {
|
||||
int alt_id = alt_list->Alt(i);
|
||||
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
|
||||
if (unichar_id == (*results)[r].unichar_id &&
|
||||
alt_list->AltProb(i) > best_prob) {
|
||||
best_prob = alt_list->AltProb(i);
|
||||
}
|
||||
}
|
||||
(*results)[r].rating = best_prob;
|
||||
}
|
||||
delete alt_list;
|
||||
// Re-sort by rating.
|
||||
results->sort(&UnicharRating::SortDescendingRating);
|
||||
}
|
||||
delete cube_obj;
|
||||
return results->size();
|
||||
}
|
||||
|
||||
/** Provides access to the ShapeTable that this classifier works with. */
|
||||
const ShapeTable* CubeTessClassifier::GetShapeTable() const {
|
||||
return &shape_table_;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
|
@ -1,80 +0,0 @@
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: cubeclassifier.h
|
||||
// Description: Cube implementation of a ShapeClassifier.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Nov 23 10:36:32 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
|
||||
#define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
|
||||
|
||||
#include "shapeclassifier.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Classify;
|
||||
class CubeRecoContext;
|
||||
class ShapeTable;
|
||||
class TessClassifier;
|
||||
class Tesseract;
|
||||
class TrainingSample;
|
||||
struct UnicharRating;
|
||||
|
||||
// Cube implementation of a ShapeClassifier.
|
||||
class CubeClassifier : public ShapeClassifier {
|
||||
public:
|
||||
explicit CubeClassifier(Tesseract* tesseract);
|
||||
virtual ~CubeClassifier();
|
||||
|
||||
// Classifies the given [training] sample, writing to results.
|
||||
// See ShapeClassifier for a full description.
|
||||
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
|
||||
int debug, UNICHAR_ID keep_this,
|
||||
GenericVector<UnicharRating>* results);
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
virtual const ShapeTable* GetShapeTable() const;
|
||||
|
||||
private:
|
||||
// Cube objects.
|
||||
CubeRecoContext* cube_cntxt_;
|
||||
const ShapeTable& shape_table_;
|
||||
};
|
||||
|
||||
// Combination of Tesseract class pruner with scoring by cube.
|
||||
class CubeTessClassifier : public ShapeClassifier {
|
||||
public:
|
||||
explicit CubeTessClassifier(Tesseract* tesseract);
|
||||
virtual ~CubeTessClassifier();
|
||||
|
||||
// Classifies the given [training] sample, writing to results.
|
||||
// See ShapeClassifier for a full description.
|
||||
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
|
||||
int debug, UNICHAR_ID keep_this,
|
||||
GenericVector<UnicharRating>* results);
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
virtual const ShapeTable* GetShapeTable() const;
|
||||
|
||||
private:
|
||||
// Cube objects.
|
||||
CubeRecoContext* cube_cntxt_;
|
||||
const ShapeTable& shape_table_;
|
||||
TessClassifier* pruner_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif /* THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_ */
|
@ -98,8 +98,8 @@ void Tesseract::word_char_quality(WERD_RES *word,
|
||||
ROW *row,
|
||||
inT16 *match_count,
|
||||
inT16 *accepted_match_count) {
|
||||
if (word->bln_boxes == NULL ||
|
||||
word->rebuild_word == NULL || word->rebuild_word->blobs.empty()) {
|
||||
if (word->bln_boxes == NULL || word->rebuild_word == NULL ||
|
||||
word->rebuild_word->blobs.empty()) {
|
||||
*match_count = 0;
|
||||
*accepted_match_count = 0;
|
||||
return;
|
||||
@ -132,7 +132,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) {
|
||||
int expected_outline_count;
|
||||
|
||||
if (STRING (outlines_odd).contains (c))
|
||||
return 0; //Don't use this char
|
||||
return 0; // Don't use this char
|
||||
else if (STRING (outlines_2).contains (c))
|
||||
expected_outline_count = 2;
|
||||
else
|
||||
@ -151,7 +151,6 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* unrej_good_quality_words()
|
||||
* Accept potential rejects in words which pass the following checks:
|
||||
@ -403,7 +402,6 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* reject_whole_page()
|
||||
* Don't believe any of it - set the reject map to 00..00 in all words
|
||||
|
@ -624,10 +624,6 @@ void EquationDetect::IdentifySeedParts() {
|
||||
}
|
||||
|
||||
float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) {
|
||||
#if LIBLEPT_MINOR_VERSION < 69 && LIBLEPT_MAJOR_VERSION <= 1
|
||||
// This will disable the detector because no seed will be identified.
|
||||
return 1.0f;
|
||||
#else
|
||||
Pix *pix_bi = lang_tesseract_->pix_binary();
|
||||
int pix_height = pixGetHeight(pix_bi);
|
||||
Box* box = boxCreate(tbox.left(), pix_height - tbox.top(),
|
||||
@ -639,7 +635,6 @@ float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) {
|
||||
boxDestroy(&box);
|
||||
|
||||
return fract;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool EquationDetect::CheckSeedFgDensity(const float density_th,
|
||||
|
@ -17,8 +17,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H__
|
||||
#define TESSERACT_CCMAIN_EQUATIONDETECT_H__
|
||||
#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H_
|
||||
#define TESSERACT_CCMAIN_EQUATIONDETECT_H_
|
||||
|
||||
#include "blobbox.h"
|
||||
#include "equationdetectbase.h"
|
||||
|
@ -211,7 +211,6 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name eval_word_spacing()
|
||||
* The basic measure is the number of characters in contextually confirmed
|
||||
|
333
ccmain/linerec.cpp
Normal file
333
ccmain/linerec.cpp
Normal file
@ -0,0 +1,333 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: linerec.cpp
|
||||
// Description: Top-level line-based recognition module for Tesseract.
|
||||
// Author: Ray Smith
|
||||
// Created: Thu May 02 09:47:06 PST 2013
|
||||
//
|
||||
// (C) Copyright 2013, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tesseractclass.h"
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "boxread.h"
|
||||
#include "imagedata.h"
|
||||
#ifndef ANDROID_BUILD
|
||||
#include "lstmrecognizer.h"
|
||||
#include "recodebeam.h"
|
||||
#endif
|
||||
#include "ndminx.h"
|
||||
#include "pageres.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Arbitarary penalty for non-dictionary words.
|
||||
// TODO(rays) How to learn this?
|
||||
const float kNonDictionaryPenalty = 5.0f;
|
||||
// Scale factor to make certainty more comparable to Tesseract.
|
||||
const float kCertaintyScale = 7.0f;
|
||||
// Worst acceptable certainty for a dictionary word.
|
||||
const float kWorstDictCertainty = -25.0f;
|
||||
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
// Breaks the page into lines, according to the boxes, and writes them to a
|
||||
// serialized DocumentData based on output_basename.
|
||||
void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
||||
const STRING& output_basename,
|
||||
BLOCK_LIST *block_list) {
|
||||
STRING lstmf_name = output_basename + ".lstmf";
|
||||
DocumentData images(lstmf_name);
|
||||
if (applybox_page > 0) {
|
||||
// Load existing document for the previous pages.
|
||||
if (!images.LoadDocument(lstmf_name.string(), "eng", 0, 0, NULL)) {
|
||||
tprintf("Failed to read training data from %s!\n", lstmf_name.string());
|
||||
return;
|
||||
}
|
||||
}
|
||||
GenericVector<TBOX> boxes;
|
||||
GenericVector<STRING> texts;
|
||||
// Get the boxes for this page, if there are any.
|
||||
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, NULL,
|
||||
NULL) ||
|
||||
boxes.empty()) {
|
||||
tprintf("Failed to read boxes from %s\n", input_imagename.string());
|
||||
return;
|
||||
}
|
||||
TrainFromBoxes(boxes, texts, block_list, &images);
|
||||
images.Shuffle();
|
||||
if (!images.SaveDocument(lstmf_name.string(), NULL)) {
|
||||
tprintf("Failed to write training data to %s!\n", lstmf_name.string());
|
||||
}
|
||||
}
|
||||
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
// Breaks the boxes into lines, normalizes them, converts to ImageData and
|
||||
// appends them to the given training_data.
|
||||
void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
|
||||
const GenericVector<STRING>& texts,
|
||||
BLOCK_LIST *block_list,
|
||||
DocumentData* training_data) {
|
||||
int box_count = boxes.size();
|
||||
// Process all the text lines in this page, as defined by the boxes.
|
||||
int end_box = 0;
|
||||
// Don't let \t, which marks newlines in the box file, get into the line
|
||||
// content, as that makes the line unusable in training.
|
||||
while (end_box < texts.size() && texts[end_box] == "\t") ++end_box;
|
||||
for (int start_box = end_box; start_box < box_count; start_box = end_box) {
|
||||
// Find the textline of boxes starting at start and their bounding box.
|
||||
TBOX line_box = boxes[start_box];
|
||||
STRING line_str = texts[start_box];
|
||||
for (end_box = start_box + 1; end_box < box_count && texts[end_box] != "\t";
|
||||
++end_box) {
|
||||
line_box += boxes[end_box];
|
||||
line_str += texts[end_box];
|
||||
}
|
||||
// Find the most overlapping block.
|
||||
BLOCK* best_block = NULL;
|
||||
int best_overlap = 0;
|
||||
BLOCK_IT b_it(block_list);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
BLOCK* block = b_it.data();
|
||||
if (block->poly_block() != NULL && !block->poly_block()->IsText())
|
||||
continue; // Not a text block.
|
||||
TBOX block_box = block->bounding_box();
|
||||
block_box.rotate(block->re_rotation());
|
||||
if (block_box.major_overlap(line_box)) {
|
||||
TBOX overlap_box = line_box.intersection(block_box);
|
||||
if (overlap_box.area() > best_overlap) {
|
||||
best_overlap = overlap_box.area();
|
||||
best_block = block;
|
||||
}
|
||||
}
|
||||
}
|
||||
ImageData* imagedata = NULL;
|
||||
if (best_block == NULL) {
|
||||
tprintf("No block overlapping textline: %s\n", line_str.string());
|
||||
} else {
|
||||
imagedata = GetLineData(line_box, boxes, texts, start_box, end_box,
|
||||
*best_block);
|
||||
}
|
||||
if (imagedata != NULL)
|
||||
training_data->AddPageToDocument(imagedata);
|
||||
// Don't let \t, which marks newlines in the box file, get into the line
|
||||
// content, as that makes the line unusable in training.
|
||||
while (end_box < texts.size() && texts[end_box] == "\t") ++end_box;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns an Imagedata containing the image of the given box,
|
||||
// and ground truth boxes/truth text if available in the input.
|
||||
// The image is not normalized in any way.
|
||||
ImageData* Tesseract::GetLineData(const TBOX& line_box,
|
||||
const GenericVector<TBOX>& boxes,
|
||||
const GenericVector<STRING>& texts,
|
||||
int start_box, int end_box,
|
||||
const BLOCK& block) {
|
||||
TBOX revised_box;
|
||||
ImageData* image_data = GetRectImage(line_box, block, kImagePadding,
|
||||
&revised_box);
|
||||
if (image_data == NULL) return NULL;
|
||||
image_data->set_page_number(applybox_page);
|
||||
// Copy the boxes and shift them so they are relative to the image.
|
||||
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
|
||||
ICOORD shift = -revised_box.botleft();
|
||||
GenericVector<TBOX> line_boxes;
|
||||
GenericVector<STRING> line_texts;
|
||||
for (int b = start_box; b < end_box; ++b) {
|
||||
TBOX box = boxes[b];
|
||||
box.rotate(block_rotation);
|
||||
box.move(shift);
|
||||
line_boxes.push_back(box);
|
||||
line_texts.push_back(texts[b]);
|
||||
}
|
||||
GenericVector<int> page_numbers;
|
||||
page_numbers.init_to_size(line_boxes.size(), applybox_page);
|
||||
image_data->AddBoxes(line_boxes, line_texts, page_numbers);
|
||||
return image_data;
|
||||
}
|
||||
|
||||
// Helper gets the image of a rectangle, using the block.re_rotation() if
|
||||
// needed to get to the image, and rotating the result back to horizontal
|
||||
// layout. (CJK characters will be on their left sides) The vertical text flag
|
||||
// is set in the returned ImageData if the text was originally vertical, which
|
||||
// can be used to invoke a different CJK recognition engine. The revised_box
|
||||
// is also returned to enable calculation of output bounding boxes.
|
||||
ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
|
||||
int padding, TBOX* revised_box) const {
|
||||
TBOX wbox = box;
|
||||
wbox.pad(padding, padding);
|
||||
*revised_box = wbox;
|
||||
// Number of clockwise 90 degree rotations needed to get back to tesseract
|
||||
// coords from the clipped image.
|
||||
int num_rotations = 0;
|
||||
if (block.re_rotation().y() > 0.0f)
|
||||
num_rotations = 1;
|
||||
else if (block.re_rotation().x() < 0.0f)
|
||||
num_rotations = 2;
|
||||
else if (block.re_rotation().y() < 0.0f)
|
||||
num_rotations = 3;
|
||||
// Handle two cases automatically: 1 the box came from the block, 2 the box
|
||||
// came from a box file, and refers to the image, which the block may not.
|
||||
if (block.bounding_box().major_overlap(*revised_box))
|
||||
revised_box->rotate(block.re_rotation());
|
||||
// Now revised_box always refers to the image.
|
||||
// BestPix is never colormapped, but may be of any depth.
|
||||
Pix* pix = BestPix();
|
||||
int width = pixGetWidth(pix);
|
||||
int height = pixGetHeight(pix);
|
||||
TBOX image_box(0, 0, width, height);
|
||||
// Clip to image bounds;
|
||||
*revised_box &= image_box;
|
||||
if (revised_box->null_box()) return NULL;
|
||||
Box* clip_box = boxCreate(revised_box->left(), height - revised_box->top(),
|
||||
revised_box->width(), revised_box->height());
|
||||
Pix* box_pix = pixClipRectangle(pix, clip_box, NULL);
|
||||
if (box_pix == NULL) return NULL;
|
||||
boxDestroy(&clip_box);
|
||||
if (num_rotations > 0) {
|
||||
Pix* rot_pix = pixRotateOrth(box_pix, num_rotations);
|
||||
pixDestroy(&box_pix);
|
||||
box_pix = rot_pix;
|
||||
}
|
||||
// Convert sub-8-bit images to 8 bit.
|
||||
int depth = pixGetDepth(box_pix);
|
||||
if (depth < 8) {
|
||||
Pix* grey;
|
||||
grey = pixConvertTo8(box_pix, false);
|
||||
pixDestroy(&box_pix);
|
||||
box_pix = grey;
|
||||
}
|
||||
bool vertical_text = false;
|
||||
if (num_rotations > 0) {
|
||||
// Rotated the clipped revised box back to internal coordinates.
|
||||
FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y());
|
||||
revised_box->rotate(rotation);
|
||||
if (num_rotations != 2)
|
||||
vertical_text = true;
|
||||
}
|
||||
return new ImageData(vertical_text, box_pix);
|
||||
}
|
||||
|
||||
#ifndef ANDROID_BUILD
|
||||
// Recognizes a word or group of words, converting to WERD_RES in *words.
|
||||
// Analogous to classify_word_pass1, but can handle a group of words as well.
|
||||
void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
|
||||
PointerVector<WERD_RES>* words) {
|
||||
TBOX word_box = word->word->bounding_box();
|
||||
// Get the word image - no frills.
|
||||
if (tessedit_pageseg_mode == PSM_SINGLE_WORD ||
|
||||
tessedit_pageseg_mode == PSM_RAW_LINE) {
|
||||
// In single word mode, use the whole image without any other row/word
|
||||
// interpretation.
|
||||
word_box = TBOX(0, 0, ImageWidth(), ImageHeight());
|
||||
} else {
|
||||
float baseline = row->base_line((word_box.left() + word_box.right()) / 2);
|
||||
if (baseline + row->descenders() < word_box.bottom())
|
||||
word_box.set_bottom(baseline + row->descenders());
|
||||
if (baseline + row->x_height() + row->ascenders() > word_box.top())
|
||||
word_box.set_top(baseline + row->x_height() + row->ascenders());
|
||||
}
|
||||
ImageData* im_data = GetRectImage(word_box, block, kImagePadding, &word_box);
|
||||
if (im_data == NULL) return;
|
||||
lstm_recognizer_->RecognizeLine(*im_data, true, classify_debug_level > 0,
|
||||
kWorstDictCertainty / kCertaintyScale,
|
||||
lstm_use_matrix, &unicharset, word_box, 2.0,
|
||||
false, words);
|
||||
delete im_data;
|
||||
SearchWords(words);
|
||||
}
|
||||
|
||||
// Apply segmentation search to the given set of words, within the constraints
|
||||
// of the existing ratings matrix. If there is already a best_choice on a word
|
||||
// leaves it untouched and just sets the done/accepted etc flags.
|
||||
void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
|
||||
// Run the segmentation search on the network outputs and make a BoxWord
|
||||
// for each of the output words.
|
||||
// If we drop a word as junk, then there is always a space in front of the
|
||||
// next.
|
||||
const Dict* stopper_dict = lstm_recognizer_->GetDict();
|
||||
if (stopper_dict == nullptr) stopper_dict = &getDict();
|
||||
bool any_nonspace_delimited = false;
|
||||
for (int w = 0; w < words->size(); ++w) {
|
||||
WERD_RES* word = (*words)[w];
|
||||
if (word->best_choice != nullptr &&
|
||||
word->best_choice->ContainsAnyNonSpaceDelimited()) {
|
||||
any_nonspace_delimited = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int w = 0; w < words->size(); ++w) {
|
||||
WERD_RES* word = (*words)[w];
|
||||
if (word->best_choice == NULL) {
|
||||
// If we are using the beam search, the unicharset had better match!
|
||||
word->SetupWordScript(unicharset);
|
||||
WordSearch(word);
|
||||
} else if (word->best_choice->unicharset() == &unicharset &&
|
||||
!lstm_recognizer_->IsRecoding()) {
|
||||
// We set up the word without using the dictionary, so set the permuter
|
||||
// now, but we can only do it because the unicharsets match.
|
||||
word->best_choice->set_permuter(
|
||||
getDict().valid_word(*word->best_choice, true));
|
||||
}
|
||||
if (word->best_choice == NULL) {
|
||||
// It is a dud.
|
||||
word->SetupFake(lstm_recognizer_->GetUnicharset());
|
||||
} else {
|
||||
// Set the best state.
|
||||
for (int i = 0; i < word->best_choice->length(); ++i) {
|
||||
int length = word->best_choice->state(i);
|
||||
word->best_state.push_back(length);
|
||||
}
|
||||
word->reject_map.initialise(word->best_choice->length());
|
||||
word->tess_failed = false;
|
||||
word->tess_accepted = true;
|
||||
word->tess_would_adapt = false;
|
||||
word->done = true;
|
||||
word->tesseract = this;
|
||||
float word_certainty = MIN(word->space_certainty,
|
||||
word->best_choice->certainty());
|
||||
word_certainty *= kCertaintyScale;
|
||||
// Arbitrary ding factor for non-dictionary words.
|
||||
if (!lstm_recognizer_->IsRecoding() &&
|
||||
!Dict::valid_word_permuter(word->best_choice->permuter(), true))
|
||||
word_certainty -= kNonDictionaryPenalty;
|
||||
if (getDict().stopper_debug_level >= 1) {
|
||||
tprintf("Best choice certainty=%g, space=%g, scaled=%g, final=%g\n",
|
||||
word->best_choice->certainty(), word->space_certainty,
|
||||
MIN(word->space_certainty, word->best_choice->certainty()) *
|
||||
kCertaintyScale,
|
||||
word_certainty);
|
||||
word->best_choice->print();
|
||||
}
|
||||
word->best_choice->set_certainty(word_certainty);
|
||||
// Discard words that are impossibly bad, but allow a bit more for
|
||||
// dictionary words, and keep bad words in non-space-delimited langs.
|
||||
if (word_certainty >= RecodeBeamSearch::kMinCertainty ||
|
||||
any_nonspace_delimited ||
|
||||
(word_certainty >= kWorstDictCertainty &&
|
||||
Dict::valid_word_permuter(word->best_choice->permuter(), true))) {
|
||||
word->tess_accepted = stopper_dict->AcceptableResult(word);
|
||||
} else {
|
||||
if (getDict().stopper_debug_level >= 1) {
|
||||
tprintf("Deleting word with certainty %g\n", word_certainty);
|
||||
word->best_choice->print();
|
||||
}
|
||||
// It is a dud.
|
||||
word->SetupFake(lstm_recognizer_->GetUnicharset());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // ANDROID_BUILD
|
||||
|
||||
} // namespace tesseract.
|
@ -145,11 +145,10 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
void LTRResultIterator::RowAttributes(float* row_height,
|
||||
float* descenders,
|
||||
void LTRResultIterator::RowAttributes(float* row_height, float* descenders,
|
||||
float* ascenders) const {
|
||||
*row_height = it_->row()->row->x_height() + it_->row()-> row->ascenders()
|
||||
- it_->row()->row->descenders();
|
||||
*row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
|
||||
it_->row()->row->descenders();
|
||||
*descenders = it_->row()->row->descenders();
|
||||
*ascenders = it_->row()->row->ascenders();
|
||||
}
|
||||
@ -221,6 +220,12 @@ bool LTRResultIterator::WordIsFromDictionary() const {
|
||||
permuter == USER_DAWG_PERM;
|
||||
}
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int LTRResultIterator::BlanksBeforeWord() const {
|
||||
if (it_->word() == NULL) return 1;
|
||||
return it_->word()->word->space();
|
||||
}
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool LTRResultIterator::WordIsNumeric() const {
|
||||
if (it_->word() == NULL) return false; // Already at the end!
|
||||
|
@ -18,8 +18,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
||||
#include "platform.h"
|
||||
#include "pageiterator.h"
|
||||
@ -92,8 +92,7 @@ class TESS_API LTRResultIterator : public PageIterator {
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float* row_height,
|
||||
float* descenders,
|
||||
void RowAttributes(float* row_height, float* descenders,
|
||||
float* ascenders) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
@ -125,6 +124,9 @@ class TESS_API LTRResultIterator : public PageIterator {
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
@ -216,4 +218,4 @@ class ChoiceIterator {
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
@ -18,8 +18,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H__
|
||||
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H__
|
||||
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
||||
|
||||
#include "resultiterator.h"
|
||||
|
||||
@ -61,4 +61,4 @@ class MutableIterator : public ResultIterator {
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H__
|
||||
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_
|
||||
|
@ -164,13 +164,19 @@ void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
|
||||
int vertical_y = 1;
|
||||
tesseract::TabVector_LIST v_lines;
|
||||
tesseract::TabVector_LIST h_lines;
|
||||
int resolution = (kMinCredibleResolution > pixGetXRes(pix)) ?
|
||||
kMinCredibleResolution : pixGetXRes(pix);
|
||||
int resolution;
|
||||
if (kMinCredibleResolution > pixGetXRes(pix)) {
|
||||
resolution = kMinCredibleResolution;
|
||||
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
|
||||
pixGetXRes(pix), resolution);
|
||||
} else {
|
||||
resolution = pixGetXRes(pix);
|
||||
}
|
||||
|
||||
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
|
||||
&vertical_x, &vertical_y,
|
||||
NULL, &v_lines, &h_lines);
|
||||
Pix* im_pix = tesseract::ImageFind::FindImages(pix);
|
||||
Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
|
||||
if (im_pix != NULL) {
|
||||
pixSubtract(pix, pix, im_pix);
|
||||
pixDestroy(&im_pix);
|
||||
|
@ -17,8 +17,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H__
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H__
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
||||
#include "strngs.h"
|
||||
#include "unicharset.h"
|
||||
@ -135,4 +135,4 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int& id);
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H__
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
@ -78,17 +78,15 @@ void Tesseract::output_pass( //Tess output pass //send to api
|
||||
while (page_res_it.word () != NULL) {
|
||||
check_debug_pt (page_res_it.word (), 120);
|
||||
|
||||
if (target_word_box)
|
||||
{
|
||||
|
||||
TBOX current_word_box=page_res_it.word ()->word->bounding_box();
|
||||
FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2);
|
||||
if (!target_word_box->contains(center_pt))
|
||||
{
|
||||
page_res_it.forward ();
|
||||
if (target_word_box) {
|
||||
TBOX current_word_box = page_res_it.word()->word->bounding_box();
|
||||
FCOORD center_pt(
|
||||
(current_word_box.right() + current_word_box.left()) / 2,
|
||||
(current_word_box.bottom() + current_word_box.top()) / 2);
|
||||
if (!target_word_box->contains(center_pt)) {
|
||||
page_res_it.forward();
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
if (tessedit_write_block_separators &&
|
||||
block_of_last_word != page_res_it.block ()) {
|
||||
@ -337,7 +335,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
|
||||
rating_per_ch = word.rating() / word_res->reject_map.length();
|
||||
|
||||
if (rating_per_ch >= suspect_rating_per_ch)
|
||||
return; //Don't touch bad ratings
|
||||
return; // Don't touch bad ratings
|
||||
|
||||
if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
|
||||
/* Unreject any Tess Acceptable word - but NOT tess reject chs*/
|
||||
|
@ -87,7 +87,7 @@ const PageIterator& PageIterator::operator=(const PageIterator& src) {
|
||||
rect_top_ = src.rect_top_;
|
||||
rect_width_ = src.rect_width_;
|
||||
rect_height_ = src.rect_height_;
|
||||
if (it_ != NULL) delete it_;
|
||||
delete it_;
|
||||
it_ = new PAGE_RES_IT(*src.it_);
|
||||
BeginWord(src.blob_index_);
|
||||
return *this;
|
||||
@ -597,10 +597,8 @@ void PageIterator::BeginWord(int offset) {
|
||||
}
|
||||
word_ = NULL;
|
||||
// We will be iterating the box_word.
|
||||
if (cblob_it_ != NULL) {
|
||||
delete cblob_it_;
|
||||
cblob_it_ = NULL;
|
||||
}
|
||||
} else {
|
||||
// No recognition yet, so a "symbol" is a cblob.
|
||||
word_ = word_res->word;
|
||||
|
@ -18,8 +18,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H__
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H__
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
||||
#include "publictypes.h"
|
||||
#include "platform.h"
|
||||
@ -361,4 +361,4 @@ class TESS_API PageIterator {
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H__
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
@ -18,9 +18,6 @@
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef __GNUC__
|
||||
#include <windows.h>
|
||||
#endif // __GNUC__
|
||||
#ifndef unlink
|
||||
#include <io.h>
|
||||
#endif
|
||||
@ -40,6 +37,7 @@
|
||||
#include "blobbox.h"
|
||||
#include "blread.h"
|
||||
#include "colfind.h"
|
||||
#include "debugpixa.h"
|
||||
#include "equationdetect.h"
|
||||
#include "imagefind.h"
|
||||
#include "linefind.h"
|
||||
@ -179,28 +177,6 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
|
||||
return auto_page_seg_ret_val;
|
||||
}
|
||||
|
||||
// Helper writes a grey image to a file for use by scrollviewer.
|
||||
// Normally for speed we don't display the image in the layout debug windows.
|
||||
// If textord_debug_images is true, we draw the image as a background to some
|
||||
// of the debug windows. printable determines whether these
|
||||
// images are optimized for printing instead of screen display.
|
||||
static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
|
||||
Pix* grey_pix = pixCreate(pixGetWidth(pix_binary),
|
||||
pixGetHeight(pix_binary), 8);
|
||||
// Printable images are light grey on white, but for screen display
|
||||
// they are black on dark grey so the other colors show up well.
|
||||
if (printable) {
|
||||
pixSetAll(grey_pix);
|
||||
pixSetMasked(grey_pix, pix_binary, 192);
|
||||
} else {
|
||||
pixSetAllArbitrary(grey_pix, 64);
|
||||
pixSetMasked(grey_pix, pix_binary, 0);
|
||||
}
|
||||
AlignedBlob::IncrementDebugPix();
|
||||
pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
|
||||
pixDestroy(&grey_pix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto page segmentation. Divide the page image into blocks of uniform
|
||||
* text linespacing and images.
|
||||
@ -229,9 +205,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
|
||||
TO_BLOCK_LIST* to_blocks,
|
||||
BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
|
||||
OSResults* osr) {
|
||||
if (textord_debug_images) {
|
||||
WriteDebugBackgroundImage(textord_debug_printable, pix_binary_);
|
||||
}
|
||||
Pix* photomask_pix = NULL;
|
||||
Pix* musicmask_pix = NULL;
|
||||
// The blocks made by the ColumnFinder. Moved to blocks before return.
|
||||
@ -253,9 +226,10 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
|
||||
if (equ_detect_) {
|
||||
finder->SetEquationDetect(equ_detect_);
|
||||
}
|
||||
result = finder->FindBlocks(
|
||||
pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix,
|
||||
pix_thresholds_, pix_grey_, &found_blocks, diacritic_blobs, to_blocks);
|
||||
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
|
||||
to_block, photomask_pix, pix_thresholds_,
|
||||
pix_grey_, &pixa_debug_, &found_blocks,
|
||||
diacritic_blobs, to_blocks);
|
||||
if (result >= 0)
|
||||
finder->GetDeskewVectors(&deskew_, &reskew_);
|
||||
delete finder;
|
||||
@ -268,11 +242,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
|
||||
BLOCK_IT block_it(blocks);
|
||||
// Move the found blocks to the input/output blocks.
|
||||
block_it.add_list_after(&found_blocks);
|
||||
|
||||
if (textord_debug_images) {
|
||||
// The debug image is no longer needed so delete it.
|
||||
unlink(AlignedBlob::textord_debug_pix().string());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -314,19 +283,21 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
|
||||
ASSERT_HOST(pix_binary_ != NULL);
|
||||
if (tessedit_dump_pageseg_images) {
|
||||
pixWrite("tessinput.png", pix_binary_, IFF_PNG);
|
||||
pixa_debug_.AddPix(pix_binary_, "PageSegInput");
|
||||
}
|
||||
// Leptonica is used to find the rule/separator lines in the input.
|
||||
LineFinder::FindAndRemoveLines(source_resolution_,
|
||||
textord_tabfind_show_vlines, pix_binary_,
|
||||
&vertical_x, &vertical_y, music_mask_pix,
|
||||
&v_lines, &h_lines);
|
||||
if (tessedit_dump_pageseg_images)
|
||||
pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
|
||||
if (tessedit_dump_pageseg_images) {
|
||||
pixa_debug_.AddPix(pix_binary_, "NoLines");
|
||||
}
|
||||
// Leptonica is used to find a mask of the photo regions in the input.
|
||||
*photo_mask_pix = ImageFind::FindImages(pix_binary_);
|
||||
if (tessedit_dump_pageseg_images)
|
||||
pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
|
||||
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
|
||||
if (tessedit_dump_pageseg_images) {
|
||||
pixa_debug_.AddPix(pix_binary_, "NoImages");
|
||||
}
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
|
||||
|
||||
// The rest of the algorithm uses the usual connected components.
|
||||
@ -412,7 +383,8 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
"Don't rotate.\n", osd_margin);
|
||||
osd_orientation = 0;
|
||||
} else {
|
||||
tprintf("OSD: Weak margin (%.2f) for %d blob text block, "
|
||||
tprintf(
|
||||
"OSD: Weak margin (%.2f) for %d blob text block, "
|
||||
"but using orientation anyway: %d\n",
|
||||
osd_margin, osd_blobs.length(), osd_orientation);
|
||||
}
|
||||
|
@ -18,9 +18,9 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tesseractclass.h"
|
||||
#ifdef OPENMP
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif // OPENMP
|
||||
#endif // _OPENMP
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -53,7 +53,9 @@ void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
|
||||
}
|
||||
// Pre-classify all the blobs.
|
||||
if (tessedit_parallelize > 1) {
|
||||
#pragma omp parallel for num_threads(10)
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for num_threads(10)
|
||||
#endif // _OPENMP
|
||||
for (int b = 0; b < blobs.size(); ++b) {
|
||||
*blobs[b].choices =
|
||||
blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);
|
||||
|
@ -2052,7 +2052,7 @@ void ConvertHypothesizedModelRunsToParagraphs(
|
||||
bool single_line_paragraph = false;
|
||||
SetOfModels models;
|
||||
rows[start].NonNullHypotheses(&models);
|
||||
if (models.size() > 0) {
|
||||
if (!models.empty()) {
|
||||
model = models[0];
|
||||
if (rows[start].GetLineType(model) != LT_BODY)
|
||||
single_line_paragraph = true;
|
||||
@ -2113,6 +2113,7 @@ void ConvertHypothesizedModelRunsToParagraphs(
|
||||
if ((*row_owners)[row] != NULL) {
|
||||
tprintf("Memory leak! ConvertHypothesizeModelRunsToParagraphs() called "
|
||||
"more than once!\n");
|
||||
delete (*row_owners)[row];
|
||||
}
|
||||
(*row_owners)[row] = p;
|
||||
}
|
||||
@ -2189,17 +2190,17 @@ void LeftoverSegments(const GenericVector<RowScratchRegisters> &rows,
|
||||
SetOfModels models_w_crowns;
|
||||
rows[i].StrongHypotheses(&models);
|
||||
rows[i].NonNullHypotheses(&models_w_crowns);
|
||||
if (models.empty() && models_w_crowns.size() > 0) {
|
||||
if (models.empty() && !models_w_crowns.empty()) {
|
||||
// Crown paragraph. Is it followed by a modeled line?
|
||||
for (int end = i + 1; end < rows.size(); end++) {
|
||||
SetOfModels end_models;
|
||||
SetOfModels strong_end_models;
|
||||
rows[end].NonNullHypotheses(&end_models);
|
||||
rows[end].StrongHypotheses(&strong_end_models);
|
||||
if (end_models.size() == 0) {
|
||||
if (end_models.empty()) {
|
||||
needs_fixing = true;
|
||||
break;
|
||||
} else if (strong_end_models.size() > 0) {
|
||||
} else if (!strong_end_models.empty()) {
|
||||
needs_fixing = false;
|
||||
break;
|
||||
}
|
||||
@ -2484,7 +2485,7 @@ void InitializeRowInfo(bool after_recognition,
|
||||
info->ltr = ltr >= rtl;
|
||||
info->has_leaders = num_leaders > 3;
|
||||
info->num_words = werds.size();
|
||||
if (werds.size() > 0) {
|
||||
if (!werds.empty()) {
|
||||
WERD_RES *lword = werds[0], *rword = werds[werds.size() - 1];
|
||||
info->lword_text = lword->best_choice->unichar_string().string();
|
||||
info->rword_text = rword->best_choice->unichar_string().string();
|
||||
@ -2537,7 +2538,7 @@ void DetectParagraphs(int debug_level,
|
||||
|
||||
// If we're called before text recognition, we might not have
|
||||
// tight block bounding boxes, so trim by the minimum on each side.
|
||||
if (row_infos.size() > 0) {
|
||||
if (!row_infos.empty()) {
|
||||
int min_lmargin = row_infos[0].pix_ldistance;
|
||||
int min_rmargin = row_infos[0].pix_rdistance;
|
||||
for (int i = 1; i < row_infos.size(); i++) {
|
||||
|
@ -329,13 +329,19 @@ void ParamsEditor::WriteParams(char *filename,
|
||||
fclose(fp);
|
||||
sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
|
||||
int a = sv_window_->ShowYesNoDialog(msg_str);
|
||||
if (a == 'n') { return; } // don't write
|
||||
if (a == 'n') {
|
||||
return;
|
||||
} // don't write
|
||||
}
|
||||
|
||||
|
||||
fp = fopen (filename, "wb"); // can we write to it?
|
||||
if (fp == NULL) {
|
||||
sv_window_->AddMessage("Can't write to file " "%s" "", filename);
|
||||
sv_window_->AddMessage(
|
||||
"Can't write to file "
|
||||
"%s"
|
||||
"",
|
||||
filename);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -19,14 +19,12 @@
|
||||
//
|
||||
// Tesseract parameter editor is used to edit all the parameters used
|
||||
// within tesseract from the ui.
|
||||
#ifndef TESSERACT_CCMAIN_PARAMSD_H_
|
||||
#define TESSERACT_CCMAIN_PARAMSD_H_
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
#ifndef VARABLED_H
|
||||
#define VARABLED_H
|
||||
|
||||
#include "elst.h"
|
||||
#ifndef NO_CUBE_BUILD
|
||||
#include "scrollview.h"
|
||||
#endif
|
||||
#include "params.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
@ -122,5 +120,5 @@ class ParamsEditor : public SVEventHandler {
|
||||
ScrollView* sv_window_;
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif // GRAPHICS_DISABLED
|
||||
#endif // TESSERACT_CCMAIN_PARAMSD_H_
|
||||
|
@ -191,7 +191,7 @@ ScrollView* bln_word_window_handle() { // return handle
|
||||
*/
|
||||
|
||||
void build_image_window(int width, int height) {
|
||||
if (image_win != NULL) { delete image_win; }
|
||||
delete image_win;
|
||||
image_win = new ScrollView(editor_image_win_name.string(),
|
||||
editor_image_xpos, editor_image_ypos,
|
||||
width + 1,
|
||||
|
@ -19,8 +19,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H__
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H__
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
||||
#include "platform.h"
|
||||
#include "ltrresultiterator.h"
|
||||
@ -241,4 +241,4 @@ class TESS_API ResultIterator : public LTRResultIterator {
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H__
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
@ -40,11 +40,14 @@
|
||||
#include "efio.h"
|
||||
#include "danerror.h"
|
||||
#include "globals.h"
|
||||
#ifndef ANDROID_BUILD
|
||||
#include "lstmrecognizer.h"
|
||||
#endif
|
||||
#include "tesseractclass.h"
|
||||
#include "params.h"
|
||||
|
||||
#define VARDIR "configs/" /*variables files */
|
||||
//config under api
|
||||
// config under api
|
||||
#define API_CONFIG "configs/api_config"
|
||||
|
||||
ETEXT_DESC *global_monitor = NULL; // progress monitor
|
||||
@ -89,8 +92,8 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
const char *arg0, const char *textbase, const char *language,
|
||||
OcrEngineMode oem, char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_non_debug_params) {
|
||||
const GenericVector<STRING> *vars_values, bool set_only_non_debug_params,
|
||||
TessdataManager *mgr) {
|
||||
// Set the basename, compute the data directory.
|
||||
main_setup(arg0, textbase);
|
||||
|
||||
@ -102,20 +105,39 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
|
||||
// Initialize TessdataManager.
|
||||
STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
|
||||
if (!tessdata_manager.Init(tessdata_path.string(),
|
||||
tessdata_manager_debug_level)) {
|
||||
if (!mgr->is_loaded() && !mgr->Init(tessdata_path.string())) {
|
||||
// Try without tessdata.
|
||||
m_data_sub_dir.set_value("");
|
||||
main_setup(arg0, textbase);
|
||||
language_data_path_prefix = datadir;
|
||||
language_data_path_prefix += lang;
|
||||
language_data_path_prefix += ".";
|
||||
tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
|
||||
if (!mgr->Init(tessdata_path.string())) {
|
||||
tprintf("Error opening data file %s\n", tessdata_path.string());
|
||||
tprintf(
|
||||
"Please make sure the TESSDATA_PREFIX environment variable is set"
|
||||
" to your \"tessdata\" directory.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (oem == OEM_DEFAULT) {
|
||||
// Set the engine mode from availability, which can then be overidden by
|
||||
// the config file when we read it below.
|
||||
if (!mgr->IsLSTMAvailable()) {
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
} else if (!mgr->IsBaseAvailable()) {
|
||||
tessedit_ocr_engine_mode.set_value(OEM_LSTM_ONLY);
|
||||
} else {
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_LSTM_COMBINED);
|
||||
}
|
||||
}
|
||||
|
||||
// If a language specific config file (lang.config) exists, load it in.
|
||||
if (tessdata_manager.SeekToStart(TESSDATA_LANG_CONFIG)) {
|
||||
ParamUtils::ReadParamsFromFp(
|
||||
tessdata_manager.GetDataFilePtr(),
|
||||
tessdata_manager.GetEndOffset(TESSDATA_LANG_CONFIG),
|
||||
SET_PARAM_CONSTRAINT_NONE, this->params());
|
||||
if (tessdata_manager_debug_level) {
|
||||
tprintf("Loaded language config file\n");
|
||||
}
|
||||
TFile fp;
|
||||
if (mgr->GetComponent(TESSDATA_LANG_CONFIG, &fp)) {
|
||||
ParamUtils::ReadParamsFromFp(SET_PARAM_CONSTRAINT_NONE, &fp,
|
||||
this->params());
|
||||
}
|
||||
|
||||
SetParamConstraint set_params_constraint = set_only_non_debug_params ?
|
||||
@ -145,10 +167,6 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
if (params_file != NULL) {
|
||||
ParamUtils::PrintParams(params_file, this->params());
|
||||
fclose(params_file);
|
||||
if (tessdata_manager_debug_level > 0) {
|
||||
tprintf("Wrote parameters to %s\n",
|
||||
tessedit_write_params_to_file.string());
|
||||
}
|
||||
} else {
|
||||
tprintf("Failed to open %s for writing params.\n",
|
||||
tessedit_write_params_to_file.string());
|
||||
@ -157,30 +175,48 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
|
||||
// Determine which ocr engine(s) should be loaded and used for recognition.
|
||||
if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
|
||||
if (tessdata_manager_debug_level) {
|
||||
tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
|
||||
static_cast<int>(tessedit_ocr_engine_mode));
|
||||
}
|
||||
|
||||
// If we are only loading the config file (and so not planning on doing any
|
||||
// recognition) then there's nothing else do here.
|
||||
if (tessedit_init_config_only) {
|
||||
if (tessdata_manager_debug_level) {
|
||||
tprintf("Returning after loading config file\n");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// The various OcrEngineMode settings (see publictypes.h) determine which
|
||||
// engine-specific data files need to be loaded.
|
||||
// If LSTM_ONLY is requested, the base Tesseract files are *Not* required.
|
||||
#ifndef ANDROID_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
|
||||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
|
||||
if (mgr->swap()) {
|
||||
tprintf("Error: LSTM requested on big-endian hardware!!\n");
|
||||
tprintf("Big-endian not yet supported! Loading tesseract.\n");
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
} else if (mgr->GetComponent(TESSDATA_LSTM, &fp)) {
|
||||
lstm_recognizer_ = new LSTMRecognizer;
|
||||
ASSERT_HOST(lstm_recognizer_->DeSerialize(mgr->swap(), &fp));
|
||||
if (lstm_use_matrix) lstm_recognizer_->LoadDictionary(language, mgr);
|
||||
} else {
|
||||
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Load the unicharset
|
||||
if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
|
||||
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
// Avoid requiring a unicharset when we aren't running base tesseract.
|
||||
#ifndef ANDROID_BUILD
|
||||
unicharset.CopyFrom(lstm_recognizer_->GetUnicharset());
|
||||
#endif
|
||||
} else if (!mgr->GetComponent(TESSDATA_UNICHARSET, &fp) ||
|
||||
!unicharset.load_from_file(&fp, false)) {
|
||||
return false;
|
||||
}
|
||||
if (unicharset.size() > MAX_NUM_CLASSES) {
|
||||
tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
|
||||
return false;
|
||||
}
|
||||
if (tessdata_manager_debug_level) tprintf("Loaded unicharset\n");
|
||||
right_to_left_ = unicharset.major_right_to_left();
|
||||
|
||||
// Setup initial unichar ambigs table and read universal ambigs.
|
||||
@ -189,33 +225,11 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
unichar_ambigs.InitUnicharAmbigs(unicharset, use_ambigs_for_adaption);
|
||||
unichar_ambigs.LoadUniversal(encoder_unicharset, &unicharset);
|
||||
|
||||
if (!tessedit_ambigs_training &&
|
||||
tessdata_manager.SeekToStart(TESSDATA_AMBIGS)) {
|
||||
TFile ambigs_file;
|
||||
ambigs_file.Open(tessdata_manager.GetDataFilePtr(),
|
||||
tessdata_manager.GetEndOffset(TESSDATA_AMBIGS) + 1);
|
||||
unichar_ambigs.LoadUnicharAmbigs(
|
||||
encoder_unicharset,
|
||||
&ambigs_file,
|
||||
ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
|
||||
if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
|
||||
if (!tessedit_ambigs_training && mgr->GetComponent(TESSDATA_AMBIGS, &fp)) {
|
||||
unichar_ambigs.LoadUnicharAmbigs(encoder_unicharset, &fp,
|
||||
ambigs_debug_level,
|
||||
use_ambigs_for_adaption, &unicharset);
|
||||
}
|
||||
|
||||
// The various OcrEngineMode settings (see publictypes.h) determine which
|
||||
// engine-specific data files need to be loaded. Currently everything needs
|
||||
// the base tesseract data, which supplies other useful information, but
|
||||
// alternative engines, such as cube and LSTM are optional.
|
||||
#ifndef NO_CUBE_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
|
||||
ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded Cube w/out combiner\n");
|
||||
} else if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) {
|
||||
ASSERT_HOST(init_cube_objects(true, &tessdata_manager));
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded Cube with combiner\n");
|
||||
}
|
||||
#endif
|
||||
// Init ParamsModel.
|
||||
// Load pass1 and pass2 weights (for now these two sets are the same, but in
|
||||
// the future separate sets of weights can be generated).
|
||||
@ -223,15 +237,12 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
p < ParamsModel::PTRAIN_NUM_PASSES; ++p) {
|
||||
language_model_->getParamsModel().SetPass(
|
||||
static_cast<ParamsModel::PassEnum>(p));
|
||||
if (tessdata_manager.SeekToStart(TESSDATA_PARAMS_MODEL)) {
|
||||
if (!language_model_->getParamsModel().LoadFromFp(
|
||||
lang.string(), tessdata_manager.GetDataFilePtr(),
|
||||
tessdata_manager.GetEndOffset(TESSDATA_PARAMS_MODEL))) {
|
||||
if (mgr->GetComponent(TESSDATA_PARAMS_MODEL, &fp)) {
|
||||
if (!language_model_->getParamsModel().LoadFromFp(lang.string(), &fp)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (tessdata_manager_debug_level) language_model_->getParamsModel().Print();
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -276,8 +287,6 @@ void Tesseract::ParseLanguageString(const char* lang_str,
|
||||
remains = next;
|
||||
// Check whether lang_code is already in the target vector and add.
|
||||
if (!IsStrInList(lang_code, *target)) {
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Adding language '%s' to list\n", lang_code.string());
|
||||
target->push_back(lang_code);
|
||||
}
|
||||
}
|
||||
@ -287,12 +296,13 @@ void Tesseract::ParseLanguageString(const char* lang_str,
|
||||
// string and recursively any additional languages required by any language
|
||||
// traineddata file (via tessedit_load_sublangs in its config) that is loaded.
|
||||
// See init_tesseract_internal for args.
|
||||
int Tesseract::init_tesseract(
|
||||
const char *arg0, const char *textbase, const char *language,
|
||||
OcrEngineMode oem, char **configs, int configs_size,
|
||||
int Tesseract::init_tesseract(const char *arg0, const char *textbase,
|
||||
const char *language, OcrEngineMode oem,
|
||||
char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_non_debug_params) {
|
||||
bool set_only_non_debug_params,
|
||||
TessdataManager *mgr) {
|
||||
GenericVector<STRING> langs_to_load;
|
||||
GenericVector<STRING> langs_not_to_load;
|
||||
ParseLanguageString(language, &langs_to_load, &langs_not_to_load);
|
||||
@ -314,15 +324,15 @@ int Tesseract::init_tesseract(
|
||||
}
|
||||
|
||||
int result = tess_to_init->init_tesseract_internal(
|
||||
arg0, textbase, lang_str, oem, configs, configs_size,
|
||||
vars_vec, vars_values, set_only_non_debug_params);
|
||||
arg0, textbase, lang_str, oem, configs, configs_size, vars_vec,
|
||||
vars_values, set_only_non_debug_params, mgr);
|
||||
// Forget that language, but keep any reader we were given.
|
||||
mgr->Clear();
|
||||
|
||||
if (!loaded_primary) {
|
||||
if (result < 0) {
|
||||
tprintf("Failed loading language '%s'\n", lang_str);
|
||||
} else {
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded language '%s' as main language\n", lang_str);
|
||||
ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
|
||||
&langs_to_load, &langs_not_to_load);
|
||||
loaded_primary = true;
|
||||
@ -332,8 +342,6 @@ int Tesseract::init_tesseract(
|
||||
tprintf("Failed loading language '%s'\n", lang_str);
|
||||
delete tess_to_init;
|
||||
} else {
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded language '%s' as secondary language\n", lang_str);
|
||||
sub_langs_.push_back(tess_to_init);
|
||||
// Add any languages that this language requires
|
||||
ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
|
||||
@ -358,16 +366,11 @@ int Tesseract::init_tesseract(
|
||||
this->language_model_->getParamsModel());
|
||||
}
|
||||
tprintf("Using params model of the primary language\n");
|
||||
if (tessdata_manager_debug_level) {
|
||||
this->language_model_->getParamsModel().Print();
|
||||
}
|
||||
} else {
|
||||
this->language_model_->getParamsModel().Clear();
|
||||
for (int s = 0; s < sub_langs_.size(); ++s) {
|
||||
sub_langs_[s]->language_model_->getParamsModel().Clear();
|
||||
}
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Using default language params\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -391,33 +394,26 @@ int Tesseract::init_tesseract(
|
||||
// in vars_vec.
|
||||
// If set_only_init_params is true, then only the initialization variables
|
||||
// will be set.
|
||||
int Tesseract::init_tesseract_internal(
|
||||
const char *arg0, const char *textbase, const char *language,
|
||||
OcrEngineMode oem, char **configs, int configs_size,
|
||||
int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase,
|
||||
const char *language, OcrEngineMode oem,
|
||||
char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_non_debug_params) {
|
||||
bool set_only_non_debug_params,
|
||||
TessdataManager *mgr) {
|
||||
if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
|
||||
configs_size, vars_vec, vars_values,
|
||||
set_only_non_debug_params)) {
|
||||
set_only_non_debug_params, mgr)) {
|
||||
return -1;
|
||||
}
|
||||
if (tessedit_init_config_only) {
|
||||
tessdata_manager.End();
|
||||
return 0;
|
||||
}
|
||||
// If only Cube will be used, skip loading Tesseract classifier's
|
||||
// pre-trained templates.
|
||||
bool init_tesseract_classifier =
|
||||
(tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
|
||||
tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED);
|
||||
// If only Cube will be used and if it has its own Unicharset,
|
||||
// skip initializing permuter and loading Tesseract Dawgs.
|
||||
bool init_dict =
|
||||
!(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
|
||||
tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
|
||||
program_editup(textbase, init_tesseract_classifier, init_dict);
|
||||
tessdata_manager.End();
|
||||
// If only LSTM will be used, skip loading Tesseract classifier's
|
||||
// pre-trained templates and dictionary.
|
||||
bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY;
|
||||
program_editup(textbase, init_tesseract ? mgr : nullptr,
|
||||
init_tesseract ? mgr : nullptr);
|
||||
return 0; //Normal exit
|
||||
}
|
||||
|
||||
@ -462,14 +458,14 @@ void Tesseract::SetupUniversalFontIds() {
|
||||
}
|
||||
|
||||
// init the LM component
|
||||
int Tesseract::init_tesseract_lm(const char *arg0,
|
||||
const char *textbase,
|
||||
const char *language) {
|
||||
int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase,
|
||||
const char *language, TessdataManager *mgr) {
|
||||
if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
|
||||
NULL, 0, NULL, NULL, false))
|
||||
NULL, 0, NULL, NULL, false, mgr))
|
||||
return -1;
|
||||
getDict().Load(Dict::GlobalDawgCache());
|
||||
tessdata_manager.End();
|
||||
getDict().SetupForLoad(Dict::GlobalDawgCache());
|
||||
getDict().Load(lang, mgr);
|
||||
getDict().FinishLoad();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,306 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: tesseract_cube_combiner.h
|
||||
* Description: Declaration of the Tesseract & Cube results combiner Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The TesseractCubeCombiner class provides the functionality of combining
|
||||
// the recognition results of Tesseract and Cube at the word level
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "tesseract_cube_combiner.h"
|
||||
|
||||
#include "cube_object.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "cube_utils.h"
|
||||
#include "neural_net.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "word_altlist.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
TesseractCubeCombiner::TesseractCubeCombiner(CubeRecoContext *cube_cntxt) {
|
||||
cube_cntxt_ = cube_cntxt;
|
||||
combiner_net_ = NULL;
|
||||
}
|
||||
|
||||
TesseractCubeCombiner::~TesseractCubeCombiner() {
|
||||
if (combiner_net_ != NULL) {
|
||||
delete combiner_net_;
|
||||
combiner_net_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool TesseractCubeCombiner::LoadCombinerNet() {
|
||||
ASSERT_HOST(cube_cntxt_);
|
||||
// Compute the path of the combiner net
|
||||
string data_path;
|
||||
cube_cntxt_->GetDataFilePath(&data_path);
|
||||
string net_file_name = data_path + cube_cntxt_->Lang() +
|
||||
".tesseract_cube.nn";
|
||||
|
||||
// Return false if file does not exist
|
||||
FILE *fp = fopen(net_file_name.c_str(), "rb");
|
||||
if (fp == NULL)
|
||||
return false;
|
||||
else
|
||||
fclose(fp);
|
||||
|
||||
// Load and validate net
|
||||
combiner_net_ = NeuralNet::FromFile(net_file_name);
|
||||
if (combiner_net_ == NULL) {
|
||||
tprintf("Could not read combiner net file %s", net_file_name.c_str());
|
||||
return false;
|
||||
} else if (combiner_net_->out_cnt() != 2) {
|
||||
tprintf("Invalid combiner net file %s! Output count != 2\n",
|
||||
net_file_name.c_str());
|
||||
delete combiner_net_;
|
||||
combiner_net_ = NULL;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
|
||||
// strips punc and/or normalizes case and then converts back
|
||||
string TesseractCubeCombiner::NormalizeString(const string &str,
|
||||
bool remove_punc,
|
||||
bool norm_case) {
|
||||
// convert to UTF32
|
||||
string_32 str32;
|
||||
CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
|
||||
// strip punc and normalize
|
||||
string_32 new_str32;
|
||||
for (int idx = 0; idx < str32.length(); idx++) {
|
||||
// if no punc removal is required or not a punctuation character
|
||||
if (!remove_punc || iswpunct(str32[idx]) == 0) {
|
||||
char_32 norm_char = str32[idx];
|
||||
// normalize case if required
|
||||
if (norm_case && iswalpha(norm_char)) {
|
||||
norm_char = towlower(norm_char);
|
||||
}
|
||||
new_str32.push_back(norm_char);
|
||||
}
|
||||
}
|
||||
// convert back to UTF8
|
||||
string new_str;
|
||||
CubeUtils::UTF32ToUTF8(new_str32.c_str(), &new_str);
|
||||
return new_str;
|
||||
}
|
||||
|
||||
// Compares 2 strings optionally ignoring punctuation
|
||||
int TesseractCubeCombiner::CompareStrings(const string &str1,
|
||||
const string &str2,
|
||||
bool ignore_punc,
|
||||
bool ignore_case) {
|
||||
if (!ignore_punc && !ignore_case) {
|
||||
return str1.compare(str2);
|
||||
}
|
||||
string norm_str1 = NormalizeString(str1, ignore_punc, ignore_case);
|
||||
string norm_str2 = NormalizeString(str2, ignore_punc, ignore_case);
|
||||
return norm_str1.compare(norm_str2);
|
||||
}
|
||||
|
||||
// Check if a string is a valid Tess dict word or not
|
||||
bool TesseractCubeCombiner::ValidWord(const string &str) {
|
||||
return (cube_cntxt_->TesseractObject()->getDict().valid_word(str.c_str())
|
||||
> 0);
|
||||
}
|
||||
|
||||
// Public method for computing the combiner features. The agreement
|
||||
// output parameter will be true if both answers are identical,
|
||||
// and false otherwise.
|
||||
bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str,
|
||||
int tess_confidence,
|
||||
CubeObject *cube_obj,
|
||||
WordAltList *cube_alt_list,
|
||||
vector<double> *features,
|
||||
bool *agreement) {
|
||||
features->clear();
|
||||
*agreement = false;
|
||||
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)
|
||||
return false;
|
||||
|
||||
// Get Cube's best string; return false if empty
|
||||
char_32 *cube_best_str32 = cube_alt_list->Alt(0);
|
||||
if (cube_best_str32 == NULL || CubeUtils::StrLen(cube_best_str32) < 1)
|
||||
return false;
|
||||
string cube_best_str;
|
||||
int cube_best_cost = cube_alt_list->AltCost(0);
|
||||
int cube_best_bigram_cost = 0;
|
||||
bool cube_best_bigram_cost_valid = true;
|
||||
if (cube_cntxt_->Bigrams())
|
||||
cube_best_bigram_cost = cube_cntxt_->Bigrams()->
|
||||
Cost(cube_best_str32, cube_cntxt_->CharacterSet());
|
||||
else
|
||||
cube_best_bigram_cost_valid = false;
|
||||
CubeUtils::UTF32ToUTF8(cube_best_str32, &cube_best_str);
|
||||
|
||||
// Get Tesseract's UTF32 string
|
||||
string_32 tess_str32;
|
||||
CubeUtils::UTF8ToUTF32(tess_str.c_str(), &tess_str32);
|
||||
|
||||
// Compute agreement flag
|
||||
*agreement = (tess_str.compare(cube_best_str) == 0);
|
||||
|
||||
// Get Cube's second best string; if empty, return false
|
||||
char_32 *cube_next_best_str32;
|
||||
string cube_next_best_str;
|
||||
int cube_next_best_cost = WORST_COST;
|
||||
if (cube_alt_list->AltCount() > 1) {
|
||||
cube_next_best_str32 = cube_alt_list->Alt(1);
|
||||
if (cube_next_best_str32 == NULL ||
|
||||
CubeUtils::StrLen(cube_next_best_str32) == 0) {
|
||||
return false;
|
||||
}
|
||||
cube_next_best_cost = cube_alt_list->AltCost(1);
|
||||
CubeUtils::UTF32ToUTF8(cube_next_best_str32, &cube_next_best_str);
|
||||
}
|
||||
// Rank of Tesseract's top result in Cube's alternate list
|
||||
int tess_rank = 0;
|
||||
for (tess_rank = 0; tess_rank < cube_alt_list->AltCount(); tess_rank++) {
|
||||
string alt_str;
|
||||
CubeUtils::UTF32ToUTF8(cube_alt_list->Alt(tess_rank), &alt_str);
|
||||
if (alt_str == tess_str)
|
||||
break;
|
||||
}
|
||||
|
||||
// Cube's cost for tesseract's result. Note that this modifies the
|
||||
// state of cube_obj, including its alternate list by calling RecognizeWord()
|
||||
int tess_cost = cube_obj->WordCost(tess_str.c_str());
|
||||
// Cube's bigram cost of Tesseract's string
|
||||
int tess_bigram_cost = 0;
|
||||
int tess_bigram_cost_valid = true;
|
||||
if (cube_cntxt_->Bigrams())
|
||||
tess_bigram_cost = cube_cntxt_->Bigrams()->
|
||||
Cost(tess_str32.c_str(), cube_cntxt_->CharacterSet());
|
||||
else
|
||||
tess_bigram_cost_valid = false;
|
||||
|
||||
// Tesseract confidence
|
||||
features->push_back(tess_confidence);
|
||||
// Cube cost of Tesseract string
|
||||
features->push_back(tess_cost);
|
||||
// Cube Rank of Tesseract string
|
||||
features->push_back(tess_rank);
|
||||
// length of Tesseract OCR string
|
||||
features->push_back(tess_str.length());
|
||||
// Tesseract OCR string in dictionary
|
||||
features->push_back(ValidWord(tess_str));
|
||||
if (tess_bigram_cost_valid) {
|
||||
// bigram cost of Tesseract string
|
||||
features->push_back(tess_bigram_cost);
|
||||
}
|
||||
// Cube tess_cost of Cube best string
|
||||
features->push_back(cube_best_cost);
|
||||
// Cube tess_cost of Cube next best string
|
||||
features->push_back(cube_next_best_cost);
|
||||
// length of Cube string
|
||||
features->push_back(cube_best_str.length());
|
||||
// Cube string in dictionary
|
||||
features->push_back(ValidWord(cube_best_str));
|
||||
if (cube_best_bigram_cost_valid) {
|
||||
// bigram cost of Cube string
|
||||
features->push_back(cube_best_bigram_cost);
|
||||
}
|
||||
// case-insensitive string comparison, including punctuation
|
||||
int compare_nocase_punc = CompareStrings(cube_best_str,
|
||||
tess_str, false, true);
|
||||
features->push_back(compare_nocase_punc == 0);
|
||||
// case-sensitive string comparison, ignoring punctuation
|
||||
int compare_case_nopunc = CompareStrings(cube_best_str,
|
||||
tess_str, true, false);
|
||||
features->push_back(compare_case_nopunc == 0);
|
||||
// case-insensitive string comparison, ignoring punctuation
|
||||
int compare_nocase_nopunc = CompareStrings(cube_best_str,
|
||||
tess_str, true, true);
|
||||
features->push_back(compare_nocase_nopunc == 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
// The CubeObject parameter is used for 2 purposes: 1) to retrieve
|
||||
// cube's alt list, and 2) to compute cube's word cost for the
|
||||
// tesseract result. The call to CubeObject::WordCost() modifies
|
||||
// the object's alternate list, so previous state will be lost.
|
||||
float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res,
|
||||
CubeObject *cube_obj) {
|
||||
// If no combiner is loaded or the cube object is undefined,
|
||||
// tesseract wins with probability 1.0
|
||||
if (combiner_net_ == NULL || cube_obj == NULL) {
|
||||
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
|
||||
"Cube objects not initialized; defaulting to Tesseract\n");
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Retrieve the alternate list from the CubeObject's current state.
|
||||
// If the alt list empty, tesseract wins with probability 1.0
|
||||
WordAltList *cube_alt_list = cube_obj->AlternateList();
|
||||
if (cube_alt_list == NULL)
|
||||
cube_alt_list = cube_obj->RecognizeWord();
|
||||
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
|
||||
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
|
||||
"Cube returned no results; defaulting to Tesseract\n");
|
||||
return 1.0;
|
||||
}
|
||||
return CombineResults(tess_res, cube_obj, cube_alt_list);
|
||||
}
|
||||
|
||||
// The alt_list parameter is expected to have been extracted from the
|
||||
// CubeObject that recognized the word to be combined. The cube_obj
|
||||
// parameter passed may be either same instance or a separate instance to
|
||||
// be used only by the combiner. In both cases, its alternate
|
||||
// list will be modified by an internal call to RecognizeWord().
|
||||
float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res,
|
||||
CubeObject *cube_obj,
|
||||
WordAltList *cube_alt_list) {
|
||||
// If no combiner is loaded or the cube object is undefined, or the
|
||||
// alt list is empty, tesseract wins with probability 1.0
|
||||
if (combiner_net_ == NULL || cube_obj == NULL ||
|
||||
cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
|
||||
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
|
||||
"Cube result cannot be retrieved; defaulting to Tesseract\n");
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Tesseract result string, tesseract confidence, and cost of
|
||||
// tesseract result according to cube
|
||||
string tess_str = tess_res->best_choice->unichar_string().string();
|
||||
// Map certainty [-20.0, 0.0] to confidence [0, 100]
|
||||
int tess_confidence = MIN(100, MAX(1, static_cast<int>(
|
||||
100 + (5 * tess_res->best_choice->certainty()))));
|
||||
|
||||
// Compute the combiner features. If feature computation fails or
|
||||
// answers are identical, tesseract wins with probability 1.0
|
||||
vector<double> features;
|
||||
bool agreement;
|
||||
bool combiner_success = ComputeCombinerFeatures(tess_str, tess_confidence,
|
||||
cube_obj, cube_alt_list,
|
||||
&features, &agreement);
|
||||
if (!combiner_success || agreement)
|
||||
return 1.0;
|
||||
|
||||
// Classify combiner feature vector and return output (probability
|
||||
// of tesseract class).
|
||||
double net_out[2];
|
||||
if (!combiner_net_->FeedForward(&features[0], net_out))
|
||||
return 1.0;
|
||||
return net_out[1];
|
||||
}
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: tesseract_cube_combiner.h
|
||||
* Description: Declaration of the Tesseract & Cube results combiner Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The TesseractCubeCombiner class provides the functionality of combining
|
||||
// the recognition results of Tesseract and Cube at the word level
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
|
||||
#define TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "pageres.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
using namespace std;
|
||||
#endif
|
||||
|
||||
#ifdef USE_STD_NAMESPACE
|
||||
using std::string;
|
||||
using std::vector;
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class CubeObject;
|
||||
class NeuralNet;
|
||||
class CubeRecoContext;
|
||||
class WordAltList;
|
||||
|
||||
class TesseractCubeCombiner {
|
||||
public:
|
||||
explicit TesseractCubeCombiner(CubeRecoContext *cube_cntxt);
|
||||
virtual ~TesseractCubeCombiner();
|
||||
|
||||
// There are 2 public methods for combining the results of tesseract
|
||||
// and cube. Both return the probability that the Tesseract result is
|
||||
// correct. The difference between the two interfaces is in how the
|
||||
// passed-in CubeObject is used.
|
||||
|
||||
// The CubeObject parameter is used for 2 purposes: 1) to retrieve
|
||||
// cube's alt list, and 2) to compute cube's word cost for the
|
||||
// tesseract result. Both uses may modify the state of the
|
||||
// CubeObject (including the BeamSearch state) with a call to
|
||||
// RecognizeWord().
|
||||
float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj);
|
||||
|
||||
// The alt_list parameter is expected to have been extracted from the
|
||||
// CubeObject that recognized the word to be combined. The cube_obj
|
||||
// parameter passed in is a separate instance to be used only by
|
||||
// the combiner.
|
||||
float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj,
|
||||
WordAltList *alt_list);
|
||||
|
||||
// Public method for computing the combiner features. The agreement
|
||||
// output parameter will be true if both answers are identical,
|
||||
// false otherwise. Modifies the cube_alt_list, so no assumptions
|
||||
// should be made about its state upon return.
|
||||
bool ComputeCombinerFeatures(const string &tess_res,
|
||||
int tess_confidence,
|
||||
CubeObject *cube_obj,
|
||||
WordAltList *cube_alt_list,
|
||||
vector<double> *features,
|
||||
bool *agreement);
|
||||
|
||||
// Is the word valid according to Tesseract's language model
|
||||
bool ValidWord(const string &str);
|
||||
|
||||
// Loads the combiner neural network from file, using cube_cntxt_
|
||||
// to find path.
|
||||
bool LoadCombinerNet();
|
||||
private:
|
||||
// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
|
||||
// strips punc and/or normalizes case and then converts back
|
||||
string NormalizeString(const string &str, bool remove_punc, bool norm_case);
|
||||
|
||||
// Compares 2 strings after optionally normalizing them and or stripping
|
||||
// punctuation
|
||||
int CompareStrings(const string &str1, const string &str2, bool ignore_punc,
|
||||
bool norm_case);
|
||||
|
||||
NeuralNet *combiner_net_; // pointer to the combiner NeuralNet object
|
||||
CubeRecoContext *cube_cntxt_; // used for language ID and data paths
|
||||
};
|
||||
}
|
||||
|
||||
#endif // TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
|
@ -42,14 +42,11 @@
|
||||
#include "tesseractclass.h"
|
||||
|
||||
#include "allheaders.h"
|
||||
#ifndef NO_CUBE_BUILD
|
||||
#include "cube_reco_context.h"
|
||||
#endif
|
||||
#include "edgblob.h"
|
||||
#include "equationdetect.h"
|
||||
#include "globals.h"
|
||||
#ifndef NO_CUBE_BUILD
|
||||
#include "tesseract_cube_combiner.h"
|
||||
#ifndef ANDROID_BUILD
|
||||
#include "lstmrecognizer.h"
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
@ -65,6 +62,9 @@ Tesseract::Tesseract()
|
||||
"Generate training data from boxed chars", this->params()),
|
||||
BOOL_MEMBER(tessedit_make_boxes_from_boxes, false,
|
||||
"Generate more boxes from boxed chars", this->params()),
|
||||
BOOL_MEMBER(tessedit_train_line_recognizer, false,
|
||||
"Break input into lines and remap boxes if present",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_dump_pageseg_images, false,
|
||||
"Dump intermediate images made during page segmentation",
|
||||
this->params()),
|
||||
@ -76,11 +76,10 @@ Tesseract::Tesseract()
|
||||
" 5=line, 6=word, 7=char"
|
||||
" (Values from PageSegMode enum in publictypes.h)",
|
||||
this->params()),
|
||||
INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
|
||||
"Which OCR engine(s) to run (Tesseract, Cube, both)."
|
||||
" Defaults to loading and running only Tesseract"
|
||||
" (no Cube,no combiner)."
|
||||
" Values from OcrEngineMode enum in tesseractclass.h)",
|
||||
INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
|
||||
"Which OCR engine(s) to run (Tesseract, LSTM, both)."
|
||||
" Defaults to loading and running the most accurate"
|
||||
" available.",
|
||||
this->params()),
|
||||
STRING_MEMBER(tessedit_char_blacklist, "",
|
||||
"Blacklist of chars not to recognize", this->params()),
|
||||
@ -215,13 +214,16 @@ Tesseract::Tesseract()
|
||||
BOOL_MEMBER(test_pt, false, "Test for point", this->params()),
|
||||
double_MEMBER(test_pt_x, 99999.99, "xcoord", this->params()),
|
||||
double_MEMBER(test_pt_y, 99999.99, "ycoord", this->params()),
|
||||
INT_MEMBER(multilang_debug_level, 0, "Print multilang debug info.",
|
||||
this->params()),
|
||||
INT_MEMBER(paragraph_debug_level, 0, "Print paragraph debug info.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(paragraph_text_based, true,
|
||||
"Run paragraph detection on the post-text-recognition "
|
||||
"(more accurate)",
|
||||
this->params()),
|
||||
INT_MEMBER(cube_debug_level, 0, "Print cube debug info.", this->params()),
|
||||
BOOL_MEMBER(lstm_use_matrix, 1,
|
||||
"Use ratings matrix/beam search with lstm", this->params()),
|
||||
STRING_MEMBER(outlines_odd, "%| ", "Non standard number of outlines",
|
||||
this->params()),
|
||||
STRING_MEMBER(outlines_2, "ij!?%\":;", "Non standard number of outlines",
|
||||
@ -265,7 +267,7 @@ Tesseract::Tesseract()
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_debug_quality_metrics, false,
|
||||
"Output data to debug file", this->params()),
|
||||
BOOL_MEMBER(bland_unrej, false, "unrej potential with no chekcs",
|
||||
BOOL_MEMBER(bland_unrej, false, "unrej potential with no checks",
|
||||
this->params()),
|
||||
double_MEMBER(quality_rowrej_pc, 1.1,
|
||||
"good_quality_doc gte good char limit", this->params()),
|
||||
@ -389,6 +391,9 @@ Tesseract::Tesseract()
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(textonly_pdf, false,
|
||||
"Create PDF with only one invisible text layer",
|
||||
this->params()),
|
||||
STRING_MEMBER(unrecognised_char, "|",
|
||||
"Output char for unidentified blobs", this->params()),
|
||||
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
|
||||
@ -398,8 +403,8 @@ Tesseract::Tesseract()
|
||||
"Don't suspect dict wds longer than this", this->params()),
|
||||
BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
|
||||
this->params()),
|
||||
double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit",
|
||||
this->params()),
|
||||
double_MEMBER(suspect_rating_per_ch, 999.9,
|
||||
"Don't touch bad rating limit", this->params()),
|
||||
double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_minimal_rejection, false,
|
||||
@ -452,7 +457,7 @@ Tesseract::Tesseract()
|
||||
this->params()),
|
||||
INT_MEMBER(tessedit_page_number, -1,
|
||||
"-1 -> All pages"
|
||||
" , else specifc page to process",
|
||||
" , else specific page to process",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_write_images, false,
|
||||
"Capture the image from the IPE", this->params()),
|
||||
@ -461,10 +466,6 @@ Tesseract::Tesseract()
|
||||
STRING_MEMBER(file_type, ".tif", "Filename extension", this->params()),
|
||||
BOOL_MEMBER(tessedit_override_permuter, true, "According to dict_word",
|
||||
this->params()),
|
||||
INT_MEMBER(tessdata_manager_debug_level, 0,
|
||||
"Debug level for"
|
||||
" TessdataManager functions.",
|
||||
this->params()),
|
||||
STRING_MEMBER(tessedit_load_sublangs, "",
|
||||
"List of languages to load with this one", this->params()),
|
||||
BOOL_MEMBER(tessedit_use_primary_params_model, false,
|
||||
@ -512,7 +513,6 @@ Tesseract::Tesseract()
|
||||
"Page separator (default is form feed control character)",
|
||||
this->params()),
|
||||
|
||||
|
||||
// The following parameters were deprecated and removed from their
|
||||
// original
|
||||
// locations. The parameters are temporarily kept here to give Tesseract
|
||||
@ -604,8 +604,8 @@ Tesseract::Tesseract()
|
||||
|
||||
backup_config_file_(NULL),
|
||||
pix_binary_(NULL),
|
||||
cube_binary_(NULL),
|
||||
pix_grey_(NULL),
|
||||
pix_original_(NULL),
|
||||
pix_thresholds_(NULL),
|
||||
source_resolution_(0),
|
||||
textord_(this),
|
||||
@ -616,33 +616,28 @@ Tesseract::Tesseract()
|
||||
reskew_(1.0f, 0.0f),
|
||||
most_recently_used_(this),
|
||||
font_table_size_(0),
|
||||
#ifndef NO_CUBE_BUILD
|
||||
cube_cntxt_(NULL),
|
||||
tess_cube_combiner_(NULL),
|
||||
equ_detect_(NULL),
|
||||
#ifndef ANDROID_BUILD
|
||||
lstm_recognizer_(NULL),
|
||||
#endif
|
||||
equ_detect_(NULL) {
|
||||
train_line_page_num_(0) {
|
||||
}
|
||||
|
||||
Tesseract::~Tesseract() {
|
||||
Clear();
|
||||
pixDestroy(&pix_original_);
|
||||
end_tesseract();
|
||||
sub_langs_.delete_data_pointers();
|
||||
#ifndef NO_CUBE_BUILD
|
||||
// Delete cube objects.
|
||||
if (cube_cntxt_ != NULL) {
|
||||
delete cube_cntxt_;
|
||||
cube_cntxt_ = NULL;
|
||||
}
|
||||
if (tess_cube_combiner_ != NULL) {
|
||||
delete tess_cube_combiner_;
|
||||
tess_cube_combiner_ = NULL;
|
||||
}
|
||||
#ifndef ANDROID_BUILD
|
||||
delete lstm_recognizer_;
|
||||
lstm_recognizer_ = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Tesseract::Clear() {
|
||||
STRING debug_name = imagebasename + "_debug.pdf";
|
||||
pixa_debug_.WritePDF(debug_name.string());
|
||||
pixDestroy(&pix_binary_);
|
||||
pixDestroy(&cube_binary_);
|
||||
pixDestroy(&pix_grey_);
|
||||
pixDestroy(&pix_thresholds_);
|
||||
pixDestroy(&scaled_color_);
|
||||
@ -692,8 +687,6 @@ void Tesseract::SetBlackAndWhitelist() {
|
||||
// page segmentation.
|
||||
void Tesseract::PrepareForPageseg() {
|
||||
textord_.set_use_cjk_fp_model(textord_use_cjk_fp_model);
|
||||
pixDestroy(&cube_binary_);
|
||||
cube_binary_ = pixClone(pix_binary());
|
||||
// Find the max splitter strategy over all langs.
|
||||
ShiroRekhaSplitter::SplitStrategy max_pageseg_strategy =
|
||||
static_cast<ShiroRekhaSplitter::SplitStrategy>(
|
||||
@ -704,9 +697,6 @@ void Tesseract::PrepareForPageseg() {
|
||||
static_cast<inT32>(sub_langs_[i]->pageseg_devanagari_split_strategy));
|
||||
if (pageseg_strategy > max_pageseg_strategy)
|
||||
max_pageseg_strategy = pageseg_strategy;
|
||||
// Clone the cube image to all the sub langs too.
|
||||
pixDestroy(&sub_langs_[i]->cube_binary_);
|
||||
sub_langs_[i]->cube_binary_ = pixClone(pix_binary());
|
||||
pixDestroy(&sub_langs_[i]->pix_binary_);
|
||||
sub_langs_[i]->pix_binary_ = pixClone(pix_binary());
|
||||
}
|
||||
@ -714,7 +704,7 @@ void Tesseract::PrepareForPageseg() {
|
||||
// the newly splitted image.
|
||||
splitter_.set_orig_pix(pix_binary());
|
||||
splitter_.set_pageseg_split_strategy(max_pageseg_strategy);
|
||||
if (splitter_.Split(true)) {
|
||||
if (splitter_.Split(true, &pixa_debug_)) {
|
||||
ASSERT_HOST(splitter_.splitted_image());
|
||||
pixDestroy(&pix_binary_);
|
||||
pix_binary_ = pixClone(splitter_.splitted_image());
|
||||
@ -743,7 +733,7 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list,
|
||||
splitter_.set_segmentation_block_list(block_list);
|
||||
splitter_.set_ocr_split_strategy(max_ocr_strategy);
|
||||
// Run the splitter for OCR
|
||||
bool split_for_ocr = splitter_.Split(false);
|
||||
bool split_for_ocr = splitter_.Split(false, &pixa_debug_);
|
||||
// Restore pix_binary to the binarized original pix for future reference.
|
||||
ASSERT_HOST(splitter_.orig_pix());
|
||||
pixDestroy(&pix_binary_);
|
||||
|
@ -23,22 +23,22 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__
|
||||
#define TESSERACT_CCMAIN_TESSERACTCLASS_H__
|
||||
#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H_
|
||||
#define TESSERACT_CCMAIN_TESSERACTCLASS_H_
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "control.h"
|
||||
#include "docqual.h"
|
||||
#include "debugpixa.h"
|
||||
#include "devanagari_processing.h"
|
||||
#include "docqual.h"
|
||||
#include "genericvector.h"
|
||||
#include "params.h"
|
||||
#include "ocrclass.h"
|
||||
#include "params.h"
|
||||
#include "textord.h"
|
||||
#include "wordrec.h"
|
||||
|
||||
class BLOB_CHOICE_LIST_CLIST;
|
||||
class BLOCK_LIST;
|
||||
class CharSamp;
|
||||
struct OSResults;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
@ -77,8 +77,7 @@ class WERD_RES;
|
||||
// WordRec (wordrec/wordrec.h)
|
||||
// ^ Members include: WERD*, DENORM*
|
||||
// Tesseract (ccmain/tesseractclass.h)
|
||||
// Members include: Pix*, CubeRecoContext*,
|
||||
// TesseractCubeCombiner*
|
||||
// Members include: Pix*
|
||||
//
|
||||
// Other important classes:
|
||||
//
|
||||
@ -97,16 +96,11 @@ class WERD_RES;
|
||||
namespace tesseract {
|
||||
|
||||
class ColumnFinder;
|
||||
#ifndef NO_CUBE_BUILD
|
||||
class CubeLineObject;
|
||||
class CubeObject;
|
||||
class CubeRecoContext;
|
||||
#endif
|
||||
class DocumentData;
|
||||
class EquationDetect;
|
||||
class ImageData;
|
||||
class LSTMRecognizer;
|
||||
class Tesseract;
|
||||
#ifndef NO_CUBE_BUILD
|
||||
class TesseractCubeCombiner;
|
||||
#endif
|
||||
|
||||
// A collection of various variables for statistics and debugging.
|
||||
struct TesseractStats {
|
||||
@ -189,7 +183,7 @@ class Tesseract : public Wordrec {
|
||||
}
|
||||
// Destroy any existing pix and return a pointer to the pointer.
|
||||
Pix** mutable_pix_binary() {
|
||||
Clear();
|
||||
pixDestroy(&pix_binary_);
|
||||
return &pix_binary_;
|
||||
}
|
||||
Pix* pix_binary() const {
|
||||
@ -202,16 +196,24 @@ class Tesseract : public Wordrec {
|
||||
pixDestroy(&pix_grey_);
|
||||
pix_grey_ = grey_pix;
|
||||
}
|
||||
// Returns a pointer to a Pix representing the best available image of the
|
||||
// page. The image will be 8-bit grey if the input was grey or color. Note
|
||||
// that in grey 0 is black and 255 is white. If the input was binary, then
|
||||
// the returned Pix will be binary. Note that here black is 1 and white is 0.
|
||||
// To tell the difference pixGetDepth() will return 8 or 1.
|
||||
// In either case, the return value is a borrowed Pix, and should not be
|
||||
// deleted or pixDestroyed.
|
||||
Pix* BestPix() const {
|
||||
return pix_grey_ != NULL ? pix_grey_ : pix_binary_;
|
||||
Pix* pix_original() const { return pix_original_; }
|
||||
// Takes ownership of the given original_pix.
|
||||
void set_pix_original(Pix* original_pix) {
|
||||
pixDestroy(&pix_original_);
|
||||
pix_original_ = original_pix;
|
||||
// Clone to sublangs as well.
|
||||
for (int i = 0; i < sub_langs_.size(); ++i)
|
||||
sub_langs_[i]->set_pix_original(original_pix ? pixClone(original_pix)
|
||||
: nullptr);
|
||||
}
|
||||
// Returns a pointer to a Pix representing the best available (original) image
|
||||
// of the page. Can be of any bit depth, but never color-mapped, as that has
|
||||
// always been dealt with. Note that in grey and color, 0 is black and 255 is
|
||||
// white. If the input was binary, then black is 1 and white is 0.
|
||||
// To tell the difference pixGetDepth() will return 32, 8 or 1.
|
||||
// In any case, the return value is a borrowed Pix, and should not be
|
||||
// deleted or pixDestroyed.
|
||||
Pix* BestPix() const { return pix_original_; }
|
||||
void set_pix_thresholds(Pix* thresholds) {
|
||||
pixDestroy(&pix_thresholds_);
|
||||
pix_thresholds_ = thresholds;
|
||||
@ -254,11 +256,19 @@ class Tesseract : public Wordrec {
|
||||
Tesseract* get_sub_lang(int index) const {
|
||||
return sub_langs_[index];
|
||||
}
|
||||
// Returns true if any language uses Tesseract (as opposed to cube).
|
||||
// Returns true if any language uses Tesseract (as opposed to LSTM).
|
||||
bool AnyTessLang() const {
|
||||
if (tessedit_ocr_engine_mode != OEM_CUBE_ONLY) return true;
|
||||
if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_CUBE_ONLY)
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if any language uses the LSTM.
|
||||
bool AnyLSTMLang() const {
|
||||
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) return true;
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -293,6 +303,46 @@ class Tesseract : public Wordrec {
|
||||
// par_control.cpp
|
||||
void PrerecAllWordsPar(const GenericVector<WordData>& words);
|
||||
|
||||
//// linerec.cpp
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
// Breaks the page into lines, according to the boxes, and writes them to a
|
||||
// serialized DocumentData based on output_basename.
|
||||
void TrainLineRecognizer(const STRING& input_imagename,
|
||||
const STRING& output_basename,
|
||||
BLOCK_LIST *block_list);
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
// Breaks the boxes into lines, normalizes them, converts to ImageData and
|
||||
// appends them to the given training_data.
|
||||
void TrainFromBoxes(const GenericVector<TBOX>& boxes,
|
||||
const GenericVector<STRING>& texts,
|
||||
BLOCK_LIST *block_list,
|
||||
DocumentData* training_data);
|
||||
|
||||
// Returns an Imagedata containing the image of the given textline,
|
||||
// and ground truth boxes/truth text if available in the input.
|
||||
// The image is not normalized in any way.
|
||||
ImageData* GetLineData(const TBOX& line_box,
|
||||
const GenericVector<TBOX>& boxes,
|
||||
const GenericVector<STRING>& texts,
|
||||
int start_box, int end_box,
|
||||
const BLOCK& block);
|
||||
// Helper gets the image of a rectangle, using the block.re_rotation() if
|
||||
// needed to get to the image, and rotating the result back to horizontal
|
||||
// layout. (CJK characters will be on their left sides) The vertical text flag
|
||||
// is set in the returned ImageData if the text was originally vertical, which
|
||||
// can be used to invoke a different CJK recognition engine. The revised_box
|
||||
// is also returned to enable calculation of output bounding boxes.
|
||||
ImageData* GetRectImage(const TBOX& box, const BLOCK& block, int padding,
|
||||
TBOX* revised_box) const;
|
||||
// Recognizes a word or group of words, converting to WERD_RES in *words.
|
||||
// Analogous to classify_word_pass1, but can handle a group of words as well.
|
||||
void LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
|
||||
PointerVector<WERD_RES>* words);
|
||||
// Apply segmentation search to the given set of words, within the constraints
|
||||
// of the existing ratings matrix. If there is already a best_choice on a word
|
||||
// leaves it untouched and just sets the done/accepted etc flags.
|
||||
void SearchWords(PointerVector<WERD_RES>* words);
|
||||
|
||||
//// control.h /////////////////////////////////////////////////////////
|
||||
bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,
|
||||
const char* word_config, int pass);
|
||||
@ -324,9 +374,8 @@ class Tesseract : public Wordrec {
|
||||
// Helper to recognize the word using the given (language-specific) tesseract.
|
||||
// Returns positive if this recognizer found more new best words than the
|
||||
// number kept from best_words.
|
||||
int RetryWithLanguage(const WordData& word_data,
|
||||
WordRecognizer recognizer,
|
||||
WERD_RES** in_word,
|
||||
int RetryWithLanguage(const WordData& word_data, WordRecognizer recognizer,
|
||||
bool debug, WERD_RES** in_word,
|
||||
PointerVector<WERD_RES>* best_words);
|
||||
// Moves good-looking "noise"/diacritics from the reject list to the main
|
||||
// blob list on the current word. Returns true if anything was done, and
|
||||
@ -428,34 +477,6 @@ class Tesseract : public Wordrec {
|
||||
int *left_ok,
|
||||
int *right_ok) const;
|
||||
|
||||
//// cube_control.cpp ///////////////////////////////////////////////////
|
||||
#ifndef NO_CUBE_BUILD
|
||||
bool init_cube_objects(bool load_combiner,
|
||||
TessdataManager *tessdata_manager);
|
||||
// Iterates through tesseract's results and calls cube on each word,
|
||||
// combining the results with the existing tesseract result.
|
||||
void run_cube_combiner(PAGE_RES *page_res);
|
||||
// Recognizes a single word using (only) cube. Compatible with
|
||||
// Tesseract's classify_word_pass1/classify_word_pass2.
|
||||
void cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word);
|
||||
// Cube recognizer to recognize a single word as with classify_word_pass1
|
||||
// but also returns the cube object in case the combiner is needed.
|
||||
CubeObject* cube_recognize_word(BLOCK* block, WERD_RES* word);
|
||||
// Combines the cube and tesseract results for a single word, leaving the
|
||||
// result in tess_word.
|
||||
void cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
|
||||
WERD_RES* tess_word);
|
||||
// Call cube on the current word, and write the result to word.
|
||||
// Sets up a fake result and returns false if something goes wrong.
|
||||
bool cube_recognize(CubeObject *cube_obj, BLOCK* block, WERD_RES *word);
|
||||
void fill_werd_res(const BoxWord& cube_box_word,
|
||||
const char* cube_best_str,
|
||||
WERD_RES* tess_werd_res);
|
||||
bool extract_cube_state(CubeObject* cube_obj, int* num_chars,
|
||||
Boxa** char_boxes, CharSamp*** char_samples);
|
||||
bool create_cube_box_word(Boxa *char_boxes, int num_chars,
|
||||
TBOX word_box, BoxWord* box_word);
|
||||
#endif
|
||||
//// output.h //////////////////////////////////////////////////////////
|
||||
|
||||
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box);
|
||||
@ -475,20 +496,17 @@ class Tesseract : public Wordrec {
|
||||
// string and recursively any additional languages required by any language
|
||||
// traineddata file (via tessedit_load_sublangs in its config) that is loaded.
|
||||
// See init_tesseract_internal for args.
|
||||
int init_tesseract(const char *arg0,
|
||||
const char *textbase,
|
||||
const char *language,
|
||||
OcrEngineMode oem,
|
||||
char **configs,
|
||||
int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_init_params);
|
||||
int init_tesseract(const char* arg0, const char* textbase,
|
||||
const char* language, OcrEngineMode oem, char** configs,
|
||||
int configs_size, const GenericVector<STRING>* vars_vec,
|
||||
const GenericVector<STRING>* vars_values,
|
||||
bool set_only_init_params, TessdataManager* mgr);
|
||||
int init_tesseract(const char *datapath,
|
||||
const char *language,
|
||||
OcrEngineMode oem) {
|
||||
return init_tesseract(datapath, NULL, language, oem,
|
||||
NULL, 0, NULL, NULL, false);
|
||||
TessdataManager mgr;
|
||||
return init_tesseract(datapath, NULL, language, oem, NULL, 0, NULL, NULL,
|
||||
false, &mgr);
|
||||
}
|
||||
// Common initialization for a single language.
|
||||
// arg0 is the datapath for the tessdata directory, which could be the
|
||||
@ -506,36 +524,30 @@ class Tesseract : public Wordrec {
|
||||
// in vars_vec.
|
||||
// If set_only_init_params is true, then only the initialization variables
|
||||
// will be set.
|
||||
int init_tesseract_internal(const char *arg0,
|
||||
const char *textbase,
|
||||
const char *language,
|
||||
OcrEngineMode oem,
|
||||
char **configs,
|
||||
int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_init_params);
|
||||
int init_tesseract_internal(const char* arg0, const char* textbase,
|
||||
const char* language, OcrEngineMode oem,
|
||||
char** configs, int configs_size,
|
||||
const GenericVector<STRING>* vars_vec,
|
||||
const GenericVector<STRING>* vars_values,
|
||||
bool set_only_init_params, TessdataManager* mgr);
|
||||
|
||||
// Set the universal_id member of each font to be unique among all
|
||||
// instances of the same font loaded.
|
||||
void SetupUniversalFontIds();
|
||||
|
||||
int init_tesseract_lm(const char *arg0,
|
||||
const char *textbase,
|
||||
const char *language);
|
||||
int init_tesseract_lm(const char* arg0, const char* textbase,
|
||||
const char* language, TessdataManager* mgr);
|
||||
|
||||
void recognize_page(STRING& image_name);
|
||||
void end_tesseract();
|
||||
|
||||
bool init_tesseract_lang_data(const char *arg0,
|
||||
const char *textbase,
|
||||
const char *language,
|
||||
OcrEngineMode oem,
|
||||
char **configs,
|
||||
int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_init_params);
|
||||
bool init_tesseract_lang_data(const char* arg0, const char* textbase,
|
||||
const char* language, OcrEngineMode oem,
|
||||
char** configs, int configs_size,
|
||||
const GenericVector<STRING>* vars_vec,
|
||||
const GenericVector<STRING>* vars_values,
|
||||
bool set_only_init_params,
|
||||
TessdataManager* mgr);
|
||||
|
||||
void ParseLanguageString(const char* lang_str,
|
||||
GenericVector<STRING>* to_load,
|
||||
@ -783,16 +795,17 @@ class Tesseract : public Wordrec {
|
||||
"Generate training data from boxed chars");
|
||||
BOOL_VAR_H(tessedit_make_boxes_from_boxes, false,
|
||||
"Generate more boxes from boxed chars");
|
||||
BOOL_VAR_H(tessedit_train_line_recognizer, false,
|
||||
"Break input into lines and remap boxes if present");
|
||||
BOOL_VAR_H(tessedit_dump_pageseg_images, false,
|
||||
"Dump intermediate images made during page segmentation");
|
||||
INT_VAR_H(tessedit_pageseg_mode, PSM_SINGLE_BLOCK,
|
||||
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
|
||||
" 5=line, 6=word, 7=char"
|
||||
" (Values from PageSegMode enum in publictypes.h)");
|
||||
INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
|
||||
"Which OCR engine(s) to run (Tesseract, Cube, both). Defaults"
|
||||
" to loading and running only Tesseract (no Cube, no combiner)."
|
||||
" (Values from OcrEngineMode enum in tesseractclass.h)");
|
||||
INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
|
||||
"Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults"
|
||||
" to loading and running the most accurate available.");
|
||||
STRING_VAR_H(tessedit_char_blacklist, "",
|
||||
"Blacklist of chars not to recognize");
|
||||
STRING_VAR_H(tessedit_char_whitelist, "",
|
||||
@ -886,11 +899,12 @@ class Tesseract : public Wordrec {
|
||||
BOOL_VAR_H(test_pt, false, "Test for point");
|
||||
double_VAR_H(test_pt_x, 99999.99, "xcoord");
|
||||
double_VAR_H(test_pt_y, 99999.99, "ycoord");
|
||||
INT_VAR_H(multilang_debug_level, 0, "Print multilang debug info.");
|
||||
INT_VAR_H(paragraph_debug_level, 0, "Print paragraph debug info.");
|
||||
BOOL_VAR_H(paragraph_text_based, true,
|
||||
"Run paragraph detection on the post-text-recognition "
|
||||
"(more accurate)");
|
||||
INT_VAR_H(cube_debug_level, 1, "Print cube debug info.");
|
||||
BOOL_VAR_H(lstm_use_matrix, 1, "Use ratings matrix/beam searct with lstm");
|
||||
STRING_VAR_H(outlines_odd, "%| ", "Non standard number of outlines");
|
||||
STRING_VAR_H(outlines_2, "ij!?%\":;", "Non standard number of outlines");
|
||||
BOOL_VAR_H(docqual_excuse_outline_errs, false,
|
||||
@ -926,7 +940,7 @@ class Tesseract : public Wordrec {
|
||||
BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats");
|
||||
BOOL_VAR_H(tessedit_debug_quality_metrics, false,
|
||||
"Output data to debug file");
|
||||
BOOL_VAR_H(bland_unrej, false, "unrej potential with no chekcs");
|
||||
BOOL_VAR_H(bland_unrej, false, "unrej potential with no checks");
|
||||
double_VAR_H(quality_rowrej_pc, 1.1,
|
||||
"good_quality_doc gte good char limit");
|
||||
BOOL_VAR_H(unlv_tilde_crunching, true,
|
||||
@ -1005,13 +1019,14 @@ class Tesseract : public Wordrec {
|
||||
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
|
||||
BOOL_VAR_H(tessedit_create_tsv, false, "Write .tsv output file");
|
||||
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
|
||||
BOOL_VAR_H(textonly_pdf, false,
|
||||
"Create PDF with only one invisible text layer");
|
||||
STRING_VAR_H(unrecognised_char, "|",
|
||||
"Output char for unidentified blobs");
|
||||
INT_VAR_H(suspect_level, 99, "Suspect marker level");
|
||||
INT_VAR_H(suspect_space_level, 100,
|
||||
"Min suspect level for rejecting spaces");
|
||||
INT_VAR_H(suspect_short_words, 2,
|
||||
"Don't Suspect dict wds longer than this");
|
||||
INT_VAR_H(suspect_short_words, 2, "Don't Suspect dict wds longer than this");
|
||||
BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
|
||||
double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit");
|
||||
double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
|
||||
@ -1045,13 +1060,11 @@ class Tesseract : public Wordrec {
|
||||
INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this");
|
||||
BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes");
|
||||
INT_VAR_H(tessedit_page_number, -1,
|
||||
"-1 -> All pages, else specifc page to process");
|
||||
"-1 -> All pages, else specific page to process");
|
||||
BOOL_VAR_H(tessedit_write_images, false, "Capture the image from the IPE");
|
||||
BOOL_VAR_H(interactive_display_mode, false, "Run interactively?");
|
||||
STRING_VAR_H(file_type, ".tif", "Filename extension");
|
||||
BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word");
|
||||
INT_VAR_H(tessdata_manager_debug_level, 0,
|
||||
"Debug level for TessdataManager functions.");
|
||||
STRING_VAR_H(tessedit_load_sublangs, "",
|
||||
"List of languages to load with this one");
|
||||
BOOL_VAR_H(tessedit_use_primary_params_model, false,
|
||||
@ -1157,10 +1170,6 @@ class Tesseract : public Wordrec {
|
||||
PAGE_RES_IT* pr_it,
|
||||
FILE *output_file);
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
inline CubeRecoContext *GetCubeRecoContext() { return cube_cntxt_; }
|
||||
#endif
|
||||
|
||||
private:
|
||||
// The filename of a backup config file. If not null, then we currently
|
||||
// have a temporary debug config file loaded, and backup_config_file_
|
||||
@ -1171,12 +1180,14 @@ class Tesseract : public Wordrec {
|
||||
// Image used for input to layout analysis and tesseract recognition.
|
||||
// May be modified by the ShiroRekhaSplitter to eliminate the top-line.
|
||||
Pix* pix_binary_;
|
||||
// Unmodified image used for input to cube. Always valid.
|
||||
Pix* cube_binary_;
|
||||
// Grey-level input image if the input was not binary, otherwise NULL.
|
||||
Pix* pix_grey_;
|
||||
// Original input image. Color if the input was color.
|
||||
Pix* pix_original_;
|
||||
// Thresholds that were used to generate the thresholded image from grey.
|
||||
Pix* pix_thresholds_;
|
||||
// Debug images. If non-empty, will be written on destruction.
|
||||
DebugPixa pixa_debug_;
|
||||
// Input image resolution after any scaling. The resolution is not well
|
||||
// transmitted by operations on Pix, so we keep an independent record here.
|
||||
int source_resolution_;
|
||||
@ -1199,16 +1210,14 @@ class Tesseract : public Wordrec {
|
||||
Tesseract* most_recently_used_;
|
||||
// The size of the font table, ie max possible font id + 1.
|
||||
int font_table_size_;
|
||||
#ifndef NO_CUBE_BUILD
|
||||
// Cube objects.
|
||||
CubeRecoContext* cube_cntxt_;
|
||||
TesseractCubeCombiner *tess_cube_combiner_;
|
||||
#endif
|
||||
// Equation detector. Note: this pointer is NOT owned by the class.
|
||||
EquationDetect* equ_detect_;
|
||||
// LSTM recognizer, if available.
|
||||
LSTMRecognizer* lstm_recognizer_;
|
||||
// Output "page" number (actually line number) using TrainLineRecognizer.
|
||||
int train_line_page_num_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__
|
||||
#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H_
|
||||
|
@ -152,19 +152,27 @@ void ImageThresholder::SetImage(const Pix* pix) {
|
||||
int depth;
|
||||
pixGetDimensions(src, &image_width_, &image_height_, &depth);
|
||||
// Convert the image as necessary so it is one of binary, plain RGB, or
|
||||
// 8 bit with no colormap.
|
||||
// 8 bit with no colormap. Guarantee that we always end up with our own copy,
|
||||
// not just a clone of the input.
|
||||
if (pixGetColormap(src)) {
|
||||
Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
|
||||
depth = pixGetDepth(tmp);
|
||||
if (depth > 1 && depth < 8) {
|
||||
pix_ = pixConvertTo8(src, false);
|
||||
} else if (pixGetColormap(src)) {
|
||||
pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
|
||||
pix_ = pixConvertTo8(tmp, false);
|
||||
pixDestroy(&tmp);
|
||||
} else {
|
||||
pix_ = pixClone(src);
|
||||
pix_ = tmp;
|
||||
}
|
||||
} else if (depth > 1 && depth < 8) {
|
||||
pix_ = pixConvertTo8(src, false);
|
||||
} else {
|
||||
pix_ = pixCopy(NULL, src);
|
||||
}
|
||||
depth = pixGetDepth(pix_);
|
||||
pix_channels_ = depth / 8;
|
||||
pix_wpl_ = pixGetWpl(pix_);
|
||||
scale_ = 1;
|
||||
estimated_res_ = yres_ = pixGetYRes(src);
|
||||
estimated_res_ = yres_ = pixGetYRes(pix_);
|
||||
Init();
|
||||
}
|
||||
|
||||
@ -173,8 +181,11 @@ void ImageThresholder::SetImage(const Pix* pix) {
|
||||
// Caller must use pixDestroy to free the created Pix.
|
||||
void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
|
||||
if (pix_channels_ == 0) {
|
||||
// We have a binary image, so it just has to be cloned.
|
||||
*pix = GetPixRect();
|
||||
// We have a binary image, but it still has to be copied, as this API
|
||||
// allows the caller to modify the output.
|
||||
Pix* original = GetPixRect();
|
||||
*pix = pixCopy(nullptr, original);
|
||||
pixDestroy(&original);
|
||||
} else {
|
||||
OtsuThresholdRectToPix(pix_, pix);
|
||||
}
|
||||
@ -257,10 +268,10 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
|
||||
OpenclDevice od;
|
||||
if ((num_channels == 4 || num_channels == 1) &&
|
||||
od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) {
|
||||
od.ThresholdRectToPixOCL((const unsigned char*)pixGetData(src_pix),
|
||||
num_channels, pixGetWpl(src_pix) * 4,
|
||||
thresholds, hi_values, out_pix /*pix_OCL*/,
|
||||
rect_height_, rect_width_, rect_top_, rect_left_);
|
||||
od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
|
||||
pixGetWpl(src_pix) * 4, thresholds, hi_values,
|
||||
out_pix /*pix_OCL*/, rect_height_, rect_width_,
|
||||
rect_top_, rect_left_);
|
||||
} else {
|
||||
#endif
|
||||
ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
|
||||
|
@ -17,8 +17,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H__
|
||||
#define TESSERACT_CCMAIN_THRESHOLDER_H__
|
||||
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H_
|
||||
#define TESSERACT_CCMAIN_THRESHOLDER_H_
|
||||
|
||||
#include "platform.h"
|
||||
#include "publictypes.h"
|
||||
@ -186,4 +186,4 @@ class TESS_API ImageThresholder {
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_THRESHOLDER_H__
|
||||
#endif // TESSERACT_CCMAIN_THRESHOLDER_H_
|
||||
|
@ -12,7 +12,7 @@ endif
|
||||
include_HEADERS = publictypes.h
|
||||
noinst_HEADERS = \
|
||||
blamer.h blckerr.h blobbox.h blobs.h blread.h boxread.h boxword.h ccstruct.h coutln.h crakedge.h \
|
||||
detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \
|
||||
debugpixa.h detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \
|
||||
imagedata.h \
|
||||
ipoints.h \
|
||||
linlsq.h matrix.h mod128.h normalis.h \
|
||||
|
@ -317,7 +317,7 @@ void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) {
|
||||
int num_blobs = word->chopped_word->blobs.size();
|
||||
int box_index = 0;
|
||||
int blob_index = 0;
|
||||
inT16 truth_x;
|
||||
inT16 truth_x = -1;
|
||||
while (box_index < truth_word_.length() && blob_index < num_blobs) {
|
||||
truth_x = norm_truth_word_.BlobBox(box_index).right();
|
||||
TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
|
||||
|
@ -31,7 +31,9 @@
|
||||
#define PROJECTION_MARGIN 10 //arbitrary
|
||||
#define EXTERN
|
||||
|
||||
ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK)
|
||||
ELISTIZE(BLOBNBOX)
|
||||
ELIST2IZE(TO_ROW)
|
||||
ELISTIZE(TO_BLOCK)
|
||||
|
||||
// Up to 30 degrees is allowed for rotations of diacritic blobs.
|
||||
const double kCosSmallAngle = 0.866;
|
||||
@ -176,7 +178,7 @@ void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
|
||||
gaps[dir] = MAX_INT16;
|
||||
BLOBNBOX* neighbour = neighbours_[dir];
|
||||
if (neighbour != NULL) {
|
||||
TBOX n_box = neighbour->bounding_box();
|
||||
const TBOX& n_box = neighbour->bounding_box();
|
||||
if (dir == BND_LEFT || dir == BND_RIGHT) {
|
||||
gaps[dir] = box.x_gap(n_box);
|
||||
} else {
|
||||
|
@ -815,12 +815,10 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
|
||||
float input_y_offset = 0.0f;
|
||||
float final_y_offset = static_cast<float>(kBlnBaselineOffset);
|
||||
float scale = kBlnXHeight / x_height;
|
||||
if (hint == tesseract::OEM_CUBE_ONLY || row == NULL) {
|
||||
if (row == NULL) {
|
||||
word_middle = word_box.left();
|
||||
input_y_offset = word_box.bottom();
|
||||
final_y_offset = 0.0f;
|
||||
if (hint == tesseract::OEM_CUBE_ONLY)
|
||||
scale = 1.0f;
|
||||
} else {
|
||||
input_y_offset = row->base_line(word_middle) + baseline_shift;
|
||||
}
|
||||
@ -834,7 +832,7 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
|
||||
baseline = blob_box.bottom();
|
||||
blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
|
||||
scale, scale * 1.5f);
|
||||
} else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) {
|
||||
} else if (row != NULL) {
|
||||
baseline = row->base_line(mid_x) + baseline_shift;
|
||||
}
|
||||
// The image will be 8-bit grey if the input was grey or color. Note that in
|
||||
|
@ -34,8 +34,7 @@ FILE* OpenBoxFile(const STRING& fname) {
|
||||
STRING filename = BoxFileName(fname);
|
||||
FILE* box_file = NULL;
|
||||
if (!(box_file = fopen(filename.string(), "rb"))) {
|
||||
CANTOPENFILE.error("read_next_box", TESSEXIT,
|
||||
"Can't open box file %s",
|
||||
CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s",
|
||||
filename.string());
|
||||
}
|
||||
return box_file;
|
||||
@ -56,6 +55,8 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
|
||||
GenericVector<char> box_data;
|
||||
if (!tesseract::LoadDataFromFile(BoxFileName(filename), &box_data))
|
||||
return false;
|
||||
// Convert the array of bytes to a string, so it can be used by the parser.
|
||||
box_data.push_back('\0');
|
||||
return ReadMemBoxes(target_page, skip_blanks, &box_data[0], boxes, texts,
|
||||
box_texts, pages);
|
||||
}
|
||||
|
@ -17,8 +17,8 @@
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_BOXREAD_H__
|
||||
#define TESSERACT_CCUTIL_BOXREAD_H__
|
||||
#ifndef TESSERACT_CCUTIL_BOXREAD_H_
|
||||
#define TESSERACT_CCUTIL_BOXREAD_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include "genericvector.h"
|
||||
@ -82,4 +82,4 @@ bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
|
||||
void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
|
||||
STRING* box_str);
|
||||
|
||||
#endif // TESSERACT_CCUTIL_BOXREAD_H__
|
||||
#endif // TESSERACT_CCUTIL_BOXREAD_H_
|
||||
|
@ -17,8 +17,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CSTRUCT_BOXWORD_H__
|
||||
#define TESSERACT_CSTRUCT_BOXWORD_H__
|
||||
#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
#define TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "rect.h"
|
||||
@ -82,9 +82,7 @@ class BoxWord {
|
||||
const TBOX& bounding_box() const {
|
||||
return bbox_;
|
||||
}
|
||||
int length() const {
|
||||
return length_;
|
||||
}
|
||||
int length() const { return length_; }
|
||||
const TBOX& BlobBox(int index) const {
|
||||
return boxes_[index];
|
||||
}
|
||||
@ -99,5 +97,4 @@ class BoxWord {
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
||||
#endif // TESSERACT_CSTRUCT_BOXWORD_H__
|
||||
#endif // TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
|
@ -16,8 +16,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H__
|
||||
#define TESSERACT_CCSTRUCT_CCSTRUCT_H__
|
||||
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
#define TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
|
||||
#include "cutil.h"
|
||||
|
||||
@ -40,5 +40,4 @@ class CCStruct : public CUtil {
|
||||
class Tesseract;
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H__
|
||||
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_
|
||||
|
@ -48,9 +48,9 @@ ICOORD C_OUTLINE::step_coords[4] = {
|
||||
* @param length length of loop
|
||||
*/
|
||||
|
||||
C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left,
|
||||
ICOORD top_right, inT16 length)
|
||||
: box (bot_left, top_right), start (startpt->pos), offsets(NULL) {
|
||||
C_OUTLINE::C_OUTLINE(CRACKEDGE* startpt, ICOORD bot_left, ICOORD top_right,
|
||||
inT16 length)
|
||||
: box(bot_left, top_right), start(startpt->pos), offsets(NULL) {
|
||||
inT16 stepindex; //index to step
|
||||
CRACKEDGE *edgept; //current point
|
||||
|
||||
@ -71,7 +71,6 @@ C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name C_OUTLINE::C_OUTLINE
|
||||
*
|
||||
@ -139,7 +138,7 @@ inT16 length //length of loop
|
||||
* @param rotation rotate to coord
|
||||
*/
|
||||
|
||||
C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(NULL) {
|
||||
C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(NULL) {
|
||||
TBOX new_box; //easy bounding
|
||||
inT16 stepindex; //index to step
|
||||
inT16 dirdiff; //direction change
|
||||
@ -300,7 +299,6 @@ inT32 C_OUTLINE::perimeter() const {
|
||||
return total_steps;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name C_OUTLINE::outer_area
|
||||
*
|
||||
@ -332,7 +330,6 @@ inT32 C_OUTLINE::outer_area() const {
|
||||
return total;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name C_OUTLINE::count_transitions
|
||||
*
|
||||
@ -459,7 +456,6 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) {
|
||||
return total;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name C_OUTLINE::operator<
|
||||
*
|
||||
@ -468,8 +464,7 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) {
|
||||
*/
|
||||
|
||||
BOOL8
|
||||
C_OUTLINE::operator< (const C_OUTLINE & other) const
|
||||
{
|
||||
C_OUTLINE::operator<(const C_OUTLINE& other) const {
|
||||
inT16 count = 0; //winding count
|
||||
ICOORD pos; //position of point
|
||||
inT32 stepindex; //index to cstep
|
||||
@ -495,7 +490,6 @@ C_OUTLINE::operator< (const C_OUTLINE & other) const
|
||||
return count != 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name C_OUTLINE::winding_number
|
||||
*
|
||||
@ -534,7 +528,6 @@ inT16 C_OUTLINE::winding_number(ICOORD point) const {
|
||||
return count; //winding number
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* C_OUTLINE::turn_direction
|
||||
*
|
||||
@ -563,7 +556,6 @@ inT16 C_OUTLINE::turn_direction() const { //winding number
|
||||
return count; //winding number
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name C_OUTLINE::reverse
|
||||
*
|
||||
@ -586,7 +578,6 @@ void C_OUTLINE::reverse() { //reverse drection
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name C_OUTLINE::move
|
||||
*
|
||||
@ -661,14 +652,27 @@ static void ComputeGradient(const l_uint32* data, int wpl,
|
||||
int x, int y, int width, int height,
|
||||
ICOORD* gradient) {
|
||||
const l_uint32* line = data + y * wpl;
|
||||
int pix_x_y = x < width && y < height ?
|
||||
GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x) : 255;
|
||||
int pix_x_prevy = x < width && y > 0 ?
|
||||
GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line - wpl)), x) : 255;
|
||||
int pix_prevx_prevy = x > 0 && y > 0 ?
|
||||
GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<void const*>(line - wpl)), x - 1) : 255;
|
||||
int pix_prevx_y = x > 0 && y < height ?
|
||||
GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x - 1) : 255;
|
||||
int pix_x_y =
|
||||
x < width && y < height
|
||||
? GET_DATA_BYTE(
|
||||
const_cast<void*>(reinterpret_cast<const void*>(line)), x)
|
||||
: 255;
|
||||
int pix_x_prevy =
|
||||
x < width && y > 0
|
||||
? GET_DATA_BYTE(
|
||||
const_cast<void*>(reinterpret_cast<const void*>(line - wpl)), x)
|
||||
: 255;
|
||||
int pix_prevx_prevy =
|
||||
x > 0 && y > 0
|
||||
? GET_DATA_BYTE(
|
||||
const_cast<void*>(reinterpret_cast<void const*>(line - wpl)),
|
||||
x - 1)
|
||||
: 255;
|
||||
int pix_prevx_y =
|
||||
x > 0 && y < height
|
||||
? GET_DATA_BYTE(
|
||||
const_cast<void*>(reinterpret_cast<const void*>(line)), x - 1)
|
||||
: 255;
|
||||
gradient->set_x(pix_x_y + pix_x_prevy - (pix_prevx_y + pix_prevx_prevy));
|
||||
gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y));
|
||||
}
|
||||
@ -684,8 +688,10 @@ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign,
|
||||
if (y <= 0 || y >= height)
|
||||
return false;
|
||||
const l_uint32* line = data + y * wpl;
|
||||
int pixel1 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line - wpl)), x);
|
||||
int pixel2 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x);
|
||||
int pixel1 = GET_DATA_BYTE(
|
||||
const_cast<void*>(reinterpret_cast<const void*>(line - wpl)), x);
|
||||
int pixel2 =
|
||||
GET_DATA_BYTE(const_cast<void*>(reinterpret_cast<const void*>(line)), x);
|
||||
int diff = (pixel2 - pixel1) * diff_sign;
|
||||
if (diff > *best_diff) {
|
||||
*best_diff = diff;
|
||||
@ -705,8 +711,10 @@ static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign,
|
||||
int* best_diff, int* best_sum, int* best_x) {
|
||||
if (x <= 0 || x >= width)
|
||||
return false;
|
||||
int pixel1 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x - 1);
|
||||
int pixel2 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x);
|
||||
int pixel1 = GET_DATA_BYTE(
|
||||
const_cast<void*>(reinterpret_cast<const void*>(line)), x - 1);
|
||||
int pixel2 =
|
||||
GET_DATA_BYTE(const_cast<void*>(reinterpret_cast<const void*>(line)), x);
|
||||
int diff = (pixel2 - pixel1) * diff_sign;
|
||||
if (diff > *best_diff) {
|
||||
*best_diff = diff;
|
||||
@ -954,8 +962,7 @@ void C_OUTLINE::render_outline(int left, int top, Pix* pix) const {
|
||||
*/
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void C_OUTLINE::plot(ScrollView* window,
|
||||
ScrollView::Color colour) const {
|
||||
void C_OUTLINE::plot(ScrollView* window, ScrollView::Color colour) const {
|
||||
inT16 stepindex; // index to cstep
|
||||
ICOORD pos; // current position
|
||||
DIR128 stepdir; // direction of step
|
||||
@ -1016,7 +1023,6 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* @name C_OUTLINE::operator=
|
||||
*
|
||||
@ -1024,7 +1030,7 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
|
||||
* @param source assign from this
|
||||
*/
|
||||
|
||||
C_OUTLINE & C_OUTLINE::operator= (const C_OUTLINE & source) {
|
||||
C_OUTLINE& C_OUTLINE::operator=(const C_OUTLINE& source) {
|
||||
box = source.box;
|
||||
start = source.start;
|
||||
if (steps != NULL)
|
||||
|
52
ccstruct/debugpixa.h
Normal file
52
ccstruct/debugpixa.h
Normal file
@ -0,0 +1,52 @@
|
||||
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||
|
||||
#include "allheaders.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Class to hold a Pixa collection of debug images with captions and save them
|
||||
// to a PDF file.
|
||||
class DebugPixa {
|
||||
public:
|
||||
// TODO(rays) add another constructor with size control.
|
||||
DebugPixa() {
|
||||
pixa_ = pixaCreate(0);
|
||||
fonts_ = bmfCreate(nullptr, 14);
|
||||
}
|
||||
// If the filename_ has been set and there are any debug images, they are
|
||||
// written to the set filename_.
|
||||
~DebugPixa() {
|
||||
pixaDestroy(&pixa_);
|
||||
bmfDestroy(&fonts_);
|
||||
}
|
||||
|
||||
// Adds the given pix to the set of pages in the PDF file, with the given
|
||||
// caption added to the top.
|
||||
void AddPix(const Pix* pix, const char* caption) {
|
||||
int depth = pixGetDepth(const_cast<Pix*>(pix));
|
||||
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
|
||||
Pix* pix_debug = pixAddSingleTextblock(
|
||||
const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
|
||||
pixaAddPix(pixa_, pix_debug, L_INSERT);
|
||||
}
|
||||
|
||||
// Sets the destination filename and enables images to be written to a PDF
|
||||
// on destruction.
|
||||
void WritePDF(const char* filename) {
|
||||
if (pixaGetCount(pixa_) > 0) {
|
||||
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
|
||||
pixaClear(pixa_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// The collection of images to put in the PDF.
|
||||
Pixa* pixa_;
|
||||
// The fonts used to draw text captions.
|
||||
L_Bmf* fonts_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
@ -17,8 +17,8 @@
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_DPPOINT_H__
|
||||
#define TESSERACT_CCSTRUCT_DPPOINT_H__
|
||||
#ifndef TESSERACT_CCSTRUCT_DPPOINT_H_
|
||||
#define TESSERACT_CCSTRUCT_DPPOINT_H_
|
||||
|
||||
#include "host.h"
|
||||
|
||||
@ -98,5 +98,4 @@ class DPPoint {
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DPPOINT_H__
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DPPOINT_H_
|
||||
|
@ -31,7 +31,7 @@ bool FontInfo::Serialize(FILE* fp) const {
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool FontInfo::DeSerialize(bool swap, FILE* fp) {
|
||||
bool FontInfo::DeSerialize(bool swap, TFile* fp) {
|
||||
if (!read_info(fp, this, swap)) return false;
|
||||
if (!read_spacing_info(fp, this, swap)) return false;
|
||||
return true;
|
||||
@ -51,7 +51,7 @@ bool FontInfoTable::Serialize(FILE* fp) const {
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool FontInfoTable::DeSerialize(bool swap, FILE* fp) {
|
||||
bool FontInfoTable::DeSerialize(bool swap, TFile* fp) {
|
||||
truncate(0);
|
||||
return this->DeSerializeClasses(swap, fp);
|
||||
}
|
||||
@ -149,19 +149,15 @@ void FontSetDeleteCallback(FontSet fs) {
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
|
||||
bool read_info(FILE* f, FontInfo* fi, bool swap) {
|
||||
bool read_info(TFile* f, FontInfo* fi, bool swap) {
|
||||
inT32 size;
|
||||
if (fread(&size, sizeof(size), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
Reverse32(&size);
|
||||
if (f->FReadEndian(&size, sizeof(size), 1, swap) != 1) return false;
|
||||
char* font_name = new char[size + 1];
|
||||
fi->name = font_name;
|
||||
if (static_cast<int>(fread(font_name, sizeof(*font_name), size, f)) != size)
|
||||
return false;
|
||||
if (f->FRead(font_name, sizeof(*font_name), size) != size) return false;
|
||||
font_name[size] = '\0';
|
||||
if (fread(&fi->properties, sizeof(fi->properties), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
Reverse32(&fi->properties);
|
||||
if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1, swap) != 1)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -174,26 +170,22 @@ bool write_info(FILE* f, const FontInfo& fi) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) {
|
||||
bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) {
|
||||
inT32 vec_size, kern_size;
|
||||
if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
|
||||
if (swap) Reverse32(&vec_size);
|
||||
if (f->FReadEndian(&vec_size, sizeof(vec_size), 1, swap) != 1) return false;
|
||||
ASSERT_HOST(vec_size >= 0);
|
||||
if (vec_size == 0) return true;
|
||||
fi->init_spacing(vec_size);
|
||||
for (int i = 0; i < vec_size; ++i) {
|
||||
FontSpacingInfo *fs = new FontSpacingInfo();
|
||||
if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 ||
|
||||
fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 ||
|
||||
fread(&kern_size, sizeof(kern_size), 1, f) != 1) {
|
||||
if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, swap) !=
|
||||
1 ||
|
||||
f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, swap) !=
|
||||
1 ||
|
||||
f->FReadEndian(&kern_size, sizeof(kern_size), 1, swap) != 1) {
|
||||
delete fs;
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before));
|
||||
ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after));
|
||||
Reverse32(&kern_size);
|
||||
}
|
||||
if (kern_size < 0) { // indication of a NULL entry in fi->spacing_vec
|
||||
delete fs;
|
||||
continue;
|
||||
@ -237,16 +229,12 @@ bool write_spacing_info(FILE* f, const FontInfo& fi) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_set(FILE* f, FontSet* fs, bool swap) {
|
||||
if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
Reverse32(&fs->size);
|
||||
bool read_set(TFile* f, FontSet* fs, bool swap) {
|
||||
if (f->FReadEndian(&fs->size, sizeof(fs->size), 1, swap) != 1) return false;
|
||||
fs->configs = new int[fs->size];
|
||||
for (int i = 0; i < fs->size; ++i) {
|
||||
if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
Reverse32(&fs->configs[i]);
|
||||
}
|
||||
if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size, swap) !=
|
||||
fs->size)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ struct FontInfo {
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
|
||||
// Reserves unicharset_size spots in spacing_vec.
|
||||
void init_spacing(int unicharset_size) {
|
||||
@ -152,7 +152,7 @@ class FontInfoTable : public GenericVector<FontInfo> {
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
|
||||
// Returns true if the given set of fonts includes one with the same
|
||||
// properties as font_id.
|
||||
@ -177,11 +177,11 @@ void FontInfoDeleteCallback(FontInfo f);
|
||||
void FontSetDeleteCallback(FontSet fs);
|
||||
|
||||
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
|
||||
bool read_info(FILE* f, FontInfo* fi, bool swap);
|
||||
bool read_info(TFile* f, FontInfo* fi, bool swap);
|
||||
bool write_info(FILE* f, const FontInfo& fi);
|
||||
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap);
|
||||
bool read_spacing_info(TFile* f, FontInfo* fi, bool swap);
|
||||
bool write_spacing_info(FILE* f, const FontInfo& fi);
|
||||
bool read_set(FILE* f, FontSet* fs, bool swap);
|
||||
bool read_set(TFile* f, FontSet* fs, bool swap);
|
||||
bool write_set(FILE* f, const FontSet& fs);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -1,3 +1,12 @@
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef HPDSIZES_H
|
||||
#define HPDSIZES_H
|
||||
|
||||
|
@ -24,12 +24,22 @@
|
||||
|
||||
#include "imagedata.h"
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "boxread.h"
|
||||
#include "callcpp.h"
|
||||
#include "helpers.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
// Number of documents to read ahead while training. Doesn't need to be very
|
||||
// large.
|
||||
const int kMaxReadAhead = 8;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
WordFeature::WordFeature() : x_(0), y_(0), dir_(0) {
|
||||
@ -182,6 +192,19 @@ bool ImageData::DeSerialize(bool swap, TFile* fp) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
bool ImageData::SkipDeSerialize(bool swap, TFile* fp) {
|
||||
if (!STRING::SkipDeSerialize(swap, fp)) return false;
|
||||
inT32 page_number;
|
||||
if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false;
|
||||
if (!GenericVector<char>::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!STRING::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!GenericVector<TBOX>::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!GenericVector<STRING>::SkipDeSerializeClasses(swap, fp)) return false;
|
||||
inT8 vertical = 0;
|
||||
return fp->FRead(&vertical, sizeof(vertical), 1) == 1;
|
||||
}
|
||||
|
||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||
void ImageData::SetPix(Pix* pix) {
|
||||
SetPixInternal(pix, &image_data_);
|
||||
@ -195,9 +218,10 @@ Pix* ImageData::GetPix() const {
|
||||
// Gets anything and everything with a non-NULL pointer, prescaled to a
|
||||
// given target_height (if 0, then the original image height), and aligned.
|
||||
// Also returns (if not NULL) the width and height of the scaled image.
|
||||
// The return value is the scale factor that was applied to the image to
|
||||
// achieve the target_height.
|
||||
float ImageData::PreScale(int target_height, Pix** pix,
|
||||
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
||||
// and scale_factor (if not NULL) is set to the scale factor that was applied
|
||||
// to the image to achieve the target_height.
|
||||
Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor,
|
||||
int* scaled_width, int* scaled_height,
|
||||
GenericVector<TBOX>* boxes) const {
|
||||
int input_width = 0;
|
||||
@ -206,26 +230,22 @@ float ImageData::PreScale(int target_height, Pix** pix,
|
||||
ASSERT_HOST(src_pix != NULL);
|
||||
input_width = pixGetWidth(src_pix);
|
||||
input_height = pixGetHeight(src_pix);
|
||||
if (target_height == 0)
|
||||
target_height = input_height;
|
||||
if (target_height == 0) {
|
||||
target_height = MIN(input_height, max_height);
|
||||
}
|
||||
float im_factor = static_cast<float>(target_height) / input_height;
|
||||
if (scaled_width != NULL)
|
||||
*scaled_width = IntCastRounded(im_factor * input_width);
|
||||
if (scaled_height != NULL)
|
||||
*scaled_height = target_height;
|
||||
if (pix != NULL) {
|
||||
// Get the scaled image.
|
||||
pixDestroy(pix);
|
||||
*pix = pixScale(src_pix, im_factor, im_factor);
|
||||
if (*pix == NULL) {
|
||||
Pix* pix = pixScale(src_pix, im_factor, im_factor);
|
||||
if (pix == NULL) {
|
||||
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
|
||||
input_width, input_height, im_factor);
|
||||
}
|
||||
if (scaled_width != NULL)
|
||||
*scaled_width = pixGetWidth(*pix);
|
||||
if (scaled_height != NULL)
|
||||
*scaled_height = pixGetHeight(*pix);
|
||||
}
|
||||
if (scaled_width != NULL) *scaled_width = pixGetWidth(pix);
|
||||
if (scaled_height != NULL) *scaled_height = pixGetHeight(pix);
|
||||
pixDestroy(&src_pix);
|
||||
if (boxes != NULL) {
|
||||
// Get the boxes.
|
||||
@ -241,7 +261,8 @@ float ImageData::PreScale(int target_height, Pix** pix,
|
||||
boxes->push_back(box);
|
||||
}
|
||||
}
|
||||
return im_factor;
|
||||
if (scale_factor != NULL) *scale_factor = im_factor;
|
||||
return pix;
|
||||
}
|
||||
|
||||
int ImageData::MemoryUsed() const {
|
||||
@ -266,19 +287,20 @@ void ImageData::Display() const {
|
||||
// Draw the boxes.
|
||||
win->Pen(ScrollView::RED);
|
||||
win->Brush(ScrollView::NONE);
|
||||
win->TextAttributes("Arial", kTextSize, false, false, false);
|
||||
int text_size = kTextSize;
|
||||
if (!boxes_.empty() && boxes_[0].height() * 2 < text_size)
|
||||
text_size = boxes_[0].height() * 2;
|
||||
win->TextAttributes("Arial", text_size, false, false, false);
|
||||
if (!boxes_.empty()) {
|
||||
for (int b = 0; b < boxes_.size(); ++b) {
|
||||
boxes_[b].plot(win);
|
||||
win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string());
|
||||
TBOX scaled(boxes_[b]);
|
||||
scaled.scale(256.0 / height);
|
||||
scaled.plot(win);
|
||||
}
|
||||
} else {
|
||||
// The full transcription.
|
||||
win->Pen(ScrollView::CYAN);
|
||||
win->Text(0, height + kTextSize * 2, transcription_.string());
|
||||
// Add the features.
|
||||
win->Pen(ScrollView::GREEN);
|
||||
}
|
||||
win->Update();
|
||||
window_wait(win);
|
||||
#endif
|
||||
@ -340,27 +362,51 @@ bool ImageData::AddBoxes(const char* box_text) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DocumentData::DocumentData(const STRING& name)
|
||||
: document_name_(name), pages_offset_(0), total_pages_(0),
|
||||
memory_used_(0), max_memory_(0), reader_(NULL) {}
|
||||
// Thread function to call ReCachePages.
|
||||
void* ReCachePagesFunc(void* data) {
|
||||
DocumentData* document_data = reinterpret_cast<DocumentData*>(data);
|
||||
document_data->ReCachePages();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DocumentData::~DocumentData() {}
|
||||
DocumentData::DocumentData(const STRING& name)
|
||||
: document_name_(name),
|
||||
pages_offset_(-1),
|
||||
total_pages_(-1),
|
||||
memory_used_(0),
|
||||
max_memory_(0),
|
||||
reader_(NULL) {}
|
||||
|
||||
DocumentData::~DocumentData() {
|
||||
SVAutoLock lock_p(&pages_mutex_);
|
||||
SVAutoLock lock_g(&general_mutex_);
|
||||
}
|
||||
|
||||
// Reads all the pages in the given lstmf filename to the cache. The reader
|
||||
// is used to read the file.
|
||||
bool DocumentData::LoadDocument(const char* filename, const char* lang,
|
||||
int start_page, inT64 max_memory,
|
||||
FileReader reader) {
|
||||
SetDocument(filename, lang, max_memory, reader);
|
||||
pages_offset_ = start_page;
|
||||
return ReCachePages();
|
||||
}
|
||||
|
||||
// Sets up the document, without actually loading it.
|
||||
void DocumentData::SetDocument(const char* filename, const char* lang,
|
||||
inT64 max_memory, FileReader reader) {
|
||||
SVAutoLock lock_p(&pages_mutex_);
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
document_name_ = filename;
|
||||
lang_ = lang;
|
||||
pages_offset_ = start_page;
|
||||
pages_offset_ = -1;
|
||||
max_memory_ = max_memory;
|
||||
reader_ = reader;
|
||||
return ReCachePages();
|
||||
}
|
||||
|
||||
// Writes all the pages to the given filename. Returns false on error.
|
||||
bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
TFile fp;
|
||||
fp.OpenWrite(NULL);
|
||||
if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
|
||||
@ -370,112 +416,184 @@ bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
|
||||
return true;
|
||||
}
|
||||
bool DocumentData::SaveToBuffer(GenericVector<char>* buffer) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
TFile fp;
|
||||
fp.OpenWrite(buffer);
|
||||
return pages_.Serialize(&fp);
|
||||
}
|
||||
|
||||
// Returns a pointer to the page with the given index, modulo the total
|
||||
// number of pages, recaching if needed.
|
||||
const ImageData* DocumentData::GetPage(int index) {
|
||||
index = Modulo(index, total_pages_);
|
||||
if (index < pages_offset_ || index >= pages_offset_ + pages_.size()) {
|
||||
pages_offset_ = index;
|
||||
if (!ReCachePages()) return NULL;
|
||||
}
|
||||
return pages_[index - pages_offset_];
|
||||
// Adds the given page data to this document, counting up memory.
|
||||
void DocumentData::AddPageToDocument(ImageData* page) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
pages_.push_back(page);
|
||||
set_memory_used(memory_used() + page->MemoryUsed());
|
||||
}
|
||||
|
||||
// Loads as many pages can fit in max_memory_ starting at index pages_offset_.
|
||||
// If the given index is not currently loaded, loads it using a separate
|
||||
// thread.
|
||||
void DocumentData::LoadPageInBackground(int index) {
|
||||
ImageData* page = NULL;
|
||||
if (IsPageAvailable(index, &page)) return;
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
if (pages_offset_ == index) return;
|
||||
pages_offset_ = index;
|
||||
pages_.clear();
|
||||
SVSync::StartThread(ReCachePagesFunc, this);
|
||||
}
|
||||
|
||||
// Returns a pointer to the page with the given index, modulo the total
|
||||
// number of pages. Blocks until the background load is completed.
|
||||
const ImageData* DocumentData::GetPage(int index) {
|
||||
ImageData* page = NULL;
|
||||
while (!IsPageAvailable(index, &page)) {
|
||||
// If there is no background load scheduled, schedule one now.
|
||||
pages_mutex_.Lock();
|
||||
bool needs_loading = pages_offset_ != index;
|
||||
pages_mutex_.Unlock();
|
||||
if (needs_loading) LoadPageInBackground(index);
|
||||
// We can't directly load the page, or the background load will delete it
|
||||
// while the caller is using it, so give it a chance to work.
|
||||
#if defined(__MINGW32__)
|
||||
sleep(1);
|
||||
#else
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
#endif
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
// Returns true if the requested page is available, and provides a pointer,
|
||||
// which may be NULL if the document is empty. May block, even though it
|
||||
// doesn't guarantee to return true.
|
||||
bool DocumentData::IsPageAvailable(int index, ImageData** page) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
int num_pages = NumPages();
|
||||
if (num_pages == 0 || index < 0) {
|
||||
*page = NULL; // Empty Document.
|
||||
return true;
|
||||
}
|
||||
if (num_pages > 0) {
|
||||
index = Modulo(index, num_pages);
|
||||
if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
|
||||
*page = pages_[index - pages_offset_]; // Page is available already.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Removes all pages from memory and frees the memory, but does not forget
|
||||
// the document metadata.
|
||||
inT64 DocumentData::UnCache() {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
inT64 memory_saved = memory_used();
|
||||
pages_.clear();
|
||||
pages_offset_ = -1;
|
||||
set_total_pages(-1);
|
||||
set_memory_used(0);
|
||||
tprintf("Unloaded document %s, saving %d memory\n", document_name_.string(),
|
||||
memory_saved);
|
||||
return memory_saved;
|
||||
}
|
||||
|
||||
// Shuffles all the pages in the document.
|
||||
void DocumentData::Shuffle() {
|
||||
TRand random;
|
||||
// Different documents get shuffled differently, but the same for the same
|
||||
// name.
|
||||
random.set_seed(document_name_.string());
|
||||
int num_pages = pages_.size();
|
||||
// Execute one random swap for each page in the document.
|
||||
for (int i = 0; i < num_pages; ++i) {
|
||||
int src = random.IntRand() % num_pages;
|
||||
int dest = random.IntRand() % num_pages;
|
||||
std::swap(pages_[src], pages_[dest]);
|
||||
}
|
||||
}
|
||||
|
||||
// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
|
||||
// starting at index pages_offset_.
|
||||
bool DocumentData::ReCachePages() {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
// Read the file.
|
||||
TFile fp;
|
||||
if (!fp.Open(document_name_, reader_)) return false;
|
||||
memory_used_ = 0;
|
||||
if (!pages_.DeSerialize(false, &fp)) {
|
||||
tprintf("Deserialize failed: %s\n", document_name_.string());
|
||||
set_total_pages(0);
|
||||
set_memory_used(0);
|
||||
int loaded_pages = 0;
|
||||
pages_.truncate(0);
|
||||
TFile fp;
|
||||
if (!fp.Open(document_name_, reader_) ||
|
||||
!PointerVector<ImageData>::DeSerializeSize(false, &fp, &loaded_pages) ||
|
||||
loaded_pages <= 0) {
|
||||
tprintf("Deserialize header failed: %s\n", document_name_.string());
|
||||
return false;
|
||||
}
|
||||
total_pages_ = pages_.size();
|
||||
pages_offset_ %= total_pages_;
|
||||
// Delete pages before the first one we want, and relocate the rest.
|
||||
pages_offset_ %= loaded_pages;
|
||||
// Skip pages before the first one we want, and load the rest until max
|
||||
// memory and skip the rest after that.
|
||||
int page;
|
||||
for (page = 0; page < pages_.size(); ++page) {
|
||||
if (page < pages_offset_) {
|
||||
delete pages_[page];
|
||||
pages_[page] = NULL;
|
||||
for (page = 0; page < loaded_pages; ++page) {
|
||||
if (page < pages_offset_ ||
|
||||
(max_memory_ > 0 && memory_used() > max_memory_)) {
|
||||
if (!PointerVector<ImageData>::DeSerializeSkip(false, &fp)) break;
|
||||
} else {
|
||||
ImageData* image_data = pages_[page];
|
||||
if (max_memory_ > 0 && page > pages_offset_ &&
|
||||
memory_used_ + image_data->MemoryUsed() > max_memory_)
|
||||
break; // Don't go over memory quota unless the first image.
|
||||
if (!pages_.DeSerializeElement(false, &fp)) break;
|
||||
ImageData* image_data = pages_.back();
|
||||
if (image_data->imagefilename().length() == 0) {
|
||||
image_data->set_imagefilename(document_name_);
|
||||
image_data->set_page_number(page);
|
||||
}
|
||||
image_data->set_language(lang_);
|
||||
memory_used_ += image_data->MemoryUsed();
|
||||
if (pages_offset_ != 0) {
|
||||
pages_[page - pages_offset_] = image_data;
|
||||
pages_[page] = NULL;
|
||||
set_memory_used(memory_used() + image_data->MemoryUsed());
|
||||
}
|
||||
}
|
||||
if (page < loaded_pages) {
|
||||
tprintf("Deserialize failed: %s read %d/%d pages\n",
|
||||
document_name_.string(), page, loaded_pages);
|
||||
pages_.truncate(0);
|
||||
} else {
|
||||
tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", pages_.size(),
|
||||
loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(),
|
||||
document_name_.string());
|
||||
}
|
||||
pages_.truncate(page - pages_offset_);
|
||||
tprintf("Loaded %d/%d pages (%d-%d) of document %s\n",
|
||||
pages_.size(), total_pages_, pages_offset_,
|
||||
pages_offset_ + pages_.size(), document_name_.string());
|
||||
set_total_pages(loaded_pages);
|
||||
return !pages_.empty();
|
||||
}
|
||||
|
||||
// Adds the given page data to this document, counting up memory.
|
||||
void DocumentData::AddPageToDocument(ImageData* page) {
|
||||
pages_.push_back(page);
|
||||
memory_used_ += page->MemoryUsed();
|
||||
}
|
||||
|
||||
// A collection of DocumentData that knows roughly how much memory it is using.
|
||||
DocumentCache::DocumentCache(inT64 max_memory)
|
||||
: total_pages_(0), memory_used_(0), max_memory_(max_memory) {}
|
||||
: num_pages_per_doc_(0), max_memory_(max_memory) {}
|
||||
DocumentCache::~DocumentCache() {}
|
||||
|
||||
// Adds all the documents in the list of filenames, counting memory.
|
||||
// The reader is used to read the files.
|
||||
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
|
||||
const char* lang, FileReader reader) {
|
||||
inT64 fair_share_memory = max_memory_ / filenames.size();
|
||||
const char* lang,
|
||||
CachingStrategy cache_strategy,
|
||||
FileReader reader) {
|
||||
cache_strategy_ = cache_strategy;
|
||||
inT64 fair_share_memory = 0;
|
||||
// In the round-robin case, each DocumentData handles restricting its content
|
||||
// to its fair share of memory. In the sequential case, DocumentCache
|
||||
// determines which DocumentDatas are held entirely in memory.
|
||||
if (cache_strategy_ == CS_ROUND_ROBIN)
|
||||
fair_share_memory = max_memory_ / filenames.size();
|
||||
for (int arg = 0; arg < filenames.size(); ++arg) {
|
||||
STRING filename = filenames[arg];
|
||||
DocumentData* document = new DocumentData(filename);
|
||||
if (document->LoadDocument(filename.string(), lang, 0,
|
||||
fair_share_memory, reader)) {
|
||||
document->SetDocument(filename.string(), lang, fair_share_memory, reader);
|
||||
AddToCache(document);
|
||||
} else {
|
||||
tprintf("Failed to load image %s!\n", filename.string());
|
||||
delete document;
|
||||
}
|
||||
if (!documents_.empty()) {
|
||||
// Try to get the first page now to verify the list of filenames.
|
||||
if (GetPageBySerial(0) != NULL) return true;
|
||||
tprintf("Load of page 0 failed!\n");
|
||||
}
|
||||
tprintf("Loaded %d pages, total %gMB\n",
|
||||
total_pages_, memory_used_ / 1048576.0);
|
||||
return total_pages_ > 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Adds document to the cache, throwing out other documents if needed.
|
||||
// Adds document to the cache.
|
||||
bool DocumentCache::AddToCache(DocumentData* data) {
|
||||
inT64 new_memory = data->memory_used();
|
||||
memory_used_ += new_memory;
|
||||
documents_.push_back(data);
|
||||
total_pages_ += data->NumPages();
|
||||
// Delete the first item in the array, and other pages of the same name
|
||||
// while memory is full.
|
||||
while (memory_used_ >= max_memory_ && max_memory_ > 0) {
|
||||
tprintf("Memory used=%lld vs max=%lld, discarding doc of size %lld\n",
|
||||
memory_used_ , max_memory_, documents_[0]->memory_used());
|
||||
memory_used_ -= documents_[0]->memory_used();
|
||||
total_pages_ -= documents_[0]->NumPages();
|
||||
documents_.remove(0);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -488,11 +606,104 @@ DocumentData* DocumentCache::FindDocument(const STRING& document_name) const {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
|
||||
// strategy, could take a long time.
|
||||
int DocumentCache::TotalPages() {
|
||||
if (cache_strategy_ == CS_SEQUENTIAL) {
|
||||
// In sequential mode, we assume each doc has the same number of pages
|
||||
// whether it is true or not.
|
||||
if (num_pages_per_doc_ == 0) GetPageSequential(0);
|
||||
return num_pages_per_doc_ * documents_.size();
|
||||
}
|
||||
int total_pages = 0;
|
||||
int num_docs = documents_.size();
|
||||
for (int d = 0; d < num_docs; ++d) {
|
||||
// We have to load a page to make NumPages() valid.
|
||||
documents_[d]->GetPage(0);
|
||||
total_pages += documents_[d]->NumPages();
|
||||
}
|
||||
return total_pages;
|
||||
}
|
||||
|
||||
// Returns a page by serial number, selecting them in a round-robin fashion
|
||||
// from all the documents.
|
||||
const ImageData* DocumentCache::GetPageBySerial(int serial) {
|
||||
int document_index = serial % documents_.size();
|
||||
return documents_[document_index]->GetPage(serial / documents_.size());
|
||||
// from all the documents. Highly disk-intensive, but doesn't need samples
|
||||
// to be shuffled between files to begin with.
|
||||
const ImageData* DocumentCache::GetPageRoundRobin(int serial) {
|
||||
int num_docs = documents_.size();
|
||||
int doc_index = serial % num_docs;
|
||||
const ImageData* doc = documents_[doc_index]->GetPage(serial / num_docs);
|
||||
for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) {
|
||||
doc_index = (serial + offset) % num_docs;
|
||||
int page = (serial + offset) / num_docs;
|
||||
documents_[doc_index]->LoadPageInBackground(page);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
// Returns a page by serial number, selecting them in sequence from each file.
|
||||
// Requires the samples to be shuffled between the files to give a random or
|
||||
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
|
||||
const ImageData* DocumentCache::GetPageSequential(int serial) {
|
||||
int num_docs = documents_.size();
|
||||
ASSERT_HOST(num_docs > 0);
|
||||
if (num_pages_per_doc_ == 0) {
|
||||
// Use the pages in the first doc as the number of pages in each doc.
|
||||
documents_[0]->GetPage(0);
|
||||
num_pages_per_doc_ = documents_[0]->NumPages();
|
||||
if (num_pages_per_doc_ == 0) {
|
||||
tprintf("First document cannot be empty!!\n");
|
||||
ASSERT_HOST(num_pages_per_doc_ > 0);
|
||||
}
|
||||
// Get rid of zero now if we don't need it.
|
||||
if (serial / num_pages_per_doc_ % num_docs > 0) documents_[0]->UnCache();
|
||||
}
|
||||
int doc_index = serial / num_pages_per_doc_ % num_docs;
|
||||
const ImageData* doc =
|
||||
documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
|
||||
// Count up total memory. Background loading makes it more complicated to
|
||||
// keep a running count.
|
||||
inT64 total_memory = 0;
|
||||
for (int d = 0; d < num_docs; ++d) {
|
||||
total_memory += documents_[d]->memory_used();
|
||||
}
|
||||
if (total_memory >= max_memory_) {
|
||||
// Find something to un-cache.
|
||||
// If there are more than 3 in front, then serial is from the back reader
|
||||
// of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then
|
||||
// we create a hole between them and then un-caching the backmost occupied
|
||||
// will work for both.
|
||||
int num_in_front = CountNeighbourDocs(doc_index, 1);
|
||||
for (int offset = num_in_front - 2;
|
||||
offset > 1 && total_memory >= max_memory_; --offset) {
|
||||
int next_index = (doc_index + offset) % num_docs;
|
||||
total_memory -= documents_[next_index]->UnCache();
|
||||
}
|
||||
// If that didn't work, the best solution is to un-cache from the back. If
|
||||
// we take away the document that a 2nd reader is using, it will put it
|
||||
// back and make a hole between.
|
||||
int num_behind = CountNeighbourDocs(doc_index, -1);
|
||||
for (int offset = num_behind; offset < 0 && total_memory >= max_memory_;
|
||||
++offset) {
|
||||
int next_index = (doc_index + offset + num_docs) % num_docs;
|
||||
total_memory -= documents_[next_index]->UnCache();
|
||||
}
|
||||
}
|
||||
int next_index = (doc_index + 1) % num_docs;
|
||||
if (!documents_[next_index]->IsCached() && total_memory < max_memory_) {
|
||||
documents_[next_index]->LoadPageInBackground(0);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
// Helper counts the number of adjacent cached neighbours of index looking in
|
||||
// direction dir, ie index+dir, index+2*dir etc.
|
||||
int DocumentCache::CountNeighbourDocs(int index, int dir) {
|
||||
int num_docs = documents_.size();
|
||||
for (int offset = dir; abs(offset) < num_docs; offset += dir) {
|
||||
int offset_index = (index + offset + num_docs) % num_docs;
|
||||
if (!documents_[offset_index]->IsCached()) return offset - dir;
|
||||
}
|
||||
return num_docs;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "normalis.h"
|
||||
#include "rect.h"
|
||||
#include "strngs.h"
|
||||
#include "svutil.h"
|
||||
|
||||
struct Pix;
|
||||
|
||||
@ -34,8 +35,22 @@ namespace tesseract {
|
||||
const int kFeaturePadding = 2;
|
||||
// Number of pixels to pad around text boxes.
|
||||
const int kImagePadding = 4;
|
||||
// Number of training images to combine into a mini-batch for training.
|
||||
const int kNumPagesPerMiniBatch = 100;
|
||||
|
||||
// Enum to determine the caching and data sequencing strategy.
|
||||
enum CachingStrategy {
|
||||
// Reads all of one file before moving on to the next. Requires samples to be
|
||||
// shuffled across files. Uses the count of samples in the first file as
|
||||
// the count in all the files to achieve high-speed random access. As a
|
||||
// consequence, if subsequent files are smaller, they get entries used more
|
||||
// than once, and if subsequent files are larger, some entries are not used.
|
||||
// Best for larger data sets that don't fit in memory.
|
||||
CS_SEQUENTIAL,
|
||||
// Reads one sample from each file in rotation. Does not require shuffled
|
||||
// samples, but is extremely disk-intensive. Samples in smaller files also
|
||||
// get used more often than samples in larger files.
|
||||
// Best for smaller data sets that mostly fit in memory.
|
||||
CS_ROUND_ROBIN,
|
||||
};
|
||||
|
||||
class WordFeature {
|
||||
public:
|
||||
@ -103,6 +118,8 @@ class ImageData {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
|
||||
|
||||
// Other accessors.
|
||||
const STRING& imagefilename() const {
|
||||
@ -145,9 +162,10 @@ class ImageData {
|
||||
// Gets anything and everything with a non-NULL pointer, prescaled to a
|
||||
// given target_height (if 0, then the original image height), and aligned.
|
||||
// Also returns (if not NULL) the width and height of the scaled image.
|
||||
// The return value is the scale factor that was applied to the image to
|
||||
// achieve the target_height.
|
||||
float PreScale(int target_height, Pix** pix,
|
||||
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
||||
// and scale_factor (if not NULL) is set to the scale factor that was applied
|
||||
// to the image to achieve the target_height.
|
||||
Pix* PreScale(int target_height, int max_height, float* scale_factor,
|
||||
int* scaled_width, int* scaled_height,
|
||||
GenericVector<TBOX>* boxes) const;
|
||||
|
||||
@ -184,6 +202,8 @@ class ImageData {
|
||||
|
||||
// A collection of ImageData that knows roughly how much memory it is using.
|
||||
class DocumentData {
|
||||
friend void* ReCachePagesFunc(void* data);
|
||||
|
||||
public:
|
||||
explicit DocumentData(const STRING& name);
|
||||
~DocumentData();
|
||||
@ -192,6 +212,9 @@ class DocumentData {
|
||||
// is used to read the file.
|
||||
bool LoadDocument(const char* filename, const char* lang, int start_page,
|
||||
inT64 max_memory, FileReader reader);
|
||||
// Sets up the document, without actually loading it.
|
||||
void SetDocument(const char* filename, const char* lang, inT64 max_memory,
|
||||
FileReader reader);
|
||||
// Writes all the pages to the given filename. Returns false on error.
|
||||
bool SaveDocument(const char* filename, FileWriter writer);
|
||||
bool SaveToBuffer(GenericVector<char>* buffer);
|
||||
@ -200,26 +223,64 @@ class DocumentData {
|
||||
void AddPageToDocument(ImageData* page);
|
||||
|
||||
const STRING& document_name() const {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
return document_name_;
|
||||
}
|
||||
int NumPages() const {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
return total_pages_;
|
||||
}
|
||||
inT64 memory_used() const {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
return memory_used_;
|
||||
}
|
||||
// If the given index is not currently loaded, loads it using a separate
|
||||
// thread. Note: there are 4 cases:
|
||||
// Document uncached: IsCached() returns false, total_pages_ < 0.
|
||||
// Required page is available: IsPageAvailable returns true. In this case,
|
||||
// total_pages_ > 0 and
|
||||
// pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
|
||||
// Pages are loaded, but the required one is not.
|
||||
// The requested page is being loaded by LoadPageInBackground. In this case,
|
||||
// index == pages_offset_. Once the loading starts, the pages lock is held
|
||||
// until it completes, at which point IsPageAvailable will unblock and return
|
||||
// true.
|
||||
void LoadPageInBackground(int index);
|
||||
// Returns a pointer to the page with the given index, modulo the total
|
||||
// number of pages, recaching if needed.
|
||||
// number of pages. Blocks until the background load is completed.
|
||||
const ImageData* GetPage(int index);
|
||||
// Returns true if the requested page is available, and provides a pointer,
|
||||
// which may be NULL if the document is empty. May block, even though it
|
||||
// doesn't guarantee to return true.
|
||||
bool IsPageAvailable(int index, ImageData** page);
|
||||
// Takes ownership of the given page index. The page is made NULL in *this.
|
||||
ImageData* TakePage(int index) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
ImageData* page = pages_[index];
|
||||
pages_[index] = NULL;
|
||||
return page;
|
||||
}
|
||||
// Returns true if the document is currently loaded or in the process of
|
||||
// loading.
|
||||
bool IsCached() const { return NumPages() >= 0; }
|
||||
// Removes all pages from memory and frees the memory, but does not forget
|
||||
// the document metadata. Returns the memory saved.
|
||||
inT64 UnCache();
|
||||
// Shuffles all the pages in the document.
|
||||
void Shuffle();
|
||||
|
||||
private:
|
||||
// Loads as many pages can fit in max_memory_ starting at index pages_offset_.
|
||||
// Sets the value of total_pages_ behind a mutex.
|
||||
void set_total_pages(int total) {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
total_pages_ = total;
|
||||
}
|
||||
void set_memory_used(inT64 memory_used) {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
memory_used_ = memory_used;
|
||||
}
|
||||
// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
|
||||
// starting at index pages_offset_.
|
||||
bool ReCachePages();
|
||||
|
||||
private:
|
||||
@ -239,43 +300,77 @@ class DocumentData {
|
||||
inT64 max_memory_;
|
||||
// Saved reader from LoadDocument to allow re-caching.
|
||||
FileReader reader_;
|
||||
// Mutex that protects pages_ and pages_offset_ against multiple parallel
|
||||
// loads, and provides a wait for page.
|
||||
SVMutex pages_mutex_;
|
||||
// Mutex that protects other data members that callers want to access without
|
||||
// waiting for a load operation.
|
||||
mutable SVMutex general_mutex_;
|
||||
};
|
||||
|
||||
// A collection of DocumentData that knows roughly how much memory it is using.
|
||||
// Note that while it supports background read-ahead, it assumes that a single
|
||||
// thread is accessing documents, ie it is not safe for multiple threads to
|
||||
// access different documents in parallel, as one may de-cache the other's
|
||||
// content.
|
||||
class DocumentCache {
|
||||
public:
|
||||
explicit DocumentCache(inT64 max_memory);
|
||||
~DocumentCache();
|
||||
|
||||
// Deletes all existing documents from the cache.
|
||||
void Clear() {
|
||||
documents_.clear();
|
||||
num_pages_per_doc_ = 0;
|
||||
}
|
||||
// Adds all the documents in the list of filenames, counting memory.
|
||||
// The reader is used to read the files.
|
||||
bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang,
|
||||
FileReader reader);
|
||||
CachingStrategy cache_strategy, FileReader reader);
|
||||
|
||||
// Adds document to the cache, throwing out other documents if needed.
|
||||
// Adds document to the cache.
|
||||
bool AddToCache(DocumentData* data);
|
||||
|
||||
// Finds and returns a document by name.
|
||||
DocumentData* FindDocument(const STRING& document_name) const;
|
||||
|
||||
// Returns a page by serial number, selecting them in a round-robin fashion
|
||||
// from all the documents.
|
||||
const ImageData* GetPageBySerial(int serial);
|
||||
// Returns a page by serial number using the current cache_strategy_ to
|
||||
// determine the mapping from serial number to page.
|
||||
const ImageData* GetPageBySerial(int serial) {
|
||||
if (cache_strategy_ == CS_SEQUENTIAL)
|
||||
return GetPageSequential(serial);
|
||||
else
|
||||
return GetPageRoundRobin(serial);
|
||||
}
|
||||
|
||||
const PointerVector<DocumentData>& documents() const {
|
||||
return documents_;
|
||||
}
|
||||
int total_pages() const {
|
||||
return total_pages_;
|
||||
}
|
||||
// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
|
||||
// strategy, could take a long time.
|
||||
int TotalPages();
|
||||
|
||||
private:
|
||||
// Returns a page by serial number, selecting them in a round-robin fashion
|
||||
// from all the documents. Highly disk-intensive, but doesn't need samples
|
||||
// to be shuffled between files to begin with.
|
||||
const ImageData* GetPageRoundRobin(int serial);
|
||||
// Returns a page by serial number, selecting them in sequence from each file.
|
||||
// Requires the samples to be shuffled between the files to give a random or
|
||||
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
|
||||
const ImageData* GetPageSequential(int serial);
|
||||
|
||||
// Helper counts the number of adjacent cached neighbour documents_ of index
|
||||
// looking in direction dir, ie index+dir, index+2*dir etc.
|
||||
int CountNeighbourDocs(int index, int dir);
|
||||
|
||||
// A group of pages that corresponds in some loose way to a document.
|
||||
PointerVector<DocumentData> documents_;
|
||||
// Total of all pages.
|
||||
int total_pages_;
|
||||
// Total of all memory used by the cache.
|
||||
inT64 memory_used_;
|
||||
// Strategy to use for caching and serializing data samples.
|
||||
CachingStrategy cache_strategy_;
|
||||
// Number of pages in the first document, used as a divisor in
|
||||
// GetPageSequential to determine the document index.
|
||||
int num_pages_per_doc_;
|
||||
// Max memory allowed in this cache.
|
||||
inT64 max_memory_;
|
||||
};
|
||||
|
@ -1,8 +1,12 @@
|
||||
/* -*-C-*-
|
||||
******************************************************************************
|
||||
*
|
||||
* File: matrix.h (Formerly matrix.h)
|
||||
* Description: Ratings matrix code. (Used by associator)
|
||||
* Description: Generic 2-d array/matrix and banded triangular matrix class.
|
||||
* Author: Ray Smith
|
||||
* TODO(rays) Separate from ratings matrix, which it also contains:
|
||||
*
|
||||
* Descrition: Ratings matrix class (specialization of banded matrix).
|
||||
* Segmentation search matrix of lists of BLOB_CHOICE.
|
||||
* Author: Mark Seaman, OCR Technology
|
||||
* Created: Wed May 16 13:22:06 1990
|
||||
* Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt
|
||||
@ -22,12 +26,16 @@
|
||||
** limitations under the License.
|
||||
*
|
||||
*********************************************************************************/
|
||||
#ifndef TESSERACT_CCSTRUCT_MATRIX_H__
|
||||
#define TESSERACT_CCSTRUCT_MATRIX_H__
|
||||
#ifndef TESSERACT_CCSTRUCT_MATRIX_H_
|
||||
#define TESSERACT_CCSTRUCT_MATRIX_H_
|
||||
|
||||
#include <math.h>
|
||||
#include "kdpair.h"
|
||||
#include "points.h"
|
||||
#include "serialis.h"
|
||||
#include "unicharset.h"
|
||||
|
||||
class BLOB_CHOICE;
|
||||
class BLOB_CHOICE_LIST;
|
||||
|
||||
#define NOT_CLASSIFIED reinterpret_cast<BLOB_CHOICE_LIST*>(0)
|
||||
@ -44,34 +52,60 @@ class GENERIC_2D_ARRAY {
|
||||
// either pass the memory in, or allocate after by calling Resize().
|
||||
GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array)
|
||||
: empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) {
|
||||
size_allocated_ = dim1 * dim2;
|
||||
}
|
||||
// Original constructor for a full rectangular matrix DOES allocate memory
|
||||
// and initialize it to empty.
|
||||
GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty)
|
||||
: empty_(empty), dim1_(dim1), dim2_(dim2) {
|
||||
array_ = new T[dim1_ * dim2_];
|
||||
for (int x = 0; x < dim1_; x++)
|
||||
for (int y = 0; y < dim2_; y++)
|
||||
this->put(x, y, empty_);
|
||||
int new_size = dim1 * dim2;
|
||||
array_ = new T[new_size];
|
||||
size_allocated_ = new_size;
|
||||
for (int i = 0; i < size_allocated_; ++i)
|
||||
array_[i] = empty_;
|
||||
}
|
||||
// Default constructor for array allocation. Use Resize to set the size.
|
||||
GENERIC_2D_ARRAY()
|
||||
: array_(NULL), empty_(static_cast<T>(0)), dim1_(0), dim2_(0),
|
||||
size_allocated_(0) {
|
||||
}
|
||||
GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY<T>& src)
|
||||
: array_(NULL), empty_(static_cast<T>(0)), dim1_(0), dim2_(0),
|
||||
size_allocated_(0) {
|
||||
*this = src;
|
||||
}
|
||||
virtual ~GENERIC_2D_ARRAY() { delete[] array_; }
|
||||
|
||||
void operator=(const GENERIC_2D_ARRAY<T>& src) {
|
||||
ResizeNoInit(src.dim1(), src.dim2());
|
||||
memcpy(array_, src.array_, num_elements() * sizeof(array_[0]));
|
||||
}
|
||||
|
||||
// Reallocate the array to the given size. Does not keep old data, but does
|
||||
// not initialize the array either.
|
||||
void ResizeNoInit(int size1, int size2) {
|
||||
int new_size = size1 * size2;
|
||||
if (new_size > size_allocated_) {
|
||||
delete [] array_;
|
||||
array_ = new T[new_size];
|
||||
size_allocated_ = new_size;
|
||||
}
|
||||
dim1_ = size1;
|
||||
dim2_ = size2;
|
||||
}
|
||||
|
||||
// Reallocate the array to the given size. Does not keep old data.
|
||||
void Resize(int size1, int size2, const T& empty) {
|
||||
empty_ = empty;
|
||||
if (size1 != dim1_ || size2 != dim2_) {
|
||||
dim1_ = size1;
|
||||
dim2_ = size2;
|
||||
delete [] array_;
|
||||
array_ = new T[dim1_ * dim2_];
|
||||
}
|
||||
ResizeNoInit(size1, size2);
|
||||
Clear();
|
||||
}
|
||||
|
||||
// Reallocate the array to the given size, keeping old data.
|
||||
void ResizeWithCopy(int size1, int size2) {
|
||||
if (size1 != dim1_ || size2 != dim2_) {
|
||||
T* new_array = new T[size1 * size2];
|
||||
int new_size = size1 * size2;
|
||||
T* new_array = new T[new_size];
|
||||
for (int col = 0; col < size1; ++col) {
|
||||
for (int row = 0; row < size2; ++row) {
|
||||
int old_index = col * dim2() + row;
|
||||
@ -87,6 +121,7 @@ class GENERIC_2D_ARRAY {
|
||||
array_ = new_array;
|
||||
dim1_ = size1;
|
||||
dim2_ = size2;
|
||||
size_allocated_ = new_size;
|
||||
}
|
||||
}
|
||||
|
||||
@ -106,9 +141,16 @@ class GENERIC_2D_ARRAY {
|
||||
if (fwrite(array_, sizeof(*array_), size, fp) != size) return false;
|
||||
return true;
|
||||
}
|
||||
bool Serialize(tesseract::TFile* fp) const {
|
||||
if (!SerializeSize(fp)) return false;
|
||||
if (fp->FWrite(&empty_, sizeof(empty_), 1) != 1) return false;
|
||||
int size = num_elements();
|
||||
if (fp->FWrite(array_, sizeof(*array_), size) != size) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// Only works with bitwise-serializeable typ
|
||||
// Only works with bitwise-serializeable types!
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp) {
|
||||
if (!DeSerializeSize(swap, fp)) return false;
|
||||
@ -122,6 +164,18 @@ class GENERIC_2D_ARRAY {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool DeSerialize(bool swap, tesseract::TFile* fp) {
|
||||
if (!DeSerializeSize(swap, fp)) return false;
|
||||
if (fp->FRead(&empty_, sizeof(empty_), 1) != 1) return false;
|
||||
if (swap) ReverseN(&empty_, sizeof(empty_));
|
||||
int size = num_elements();
|
||||
if (fp->FRead(array_, sizeof(*array_), size) != size) return false;
|
||||
if (swap) {
|
||||
for (int i = 0; i < size; ++i)
|
||||
ReverseN(&array_[i], sizeof(array_[i]));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
// Assumes a T::Serialize(FILE*) const function.
|
||||
@ -163,11 +217,17 @@ class GENERIC_2D_ARRAY {
|
||||
}
|
||||
|
||||
// Put a list element into the matrix at a specific location.
|
||||
void put(ICOORD pos, const T& thing) {
|
||||
array_[this->index(pos.x(), pos.y())] = thing;
|
||||
}
|
||||
void put(int column, int row, const T& thing) {
|
||||
array_[this->index(column, row)] = thing;
|
||||
}
|
||||
|
||||
// Get the item at a specified location from the matrix.
|
||||
T get(ICOORD pos) const {
|
||||
return array_[this->index(pos.x(), pos.y())];
|
||||
}
|
||||
T get(int column, int row) const {
|
||||
return array_[this->index(column, row)];
|
||||
}
|
||||
@ -187,6 +247,207 @@ class GENERIC_2D_ARRAY {
|
||||
return &array_[this->index(column, 0)];
|
||||
}
|
||||
|
||||
// Adds addend to *this, element-by-element.
|
||||
void operator+=(const GENERIC_2D_ARRAY<T>& addend) {
|
||||
if (dim2_ == addend.dim2_) {
|
||||
// Faster if equal size in the major dimension.
|
||||
int size = MIN(num_elements(), addend.num_elements());
|
||||
for (int i = 0; i < size; ++i) {
|
||||
array_[i] += addend.array_[i];
|
||||
}
|
||||
} else {
|
||||
for (int x = 0; x < dim1_; x++) {
|
||||
for (int y = 0; y < dim2_; y++) {
|
||||
(*this)(x, y) += addend(x, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Subtracts minuend from *this, element-by-element.
|
||||
void operator-=(const GENERIC_2D_ARRAY<T>& minuend) {
|
||||
if (dim2_ == minuend.dim2_) {
|
||||
// Faster if equal size in the major dimension.
|
||||
int size = MIN(num_elements(), minuend.num_elements());
|
||||
for (int i = 0; i < size; ++i) {
|
||||
array_[i] -= minuend.array_[i];
|
||||
}
|
||||
} else {
|
||||
for (int x = 0; x < dim1_; x++) {
|
||||
for (int y = 0; y < dim2_; y++) {
|
||||
(*this)(x, y) -= minuend(x, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Adds addend to all elements.
|
||||
void operator+=(const T& addend) {
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
array_[i] += addend;
|
||||
}
|
||||
}
|
||||
// Multiplies *this by factor, element-by-element.
|
||||
void operator*=(const T& factor) {
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
array_[i] *= factor;
|
||||
}
|
||||
}
|
||||
// Clips *this to the given range.
|
||||
void Clip(const T& rangemin, const T& rangemax) {
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
array_[i] = ClipToRange(array_[i], rangemin, rangemax);
|
||||
}
|
||||
}
|
||||
// Returns true if all elements of *this are within the given range.
|
||||
// Only uses operator<
|
||||
bool WithinBounds(const T& rangemin, const T& rangemax) const {
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const T& value = array_[i];
|
||||
if (value < rangemin || rangemax < value)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Normalize the whole array.
|
||||
double Normalize() {
|
||||
int size = num_elements();
|
||||
if (size <= 0) return 0.0;
|
||||
// Compute the mean.
|
||||
double mean = 0.0;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
mean += array_[i];
|
||||
}
|
||||
mean /= size;
|
||||
// Subtract the mean and compute the standard deviation.
|
||||
double sd = 0.0;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
double normed = array_[i] - mean;
|
||||
array_[i] = normed;
|
||||
sd += normed * normed;
|
||||
}
|
||||
sd = sqrt(sd / size);
|
||||
if (sd > 0.0) {
|
||||
// Divide by the sd.
|
||||
for (int i = 0; i < size; ++i) {
|
||||
array_[i] /= sd;
|
||||
}
|
||||
}
|
||||
return sd;
|
||||
}
|
||||
|
||||
// Returns the maximum value of the array.
|
||||
T Max() const {
|
||||
int size = num_elements();
|
||||
if (size <= 0) return empty_;
|
||||
// Compute the max.
|
||||
T max_value = array_[0];
|
||||
for (int i = 1; i < size; ++i) {
|
||||
const T& value = array_[i];
|
||||
if (value > max_value) max_value = value;
|
||||
}
|
||||
return max_value;
|
||||
}
|
||||
|
||||
// Returns the maximum absolute value of the array.
|
||||
T MaxAbs() const {
|
||||
int size = num_elements();
|
||||
if (size <= 0) return empty_;
|
||||
// Compute the max.
|
||||
T max_abs = static_cast<T>(0);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
T value = static_cast<T>(fabs(array_[i]));
|
||||
if (value > max_abs) max_abs = value;
|
||||
}
|
||||
return max_abs;
|
||||
}
|
||||
|
||||
// Accumulates the element-wise sums of squares of src into *this.
|
||||
void SumSquares(const GENERIC_2D_ARRAY<T>& src) {
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
array_[i] += src.array_[i] * src.array_[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Scales each element using the ada-grad algorithm, ie array_[i] by
|
||||
// sqrt(num_samples/max(1,sqsum[i])).
|
||||
void AdaGradScaling(const GENERIC_2D_ARRAY<T>& sqsum, int num_samples) {
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
array_[i] *= sqrt(num_samples / MAX(1.0, sqsum.array_[i]));
|
||||
}
|
||||
}
|
||||
|
||||
void AssertFinite() const {
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
ASSERT_HOST(isfinite(array_[i]));
|
||||
}
|
||||
}
|
||||
|
||||
// REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a
|
||||
// num_dims-dimensional array/tensor with dimensions given by dims, (ordered
|
||||
// from most significant to least significant, the same as standard C arrays)
|
||||
// and moves src_dim to dest_dim, with the initial dest_dim and any dimensions
|
||||
// in between shifted towards the hole left by src_dim. Example:
|
||||
// Current data content: array_=[0, 1, 2, ....119]
|
||||
// perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]...
|
||||
// but the current dimensions are irrelevant.
|
||||
// num_dims = 4, dims=[5, 4, 3, 2]
|
||||
// src_dim=3, dest_dim=1
|
||||
// tensor=[[[[0, 1][2, 3][4, 5]]
|
||||
// [[6, 7][8, 9][10, 11]]
|
||||
// [[12, 13][14, 15][16, 17]]
|
||||
// [[18, 19][20, 21][22, 23]]]
|
||||
// [[[24, 25]...
|
||||
// output dims =[5, 2, 4, 3]
|
||||
// output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]]
|
||||
// [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]]
|
||||
// [[[24, 26, 28]...
|
||||
// which is stored in the array_ as:
|
||||
// [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...]
|
||||
// NOTE: the 2 stored matrix dimensions are simply copied from *this. To
|
||||
// change the dimensions after the transpose, use ResizeNoInit.
|
||||
// Higher dimensions above 2 are strictly the responsibility of the caller.
|
||||
void RotatingTranspose(const int* dims, int num_dims, int src_dim,
|
||||
int dest_dim, GENERIC_2D_ARRAY<T>* result) const {
|
||||
int max_d = MAX(src_dim, dest_dim);
|
||||
int min_d = MIN(src_dim, dest_dim);
|
||||
// In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the
|
||||
// ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1]
|
||||
// being contiguous blocks of data that will move together, and
|
||||
// [d0, min_d -1] being replicas of the transpose operation.
|
||||
// num_replicas represents the large dimensions unchanged by the operation.
|
||||
// move_size represents the small dimensions unchanged by the operation.
|
||||
// src_step represents the stride in the src between each adjacent group
|
||||
// in the destination.
|
||||
int num_replicas = 1, move_size = 1, src_step = 1;
|
||||
for (int d = 0; d < min_d; ++d) num_replicas *= dims[d];
|
||||
for (int d = max_d + 1; d < num_dims; ++d) move_size *= dims[d];
|
||||
for (int d = src_dim + 1; d < num_dims; ++d) src_step *= dims[d];
|
||||
if (src_dim > dest_dim) src_step *= dims[src_dim];
|
||||
// wrap_size is the size of a single replica, being the amount that is
|
||||
// handled num_replicas times.
|
||||
int wrap_size = move_size;
|
||||
for (int d = min_d; d <= max_d; ++d) wrap_size *= dims[d];
|
||||
result->ResizeNoInit(dim1_, dim2_);
|
||||
result->empty_ = empty_;
|
||||
const T* src = array_;
|
||||
T* dest = result->array_;
|
||||
for (int replica = 0; replica < num_replicas; ++replica) {
|
||||
for (int start = 0; start < src_step; start += move_size) {
|
||||
for (int pos = start; pos < wrap_size; pos += src_step) {
|
||||
memcpy(dest, src + pos, sizeof(*dest) * move_size);
|
||||
dest += move_size;
|
||||
}
|
||||
}
|
||||
src += wrap_size;
|
||||
}
|
||||
}
|
||||
|
||||
// Delete objects pointed to by array_[i].
|
||||
void delete_matrix_pointers() {
|
||||
int size = num_elements();
|
||||
@ -206,6 +467,13 @@ class GENERIC_2D_ARRAY {
|
||||
if (fwrite(&size, sizeof(size), 1, fp) != 1) return false;
|
||||
return true;
|
||||
}
|
||||
bool SerializeSize(tesseract::TFile* fp) const {
|
||||
inT32 size = dim1_;
|
||||
if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
|
||||
size = dim2_;
|
||||
if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
|
||||
return true;
|
||||
}
|
||||
// Factored helper to deserialize the size.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerializeSize(bool swap, FILE* fp) {
|
||||
@ -219,11 +487,26 @@ class GENERIC_2D_ARRAY {
|
||||
Resize(size1, size2, empty_);
|
||||
return true;
|
||||
}
|
||||
bool DeSerializeSize(bool swap, tesseract::TFile* fp) {
|
||||
inT32 size1, size2;
|
||||
if (fp->FRead(&size1, sizeof(size1), 1) != 1) return false;
|
||||
if (fp->FRead(&size2, sizeof(size2), 1) != 1) return false;
|
||||
if (swap) {
|
||||
ReverseN(&size1, sizeof(size1));
|
||||
ReverseN(&size2, sizeof(size2));
|
||||
}
|
||||
Resize(size1, size2, empty_);
|
||||
return true;
|
||||
}
|
||||
|
||||
T* array_;
|
||||
T empty_; // The unused cell.
|
||||
int dim1_; // Size of the 1st dimension in indexing functions.
|
||||
int dim2_; // Size of the 2nd dimension in indexing functions.
|
||||
// The total size to which the array can be expanded before a realloc is
|
||||
// needed. If Resize is used, memory is retained so it can be re-expanded
|
||||
// without a further alloc, and this stores the allocated size.
|
||||
int size_allocated_;
|
||||
};
|
||||
|
||||
// A generic class to store a banded triangular matrix with entries of type T.
|
||||
@ -349,4 +632,4 @@ struct MATRIX_COORD {
|
||||
// The MatrixCoordPair contains a MATRIX_COORD and its priority.
|
||||
typedef tesseract::KDPairInc<float, MATRIX_COORD> MatrixCoordPair;
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_MATRIX_H__
|
||||
#endif // TESSERACT_CCSTRUCT_MATRIX_H_
|
||||
|
@ -86,16 +86,3 @@ DIR128::DIR128( //from fcoord
|
||||
while (high - low > 1);
|
||||
dir = low;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* dir_to_gradient
|
||||
*
|
||||
* Convert a direction to a vector.
|
||||
**********************************************************************/
|
||||
|
||||
#if 0 // code is buggy for negative dir and unused
|
||||
ICOORD DIR128::vector() const { //convert to vector
|
||||
return dirtab[dir]; //easy really
|
||||
}
|
||||
#endif
|
||||
|
@ -77,7 +77,6 @@ class DLLSYM DIR128
|
||||
inT8 get_dir() const { //access function
|
||||
return dir;
|
||||
}
|
||||
ICOORD vector() const; //turn to vector
|
||||
|
||||
private:
|
||||
inT8 dir; //a direction
|
||||
|
@ -52,22 +52,15 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height,
|
||||
// only use opencl if compiled w/ OpenCL and selected device is opencl
|
||||
#ifdef USE_OPENCL
|
||||
// all of channel 0 then all of channel 1...
|
||||
int *histogramAllChannels = new int[kHistogramSize * num_channels];
|
||||
int* histogramAllChannels = new int[kHistogramSize * num_channels];
|
||||
|
||||
// Calculate Histogram on GPU
|
||||
OpenclDevice od;
|
||||
if (od.selectedDeviceIsOpenCL() &&
|
||||
(num_channels == 1 || num_channels == 4) && top == 0 && left == 0 ) {
|
||||
od.HistogramRectOCL(
|
||||
(const unsigned char*)pixGetData(src_pix),
|
||||
num_channels,
|
||||
pixGetWpl(src_pix) * 4,
|
||||
left,
|
||||
top,
|
||||
width,
|
||||
height,
|
||||
kHistogramSize,
|
||||
histogramAllChannels);
|
||||
if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) &&
|
||||
top == 0 && left == 0) {
|
||||
od.HistogramRectOCL((unsigned char*)pixGetData(src_pix), num_channels,
|
||||
pixGetWpl(src_pix) * 4, left, top, width, height,
|
||||
kHistogramSize, histogramAllChannels);
|
||||
|
||||
// Calculate Threshold from Histogram on cpu
|
||||
for (int ch = 0; ch < num_channels; ++ch) {
|
||||
@ -143,7 +136,6 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height,
|
||||
delete[] histogramAllChannels;
|
||||
#endif // USE_OPENCL
|
||||
|
||||
|
||||
if (!any_good_hivalue) {
|
||||
// Use the best of the ones that were not good enough.
|
||||
(*hi_values)[best_hi_index] = best_hi_value;
|
||||
|
@ -17,8 +17,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OTSUTHR_H__
|
||||
#define TESSERACT_CCMAIN_OTSUTHR_H__
|
||||
#ifndef TESSERACT_CCMAIN_OTSUTHR_H_
|
||||
#define TESSERACT_CCMAIN_OTSUTHR_H_
|
||||
|
||||
struct Pix;
|
||||
|
||||
@ -53,4 +53,4 @@ int OtsuStats(const int* histogram, int* H_out, int* omega0_out);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OTSUTHR_H__
|
||||
#endif // TESSERACT_CCMAIN_OTSUTHR_H_
|
||||
|
@ -303,8 +303,9 @@ bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in,
|
||||
static_cast<tesseract::OcrEngineMode>(norm_mode);
|
||||
tesseract = tess;
|
||||
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
|
||||
if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
|
||||
word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
|
||||
if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
|
||||
word->cblob_list()->empty()) ||
|
||||
(pb != NULL && !pb->IsText())) {
|
||||
// Empty words occur when all the blobs have been moved to the rej_blobs
|
||||
// list, which seems to occur frequently in junk.
|
||||
SetupFake(unicharset_in);
|
||||
@ -528,13 +529,12 @@ void WERD_RES::FilterWordChoices(int debug_level) {
|
||||
if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
|
||||
choice->certainty(i) - best_choice->certainty(j) < threshold) {
|
||||
if (debug_level >= 2) {
|
||||
STRING label;
|
||||
label.add_str_int("\nDiscarding bad choice #", index);
|
||||
choice->print(label.string());
|
||||
tprintf("i %d j %d Chunk %d Choice->Blob[i].Certainty %.4g"
|
||||
" BestChoice->ChunkCertainty[Chunk] %g Threshold %g\n",
|
||||
i, j, chunk, choice->certainty(i),
|
||||
best_choice->certainty(j), threshold);
|
||||
choice->print("WorstCertaintyDiffWorseThan");
|
||||
tprintf(
|
||||
"i %d j %d Choice->Blob[i].Certainty %.4g"
|
||||
" WorstOtherChoiceCertainty %g Threshold %g\n",
|
||||
i, j, choice->certainty(i), best_choice->certainty(j), threshold);
|
||||
tprintf("Discarding bad choice #%d\n", index);
|
||||
}
|
||||
delete it.extract();
|
||||
break;
|
||||
@ -882,17 +882,18 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
|
||||
choice_it.add_after_then_move(choices[c]);
|
||||
ratings->put(c, c, choice_list);
|
||||
}
|
||||
FakeWordFromRatings();
|
||||
FakeWordFromRatings(TOP_CHOICE_PERM);
|
||||
reject_map.initialise(blob_count);
|
||||
best_state.init_to_size(blob_count, 1);
|
||||
done = true;
|
||||
}
|
||||
|
||||
// Creates a WERD_CHOICE for the word using the top choices from the leading
|
||||
// diagonal of the ratings matrix.
|
||||
void WERD_RES::FakeWordFromRatings() {
|
||||
void WERD_RES::FakeWordFromRatings(PermuterType permuter) {
|
||||
int num_blobs = ratings->dimension();
|
||||
WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
|
||||
word_choice->set_permuter(TOP_CHOICE_PERM);
|
||||
word_choice->set_permuter(permuter);
|
||||
for (int b = 0; b < num_blobs; ++b) {
|
||||
UNICHAR_ID unichar_id = UNICHAR_SPACE;
|
||||
float rating = MAX_INT32;
|
||||
@ -1105,6 +1106,7 @@ void WERD_RES::InitNonPointers() {
|
||||
x_height = 0.0;
|
||||
caps_height = 0.0;
|
||||
baseline_shift = 0.0f;
|
||||
space_certainty = 0.0f;
|
||||
guessed_x_ht = TRUE;
|
||||
guessed_caps_ht = TRUE;
|
||||
combination = FALSE;
|
||||
|
@ -295,6 +295,9 @@ class WERD_RES : public ELIST_LINK {
|
||||
float x_height; // post match estimate
|
||||
float caps_height; // post match estimate
|
||||
float baseline_shift; // post match estimate.
|
||||
// Certainty score for the spaces either side of this word (LSTM mode).
|
||||
// MIN this value with the actual word certainty.
|
||||
float space_certainty;
|
||||
|
||||
/*
|
||||
To deal with fuzzy spaces we need to be able to combine "words" to form
|
||||
@ -327,7 +330,7 @@ class WERD_RES : public ELIST_LINK {
|
||||
}
|
||||
// Deep copies everything except the ratings MATRIX.
|
||||
// To get that use deep_copy below.
|
||||
WERD_RES(const WERD_RES &source) : ELIST_LINK(source) {
|
||||
WERD_RES(const WERD_RES& source) : ELIST_LINK(source) {
|
||||
InitPointers();
|
||||
*this = source; // see operator=
|
||||
}
|
||||
@ -590,7 +593,7 @@ class WERD_RES : public ELIST_LINK {
|
||||
|
||||
// Creates a WERD_CHOICE for the word using the top choices from the leading
|
||||
// diagonal of the ratings matrix.
|
||||
void FakeWordFromRatings();
|
||||
void FakeWordFromRatings(PermuterType permuter);
|
||||
|
||||
// Copies the best_choice strings to the correct_text for adaption/training.
|
||||
void BestChoiceToCorrectText();
|
||||
@ -630,7 +633,7 @@ class WERD_RES : public ELIST_LINK {
|
||||
static WERD_RES* deep_copy(const WERD_RES* src) {
|
||||
WERD_RES* result = new WERD_RES(*src);
|
||||
// That didn't copy the ratings, but we want a copy if there is one to
|
||||
// begin width.
|
||||
// begin with.
|
||||
if (src->ratings != NULL)
|
||||
result->ratings = src->ratings->DeepCopy();
|
||||
return result;
|
||||
|
@ -126,7 +126,7 @@ typedef GenericVector<ParamsTrainingHypothesis> ParamsTrainingHypothesisList;
|
||||
// explored on PASS1, PASS2, fix xheight pass, etc).
|
||||
class ParamsTrainingBundle {
|
||||
public:
|
||||
ParamsTrainingBundle() {};
|
||||
ParamsTrainingBundle() {}
|
||||
// Starts a new hypothesis list.
|
||||
// Should be called at the beginning of a new run of the segmentation search.
|
||||
void StartHypothesisList() {
|
||||
|
@ -29,59 +29,45 @@ struct Pix;
|
||||
|
||||
CLISTIZEH (PDBLK)
|
||||
///page block
|
||||
class PDBLK
|
||||
{
|
||||
class PDBLK {
|
||||
friend class BLOCK_RECT_IT; //< block iterator
|
||||
|
||||
public:
|
||||
///empty constructor
|
||||
/// empty constructor
|
||||
PDBLK() {
|
||||
hand_poly = NULL;
|
||||
index_ = 0;
|
||||
}
|
||||
///simple constructor
|
||||
/// simple constructor
|
||||
PDBLK(inT16 xmin, //< bottom left
|
||||
inT16 ymin,
|
||||
inT16 xmax, //< top right
|
||||
inT16 ymax);
|
||||
|
||||
///set vertex lists
|
||||
/// set vertex lists
|
||||
///@param left list of left vertices
|
||||
///@param right list of right vertices
|
||||
void set_sides(ICOORDELT_LIST *left,
|
||||
ICOORDELT_LIST *right);
|
||||
void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right);
|
||||
|
||||
///destructor
|
||||
~PDBLK () {
|
||||
if (hand_poly) delete hand_poly;
|
||||
}
|
||||
/// destructor
|
||||
~PDBLK() { delete hand_poly; }
|
||||
|
||||
POLY_BLOCK *poly_block() const {
|
||||
return hand_poly;
|
||||
}
|
||||
///set the poly block
|
||||
void set_poly_block(POLY_BLOCK *blk) {
|
||||
hand_poly = blk;
|
||||
}
|
||||
///get box
|
||||
void bounding_box(ICOORD &bottom_left, //bottom left
|
||||
ICOORD &top_right) const { //topright
|
||||
bottom_left = box.botleft ();
|
||||
top_right = box.topright ();
|
||||
}
|
||||
///get real box
|
||||
const TBOX &bounding_box() const {
|
||||
return box;
|
||||
POLY_BLOCK *poly_block() const { return hand_poly; }
|
||||
/// set the poly block
|
||||
void set_poly_block(POLY_BLOCK *blk) { hand_poly = blk; }
|
||||
/// get box
|
||||
void bounding_box(ICOORD &bottom_left, // bottom left
|
||||
ICOORD &top_right) const { // topright
|
||||
bottom_left = box.botleft();
|
||||
top_right = box.topright();
|
||||
}
|
||||
/// get real box
|
||||
const TBOX &bounding_box() const { return box; }
|
||||
|
||||
int index() const {
|
||||
return index_;
|
||||
}
|
||||
void set_index(int value) {
|
||||
index_ = value;
|
||||
}
|
||||
int index() const { return index_; }
|
||||
void set_index(int value) { index_ = value; }
|
||||
|
||||
///is pt inside block
|
||||
/// is pt inside block
|
||||
BOOL8 contains(ICOORD pt);
|
||||
|
||||
/// reposition block
|
||||
@ -93,19 +79,17 @@ class PDBLK
|
||||
// mask image.
|
||||
Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box);
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
///draw histogram
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
/// draw histogram
|
||||
///@param window window to draw in
|
||||
///@param serial serial number
|
||||
///@param colour colour to draw in
|
||||
void plot(ScrollView* window,
|
||||
inT32 serial,
|
||||
ScrollView::Color colour);
|
||||
#endif // GRAPHICS_DISABLED
|
||||
void plot(ScrollView *window, inT32 serial, ScrollView::Color colour);
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
||||
///assignment
|
||||
/// assignment
|
||||
///@param source from this
|
||||
PDBLK & operator= (const PDBLK & source);
|
||||
PDBLK &operator=(const PDBLK &source);
|
||||
|
||||
protected:
|
||||
POLY_BLOCK *hand_poly; //< weird as well
|
||||
|
@ -294,6 +294,8 @@ void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete lines;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -17,8 +17,8 @@
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H__
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H__
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
||||
// This file contains types that are used both by the API and internally
|
||||
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
||||
@ -213,7 +213,7 @@ enum PageIteratorLevel {
|
||||
};
|
||||
|
||||
/**
|
||||
* JUSTIFICATION_UNKNONW
|
||||
* JUSTIFICATION_UNKNOWN
|
||||
* The alignment is not clearly one of the other options. This could happen
|
||||
* for example if there are only one or two lines of text or the text looks
|
||||
* like source code or poetry.
|
||||
@ -235,7 +235,7 @@ enum PageIteratorLevel {
|
||||
*
|
||||
* JUSTIFICATION_RIGHT
|
||||
* Each line, except possibly the first, is flush to the same right tab stop.
|
||||
*/
|
||||
*/
|
||||
enum ParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
@ -255,17 +255,20 @@ enum ParagraphJustification {
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest
|
||||
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower
|
||||
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy
|
||||
OEM_DEFAULT // Specify this mode when calling init_*(),
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
// variables in the language-specific config,
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower
|
||||
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H__
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
@ -288,7 +288,8 @@ class WERD_CHOICE : public ELIST_LINK {
|
||||
src_certainty, src_permuter);
|
||||
}
|
||||
WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
|
||||
WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) {
|
||||
WERD_CHOICE(const WERD_CHOICE &word)
|
||||
: ELIST_LINK(word), unicharset_(word.unicharset_) {
|
||||
this->init(word.length());
|
||||
this->operator=(word);
|
||||
}
|
||||
@ -507,6 +508,20 @@ class WERD_CHOICE : public ELIST_LINK {
|
||||
}
|
||||
return word_str;
|
||||
}
|
||||
// Returns true if any unichar_id in the word is a non-space-delimited char.
|
||||
bool ContainsAnyNonSpaceDelimited() const {
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if the word is all spaces.
|
||||
bool IsAllSpaces() const {
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
if (unichar_ids_[i] != UNICHAR_SPACE) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Call this to override the default (strict left to right graphemes)
|
||||
// with the fact that some engine produces a "reading order" set of
|
||||
|
@ -29,9 +29,9 @@
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
TBOX::TBOX( //constructor
|
||||
const ICOORD pt1, //one corner
|
||||
const ICOORD pt2 //the other corner
|
||||
TBOX::TBOX( // constructor
|
||||
const ICOORD pt1, // one corner
|
||||
const ICOORD pt2 // the other corner
|
||||
) {
|
||||
if (pt1.x () <= pt2.x ()) {
|
||||
if (pt1.y () <= pt2.y ()) {
|
||||
|
@ -267,10 +267,10 @@ void REJ::full_print(FILE *fp) {
|
||||
|
||||
//The REJMAP class has been hacked to use alloc_struct instead of new [].
|
||||
//This is to reduce memory fragmentation only as it is rather kludgy.
|
||||
//alloc_struct by-passes the call to the contsructor of REJ on each
|
||||
//array element. Although the constructor is empty, the BITS16 members
|
||||
//do have a constructor which sets all the flags to 0. The memset
|
||||
//replaces this functionality.
|
||||
// alloc_struct by-passes the call to the constructor of REJ on each
|
||||
// array element. Although the constructor is empty, the BITS16 members
|
||||
// do have a constructor which sets all the flags to 0. The memset
|
||||
// replaces this functionality.
|
||||
|
||||
REJMAP::REJMAP( //classwise copy
|
||||
const REJMAP &source) {
|
||||
|
@ -48,8 +48,7 @@ OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
|
||||
#include "bits16.h"
|
||||
#include "params.h"
|
||||
|
||||
enum REJ_FLAGS
|
||||
{
|
||||
enum REJ_FLAGS {
|
||||
/* Reject modes which are NEVER overridden */
|
||||
R_TESS_FAILURE, // PERM Tess didn't classify
|
||||
R_SMALL_XHT, // PERM Xht too small
|
||||
@ -83,11 +82,11 @@ enum REJ_FLAGS
|
||||
R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
|
||||
|
||||
/* Accept modes which occur between the above rejection groups */
|
||||
R_NN_ACCEPT, //NN acceptance
|
||||
R_HYPHEN_ACCEPT, //Hyphen acceptance
|
||||
R_MM_ACCEPT, //Matrix match acceptance
|
||||
R_QUALITY_ACCEPT, //Accept word in good quality doc
|
||||
R_MINIMAL_REJ_ACCEPT //Accept EVERYTHING except tess failures
|
||||
R_NN_ACCEPT, // NN acceptance
|
||||
R_HYPHEN_ACCEPT, // Hyphen acceptance
|
||||
R_MM_ACCEPT, // Matrix match acceptance
|
||||
R_QUALITY_ACCEPT, // Accept word in good quality doc
|
||||
R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures
|
||||
};
|
||||
|
||||
/* REJECT MAP VALUES */
|
||||
|
@ -215,7 +215,6 @@ inT32 STATS::min_bucket() const { // Find min
|
||||
return rangemin_ + min;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* STATS::max_bucket
|
||||
*
|
||||
|
@ -14,11 +14,11 @@ endif
|
||||
include_HEADERS = \
|
||||
basedir.h errcode.h fileerr.h genericvector.h helpers.h host.h memry.h \
|
||||
ndminx.h params.h ocrclass.h platform.h serialis.h strngs.h \
|
||||
tesscallback.h unichar.h unicharmap.h unicharset.h
|
||||
tesscallback.h unichar.h unicharcompress.h unicharmap.h unicharset.h
|
||||
|
||||
noinst_HEADERS = \
|
||||
ambigs.h bits16.h bitvector.h ccutil.h clst.h doubleptr.h elst2.h \
|
||||
elst.h genericheap.h globaloc.h hashfn.h indexmapbidi.h kdpair.h lsterr.h \
|
||||
elst.h genericheap.h globaloc.h indexmapbidi.h kdpair.h lsterr.h \
|
||||
nwmain.h object_cache.h qrsequence.h sorthelper.h stderr.h \
|
||||
scanutils.h tessdatamanager.h tprintf.h unicity_table.h unicodes.h \
|
||||
universalambigs.h
|
||||
@ -38,7 +38,7 @@ libtesseract_ccutil_la_SOURCES = \
|
||||
mainblk.cpp memry.cpp \
|
||||
serialis.cpp strngs.cpp scanutils.cpp \
|
||||
tessdatamanager.cpp tprintf.cpp \
|
||||
unichar.cpp unicharmap.cpp unicharset.cpp unicodes.cpp \
|
||||
unichar.cpp unicharcompress.cpp unicharmap.cpp unicharset.cpp unicodes.cpp \
|
||||
params.cpp universalambigs.cpp
|
||||
|
||||
if T_WIN
|
||||
|
@ -59,17 +59,18 @@ class UnicharIdArrayUtils {
|
||||
// less than length of array2, if any array1[i] is less than array2[i].
|
||||
// Returns 0 if the arrays are equal, 1 otherwise.
|
||||
// The function assumes that the arrays are terminated by INVALID_UNICHAR_ID.
|
||||
static inline int compare(const UNICHAR_ID array1[],
|
||||
const UNICHAR_ID array2[]) {
|
||||
const UNICHAR_ID *ptr1 = array1;
|
||||
const UNICHAR_ID *ptr2 = array2;
|
||||
while (*ptr1 != INVALID_UNICHAR_ID && *ptr2 != INVALID_UNICHAR_ID) {
|
||||
if (*ptr1 != *ptr2) return *ptr1 < *ptr2 ? -1 : 1;
|
||||
++ptr1;
|
||||
++ptr2;
|
||||
static inline int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2) {
|
||||
for (;;) {
|
||||
const UNICHAR_ID val1 = *ptr1++;
|
||||
const UNICHAR_ID val2 = *ptr2++;
|
||||
if (val1 != val2) {
|
||||
if (val1 == INVALID_UNICHAR_ID) return -1;
|
||||
if (val2 == INVALID_UNICHAR_ID) return 1;
|
||||
if (val1 < val2) return -1;
|
||||
return 1;
|
||||
}
|
||||
if (val1 == INVALID_UNICHAR_ID) return 0;
|
||||
}
|
||||
if (*ptr1 == INVALID_UNICHAR_ID && *ptr2 == INVALID_UNICHAR_ID) return 0;
|
||||
return *ptr1 == INVALID_UNICHAR_ID ? -1 : 1;
|
||||
}
|
||||
|
||||
// Look uid in the vector of uids. If found, the index of the matched
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user