Merge branch 'master' into ocricola-cleanup

This commit is contained in:
Jim Regan 2017-04-28 23:38:52 +01:00 committed by GitHub
commit 3ee8db4e6b
504 changed files with 34505 additions and 39395 deletions

11
.gitignore vendored
View File

@ -52,6 +52,8 @@ training/wordlist2dawg
*.patch *.patch
# ignore compilation files # ignore compilation files
build/*
/bin
*/.deps/* */.deps/*
*/.libs/* */.libs/*
*.lo *.lo
@ -63,8 +65,6 @@ training/wordlist2dawg
*.jar *.jar
# tessdata # tessdata
*.cube.*
*.tesseract_cube.*
*.traineddata *.traineddata
# OpenCL # OpenCL
@ -73,5 +73,10 @@ kernel*.bin
# build dirs # build dirs
/build* /build*
/.cppan
/cppan /cppan
/win* /*.dll
/*.lib
/*.exe
/*.lnk
/win*

View File

@ -2,16 +2,16 @@ language: cpp
notifications: notifications:
email: false email: false
sudo: required sudo: required
os: os:
- linux - linux
#- osx - osx
branches: #branches:
only: #only:
- master #- master
addons: addons:
apt: apt:
@ -24,17 +24,17 @@ addons:
before_install: before_install:
- if [[ $TRAVIS_OS_NAME == linux ]]; then LINUX=true; fi - if [[ $TRAVIS_OS_NAME == linux ]]; then LINUX=true; fi
- if [[ $TRAVIS_OS_NAME == osx ]]; then OSX=true; fi - if [[ $TRAVIS_OS_NAME == osx ]]; then OSX=true; fi
- if [[ $OSX ]]; then brew update; fi
- export LEPT_VER=1.73 #- if [[ $OSX ]]; then brew update; fi
- export LEPT_VER=1.74.1
install: install:
- if [[ $OSX ]]; then brew install icu4c pango; brew link --force gettext; fi #- if [[ $OSX ]]; then brew install icu4c pango; brew link --force gettext; fi
- if [[ $OSX ]]; then export ICU_ROOT=/usr/local/opt/icu4c ; fi #- if [[ $OSX ]]; then export ICU_ROOT=/usr/local/opt/icu4c ; fi
- wget https://www.cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh - if [[ $LINUX ]]; then wget https://www.cmake.org/files/v3.7/cmake-3.7.2-Linux-x86_64.sh; fi
- sudo sh cmake-3.6.1-Linux-x86_64.sh --skip-license --prefix=/usr - if [[ $LINUX ]]; then sudo sh cmake-3.7.2-Linux-x86_64.sh --skip-license --prefix=/usr; fi
- wget -O leptonica.zip https://github.com/DanBloomberg/leptonica/archive/v$LEPT_VER.zip - wget -O leptonica.zip https://github.com/DanBloomberg/leptonica/archive/$LEPT_VER.zip
- unzip leptonica.zip -d . - unzip leptonica.zip -d .
- cmake -Hleptonica-$LEPT_VER -Bleptonica-$LEPT_VER/build - cmake -Hleptonica-$LEPT_VER -Bleptonica-$LEPT_VER/build
- make -C leptonica-$LEPT_VER/build - make -C leptonica-$LEPT_VER/build

14
AUTHORS
View File

@ -2,12 +2,14 @@ Ray Smith (lead developer) <theraysmith@gmail.com>
Ahmad Abdulkader Ahmad Abdulkader
Rika Antonova Rika Antonova
Nicholas Beato Nicholas Beato
Jeff Breidenbach
Samuel Charron Samuel Charron
Phil Cheatle Phil Cheatle
Simon Crouch Simon Crouch
David Eger David Eger
Sheelagh Huddleston Sheelagh Huddleston
Dan Johnson Dan Johnson
Rajesh Katikam
Thomas Kielbus Thomas Kielbus
Dar-Shyang Lee Dar-Shyang Lee
Zongyi (Joe) Liu Zongyi (Joe) Liu
@ -26,3 +28,15 @@ Joern Wanke
Ping Ping Xiu Ping Ping Xiu
Andrew Ziem Andrew Ziem
Oscar Zuniga Oscar Zuniga
Community Contributors:
Zdenko Podobný (Maintainer)
Jim Regan (Maintainer)
James R Barlow
Amit Dovev
Martin Ettl
Tom Morris
Tobias Müller
Egor Pugin
Sundar M. Vaidya
Stefan Weil

View File

@ -10,6 +10,12 @@
cmake_minimum_required(VERSION 2.8.11) cmake_minimum_required(VERSION 2.8.11)
if (NOT APPVEYOR)
if (WIN32 AND (CMAKE_VERSION VERSION_EQUAL 3.6 OR (CMAKE_VERSION VERSION_GREATER 3.6 AND CMAKE_VERSION VERSION_LESS 3.7)))
message(FATAL_ERROR "You have bugged CMake version 3.6 which is known to not work with tesseract. Please, upgrade CMake.")
endif()
endif()
# In-source builds are disabled. # In-source builds are disabled.
if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
message(FATAL_ERROR message(FATAL_ERROR
@ -40,25 +46,31 @@ set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "CMake Targets")
project(tesseract C CXX) project(tesseract C CXX)
set(VERSION_MAJOR 3) set(VERSION_MAJOR 4)
set(VERSION_MINOR 05) set(VERSION_MINOR 00)
set(VERSION_PLAIN ${VERSION_MAJOR}.${VERSION_MINOR}) set(VERSION_PLAIN ${VERSION_MAJOR}.${VERSION_MINOR})
set(MINIMUM_LEPTONICA_VERSION 1.71) set(MINIMUM_LEPTONICA_VERSION 1.74)
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/cppan) if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.cppan)
if (NOT Leptonica_DIR AND NOT MSVC) if (NOT Leptonica_DIR AND NOT MSVC)
find_package(PkgConfig REQUIRED) find_package(PkgConfig REQUIRED)
pkg_check_modules(Leptonica REQUIRED lept) pkg_check_modules(Leptonica REQUIRED lept>=${MINIMUM_LEPTONICA_VERSION})
else() else()
find_package(Leptonica ${MINIMUM_LEPTONICA_VERSION} REQUIRED CONFIG) find_package(Leptonica ${MINIMUM_LEPTONICA_VERSION} REQUIRED CONFIG)
endif() endif()
else() else()
add_subdirectory(cppan) if (STATIC)
set(CPPAN_BUILD_SHARED_LIBS 0)
else()
set(CPPAN_BUILD_SHARED_LIBS 1)
endif()
add_subdirectory(.cppan)
endif() endif()
find_package(OpenCL QUIET) find_package(OpenCL QUIET)
find_package(PkgConfig)
option(BUILD_TRAINING_TOOLS "Build training tools" ON)
############################################################################### ###############################################################################
# #
@ -76,6 +88,9 @@ if (WIN32)
add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_CRT_SECURE_NO_WARNINGS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
if (APPVEYOR)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W0")
endif()
endif() endif()
set(LIB_Ws2_32 Ws2_32) set(LIB_Ws2_32 Ws2_32)
@ -104,12 +119,7 @@ include(Configure)
configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY) configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
set(INCLUDE_DIR set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")
${CMAKE_SOURCE_DIR}/api
${CMAKE_SOURCE_DIR}/ccmain
${CMAKE_SOURCE_DIR}/ccstruct
${CMAKE_SOURCE_DIR}/ccutil
)
configure_file( configure_file(
${CMAKE_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in ${CMAKE_SOURCE_DIR}/cmake/templates/TesseractConfig-version.cmake.in
@ -137,14 +147,14 @@ include_directories(${Leptonica_INCLUDE_DIRS})
include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR})
include_directories(api) include_directories(api)
include_directories(arch)
include_directories(ccmain) include_directories(ccmain)
include_directories(ccstruct) include_directories(ccstruct)
include_directories(ccutil) include_directories(ccutil)
include_directories(classify) include_directories(classify)
include_directories(cube)
include_directories(cutil) include_directories(cutil)
include_directories(dict) include_directories(dict)
include_directories(neural_networks/runtime) include_directories(lstm)
include_directories(opencl) include_directories(opencl)
include_directories(textord) include_directories(textord)
include_directories(vs2010/port) include_directories(vs2010/port)
@ -159,14 +169,14 @@ string(SUBSTRING ${VERSION_MINOR} 0 1 VERSION_MINOR_0)
string(SUBSTRING ${VERSION_MINOR} 1 1 VERSION_MINOR_1) string(SUBSTRING ${VERSION_MINOR} 1 1 VERSION_MINOR_1)
file(GLOB tesseract_src file(GLOB tesseract_src
arch/*.cpp
ccmain/*.cpp ccmain/*.cpp
ccstruct/*.cpp ccstruct/*.cpp
ccutil/*.cpp ccutil/*.cpp
classify/*.cpp classify/*.cpp
cube/*.cpp
cutil/*.cpp cutil/*.cpp
dict/*.cpp dict/*.cpp
neural_networks/runtime/*.cpp lstm/*.cpp
opencl/*.cpp opencl/*.cpp
textord/*.cpp textord/*.cpp
viewer/*.cpp viewer/*.cpp
@ -174,14 +184,14 @@ file(GLOB tesseract_src
) )
file(GLOB tesseract_hdr file(GLOB tesseract_hdr
api/*.h api/*.h
arch/*.h
ccmain/*.h ccmain/*.h
ccstruct/*.h ccstruct/*.h
ccutil/*.h ccutil/*.h
classify/*.h classify/*.h
cube/*.h
cutil/*.h cutil/*.h
dict/*.h dict/*.h
neural_networks/runtime/*.h lstm/*.h
opencl/*.h opencl/*.h
textord/*.h textord/*.h
viewer/*.h viewer/*.h
@ -201,25 +211,40 @@ set(tesseract_src ${tesseract_src}
api/pdfrenderer.cpp api/pdfrenderer.cpp
) )
add_library (tesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr})
if (NOT STATIC)
target_compile_definitions (tesseract PUBLIC -DTESS_EXPORTS)
endif()
target_link_libraries (tesseract ${LIB_Ws2_32} ${LIB_pthread})
set_target_properties (tesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
set_target_properties (tesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
if (WIN32) if (WIN32)
set_target_properties (tesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}) set_source_files_properties(
set_target_properties (tesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d) ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
if (MSVC)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
PROPERTIES COMPILE_FLAGS "/arch:AVX")
endif()
endif()
add_library (libtesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr})
if (NOT STATIC)
target_compile_definitions (libtesseract
PRIVATE -DTESS_EXPORTS
INTERFACE -DTESS_IMPORTS
)
set_target_properties (libtesseract PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS True)
endif()
target_link_libraries (libtesseract ${LIB_Ws2_32} ${LIB_pthread})
set_target_properties (libtesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
set_target_properties (libtesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1})
if (WIN32)
set_target_properties (libtesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR})
set_target_properties (libtesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d)
endif() endif()
if (NOT CPPAN_BUILD) if (NOT CPPAN_BUILD)
target_link_libraries (tesseract ${Leptonica_LIBRARIES}) target_link_libraries (libtesseract ${Leptonica_LIBRARIES})
export(TARGETS tesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) export(TARGETS libtesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
else() else()
target_link_libraries (tesseract cppan) target_link_libraries (libtesseract pvt.cppan.demo.danbloomberg.leptonica)
file(WRITE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_BINARY_DIR}/cppan.cmake)\n") file(WRITE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_BINARY_DIR}/cppan.cmake)\n")
export(TARGETS tesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) export(TARGETS libtesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake)
endif() endif()
######################################## ########################################
@ -231,12 +256,97 @@ set(tesseractmain_src
vs2010/tesseract/resource.h vs2010/tesseract/resource.h
vs2010/tesseract/tesseract.rc vs2010/tesseract/tesseract.rc
) )
add_executable (tesseractmain ${tesseractmain_src}) add_executable (tesseract ${tesseractmain_src})
target_link_libraries (tesseractmain tesseract) target_link_libraries (tesseract libtesseract)
set_target_properties (tesseractmain PROPERTIES OUTPUT_NAME tesseract)
######################################## ########################################
if (BUILD_TRAINING_TOOLS)
add_subdirectory(training) add_subdirectory(training)
endif()
get_target_property(tesseract_NAME libtesseract NAME)
get_target_property(tesseract_VERSION libtesseract VERSION)
get_target_property(tesseract_OUTPUT_NAME libtesseract OUTPUT_NAME)
configure_file(tesseract.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc @ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc DESTINATION lib/pkgconfig)
install(TARGETS tesseract RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
install(TARGETS libtesseract EXPORT TesseractTargets RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
install(EXPORT TesseractTargets DESTINATION cmake)
install(FILES
${CMAKE_BINARY_DIR}/TesseractConfig.cmake
${CMAKE_BINARY_DIR}/TesseractConfig-version.cmake
DESTINATION cmake)
install(FILES
# from api/makefile.am
api/apitypes.h
api/baseapi.h
api/capi.h
api/renderer.h
#from arch/makefile.am
arch/dotproductavx.h
arch/dotproductsse.h
arch/simddetect.h
#from ccmain/makefile.am
ccmain/thresholder.h
ccmain/ltrresultiterator.h
ccmain/pageiterator.h
ccmain/resultiterator.h
ccmain/osdetect.h
#from ccstruct/makefile.am
ccstruct/publictypes.h
#from ccutil/makefile.am
ccutil/basedir.h
ccutil/errcode.h
ccutil/fileerr.h
ccutil/genericvector.h
ccutil/helpers.h
ccutil/host.h
ccutil/memry.h
ccutil/ndminx.h
ccutil/params.h
ccutil/ocrclass.h
ccutil/platform.h
ccutil/serialis.h
ccutil/strngs.h
ccutil/tesscallback.h
ccutil/unichar.h
ccutil/unicharcompress.h
ccutil/unicharmap.h
ccutil/unicharset.h
#from lstm/makefile.am
lstm/convolve.h
lstm/ctc.h
lstm/fullyconnected.h
lstm/functions.h
lstm/input.h
lstm/lstm.h
lstm/lstmrecognizer.h
lstm/lstmtrainer.h
lstm/maxpool.h
lstm/networkbuilder.h
lstm/network.h
lstm/networkio.h
lstm/networkscratch.h
lstm/parallel.h
lstm/plumbing.h
lstm/recodebeam.h
lstm/reconfig.h
lstm/reversed.h
lstm/series.h
lstm/static_shape.h
lstm/stridemap.h
lstm/tfnetwork.h
lstm/weightmatrix.h
#${CMAKE_BINARY_DIR}/src/endianness.h
DESTINATION include/tesseract)
############################################################################### ###############################################################################

View File

@ -24,7 +24,7 @@ When creating an issue, please report your operating system, including its speci
Search through open and closed issues to see if similar issue has been reported already (and sometimes also has been solved). Search through open and closed issues to see if similar issue has been reported already (and sometimes also has been solved).
Similary, before you post your question in the forum, search through past threads to see if similar question has been asked already. Similarly, before you post your question in the forum, search through past threads to see if similar question has been asked already.
Read the [wiki](https://github.com/tesseract-ocr/tesseract/wiki) before you report your issue or ask a question in the forum. Read the [wiki](https://github.com/tesseract-ocr/tesseract/wiki) before you report your issue or ask a question in the forum.
@ -44,7 +44,7 @@ When attaching a file to the issue report / forum ...
Do not attach programs or libraries to your issues/posts. Do not attach programs or libraries to your issues/posts.
For large files or for programs, add a link to a iocation where they can be downloaded (your site, Git repo, Google Drive, Dropbox etc.) For large files or for programs, add a link to a location where they can be downloaded (your site, Git repo, Google Drive, Dropbox etc.)
Attaching a multi-page TIFF image is useful only if you have problem with multi-page functionality, otherwise attach only one or a few single page images. Attaching a multi-page TIFF image is useful only if you have problem with multi-page functionality, otherwise attach only one or a few single page images.

View File

@ -17,5 +17,5 @@ in this distribution is now licensed under the Apache License:
Other Dependencies and Licenses: Other Dependencies and Licenses:
================================ ================================
Tesseract uses Leptonica library (http://leptonica.com/) with a very weakly Tesseract uses Leptonica library (http://leptonica.com/) which essentially
restricted copyright license (http://leptonica.com/about-the-license.html) uses a BSD 2-clause license. (http://leptonica.com/about-the-license.html)

View File

@ -1,3 +1,46 @@
2017-03-24 - V4.00.00-alpha
* Added new neural network system based on LSTMs, with major accuracy gains.
* Improvements to PDF rendering.
* Fixes to trainingdata rendering.
* Added LSTM models+lang models to 101 languages. (tessdata repository)
* Improved multi-page TIFF handling.
* Fixed damage to binary images when processing PDFs.
* Fixes to training process to allow incremental training from a recognition model.
* Made LSTM the default engine, pushed cube out.
* Deleted cube code.
* Changed OEModes --oem 0 for legacy tesseract engine, --oem 1 for LSTM, --oem 2 for both, --oem 3 for default.
* Avoid use of Leptonica debug parameters or functions.
* Fixed multi-language mode.
* Removed support for VS2010.
* Added Support for VS2015 and VS2017 with CPPAN.
* Implemented invisible text only for PDF.
* Added AVX / SSE support for Windows
* Enabled OpenMP support.
* Miscellaneous Fixes.
2017-02-16 - V3.05.00
* Made some fine tuning to the hOCR output.
* Added TSV as another optional output format.
* Fixed ABI break introduced in 3.04.00 with the AnalyseLayout() method.
* text2image tool - Enable all OpenType ligatures available in a font. This feature requires Pango 1.38 or newer.
* Training tools - Replaced asserts with tprintf() and exit(1).
* Fixed Cygwin compatibility.
* Improved multipage tiff processing.
* Improved the embedded pdf font (pdf.ttf).
* Enable selection of OCR engine mode from command line.
* Changed tesseract command line parameter '-psm' to '--psm'.
* Write output of tesseract --help, --version and --list-langs to stdout instead of stderr.
* Added new C API for orientation and script detection, removed the old one.
* Increased minimum autoconf version to 2.59.
* Removed dead code.
* Require Leptonica 1.74 or higher.
* Fixed many compiler warning.
* Fixed memory and resource leaks.
* Fixed some issues with the 'Cube' OCR engine.
* Fixed some openCL issues.
* Added option to build Tesseract with CMake build system.
* Implemented CPPAN support for easy Windows building.
2016-02-17 - V3.04.01 2016-02-17 - V3.04.01
* Added OSD renderer for psm 0. Works for single page and multi-page images. * Added OSD renderer for psm 0. Works for single page and multi-page images.
* Improve tesstrain.sh script. * Improve tesstrain.sh script.

View File

@ -45,7 +45,7 @@ The simplest way to compile this package is:
`sh ./configure' instead to prevent `csh' from trying to execute `sh ./configure' instead to prevent `csh' from trying to execute
`configure' itself. `configure' itself.
Running `configure' takes awhile. While running, it prints some Running `configure' takes a while. While running, it prints some
messages telling which features it is checking for. messages telling which features it is checking for.
2. Type `make' to compile the package. 2. Type `make' to compile the package.

View File

@ -3,11 +3,12 @@
If you have cloned Tesseract from GitHub, you must generate If you have cloned Tesseract from GitHub, you must generate
the configure script. the configure script.
If you have tesseract 3.0x installation in your system, please remove it If you have tesseract 4.0x installation in your system, please remove it
before new build. before new build.
Known dependencies for training tools (excluding leptonica): Known dependencies for training tools (excluding leptonica):
* compiler with c++ support * compiler with c++11 support
* autoconf-archive
* pango-devel * pango-devel
* cairo-devel * cairo-devel
* icu-devel * icu-devel
@ -24,7 +25,7 @@ So, the steps for making Tesseract are:
You need to install at least English language and OSD data files to TESSDATA_PREFIX You need to install at least English language and OSD data files to TESSDATA_PREFIX
directory. You can retrieve single file with tools like [wget](https://www.gnu.org/software/wget/), [curl](https://curl.haxx.se/), [GithubDownloader](https://github.com/intezer/GithubDownloader) or browser. directory. You can retrieve single file with tools like [wget](https://www.gnu.org/software/wget/), [curl](https://curl.haxx.se/), [GithubDownloader](https://github.com/intezer/GithubDownloader) or browser.
All language data files can be retrieved from git repository (usefull only for packagers!): All language data files can be retrieved from git repository (useful only for packagers!):
$ git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata $ git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata

202
LICENSE Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -4,9 +4,12 @@ ACLOCAL_AMFLAGS = -I m4
if ENABLE_TRAINING if ENABLE_TRAINING
TRAINING_SUBDIR = training TRAINING_SUBDIR = training
training: training:
$(MAKE)
@cd "$(top_builddir)/training" && $(MAKE) @cd "$(top_builddir)/training" && $(MAKE)
training-install: training-install:
@cd "$(top_builddir)/training" && $(MAKE) install @cd "$(top_builddir)/training" && $(MAKE) install
training-uninstall:
@cd "$(top_builddir)/training" && $(MAKE) uninstall
clean-local: clean-local:
@cd "$(top_builddir)/training" && $(MAKE) clean @cd "$(top_builddir)/training" && $(MAKE) clean
else else
@ -16,10 +19,7 @@ endif
.PHONY: install-langs ScrollView.jar install-jars training .PHONY: install-langs ScrollView.jar install-jars training
SUBDIRS = ccutil viewer cutil opencl ccstruct dict classify wordrec textord SUBDIRS = arch ccutil viewer cutil opencl ccstruct dict classify wordrec textord lstm
if !NO_CUBE_BUILD
SUBDIRS += neural_networks/runtime cube
endif
SUBDIRS += ccmain api . tessdata doc SUBDIRS += ccmain api . tessdata doc
EXTRA_DIST = README.md\ EXTRA_DIST = README.md\
@ -35,14 +35,14 @@ dist-hook:
# Need to remove .svn directories from directories # Need to remove .svn directories from directories
# added using EXTRA_DIST. $(distdir)/tessdata would in # added using EXTRA_DIST. $(distdir)/tessdata would in
# theory suffice. # theory suffice.
rm -rf `find $(distdir) -name .svn` rm -rf $(find $(distdir) -name .svn)
rm -rf `find $(distdir) -name .git` rm -rf $(find $(distdir) -name .git)
rm -rf `find $(distdir) -name .deps` rm -rf $(find $(distdir) -name .deps)
rm -rf `find $(distdir) -name .libs` rm -rf $(find $(distdir) -name .libs)
rm -rf `find $(distdir) -name *.o` rm -rf $(find $(distdir) -name *.o)
rm -rf `find $(distdir) -name *.lo` rm -rf $(find $(distdir) -name *.lo)
rm -rf `find $(distdir) -name *.la` rm -rf $(find $(distdir) -name *.la)
rm -rf `find $(distdir)/training -executable -type f` rm -rf $(find $(distdir)/training -executable -type f)
rm -rf $(distdir)/doc/html/* rm -rf $(distdir)/doc/html/*
ScrollView.jar: ScrollView.jar:

View File

@ -1,29 +1,30 @@
[![Build Status](https://travis-ci.org/tesseract-ocr/tesseract.svg?branch=master)](https://travis-ci.org/tesseract-ocr/tesseract) [![Build Status](https://travis-ci.org/tesseract-ocr/tesseract.svg?branch=master)](https://travis-ci.org/tesseract-ocr/tesseract)
[![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/) [![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
For the latest online version of the README.md see: For the latest online version of the README.md see:
https://github.com/tesseract-ocr/tesseract/blob/master/README.md https://github.com/tesseract-ocr/tesseract/blob/master/README.md
#About # About
This package contains an OCR engine - `libtesseract` and a command line program - `tesseract`. This package contains an OCR engine - `libtesseract` and a command line program - `tesseract`.
The lead developer is Ray Smith. The maintainer is Zdenko Podobny. The lead developer is Ray Smith. The maintainer is Zdenko Podobny.
For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS) and github's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors). For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS)
and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
Tesseract has unicode (UTF-8) support, and can recognize more than 100 Tesseract has unicode (UTF-8) support, and can recognize more than 100
languages "out of the box". It can be trained to recognize other languages. See [Tesseract Training](https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract) for more information. languages "out of the box". It can be trained to recognize other languages. See [Tesseract Training](https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract) for more information.
Tesseract supports various output formats: plain-text, hocr(html), pdf. Tesseract supports various output formats: plain-text, hocr(html), pdf.
This project does not include a GUI application. If you need one, please see the [3rdParty](https://github.com/tesseract-ocr/tesseract/wiki/3rdParty) wiki page. This project does not include a GUI application. If you need one, please see the [3rdParty](https://github.com/tesseract-ocr/tesseract/wiki/User-Projects-%E2%80%93-3rdParty) wiki page.
You should note that in many cases, in order to get better OCR results, you'll need to [improve the quality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality) of the image you are giving Tesseract. You should note that in many cases, in order to get better OCR results, you'll need to [improve the quality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality) of the image you are giving Tesseract.
The latest stable version is 3.04.01, released in February 2016. The latest stable version is 3.05.00, released in February 2017.
#Brief history # Brief history
Tesseract was originally developed at Hewlett-Packard Laboratories Bristol and Tesseract was originally developed at Hewlett-Packard Laboratories Bristol and
at Hewlett-Packard Co, Greeley Colorado between 1985 and 1994, with some at Hewlett-Packard Co, Greeley Colorado between 1985 and 1994, with some
@ -33,13 +34,13 @@ In 2005 Tesseract was open sourced by HP. Since 2006 it is developed by Google.
[Release Notes](https://github.com/tesseract-ocr/tesseract/wiki/ReleaseNotes) [Release Notes](https://github.com/tesseract-ocr/tesseract/wiki/ReleaseNotes)
#For developers # For developers
Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/master/api/capi.h) or [C++](https://github.com/tesseract-ocr/tesseract/blob/master/api/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the [wrapper](https://github.com/tesseract-ocr/tesseract/wiki/AddOns#tesseract-wrappers) section on AddOns wiki page. Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/master/api/capi.h) or [C++](https://github.com/tesseract-ocr/tesseract/blob/master/api/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the [wrapper](https://github.com/tesseract-ocr/tesseract/wiki/AddOns#tesseract-wrappers) section on AddOns wiki page.
Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](http://tesseract-ocr.github.io/). Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](http://tesseract-ocr.github.io/).
#License # License
The code in this repository is licensed under the Apache License, Version 2.0 (the "License"); The code in this repository is licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
@ -55,19 +56,27 @@ Documentation of Tesseract generated from source code by doxygen can be found on
**NOTE**: This software depends on other packages that may be licensed under different open source licenses. **NOTE**: This software depends on other packages that may be licensed under different open source licenses.
#Installing Tesseract # Installing Tesseract
You can either [Install Tesseract via pre-built binary package](https://github.com/tesseract-ocr/tesseract/wiki) or [build it from source](https://github.com/tesseract-ocr/tesseract/wiki/Compiling). You can either [Install Tesseract via pre-built binary package](https://github.com/tesseract-ocr/tesseract/wiki) or [build it from source](https://github.com/tesseract-ocr/tesseract/wiki/Compiling).
#Running Tesseract ## Supported Compilers
* GCC 4.8 and above
* Clang 3.4 and above
* MSVC 2015, 2017
Other compilers might work, but are not officially supported.
# Running Tesseract
Basic command line usage: Basic command line usage:
tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...] tesseract imagename outputbase [-l lang] [--psm pagesegmode] [configfiles...]
For more information about the various command line options use `tesseract --help` or `man tesseract`. For more information about the various command line options use `tesseract --help` or `man tesseract`.
#Support # Support
Mailing-lists: Mailing-lists:
* [tesseract-ocr](https://groups.google.com/d/forum/tesseract-ocr) - For tesseract users. * [tesseract-ocr](https://groups.google.com/d/forum/tesseract-ocr) - For tesseract users.

View File

@ -4,7 +4,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := tesseract-$(APP_ABI) LOCAL_MODULE := tesseract-$(APP_ABI)
LOCAL_STATIC_LIBRARIES := \ LOCAL_STATIC_LIBRARIES := \
mobile_base \ base \
leptonica-$(APP_ABI) leptonica-$(APP_ABI)
LOCAL_C_INCLUDES := $(APP_C_INCLUDES) LOCAL_C_INCLUDES := $(APP_C_INCLUDES)
@ -30,13 +30,6 @@ $(info local path=$(LOCAL_PATH))
LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/../../api/*.cpp $(LOCAL_PATH)/../../ccmain/*.cpp $(LOCAL_PATH)/../../ccstruct/*.cpp $(LOCAL_PATH)/../../ccutil/*.cpp $(LOCAL_PATH)/../../classify/*.cpp $(LOCAL_PATH)/../../cutil/*.cpp $(LOCAL_PATH)/../../dict/*.cpp $(LOCAL_PATH)/../../image/*.cpp $(LOCAL_PATH)/../../textord/*.cpp $(LOCAL_PATH)/../../viewer/*.cpp $(LOCAL_PATH)/../../wordrec/*.cpp) LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/../../api/*.cpp $(LOCAL_PATH)/../../ccmain/*.cpp $(LOCAL_PATH)/../../ccstruct/*.cpp $(LOCAL_PATH)/../../ccutil/*.cpp $(LOCAL_PATH)/../../classify/*.cpp $(LOCAL_PATH)/../../cutil/*.cpp $(LOCAL_PATH)/../../dict/*.cpp $(LOCAL_PATH)/../../image/*.cpp $(LOCAL_PATH)/../../textord/*.cpp $(LOCAL_PATH)/../../viewer/*.cpp $(LOCAL_PATH)/../../wordrec/*.cpp)
EXPLICIT_SRC_EXCLUDES := \ EXPLICIT_SRC_EXCLUDES := \
$(LOCAL_PATH)/../../ccmain/cubeclassifier.cpp \
$(LOCAL_PATH)/../../ccmain/cubeclassifier.h \
$(LOCAL_PATH)/../../ccmain/cube_control.cpp \
$(LOCAL_PATH)/../../ccmain/cube_reco_context.cpp \
$(LOCAL_PATH)/../../ccmain/cube_reco_context.h \
$(LOCAL_PATH)/../../ccmain/tesseract_cube_combiner.cpp \
$(LOCAL_PATH)/../../ccmain/tesseract_cube_combiner.h \
$(LOCAL_PATH)/../../api/pdfrenderer.cpp \ $(LOCAL_PATH)/../../api/pdfrenderer.cpp \
$(LOCAL_PATH)/../../api/tesseractmain.cpp \ $(LOCAL_PATH)/../../api/tesseractmain.cpp \
@ -47,11 +40,10 @@ LOCAL_SRC_FILES := $(LOCAL_SRC_FILES:$(LOCAL_PATH)/%=%)
$(info local src files = $(LOCAL_SRC_FILES)) $(info local src files = $(LOCAL_SRC_FILES))
LOCAL_LDLIBS := -ldl -llog -ljnigraphics LOCAL_LDLIBS := -ldl -llog -ljnigraphics
LOCAL_CFLAGS := -DANDROID_BUILD -DNO_CUBE_BUILD -DGRAPHICS_DISABLED LOCAL_CFLAGS := -DANDROID_BUILD -DGRAPHICS_DISABLED
include $(BUILD_SHARED_LIBRARY) include $(BUILD_SHARED_LIBRARY)
$(call import-module,mobile/base) $(call import-module,base/port)
$(call import-module,mobile/base)
$(call import-module,mobile/util/hash) $(call import-module,mobile/util/hash)
$(call import-module,third_party/leptonica/android/jni) $(call import-module,third_party/leptonica/android/jni)

View File

@ -1,6 +1,7 @@
AM_CPPFLAGS += -DLOCALEDIR=\"$(localedir)\"\ AM_CPPFLAGS += -DLOCALEDIR=\"$(localedir)\"\
-DUSE_STD_NAMESPACE \ -DUSE_STD_NAMESPACE \
-I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct -I$(top_srcdir)/cube \ -I$(top_srcdir)/arch -I$(top_srcdir)/lstm \
-I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \
-I$(top_srcdir)/viewer \ -I$(top_srcdir)/viewer \
-I$(top_srcdir)/textord -I$(top_srcdir)/dict \ -I$(top_srcdir)/textord -I$(top_srcdir)/dict \
-I$(top_srcdir)/classify -I$(top_srcdir)/ccmain \ -I$(top_srcdir)/classify -I$(top_srcdir)/ccmain \
@ -27,15 +28,15 @@ libtesseract_api_la_LIBADD = \
../wordrec/libtesseract_wordrec.la \ ../wordrec/libtesseract_wordrec.la \
../classify/libtesseract_classify.la \ ../classify/libtesseract_classify.la \
../dict/libtesseract_dict.la \ ../dict/libtesseract_dict.la \
../arch/libtesseract_arch.la \
../arch/libtesseract_avx.la \
../arch/libtesseract_sse.la \
../lstm/libtesseract_lstm.la \
../ccstruct/libtesseract_ccstruct.la \ ../ccstruct/libtesseract_ccstruct.la \
../cutil/libtesseract_cutil.la \ ../cutil/libtesseract_cutil.la \
../viewer/libtesseract_viewer.la \ ../viewer/libtesseract_viewer.la \
../ccutil/libtesseract_ccutil.la \ ../ccutil/libtesseract_ccutil.la \
../opencl/libtesseract_opencl.la ../opencl/libtesseract_opencl.la
if !NO_CUBE_BUILD
libtesseract_api_la_LIBADD += ../cube/libtesseract_cube.la \
../neural_networks/runtime/libtesseract_neural.la \
endif
endif endif
libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS) libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS)
@ -45,7 +46,7 @@ endif
libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp
lib_LTLIBRARIES += libtesseract.la lib_LTLIBRARIES += libtesseract.la
libtesseract_la_LDFLAGS = libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS)
libtesseract_la_SOURCES = libtesseract_la_SOURCES =
# Dummy C++ source to cause C++ linking. # Dummy C++ source to cause C++ linking.
# see http://www.gnu.org/s/hello/manual/automake/Libtool-Convenience-Libraries.html#Libtool-Convenience-Libraries # see http://www.gnu.org/s/hello/manual/automake/Libtool-Convenience-Libraries.html#Libtool-Convenience-Libraries
@ -57,15 +58,15 @@ libtesseract_la_LIBADD = \
../wordrec/libtesseract_wordrec.la \ ../wordrec/libtesseract_wordrec.la \
../classify/libtesseract_classify.la \ ../classify/libtesseract_classify.la \
../dict/libtesseract_dict.la \ ../dict/libtesseract_dict.la \
../arch/libtesseract_arch.la \
../arch/libtesseract_avx.la \
../arch/libtesseract_sse.la \
../lstm/libtesseract_lstm.la \
../ccstruct/libtesseract_ccstruct.la \ ../ccstruct/libtesseract_ccstruct.la \
../cutil/libtesseract_cutil.la \ ../cutil/libtesseract_cutil.la \
../viewer/libtesseract_viewer.la \ ../viewer/libtesseract_viewer.la \
../ccutil/libtesseract_ccutil.la \ ../ccutil/libtesseract_ccutil.la \
../opencl/libtesseract_opencl.la ../opencl/libtesseract_opencl.la
if !NO_CUBE_BUILD
libtesseract_la_LIBADD += ../cube/libtesseract_cube.la \
../neural_networks/runtime/libtesseract_neural.la
endif
libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION) -no-undefined libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION) -no-undefined
@ -81,9 +82,10 @@ tesseract_LDADD = libtesseract.la
tesseract_LDFLAGS = $(OPENCL_LDFLAGS) tesseract_LDFLAGS = $(OPENCL_LDFLAGS)
if OPENMP tesseract_LDADD += $(LEPTONICA_LIBS)
tesseract_LDADD += $(OPENMP_CFLAGS) tesseract_LDADD += $(OPENMP_CXXFLAGS)
endif
tesseract_LDADD += -ltiff
if T_WIN if T_WIN
tesseract_LDADD += -lws2_32 tesseract_LDADD += -lws2_32
@ -92,4 +94,3 @@ endif
if ADD_RT if ADD_RT
tesseract_LDADD += -lrt tesseract_LDADD += -lrt
endif endif

View File

@ -17,8 +17,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_API_APITYPES_H__ #ifndef TESSERACT_API_APITYPES_H_
#define TESSERACT_API_APITYPES_H__ #define TESSERACT_API_APITYPES_H_
#include "publictypes.h" #include "publictypes.h"
@ -30,4 +30,4 @@
// than the lower-level one, and lower-level code should be sure to include // than the lower-level one, and lower-level code should be sure to include
// only the lower-level file. // only the lower-level file.
#endif // TESSERACT_API_APITYPES_H__ #endif // TESSERACT_API_APITYPES_H_

View File

@ -34,8 +34,6 @@
// workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME // workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
#undef __STRICT_ANSI__ #undef __STRICT_ANSI__
#endif // _MSC_VER #endif // _MSC_VER
#include <stdlib.h>
#include <windows.h>
#include <fcntl.h> #include <fcntl.h>
#include <io.h> #include <io.h>
#else #else
@ -110,27 +108,30 @@ const int kMinCredibleResolution = 70;
const int kMaxCredibleResolution = 2400; const int kMaxCredibleResolution = 2400;
TessBaseAPI::TessBaseAPI() TessBaseAPI::TessBaseAPI()
: tesseract_(NULL), : tesseract_(nullptr),
osd_tesseract_(NULL), osd_tesseract_(nullptr),
equ_detect_(NULL), equ_detect_(nullptr),
// Thresholder is initialized to NULL here, but will be set before use by: reader_(nullptr),
// A constructor of a derived API, SetThresholder(), or // Thresholder is initialized to NULL here, but will be set before use by:
// created implicitly when used in InternalSetImage. // A constructor of a derived API, SetThresholder(), or
thresholder_(NULL), // created implicitly when used in InternalSetImage.
paragraph_models_(NULL), thresholder_(nullptr),
block_list_(NULL), paragraph_models_(nullptr),
page_res_(NULL), block_list_(nullptr),
input_file_(NULL), page_res_(nullptr),
input_image_(NULL), input_file_(nullptr),
output_file_(NULL), output_file_(nullptr),
datapath_(NULL), datapath_(nullptr),
language_(NULL), language_(nullptr),
last_oem_requested_(OEM_DEFAULT), last_oem_requested_(OEM_DEFAULT),
recognition_done_(false), recognition_done_(false),
truth_cb_(NULL), truth_cb_(NULL),
rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0), rect_left_(0),
image_width_(0), image_height_(0) { rect_top_(0),
} rect_width_(0),
rect_height_(0),
image_width_(0),
image_height_(0) {}
TessBaseAPI::~TessBaseAPI() { TessBaseAPI::~TessBaseAPI() {
End(); End();
@ -278,20 +279,33 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values, const GenericVector<STRING> *vars_values,
bool set_only_non_debug_params) { bool set_only_non_debug_params) {
return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
vars_values, set_only_non_debug_params, nullptr);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array. Also implements the version with a datapath in data,
// flagged by data_size = 0.
int TessBaseAPI::Init(const char* data, int data_size, const char* language,
OcrEngineMode oem, char** configs, int configs_size,
const GenericVector<STRING>* vars_vec,
const GenericVector<STRING>* vars_values,
bool set_only_non_debug_params, FileReader reader) {
PERF_COUNT_START("TessBaseAPI::Init") PERF_COUNT_START("TessBaseAPI::Init")
// Default language is "eng". // Default language is "eng".
if (language == NULL) language = "eng"; if (language == nullptr) language = "eng";
STRING datapath = data_size == 0 ? data : language;
// If the datapath, OcrEngineMode or the language have changed - start again. // If the datapath, OcrEngineMode or the language have changed - start again.
// Note that the language_ field stores the last requested language that was // Note that the language_ field stores the last requested language that was
// initialized successfully, while tesseract_->lang stores the language // initialized successfully, while tesseract_->lang stores the language
// actually used. They differ only if the requested language was NULL, in // actually used. They differ only if the requested language was NULL, in
// which case tesseract_->lang is set to the Tesseract default ("eng"). // which case tesseract_->lang is set to the Tesseract default ("eng").
if (tesseract_ != NULL && if (tesseract_ != nullptr &&
(datapath_ == NULL || language_ == NULL || (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
*datapath_ != datapath || last_oem_requested_ != oem || last_oem_requested_ != oem ||
(*language_ != language && tesseract_->lang != language))) { (*language_ != language && tesseract_->lang != language))) {
delete tesseract_; delete tesseract_;
tesseract_ = NULL; tesseract_ = nullptr;
} }
// PERF_COUNT_SUB("delete tesseract_") // PERF_COUNT_SUB("delete tesseract_")
#ifdef USE_OPENCL #ifdef USE_OPENCL
@ -300,19 +314,25 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
#endif #endif
PERF_COUNT_SUB("OD::InitEnv()") PERF_COUNT_SUB("OD::InitEnv()")
bool reset_classifier = true; bool reset_classifier = true;
if (tesseract_ == NULL) { if (tesseract_ == nullptr) {
reset_classifier = false; reset_classifier = false;
tesseract_ = new Tesseract; tesseract_ = new Tesseract;
if (reader != nullptr) reader_ = reader;
TessdataManager mgr(reader_);
if (data_size != 0) {
mgr.LoadMemBuffer(language, data, data_size);
}
if (tesseract_->init_tesseract( if (tesseract_->init_tesseract(
datapath, output_file_ != NULL ? output_file_->string() : NULL, datapath.string(),
language, oem, configs, configs_size, vars_vec, vars_values, output_file_ != nullptr ? output_file_->string() : nullptr,
set_only_non_debug_params) != 0) { language, oem, configs, configs_size, vars_vec, vars_values,
set_only_non_debug_params, &mgr) != 0) {
return -1; return -1;
} }
} }
PERF_COUNT_SUB("update tesseract_") PERF_COUNT_SUB("update tesseract_")
// Update datapath and language requested for the last valid initialization. // Update datapath and language requested for the last valid initialization.
if (datapath_ == NULL) if (datapath_ == nullptr)
datapath_ = new STRING(datapath); datapath_ = new STRING(datapath);
else else
*datapath_ = datapath; *datapath_ = datapath;
@ -320,7 +340,7 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
(strcmp(tesseract_->datadir.string(), "") != 0)) (strcmp(tesseract_->datadir.string(), "") != 0))
*datapath_ = tesseract_->datadir; *datapath_ = tesseract_->datadir;
if (language_ == NULL) if (language_ == nullptr)
language_ = new STRING(language); language_ = new STRING(language);
else else
*language_ = language; *language_ = language;
@ -424,7 +444,8 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
tesseract_ = new Tesseract; tesseract_ = new Tesseract;
else else
ParamUtils::ResetToDefaults(tesseract_->params()); ParamUtils::ResetToDefaults(tesseract_->params());
return tesseract_->init_tesseract_lm(datapath, NULL, language); TessdataManager mgr;
return tesseract_->init_tesseract_lm(datapath, NULL, language, &mgr);
} }
/** /**
@ -434,7 +455,7 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
void TessBaseAPI::InitForAnalysePage() { void TessBaseAPI::InitForAnalysePage() {
if (tesseract_ == NULL) { if (tesseract_ == NULL) {
tesseract_ = new Tesseract; tesseract_ = new Tesseract;
tesseract_->InitAdaptiveClassifier(false); tesseract_->InitAdaptiveClassifier(nullptr);
} }
} }
@ -515,9 +536,7 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
/** /**
* Provide an image for Tesseract to recognize. Format is as * Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Does not copy the image buffer, or take * TesseractRect above. Copies the image buffer and converts to Pix.
* ownership. The source image may be destroyed after Recognize is called,
* either explicitly or implicitly via one of the Get*Text functions.
* SetImage clears all recognition results, and sets the rectangle to the * SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it * full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition. * will automatically perform recognition.
@ -525,9 +544,11 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
void TessBaseAPI::SetImage(const unsigned char* imagedata, void TessBaseAPI::SetImage(const unsigned char* imagedata,
int width, int height, int width, int height,
int bytes_per_pixel, int bytes_per_line) { int bytes_per_pixel, int bytes_per_line) {
if (InternalSetImage()) if (InternalSetImage()) {
thresholder_->SetImage(imagedata, width, height, thresholder_->SetImage(imagedata, width, height,
bytes_per_pixel, bytes_per_line); bytes_per_pixel, bytes_per_line);
SetInputImage(thresholder_->GetPixRect());
}
} }
void TessBaseAPI::SetSourceResolution(int ppi) { void TessBaseAPI::SetSourceResolution(int ppi) {
@ -539,18 +560,17 @@ void TessBaseAPI::SetSourceResolution(int ppi) {
/** /**
* Provide an image for Tesseract to recognize. As with SetImage above, * Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract doesn't take a copy or ownership or pixDestroy the image, so * Tesseract takes its own copy of the image, so it need not persist until
* it must persist until after Recognize. * after Recognize.
* Pix vs raw, which to use? * Pix vs raw, which to use?
* Use Pix where possible. A future version of Tesseract may choose to use Pix * Use Pix where possible. Tesseract uses Pix as its internal representation
* as its internal representation and discard IMAGE altogether. * and it is therefore more efficient to provide a Pix directly.
* Because of that, an implementation that sources and targets Pix may end up
* with less copies than an implementation that does not.
*/ */
void TessBaseAPI::SetImage(Pix* pix) { void TessBaseAPI::SetImage(Pix* pix) {
if (InternalSetImage()) if (InternalSetImage()) {
thresholder_->SetImage(pix); thresholder_->SetImage(pix);
SetInputImage(pix); SetInputImage(thresholder_->GetPixRect());
}
} }
/** /**
@ -693,8 +713,8 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
if (pixa != NULL) { if (pixa != NULL) {
Pix* pix = NULL; Pix* pix = NULL;
if (raw_image) { if (raw_image) {
pix = page_it->GetImage(level, raw_padding, input_image_, pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
&left, &top); &top);
} else { } else {
pix = page_it->GetBinaryImage(level); pix = page_it->GetBinaryImage(level);
} }
@ -747,53 +767,6 @@ void TessBaseAPI::DumpPGM(const char* filename) {
fclose(fp); fclose(fp);
} }
#ifndef NO_CUBE_BUILD
/**
* Placeholder for call to Cube and test that the input data is correct.
* reskew is the direction of baselines in the skewed image in
* normalized (cos theta, sin theta) form, so (0.866, 0.5) would represent
* a 30 degree anticlockwise skew.
*/
int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
Boxa* boxa_words, Pixa* pixa_words,
const FCOORD& reskew, Pix* page_pix,
PAGE_RES* page_res) {
int block_count = boxaGetCount(boxa_blocks);
ASSERT_HOST(block_count == pixaGetCount(pixa_blocks));
// Write each block to the current directory as junk_write_display.nnn.png.
for (int i = 0; i < block_count; ++i) {
Pix* pix = pixaGetPix(pixa_blocks, i, L_CLONE);
pixDisplayWrite(pix, 1);
}
int word_count = boxaGetCount(boxa_words);
ASSERT_HOST(word_count == pixaGetCount(pixa_words));
int pr_word = 0;
PAGE_RES_IT page_res_it(page_res);
for (page_res_it.restart_page(); page_res_it.word () != NULL;
page_res_it.forward(), ++pr_word) {
WERD_RES *word = page_res_it.word();
WERD_CHOICE* choice = word->best_choice;
// Write the first 100 words to files names wordims/<wordstring>.tif.
if (pr_word < 100) {
STRING filename("wordims/");
if (choice != NULL) {
filename += choice->unichar_string();
} else {
char numbuf[32];
filename += "unclassified";
snprintf(numbuf, 32, "%03d", pr_word);
filename += numbuf;
}
filename += ".tif";
Pix* pix = pixaGetPix(pixa_words, pr_word, L_CLONE);
pixWrite(filename.string(), pix, IFF_TIFF_G4);
}
}
ASSERT_HOST(pr_word == word_count);
return 0;
}
#endif // NO_CUBE_BUILD
/** /**
* Runs page layout analysis in the mode set by SetPageSegMode. * Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just * May optionally be called prior to Recognize to get access to just
@ -809,9 +782,7 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES. * DetectOS, or anything else that changes the internal PAGE_RES.
*/ */
PageIterator* TessBaseAPI::AnalyseLayout() { PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); }
return AnalyseLayout(false);
}
PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
if (FindLines() == 0) { if (FindLines() == 0) {
@ -836,8 +807,7 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
return -1; return -1;
if (FindLines() != 0) if (FindLines() != 0)
return -1; return -1;
if (page_res_ != NULL) delete page_res_;
delete page_res_;
if (block_list_->empty()) { if (block_list_->empty()) {
page_res_ = new PAGE_RES(false, block_list_, page_res_ = new PAGE_RES(false, block_list_,
&tesseract_->prev_word_best_choice_); &tesseract_->prev_word_best_choice_);
@ -851,13 +821,17 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
} else if (tesseract_->tessedit_resegment_from_boxes) { } else if (tesseract_->tessedit_resegment_from_boxes) {
page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
} else { } else {
// TODO(rays) LSTM here. page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
page_res_ = new PAGE_RES(false,
block_list_, &tesseract_->prev_word_best_choice_); block_list_, &tesseract_->prev_word_best_choice_);
} }
if (page_res_ == NULL) { if (page_res_ == NULL) {
return -1; return -1;
} }
if (tesseract_->tessedit_train_line_recognizer) {
tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
tesseract_->CorrectClassifyWords(page_res_);
return 0;
}
if (tesseract_->tessedit_make_boxes_from_boxes) { if (tesseract_->tessedit_make_boxes_from_boxes) {
tesseract_->CorrectClassifyWords(page_res_); tesseract_->CorrectClassifyWords(page_res_);
return 0; return 0;
@ -940,17 +914,10 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
return 0; return 0;
} }
void TessBaseAPI::SetInputImage(Pix *pix) { // Takes ownership of the input pix.
if (input_image_) void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); }
pixDestroy(&input_image_);
input_image_ = NULL;
if (pix)
input_image_ = pixCopy(NULL, pix);
}
Pix* TessBaseAPI::GetInputImage() { Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); }
return input_image_;
}
const char * TessBaseAPI::GetInputName() { const char * TessBaseAPI::GetInputName() {
if (input_file_) if (input_file_)
@ -994,8 +961,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
} }
// Begin producing output // Begin producing output
const char* kUnknownTitle = ""; if (renderer && !renderer->BeginDocument(unknown_title_)) {
if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
return false; return false;
} }
@ -1038,26 +1004,13 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
int tessedit_page_number) { int tessedit_page_number) {
#ifndef ANDROID_BUILD #ifndef ANDROID_BUILD
Pix *pix = NULL; Pix *pix = NULL;
#ifdef USE_OPENCL
OpenclDevice od;
#endif // USE_OPENCL
int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
size_t offset = 0;
for (; ; ++page) { for (; ; ++page) {
if (tessedit_page_number >= 0) if (tessedit_page_number >= 0)
page = tessedit_page_number; page = tessedit_page_number;
#ifdef USE_OPENCL pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
if ( od.selectedDeviceIsOpenCL() ) { : pixReadFromMultipageTiff(filename, &offset);
pix = (data) ?
od.pixReadMemTiffCl(data, size, page) :
od.pixReadTiffCl(filename, page);
} else {
#endif // USE_OPENCL
pix = (data) ?
pixReadMemTiff(data, size, page) :
pixReadTiff(filename, page);
#ifdef USE_OPENCL
}
#endif // USE_OPENCL
if (pix == NULL) break; if (pix == NULL) break;
tprintf("Page %d\n", page + 1); tprintf("Page %d\n", page + 1);
char page_str[kMaxIntSize]; char page_str[kMaxIntSize];
@ -1068,6 +1021,7 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
pixDestroy(&pix); pixDestroy(&pix);
if (!r) return false; if (!r) return false;
if (tessedit_page_number >= 0) break; if (tessedit_page_number >= 0) break;
if (!offset) break;
} }
return true; return true;
#else #else
@ -1107,7 +1061,6 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
const char* retry_config, const char* retry_config,
int timeout_millisec, int timeout_millisec,
TessResultRenderer* renderer) { TessResultRenderer* renderer) {
#ifndef ANDROID_BUILD
PERF_COUNT_START("ProcessPages") PERF_COUNT_START("ProcessPages")
bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
if (stdInput) { if (stdInput) {
@ -1142,7 +1095,15 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
// Maybe we have a filelist // Maybe we have a filelist
if (r != 0 || format == IFF_UNKNOWN) { if (r != 0 || format == IFF_UNKNOWN) {
STRING s(buf.c_str()); STRING s;
if (stdInput) {
s = buf.c_str();
} else {
std::ifstream t(filename);
std::string u((std::istreambuf_iterator<char>(t)),
std::istreambuf_iterator<char>());
s = u.c_str();
}
return ProcessPagesFileList(NULL, &s, retry_config, return ProcessPagesFileList(NULL, &s, retry_config,
timeout_millisec, renderer, timeout_millisec, renderer,
tesseract_->tessedit_page_number); tesseract_->tessedit_page_number);
@ -1164,8 +1125,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
} }
// Begin the output // Begin the output
const char* kUnknownTitle = ""; if (renderer && !renderer->BeginDocument(unknown_title_)) {
if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
pixDestroy(&pix); pixDestroy(&pix);
return false; return false;
} }
@ -1187,9 +1147,6 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
} }
PERF_COUNT_END PERF_COUNT_END
return true; return true;
#else
return false;
#endif
} }
bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
@ -1379,8 +1336,9 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it,
hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
} }
static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2) { static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
const unsigned long BUFSIZE = 64; int num2) {
const size_t BUFSIZE = 64;
char id_buffer[BUFSIZE]; char id_buffer[BUFSIZE];
if (num2 >= 0) { if (num2 >= 0) {
snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
@ -1393,8 +1351,7 @@ static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int
*hocr_str += "'"; *hocr_str += "'";
} }
static void AddBoxTohOCR(const ResultIterator *it, static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
PageIteratorLevel level,
STRING* hocr_str) { STRING* hocr_str) {
int left, top, right, bottom; int left, top, right, bottom;
it->BoundingBox(level, &left, &top, &right, &bottom); it->BoundingBox(level, &left, &top, &right, &bottom);
@ -1410,7 +1367,7 @@ static void AddBoxTohOCR(const ResultIterator *it,
// add custom height measures // add custom height measures
float row_height, descenders, ascenders; // row attributes float row_height, descenders, ascenders; // row attributes
it->RowAttributes(&row_height, &descenders, &ascenders); it->RowAttributes(&row_height, &descenders, &ascenders);
// TODO: Do we want to limit these to a single decimal place? // TODO(rays): Do we want to limit these to a single decimal place?
hocr_str->add_str_double("; x_size ", row_height); hocr_str->add_str_double("; x_size ", row_height);
hocr_str->add_str_double("; x_descenders ", descenders * -1); hocr_str->add_str_double("; x_descenders ", descenders * -1);
hocr_str->add_str_double("; x_ascenders ", ascenders); hocr_str->add_str_double("; x_ascenders ", ascenders);
@ -1418,9 +1375,8 @@ static void AddBoxTohOCR(const ResultIterator *it,
*hocr_str += "\">"; *hocr_str += "\">";
} }
static void AddBoxToTSV(const PageIterator *it, static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
PageIteratorLevel level, STRING* hocr_str) {
STRING* hocr_str) {
int left, top, right, bottom; int left, top, right, bottom;
it->BoundingBox(level, &left, &top, &right, &bottom); it->BoundingBox(level, &left, &top, &right, &bottom);
hocr_str->add_str_int("\t", left); hocr_str->add_str_int("\t", left);
@ -1429,8 +1385,6 @@ static void AddBoxToTSV(const PageIterator *it,
hocr_str->add_str_int("\t", bottom - top); hocr_str->add_str_int("\t", bottom - top);
} }
/** /**
* Make a HTML-formatted string with hOCR markup from the internal * Make a HTML-formatted string with hOCR markup from the internal
* data structures. * data structures.
@ -1440,7 +1394,7 @@ static void AddBoxToTSV(const PageIterator *it,
* STL removed from original patch submission and refactored by rays. * STL removed from original patch submission and refactored by rays.
*/ */
char* TessBaseAPI::GetHOCRText(int page_number) { char* TessBaseAPI::GetHOCRText(int page_number) {
return GetHOCRText(NULL,page_number); return GetHOCRText(NULL, page_number);
} }
/** /**
@ -1452,13 +1406,12 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
* STL removed from original patch submission and refactored by rays. * STL removed from original patch submission and refactored by rays.
*/ */
char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (tesseract_ == NULL || if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0))
(page_res_ == NULL && Recognize(monitor) < 0))
return NULL; return NULL;
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
int page_id = page_number + 1; // hOCR uses 1-based page numbers. int page_id = page_number + 1; // hOCR uses 1-based page numbers.
bool para_is_ltr = true; // Default direction is LTR bool para_is_ltr = true; // Default direction is LTR
const char* paragraph_lang = NULL; const char* paragraph_lang = NULL;
bool font_info = false; bool font_info = false;
GetBoolVariable("hocr_font_info", &font_info); GetBoolVariable("hocr_font_info", &font_info);
@ -1470,13 +1423,13 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
#ifdef _WIN32 #ifdef _WIN32
// convert input name from ANSI encoding to utf-8 // convert input name from ANSI encoding to utf-8
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, int str16_len =
NULL, 0); MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, 0);
wchar_t *uni16_str = new WCHAR[str16_len]; wchar_t *uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
uni16_str, str16_len); uni16_str, str16_len);
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 0,
0, NULL, NULL); NULL, NULL);
char *utf8_str = new char[utf8_len]; char *utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
utf8_len, NULL, NULL); utf8_len, NULL, NULL);
@ -1509,7 +1462,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
// Open any new block/paragraph/textline. // Open any new block/paragraph/textline.
if (res_it->IsAtBeginningOf(RIL_BLOCK)) { if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
para_is_ltr = true; // reset to default direction para_is_ltr = true; // reset to default direction
hocr_str += " <div class='ocr_carea'"; hocr_str += " <div class='ocr_carea'";
AddIdTohOCR(&hocr_str, "block", page_id, bcnt); AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str); AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
@ -1523,9 +1476,9 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
AddIdTohOCR(&hocr_str, "par", page_id, pcnt); AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
paragraph_lang = res_it->WordRecognitionLanguage(); paragraph_lang = res_it->WordRecognitionLanguage();
if (paragraph_lang) { if (paragraph_lang) {
hocr_str += " lang='"; hocr_str += " lang='";
hocr_str += paragraph_lang; hocr_str += paragraph_lang;
hocr_str += "'"; hocr_str += "'";
} }
AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
} }
@ -1567,8 +1520,12 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
} }
switch (res_it->WordDirection()) { switch (res_it->WordDirection()) {
// Only emit direction if different from current paragraph direction // Only emit direction if different from current paragraph direction
case DIR_LEFT_TO_RIGHT: if (!para_is_ltr) hocr_str += " dir='ltr'"; break; case DIR_LEFT_TO_RIGHT:
case DIR_RIGHT_TO_LEFT: if (para_is_ltr) hocr_str += " dir='rtl'"; break; if (!para_is_ltr) hocr_str += " dir='ltr'";
break;
case DIR_RIGHT_TO_LEFT:
if (para_is_ltr) hocr_str += " dir='rtl'";
break;
case DIR_MIX: case DIR_MIX:
case DIR_NEUTRAL: case DIR_NEUTRAL:
default: // Do nothing. default: // Do nothing.
@ -1600,7 +1557,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (last_word_in_para) { if (last_word_in_para) {
hocr_str += "\n </p>\n"; hocr_str += "\n </p>\n";
pcnt++; pcnt++;
para_is_ltr = true; // back to default direction para_is_ltr = true; // back to default direction
} }
if (last_word_in_block) { if (last_word_in_block) {
hocr_str += " </div>\n"; hocr_str += " </div>\n";
@ -1620,8 +1577,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
* page_number is 0-based but will appear in the output as 1-based. * page_number is 0-based but will appear in the output as 1-based.
*/ */
char* TessBaseAPI::GetTSVText(int page_number) { char* TessBaseAPI::GetTSVText(int page_number) {
if (tesseract_ == NULL || if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0))
(page_res_ == NULL && Recognize(NULL) < 0))
return NULL; return NULL;
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
@ -1629,9 +1585,10 @@ char* TessBaseAPI::GetTSVText(int page_number) {
STRING tsv_str(""); STRING tsv_str("");
int page_num = page_id, block_num = 0, par_num = 0, line_num = 0, word_num = 0; int page_num = page_id, block_num = 0, par_num = 0, line_num = 0,
word_num = 0;
tsv_str.add_str_int("1\t", page_num); // level 1 - page tsv_str.add_str_int("1\t", page_num); // level 1 - page
tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", block_num);
tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", par_num);
tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", line_num);
@ -1642,7 +1599,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
tsv_str.add_str_int("\t", rect_height_); tsv_str.add_str_int("\t", rect_height_);
tsv_str += "\t-1\t\n"; tsv_str += "\t-1\t\n";
ResultIterator *res_it = GetIterator(); ResultIterator* res_it = GetIterator();
while (!res_it->Empty(RIL_BLOCK)) { while (!res_it->Empty(RIL_BLOCK)) {
if (res_it->Empty(RIL_WORD)) { if (res_it->Empty(RIL_WORD)) {
res_it->Next(RIL_WORD); res_it->Next(RIL_WORD);
@ -1652,46 +1609,40 @@ char* TessBaseAPI::GetTSVText(int page_number) {
// Add rows for any new block/paragraph/textline. // Add rows for any new block/paragraph/textline.
if (res_it->IsAtBeginningOf(RIL_BLOCK)) { if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
block_num++, par_num = 0, line_num = 0, word_num = 0; block_num++, par_num = 0, line_num = 0, word_num = 0;
tsv_str.add_str_int("2\t", page_num); // level 2 - block tsv_str.add_str_int("2\t", page_num); // level 2 - block
tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", block_num);
tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", par_num);
tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", line_num);
tsv_str.add_str_int("\t", word_num); tsv_str.add_str_int("\t", word_num);
AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
tsv_str += "\t-1\t\n"; // end of row for block tsv_str += "\t-1\t\n"; // end of row for block
} }
if (res_it->IsAtBeginningOf(RIL_PARA)) { if (res_it->IsAtBeginningOf(RIL_PARA)) {
par_num++, line_num = 0, word_num = 0; par_num++, line_num = 0, word_num = 0;
tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", block_num);
tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", par_num);
tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", line_num);
tsv_str.add_str_int("\t", word_num); tsv_str.add_str_int("\t", word_num);
AddBoxToTSV(res_it, RIL_PARA, &tsv_str); AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
tsv_str += "\t-1\t\n"; // end of row for para tsv_str += "\t-1\t\n"; // end of row for para
} }
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
line_num++, word_num = 0; line_num++, word_num = 0;
tsv_str.add_str_int("4\t", page_num); // level 4 - line tsv_str.add_str_int("4\t", page_num); // level 4 - line
tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", block_num);
tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", par_num);
tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", line_num);
tsv_str.add_str_int("\t", word_num); tsv_str.add_str_int("\t", word_num);
AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
tsv_str += "\t-1\t\n"; // end of row for line tsv_str += "\t-1\t\n"; // end of row for line
} }
// Now, process the word... // Now, process the word...
int left, top, right, bottom; int left, top, right, bottom;
bool bold, italic, underlined, monospace, serif, smallcaps;
int pointsize, font_id;
const char *font_name;
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
&monospace, &serif, &smallcaps,
&pointsize, &font_id);
word_num++; word_num++;
tsv_str.add_str_int("5\t", page_num); // level 5 - word tsv_str.add_str_int("5\t", page_num); // level 5 - word
tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", block_num);
tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", par_num);
tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", line_num);
@ -1712,11 +1663,11 @@ char* TessBaseAPI::GetTSVText(int page_number) {
tsv_str += res_it->GetUTF8Text(RIL_SYMBOL); tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
res_it->Next(RIL_SYMBOL); res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
tsv_str += "\n"; // end of row tsv_str += "\n"; // end of row
wcnt++; wcnt++;
} }
char *ret = new char[tsv_str.length() + 1]; char* ret = new char[tsv_str.length() + 1];
strcpy(ret, tsv_str.string()); strcpy(ret, tsv_str.string());
delete res_it; delete res_it;
return ret; return ret;
@ -1760,7 +1711,7 @@ char* TessBaseAPI::GetBoxText(int page_number) {
int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
kMaxBytesPerLine; kMaxBytesPerLine;
char* result = new char[total_length]; char* result = new char[total_length];
strcpy(result, "\0"); result[0] = '\0';
int output_length = 0; int output_length = 0;
LTRResultIterator* it = GetLTRIterator(); LTRResultIterator* it = GetLTRIterator();
do { do {
@ -1907,43 +1858,70 @@ char* TessBaseAPI::GetUNLVText() {
return result; return result;
} }
/** /**
* The recognized text is returned as a char* which is coded * Detect the orientation of the input image and apparent script (alphabet).
* as UTF8 and must be freed with the delete [] operator. * orient_deg is the detected clockwise rotation of the input image in degrees
* page_number is a 0-based page index that will appear in the osd file. * (0, 90, 180, 270)
*/ * orient_conf is the confidence (15.0 is reasonably confident)
char* TessBaseAPI::GetOsdText(int page_number) { * script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf,
const char** script_name,
float* script_conf) {
OSResults osr; OSResults osr;
bool osd = DetectOS(&osr); bool osd = DetectOS(&osr);
if (!osd) { if (!osd) {
return NULL; return false;
} }
int orient_id = osr.best_result.orientation_id; int orient_id = osr.best_result.orientation_id;
int script_id = osr.get_best_script(orient_id); int script_id = osr.get_best_script(orient_id);
float orient_conf = osr.best_result.oconfidence; if (orient_conf) *orient_conf = osr.best_result.oconfidence;
float script_conf = osr.best_result.sconfidence; if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees
const char* script_name =
osr.unicharset->get_script_from_script_id(script_id);
// clockwise orientation of the input image, in degrees if (script_name) {
int orient_deg = orient_id * 90; const char* script = osr.unicharset->get_script_from_script_id(script_id);
*script_name = script;
}
if (script_conf) *script_conf = osr.best_result.sconfidence;
return true;
}
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* TessBaseAPI::GetOsdText(int page_number) {
int orient_deg;
float orient_conf;
const char* script_name;
float script_conf;
if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name,
&script_conf))
return NULL;
// clockwise rotation needed to make the page upright // clockwise rotation needed to make the page upright
int rotate = OrientationIdToValue(orient_id); int rotate = OrientationIdToValue(orient_deg / 90);
char* osd_buf = new char[255]; const int kOsdBufsize = 255;
snprintf(osd_buf, 255, char* osd_buf = new char[kOsdBufsize];
"Page number: %d\n" snprintf(osd_buf, kOsdBufsize,
"Orientation in degrees: %d\n" "Page number: %d\n"
"Rotate: %d\n" "Orientation in degrees: %d\n"
"Orientation confidence: %.2f\n" "Rotate: %d\n"
"Script: %s\n" "Orientation confidence: %.2f\n"
"Script confidence: %.2f\n", "Script: %s\n"
page_number, "Script confidence: %.2f\n",
orient_deg, rotate, orient_conf, page_number, orient_deg, rotate, orient_conf, script_name,
script_name, script_conf); script_conf);
return osd_buf; return osd_buf;
} }
@ -2020,8 +1998,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
for (t = 0; text[t] != '\0'; ++t) { for (t = 0; text[t] != '\0'; ++t) {
if (text[t] == '\n' || text[t] == ' ') if (text[t] == '\n' || text[t] == ' ')
continue; continue;
while (wordstr[w] != '\0' && wordstr[w] == ' ') while (wordstr[w] == ' ') ++w;
++w;
if (text[t] != wordstr[w]) if (text[t] != wordstr[w])
break; break;
++w; ++w;
@ -2063,7 +2040,7 @@ void TessBaseAPI::Clear() {
if (thresholder_ != NULL) if (thresholder_ != NULL)
thresholder_->Clear(); thresholder_->Clear();
ClearResults(); ClearResults();
SetInputImage(NULL); if (tesseract_ != NULL) SetInputImage(NULL);
} }
/** /**
@ -2073,6 +2050,7 @@ void TessBaseAPI::Clear() {
* other than Init and anything declared above it in the class definition. * other than Init and anything declared above it in the class definition.
*/ */
void TessBaseAPI::End() { void TessBaseAPI::End() {
Clear();
if (thresholder_ != NULL) { if (thresholder_ != NULL) {
delete thresholder_; delete thresholder_;
thresholder_ = NULL; thresholder_ = NULL;
@ -2108,10 +2086,6 @@ void TessBaseAPI::End() {
delete input_file_; delete input_file_;
input_file_ = NULL; input_file_ = NULL;
} }
if (input_image_ != NULL) {
pixDestroy(&input_image_);
input_image_ = NULL;
}
if (output_file_ != NULL) { if (output_file_ != NULL) {
delete output_file_; delete output_file_;
output_file_ = NULL; output_file_ = NULL;
@ -2242,6 +2216,8 @@ void TessBaseAPI::Threshold(Pix** pix) {
if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) { if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
// Use the minimum default resolution, as it is safer to under-estimate // Use the minimum default resolution, as it is safer to under-estimate
// than over-estimate resolution. // than over-estimate resolution.
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", y_res,
kMinCredibleResolution);
thresholder_->SetSourceYResolution(kMinCredibleResolution); thresholder_->SetSourceYResolution(kMinCredibleResolution);
} }
PageSegMode pageseg_mode = PageSegMode pageseg_mode =
@ -2286,7 +2262,7 @@ int TessBaseAPI::FindLines() {
} }
if (tesseract_ == NULL) { if (tesseract_ == NULL) {
tesseract_ = new Tesseract; tesseract_ = new Tesseract;
tesseract_->InitAdaptiveClassifier(false); tesseract_->InitAdaptiveClassifier(nullptr);
} }
if (tesseract_->pix_binary() == NULL) if (tesseract_->pix_binary() == NULL)
Threshold(tesseract_->mutable_pix_binary()); Threshold(tesseract_->mutable_pix_binary());
@ -2308,14 +2284,16 @@ int TessBaseAPI::FindLines() {
Tesseract* osd_tess = osd_tesseract_; Tesseract* osd_tess = osd_tesseract_;
OSResults osr; OSResults osr;
if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) { if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
osd_tess == nullptr) {
if (strcmp(language_->string(), "osd") == 0) { if (strcmp(language_->string(), "osd") == 0) {
osd_tess = tesseract_; osd_tess = tesseract_;
} else { } else {
osd_tesseract_ = new Tesseract; osd_tesseract_ = new Tesseract;
if (osd_tesseract_->init_tesseract( TessdataManager mgr(reader_);
datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY, if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, "osd",
NULL, 0, NULL, NULL, false) == 0) { OEM_TESSERACT_ONLY, nullptr, 0,
nullptr, nullptr, false, &mgr) == 0) {
osd_tess = osd_tesseract_; osd_tess = osd_tesseract_;
osd_tesseract_->set_source_resolution( osd_tesseract_->set_source_resolution(
thresholder_->GetSourceYResolution()); thresholder_->GetSourceYResolution());
@ -2323,7 +2301,7 @@ int TessBaseAPI::FindLines() {
tprintf("Warning: Auto orientation and script detection requested," tprintf("Warning: Auto orientation and script detection requested,"
" but osd language failed to load\n"); " but osd language failed to load\n");
delete osd_tesseract_; delete osd_tesseract_;
osd_tesseract_ = NULL; osd_tesseract_ = nullptr;
} }
} }
} }
@ -2766,7 +2744,7 @@ void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
INT_FX_RESULT_STRUCT fx_info; INT_FX_RESULT_STRUCT fx_info;
tesseract_->ExtractFeatures(*blob, false, &bl_features, tesseract_->ExtractFeatures(*blob, false, &bl_features,
&cn_features, &fx_info, &outline_counts); &cn_features, &fx_info, &outline_counts);
if (cn_features.size() == 0 || cn_features.size() > MAX_NUM_INT_FEATURES) { if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
*num_features = 0; *num_features = 0;
return; // Feature extraction failed. return; // Feature extraction failed.
} }
@ -2847,13 +2825,6 @@ int TessBaseAPI::NumDawgs() const {
return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs(); return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
} }
#ifndef NO_CUBE_BUILD
/** Return a pointer to underlying CubeRecoContext object if present. */
CubeRecoContext *TessBaseAPI::GetCubeRecoContext() const {
return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
}
#endif // NO_CUBE_BUILD
/** Escape a char string - remove <>&"' with HTML codes. */ /** Escape a char string - remove <>&"' with HTML codes. */
STRING HOcrEscape(const char* text) { STRING HOcrEscape(const char* text) {
STRING ret; STRING ret;

View File

@ -17,11 +17,11 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_API_BASEAPI_H__ #ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H__ #define TESSERACT_API_BASEAPI_H_
#define TESSERACT_VERSION_STR "3.05.00dev" #define TESSERACT_VERSION_STR "4.00.00alpha"
#define TESSERACT_VERSION 0x030500 #define TESSERACT_VERSION 0x040000
#define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \ #define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
(patch)) (patch))
@ -29,14 +29,15 @@
// To avoid collision with other typenames include the ABSOLUTE MINIMUM // To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible // complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp. // and hide includes of complex types in baseapi.cpp.
#include "platform.h"
#include "apitypes.h" #include "apitypes.h"
#include "pageiterator.h"
#include "platform.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "serialis.h"
#include "tesscallback.h"
#include "thresholder.h" #include "thresholder.h"
#include "unichar.h" #include "unichar.h"
#include "tesscallback.h"
#include "publictypes.h"
#include "pageiterator.h"
#include "resultiterator.h"
template <typename T> class GenericVector; template <typename T> class GenericVector;
class PAGE_RES; class PAGE_RES;
@ -65,9 +66,6 @@ struct TBLOB;
namespace tesseract { namespace tesseract {
#ifndef NO_CUBE_BUILD
class CubeRecoContext;
#endif // NO_CUBE_BUILD
class Dawg; class Dawg;
class Dict; class Dict;
class EquationDetect; class EquationDetect;
@ -142,6 +140,7 @@ class TESS_API TessBaseAPI {
* is stored in the PDF so we need that as well. * is stored in the PDF so we need that as well.
*/ */
const char* GetInputName(); const char* GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix); void SetInputImage(Pix *pix);
Pix* GetInputImage(); Pix* GetInputImage();
int GetSourceYResolution(); int GetSourceYResolution();
@ -239,6 +238,13 @@ class TESS_API TessBaseAPI {
int Init(const char* datapath, const char* language) { int Init(const char* datapath, const char* language) {
return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false); return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
} }
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char* data, int data_size, const char* language,
OcrEngineMode mode, char** configs, int configs_size,
const GenericVector<STRING>* vars_vec,
const GenericVector<STRING>* vars_values,
bool set_only_non_debug_params, FileReader reader);
/** /**
* Returns the languages string used in the last valid initialization. * Returns the languages string used in the last valid initialization.
@ -333,9 +339,7 @@ class TESS_API TessBaseAPI {
/** /**
* Provide an image for Tesseract to recognize. Format is as * Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Does not copy the image buffer, or take * TesseractRect above. Copies the image buffer and converts to Pix.
* ownership. The source image may be destroyed after Recognize is called,
* either explicitly or implicitly via one of the Get*Text functions.
* SetImage clears all recognition results, and sets the rectangle to the * SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it * full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition. * will automatically perform recognition.
@ -345,13 +349,11 @@ class TESS_API TessBaseAPI {
/** /**
* Provide an image for Tesseract to recognize. As with SetImage above, * Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract doesn't take a copy or ownership or pixDestroy the image, so * Tesseract takes its own copy of the image, so it need not persist until
* it must persist until after Recognize. * after Recognize.
* Pix vs raw, which to use? * Pix vs raw, which to use?
* Use Pix where possible. A future version of Tesseract may choose to use Pix * Use Pix where possible. Tesseract uses Pix as its internal representation
* as its internal representation and discard IMAGE altogether. * and it is therefore more efficient to provide a Pix directly.
* Because of that, an implementation that sources and targets Pix may end up
* with less copies than an implementation that does not.
*/ */
void SetImage(Pix* pix); void SetImage(Pix* pix);
@ -376,8 +378,7 @@ class TESS_API TessBaseAPI {
* delete it when it it is replaced or the API is destructed. * delete it when it it is replaced or the API is destructed.
*/ */
void SetThresholder(ImageThresholder* thresholder) { void SetThresholder(ImageThresholder* thresholder) {
if (thresholder_ != NULL) delete thresholder_;
delete thresholder_;
thresholder_ = thresholder; thresholder_ = thresholder;
ClearResults(); ClearResults();
} }
@ -588,8 +589,8 @@ class TESS_API TessBaseAPI {
* data structures. * data structures.
* page_number is 0-based but will appear in the output as 1-based. * page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to * monitor can be used to
* cancel the recognition * cancel the recognition
* receive progress callbacks * receive progress callbacks
*/ */
char* GetHOCRText(ETEXT_DESC* monitor, int page_number); char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
@ -622,6 +623,18 @@ class TESS_API TessBaseAPI {
*/ */
char* GetUNLVText(); char* GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int* orient_deg, float* orient_conf,
const char** script_name, float* script_conf);
/** /**
* The recognized text is returned as a char* which is coded * The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator. * as UTF8 and must be freed with the delete [] operator.
@ -750,21 +763,12 @@ class TESS_API TessBaseAPI {
*/ */
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
Tesseract* tesseract() const { Tesseract* tesseract() const { return tesseract_; }
return tesseract_;
}
OcrEngineMode oem() const { OcrEngineMode oem() const { return last_oem_requested_; }
return last_oem_requested_;
}
void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
#ifndef NO_CUBE_BUILD
/** Return a pointer to underlying CubeRecoContext object if present. */
CubeRecoContext *GetCubeRecoContext() const;
#endif // NO_CUBE_BUILD
void set_min_orientation_margin(double margin); void set_min_orientation_margin(double margin);
/** /**
@ -855,9 +859,7 @@ class TESS_API TessBaseAPI {
int** y1, int** y1,
PAGE_RES* page_res); PAGE_RES* page_res);
TESS_LOCAL const PAGE_RES* GetPageRes() const { TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
return page_res_;
};
/* @} */ /* @} */
@ -865,12 +867,12 @@ class TESS_API TessBaseAPI {
Tesseract* tesseract_; ///< The underlying data object. Tesseract* tesseract_; ///< The underlying data object.
Tesseract* osd_tesseract_; ///< For orientation & script detection. Tesseract* osd_tesseract_; ///< For orientation & script detection.
EquationDetect* equ_detect_; ///<The equation detector. EquationDetect* equ_detect_; ///<The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder* thresholder_; ///< Image thresholding module. ImageThresholder* thresholder_; ///< Image thresholding module.
GenericVector<ParagraphModel *>* paragraph_models_; GenericVector<ParagraphModel *>* paragraph_models_;
BLOCK_LIST* block_list_; ///< The page layout. BLOCK_LIST* block_list_; ///< The page layout.
PAGE_RES* page_res_; ///< The page-level data. PAGE_RES* page_res_; ///< The page-level data.
STRING* input_file_; ///< Name used by training code. STRING* input_file_; ///< Name used by training code.
Pix* input_image_; ///< Image used for searchable PDF
STRING* output_file_; ///< Name used by debug code. STRING* output_file_; ///< Name used by debug code.
STRING* datapath_; ///< Current location of tessdata. STRING* datapath_; ///< Current location of tessdata.
STRING* language_; ///< Last initialized language. STRING* language_; ///< Last initialized language.
@ -898,7 +900,7 @@ class TESS_API TessBaseAPI {
const char* retry_config, int timeout_millisec, const char* retry_config, int timeout_millisec,
TessResultRenderer* renderer, TessResultRenderer* renderer,
int tessedit_page_number); int tessedit_page_number);
// TIFF supports multipage so gets special consideration // TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data, bool ProcessPagesMultipageTiff(const unsigned char *data,
size_t size, size_t size,
const char* filename, const char* filename,
@ -906,10 +908,16 @@ class TESS_API TessBaseAPI {
int timeout_millisec, int timeout_millisec,
TessResultRenderer* renderer, TessResultRenderer* renderer,
int tessedit_page_number); int tessedit_page_number);
// There's currently no way to pass a document title from the
// Tesseract command line, and we have multiple places that choose
// to set the title to an empty string. Using a single named
// variable will hopefully reduce confusion if the situation changes
// in the future.
const char *unknown_title_ = "";
}; // class TessBaseAPI. }; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */ /** Escape a char string - remove &<>"' with HTML codes. */
STRING HOcrEscape(const char* text); STRING HOcrEscape(const char* text);
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_API_BASEAPI_H__ #endif // TESSERACT_API_BASEAPI_H_

View File

@ -64,9 +64,10 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu
return new TessHOcrRenderer(outputbase, font_info); return new TessHOcrRenderer(outputbase, font_info);
} }
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir) TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
BOOL textonly)
{ {
return new TessPDFRenderer(outputbase, datadir); return new TessPDFRenderer(outputbase, datadir, textonly);
} }
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase) TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase)
@ -538,9 +539,18 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand
TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results) TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results)
{ {
return handle->DetectOS(results) ? TRUE : FALSE; return FALSE; // Unsafe ABI, return FALSE always
} }
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
{
bool success;
success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
return (BOOL)success;
}
TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* FeatureOutlineIndex) int* num_features, int* FeatureOutlineIndex)
{ {
@ -598,13 +608,6 @@ TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTr
handle->InitTruthCallback(cb); handle->InitTruthCallback(cb);
} }
#ifndef NO_CUBE_BUILD
TESS_API TessCubeRecoContext* TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle)
{
return handle->GetCubeRecoContext();
}
#endif // NO_CUBE_BUILD
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin) TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin)
{ {
handle->set_min_orientation_margin(margin); handle->set_min_orientation_margin(margin);

View File

@ -68,9 +68,6 @@ typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc;
typedef tesseract::FillLatticeFunc TessFillLatticeFunc; typedef tesseract::FillLatticeFunc TessFillLatticeFunc;
typedef tesseract::Dawg TessDawg; typedef tesseract::Dawg TessDawg;
typedef tesseract::TruthCallback TessTruthCallback; typedef tesseract::TruthCallback TessTruthCallback;
#ifndef NO_CUBE_BUILD
typedef tesseract::CubeRecoContext TessCubeRecoContext;
#endif // NO_CUBE_BUILD
typedef tesseract::Orientation TessOrientation; typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification; typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection; typedef tesseract::WritingDirection TessWritingDirection;
@ -88,7 +85,7 @@ typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator; typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator; typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator; typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode { OEM_TESSERACT_ONLY, OEM_CUBE_ONLY, OEM_TESSERACT_CUBE_COMBINED, OEM_DEFAULT } TessOcrEngineMode; typedef enum TessOcrEngineMode { OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT } TessOcrEngineMode;
typedef enum TessPageSegMode { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT, typedef enum TessPageSegMode { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT, PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD, PSM_COUNT } TessPageSegMode; PSM_SPARSE_TEXT_OSD, PSM_COUNT } TessPageSegMode;
@ -122,7 +119,8 @@ TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list);
TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir); TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
BOOL textonly);
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase);
@ -285,7 +283,10 @@ TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f); TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f);
TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f); TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f);
TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results);
// Call TessDeleteText(*best_script_name) to free memory allocated by this function
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
int* orient_deg, float* orient_conf, const char **script_name, float* script_conf);
TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* FeatureOutlineIndex); int* num_features, int* FeatureOutlineIndex);
@ -313,11 +314,6 @@ TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric
TESS_API TessOcrEngineMode TESS_API TessOcrEngineMode
TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb); TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb);
#ifndef NO_CUBE_BUILD
TESS_API TessCubeRecoContext*
TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle);
#endif // NO_CUBE_BUILD
#endif #endif
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin); TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin);

View File

@ -20,12 +20,12 @@
#include "config_auto.h" #include "config_auto.h"
#endif #endif
#include "allheaders.h"
#include "baseapi.h" #include "baseapi.h"
#include "renderer.h"
#include "math.h" #include "math.h"
#include "renderer.h"
#include "strngs.h" #include "strngs.h"
#include "tprintf.h" #include "tprintf.h"
#include "allheaders.h"
#ifdef _MSC_VER #ifdef _MSC_VER
#include "mathfix.h" #include "mathfix.h"
@ -159,7 +159,7 @@ CIDToGIDMap.
OK there is a small problem there, if I use GID 0 then Acrobat gets OK there is a small problem there, if I use GID 0 then Acrobat gets
upset about it and complains it cannot extract the font. If I set the upset about it and complains it cannot extract the font. If I set the
CIDToGIDMap so that all the entries are 1 instead, its happy. Totally CIDToGIDMap so that all the entries are 1 instead, it's happy. Totally
mad...... mad......
*/ */
@ -169,19 +169,26 @@ namespace tesseract {
// Use for PDF object fragments. Must be large enough // Use for PDF object fragments. Must be large enough
// to hold a colormap with 256 colors in the verbose // to hold a colormap with 256 colors in the verbose
// PDF representation. // PDF representation.
const int kBasicBufSize = 2048; static const int kBasicBufSize = 2048;
// If the font is 10 pts, nominal character width is 5 pts // If the font is 10 pts, nominal character width is 5 pts
const int kCharWidth = 2; static const int kCharWidth = 2;
// Used for memory allocation. A codepoint must take no more than this
// many bytes, when written in the PDF way. e.g. "<0063>" for the
// letter 'c'
static const int kMaxBytesPerCodepoint = 20;
/********************************************************************** /**********************************************************************
* PDF Renderer interface implementation * PDF Renderer interface implementation
**********************************************************************/ **********************************************************************/
TessPDFRenderer::TessPDFRenderer(const char* outputbase, const char *datadir) TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly)
: TessResultRenderer(outputbase, "pdf") { : TessResultRenderer(outputbase, "pdf") {
obj_ = 0; obj_ = 0;
datadir_ = datadir; datadir_ = datadir;
textonly_ = textonly;
offsets_.push_back(0); offsets_.push_back(0);
} }
@ -282,7 +289,7 @@ void AffineMatrix(int writing_direction,
} }
} }
// There are some really stupid PDF viewers in the wild, such as // There are some really awkward PDF viewers in the wild, such as
// 'Preview' which ships with the Mac. They do a better job with text // 'Preview' which ships with the Mac. They do a better job with text
// selection and highlighting when given perfectly flat baseline // selection and highlighting when given perfectly flat baseline
// instead of very slightly tilted. We clip small tilts to appease // instead of very slightly tilted. We clip small tilts to appease
@ -302,6 +309,23 @@ void ClipBaseline(int ppi, int x1, int y1, int x2, int y2,
*line_y1 = *line_y2 = (y1 + y2) / 2; *line_y1 = *line_y2 = (y1 + y2) / 2;
} }
bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) {
tprintf("Dropping invalid codepoint %d\n", code);
return false;
}
if (code < 0x10000) {
snprintf(utf16, kMaxBytesPerCodepoint, "%04X", code);
} else {
int a = code - 0x010000;
int high_surrogate = (0x03FF & (a >> 10)) + 0xD800;
int low_surrogate = (0x03FF & a) + 0xDC00;
snprintf(utf16, kMaxBytesPerCodepoint,
"%04X%04X", high_surrogate, low_surrogate);
}
return true;
}
char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
double width, double height) { double width, double height) {
STRING pdf_str(""); STRING pdf_str("");
@ -326,7 +350,11 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
pdf_str.add_str_double("", prec(width)); pdf_str.add_str_double("", prec(width));
pdf_str += " 0 0 "; pdf_str += " 0 0 ";
pdf_str.add_str_double("", prec(height)); pdf_str.add_str_double("", prec(height));
pdf_str += " 0 0 cm /Im1 Do Q\n"; pdf_str += " 0 0 cm";
if (!textonly_) {
pdf_str += " /Im1 Do";
}
pdf_str += " Q\n";
int line_x1 = 0; int line_x1 = 0;
int line_y1 = 0; int line_y1 = 0;
@ -436,25 +464,13 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
if (grapheme && grapheme[0] != '\0') { if (grapheme && grapheme[0] != '\0') {
GenericVector<int> unicodes; GenericVector<int> unicodes;
UNICHAR::UTF8ToUnicode(grapheme, &unicodes); UNICHAR::UTF8ToUnicode(grapheme, &unicodes);
char utf16[20]; char utf16[kMaxBytesPerCodepoint];
for (int i = 0; i < unicodes.length(); i++) { for (int i = 0; i < unicodes.length(); i++) {
int code = unicodes[i]; int code = unicodes[i];
// Convert to UTF-16BE https://en.wikipedia.org/wiki/UTF-16 if (CodepointToUtf16be(code, utf16)) {
if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) { pdf_word += utf16;
tprintf("Dropping invalid codepoint %d\n", code); pdf_word_len++;
continue;
} }
if (code < 0x10000) {
snprintf(utf16, sizeof(utf16), "<%04X>", code);
} else {
int a = code - 0x010000;
int high_surrogate = (0x03FF & (a >> 10)) + 0xD800;
int low_surrogate = (0x03FF & a) + 0xDC00;
snprintf(utf16, sizeof(utf16), "<%04X%04X>",
high_surrogate, low_surrogate);
}
pdf_word += utf16;
pdf_word_len++;
} }
} }
delete []grapheme; delete []grapheme;
@ -465,9 +481,9 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len)); kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len));
pdf_str.add_str_double("", h_stretch); pdf_str.add_str_double("", h_stretch);
pdf_str += " Tz"; // horizontal stretch pdf_str += " Tz"; // horizontal stretch
pdf_str += " [ "; pdf_str += " [ <";
pdf_str += pdf_word; // UTF-16BE representation pdf_str += pdf_word; // UTF-16BE representation
pdf_str += " ] TJ"; // show the text pdf_str += "> ] TJ"; // show the text
} }
if (last_word_in_line) { if (last_word_in_line) {
pdf_str += " \n"; pdf_str += " \n";
@ -567,7 +583,8 @@ bool TessPDFRenderer::BeginDocumentHandler() {
"<<\n" "<<\n"
" /Length %lu /Filter /FlateDecode\n" " /Length %lu /Filter /FlateDecode\n"
">>\n" ">>\n"
"stream\n", (unsigned long)len); "stream\n",
(unsigned long)len);
if (n >= sizeof(buf)) { if (n >= sizeof(buf)) {
lept_free(comp); lept_free(comp);
return false; return false;
@ -619,7 +636,6 @@ bool TessPDFRenderer::BeginDocumentHandler() {
AppendPDFObject(buf); AppendPDFObject(buf);
// FONT DESCRIPTOR // FONT DESCRIPTOR
const int kCharHeight = 2; // Effect: highlights are half height
n = snprintf(buf, sizeof(buf), n = snprintf(buf, sizeof(buf),
"7 0 obj\n" "7 0 obj\n"
"<<\n" "<<\n"
@ -635,10 +651,10 @@ bool TessPDFRenderer::BeginDocumentHandler() {
" /Type /FontDescriptor\n" " /Type /FontDescriptor\n"
">>\n" ">>\n"
"endobj\n", "endobj\n",
1000 / kCharHeight, 1000,
1000 / kCharHeight, 1000,
1000 / kCharWidth, 1000 / kCharWidth,
1000 / kCharHeight, 1000,
8L // Font data 8L // Font data
); );
if (n >= sizeof(buf)) return false; if (n >= sizeof(buf)) return false;
@ -703,11 +719,6 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
L_COMP_DATA *cid = NULL; L_COMP_DATA *cid = NULL;
const int kJpegQuality = 85; const int kJpegQuality = 85;
// TODO(jbreiden) Leptonica 1.71 doesn't correctly handle certain
// types of PNG files, especially if there are 2 samples per pixel.
// We can get rid of this logic after Leptonica 1.72 is released and
// has propagated everywhere. Bug discussion as follows.
// https://code.google.com/p/tesseract-ocr/issues/detail?id=1300
int format, sad; int format, sad;
findFileFormat(filename, &format); findFileFormat(filename, &format);
if (pixGetSpp(pix) == 4 && format == IFF_PNG) { if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
@ -819,10 +830,6 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
*pdf_object_size = *pdf_object_size =
b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len; b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len;
*pdf_object = new char[*pdf_object_size]; *pdf_object = new char[*pdf_object_size];
if (!pdf_object) {
l_CIDataDestroy(&cid);
return false;
}
char *p = *pdf_object; char *p = *pdf_object;
memcpy(p, b1, b1_len); memcpy(p, b1, b1_len);
@ -841,6 +848,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
size_t n; size_t n;
char buf[kBasicBufSize]; char buf[kBasicBufSize];
char buf2[kBasicBufSize];
Pix *pix = api->GetInputImage(); Pix *pix = api->GetInputImage();
char *filename = (char *)api->GetInputName(); char *filename = (char *)api->GetInputName();
int ppi = api->GetSourceYResolution(); int ppi = api->GetSourceYResolution();
@ -849,6 +857,9 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
double width = pixGetWidth(pix) * 72.0 / ppi; double width = pixGetWidth(pix) * 72.0 / ppi;
double height = pixGetHeight(pix) * 72.0 / ppi; double height = pixGetHeight(pix) * 72.0 / ppi;
snprintf(buf2, sizeof(buf2), "/XObject << /Im1 %ld 0 R >>\n", obj_ + 2);
const char *xobject = (textonly_) ? "" : buf2;
// PAGE // PAGE
n = snprintf(buf, sizeof(buf), n = snprintf(buf, sizeof(buf),
"%ld 0 obj\n" "%ld 0 obj\n"
@ -859,19 +870,18 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
" /Contents %ld 0 R\n" " /Contents %ld 0 R\n"
" /Resources\n" " /Resources\n"
" <<\n" " <<\n"
" /XObject << /Im1 %ld 0 R >>\n" " %s"
" /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n" " /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n"
" /Font << /f-0-0 %ld 0 R >>\n" " /Font << /f-0-0 %ld 0 R >>\n"
" >>\n" " >>\n"
">>\n" ">>\n"
"endobj\n", "endobj\n",
obj_, obj_,
2L, // Pages object 2L, // Pages object
width, width, height,
height, obj_ + 1, // Contents object
obj_ + 1, // Contents object xobject, // Image object
obj_ + 2, // Image object 3L); // Type0 Font
3L); // Type0 Font
if (n >= sizeof(buf)) return false; if (n >= sizeof(buf)) return false;
pages_.push_back(obj_); pages_.push_back(obj_);
AppendPDFObject(buf); AppendPDFObject(buf);
@ -908,13 +918,15 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
objsize += strlen(b2); objsize += strlen(b2);
AppendPDFObjectDIY(objsize); AppendPDFObjectDIY(objsize);
char *pdf_object; if (!textonly_) {
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) { char *pdf_object = nullptr;
return false; if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
return false;
}
AppendData(pdf_object, objsize);
AppendPDFObjectDIY(objsize);
delete[] pdf_object;
} }
AppendData(pdf_object, objsize);
AppendPDFObjectDIY(objsize);
delete[] pdf_object;
return true; return true;
} }
@ -958,15 +970,27 @@ bool TessPDFRenderer::EndDocumentHandler() {
offsets_.back() += pages_objsize; // manipulation #2 offsets_.back() += pages_objsize; // manipulation #2
// INFO // INFO
STRING utf16_title = "FEFF"; // byte_order_marker
GenericVector<int> unicodes;
UNICHAR::UTF8ToUnicode(title(), &unicodes);
char utf16[kMaxBytesPerCodepoint];
for (int i = 0; i < unicodes.length(); i++) {
int code = unicodes[i];
if (CodepointToUtf16be(code, utf16)) {
utf16_title += utf16;
}
}
char* datestr = l_getFormattedDate(); char* datestr = l_getFormattedDate();
n = snprintf(buf, sizeof(buf), n = snprintf(buf, sizeof(buf),
"%ld 0 obj\n" "%ld 0 obj\n"
"<<\n" "<<\n"
" /Producer (Tesseract %s)\n" " /Producer (Tesseract %s)\n"
" /CreationDate (D:%s)\n" " /CreationDate (D:%s)\n"
" /Title (%s)" " /Title <%s>\n"
">>\n" ">>\n"
"endobj\n", obj_, TESSERACT_VERSION_STR, datestr, title()); "endobj\n",
obj_, TESSERACT_VERSION_STR, datestr, utf16_title.c_str());
lept_free(datestr); lept_free(datestr);
if (n >= sizeof(buf)) return false; if (n >= sizeof(buf)) return false;
AppendPDFObject(buf); AppendPDFObject(buf);

View File

@ -155,11 +155,11 @@ TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
bool TessHOcrRenderer::BeginDocumentHandler() { bool TessHOcrRenderer::BeginDocumentHandler() {
AppendString( AppendString(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n" "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" " "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
"lang=\"en\">\n <head>\n <title>"); "lang=\"en\">\n <head>\n <title>");
AppendString(title()); AppendString(title());
AppendString( AppendString(
"</title>\n" "</title>\n"
@ -198,25 +198,25 @@ bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
/********************************************************************** /**********************************************************************
* TSV Text Renderer interface implementation * TSV Text Renderer interface implementation
**********************************************************************/ **********************************************************************/
TessTsvRenderer::TessTsvRenderer(const char *outputbase) TessTsvRenderer::TessTsvRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "tsv") { : TessResultRenderer(outputbase, "tsv") {
font_info_ = false; font_info_ = false;
} }
TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info) TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
: TessResultRenderer(outputbase, "tsv") { : TessResultRenderer(outputbase, "tsv") {
font_info_ = font_info; font_info_ = font_info;
} }
bool TessTsvRenderer::BeginDocumentHandler() { bool TessTsvRenderer::BeginDocumentHandler() {
// Output TSV column headings // Output TSV column headings
AppendString("level\tpage_num\tblock_num\tpar_num\tline_num\tword_num\tleft\ttop\twidth\theight\tconf\ttext\n"); AppendString(
"level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
"num\tleft\ttop\twidth\theight\tconf\ttext\n");
return true; return true;
} }
bool TessTsvRenderer::EndDocumentHandler() { bool TessTsvRenderer::EndDocumentHandler() { return true; }
return true;
}
bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
char* tsv = api->GetTSVText(imagenum()); char* tsv = api->GetTSVText(imagenum());
@ -266,8 +266,7 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
* Osd Text Renderer interface implementation * Osd Text Renderer interface implementation
**********************************************************************/ **********************************************************************/
TessOsdRenderer::TessOsdRenderer(const char* outputbase) TessOsdRenderer::TessOsdRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "osd") { : TessResultRenderer(outputbase, "osd") {}
}
bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
char* osd = api->GetOsdText(imagenum()); char* osd = api->GetOsdText(imagenum());

View File

@ -15,8 +15,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_API_RENDERER_H__ #ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H__ #define TESSERACT_API_RENDERER_H_
// To avoid collision with other typenames include the ABSOLUTE MINIMUM // To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible // complexity of includes here. Use forward declarations wherever possible
@ -57,6 +57,7 @@ class TESS_API TessResultRenderer {
/** /**
* Starts a new document with the given title. * Starts a new document with the given title.
* This clears the contents of the output data. * This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/ */
bool BeginDocument(const char* title); bool BeginDocument(const char* title);
@ -77,7 +78,7 @@ class TESS_API TessResultRenderer {
bool EndDocument(); bool EndDocument();
const char* file_extension() const { return file_extension_; } const char* file_extension() const { return file_extension_; }
const char* title() const { return title_; } const char* title() const { return title_.c_str(); }
/** /**
* Returns the index of the last image given to AddImage * Returns the index of the last image given to AddImage
@ -126,7 +127,7 @@ class TESS_API TessResultRenderer {
private: private:
const char* file_extension_; // standard extension for generated output const char* file_extension_; // standard extension for generated output
const char* title_; // title of document being renderered STRING title_; // title of document being renderered
int imagenum_; // index of last image added int imagenum_; // index of last image added
FILE* fout_; // output file pointer FILE* fout_; // output file pointer
@ -153,13 +154,13 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer {
explicit TessHOcrRenderer(const char *outputbase, bool font_info); explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase); explicit TessHOcrRenderer(const char *outputbase);
protected: protected:
virtual bool BeginDocumentHandler(); virtual bool BeginDocumentHandler();
virtual bool AddImageHandler(TessBaseAPI* api); virtual bool AddImageHandler(TessBaseAPI* api);
virtual bool EndDocumentHandler(); virtual bool EndDocumentHandler();
private: private:
bool font_info_; // whether to print font information bool font_info_; // whether to print font information
}; };
/** /**
@ -167,15 +168,15 @@ private:
*/ */
class TESS_API TessTsvRenderer : public TessResultRenderer { class TESS_API TessTsvRenderer : public TessResultRenderer {
public: public:
explicit TessTsvRenderer(const char *outputbase, bool font_info); explicit TessTsvRenderer(const char* outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase); explicit TessTsvRenderer(const char* outputbase);
protected: protected:
virtual bool BeginDocumentHandler(); virtual bool BeginDocumentHandler();
virtual bool AddImageHandler(TessBaseAPI* api); virtual bool AddImageHandler(TessBaseAPI* api);
virtual bool EndDocumentHandler(); virtual bool EndDocumentHandler();
private: private:
bool font_info_; // whether to print font information bool font_info_; // whether to print font information
}; };
@ -186,30 +187,30 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
public: public:
// datadir is the location of the TESSDATA. We need it because // datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location. // we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir); TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly);
protected: protected:
virtual bool BeginDocumentHandler(); virtual bool BeginDocumentHandler();
virtual bool AddImageHandler(TessBaseAPI* api); virtual bool AddImageHandler(TessBaseAPI* api);
virtual bool EndDocumentHandler(); virtual bool EndDocumentHandler();
private: private:
// We don't want to have every image in memory at once, // We don't want to have every image in memory at once,
// so we store some metadata as we go along producing // so we store some metadata as we go along producing
// PDFs one page at a time. At the end that metadata is // PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a // used to make everything that isn't easily handled in a
// streaming fashion. // streaming fashion.
long int obj_; // counter for PDF objects long int obj_; // counter for PDF objects
GenericVector<long int> offsets_; // offset of every PDF object in bytes GenericVector<long int> offsets_; // offset of every PDF object in bytes
GenericVector<long int> pages_; // object number for every /Page object GenericVector<long int> pages_; // object number for every /Page object
const char *datadir_; // where to find the custom font const char *datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself. // Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize); void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data. // Bookkeeping + emit data.
void AppendPDFObject(const char *data); void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page. // Create the /Contents object for an entire page.
static char* GetPDFTextObjects(TessBaseAPI* api, char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
double width, double height);
// Turn an image into a PDF object. Only transcode if we have to. // Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum, static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size); char **pdf_object, long int *pdf_object_size);
@ -251,4 +252,4 @@ class TESS_API TessOsdRenderer : public TessResultRenderer {
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H__ #endif // TESSERACT_API_RENDERER_H_

View File

@ -27,70 +27,82 @@
#include "allheaders.h" #include "allheaders.h"
#include "baseapi.h" #include "baseapi.h"
#include "basedir.h" #include "basedir.h"
#include "renderer.h"
#include "strngs.h"
#include "tprintf.h"
#include "openclwrapper.h" #include "openclwrapper.h"
#include "osdetect.h" #include "osdetect.h"
#include "renderer.h"
#include "simddetect.h"
#include "strngs.h"
#include "tprintf.h"
#if defined(HAVE_TIFFIO_H) && defined(_WIN32) #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
#include <tiffio.h> #include <tiffio.h>
#include <windows.h>
static void Win32WarningHandler(const char* module, const char* fmt, static void Win32WarningHandler(const char* module, const char* fmt,
va_list ap) { va_list ap) {
if (module != NULL) { if (module != NULL) {
fprintf(stderr, "%s: ", module); fprintf(stderr, "%s: ", module);
} }
fprintf(stderr, "Warning, "); fprintf(stderr, "Warning, ");
vfprintf(stderr, fmt, ap); vfprintf(stderr, fmt, ap);
fprintf(stderr, ".\n"); fprintf(stderr, ".\n");
} }
#endif /* HAVE_TIFFIO_H && _WIN32 */ #endif /* HAVE_TIFFIO_H && _WIN32 */
void PrintVersionInfo() { void PrintVersionInfo() {
char *versionStrP; char* versionStrP;
printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
versionStrP = getLeptonicaVersion(); versionStrP = getLeptonicaVersion();
printf(" %s\n", versionStrP); printf(" %s\n", versionStrP);
lept_free(versionStrP); lept_free(versionStrP);
versionStrP = getImagelibVersions(); versionStrP = getImagelibVersions();
printf(" %s\n", versionStrP); printf(" %s\n", versionStrP);
lept_free(versionStrP); lept_free(versionStrP);
#ifdef USE_OPENCL #ifdef USE_OPENCL
cl_platform_id platform; cl_platform_id platform[4];
cl_uint num_platforms; cl_uint num_platforms;
cl_device_id devices[2];
cl_uint num_devices;
char info[256];
int i;
printf(" OpenCL info:\n"); printf(" OpenCL info:\n");
clGetPlatformIDs(1, &platform, &num_platforms); if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
printf(" Found %d platforms.\n", num_platforms); printf(" Found %u platform(s).\n", num_platforms);
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0); for (unsigned n = 0; n < num_platforms; n++) {
printf(" Platform name: %s.\n", info); char info[256];
clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0); if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) ==
printf(" Version: %s.\n", info); CL_SUCCESS) {
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices); printf(" Platform %u name: %s.\n", n + 1, info);
printf(" Found %d devices.\n", num_devices); }
for (i = 0; i < num_devices; ++i) { if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) ==
clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0); CL_SUCCESS) {
printf(" Device %d name: %s.\n", i+1, info); printf(" Version: %s.\n", info);
}
cl_device_id devices[2];
cl_uint num_devices;
if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices,
&num_devices) == CL_SUCCESS) {
printf(" Found %u device(s).\n", num_devices);
for (unsigned i = 0; i < num_devices; ++i) {
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) ==
CL_SUCCESS) {
printf(" Device %u name: %s.\n", i + 1, info);
}
}
}
}
} }
#endif #endif
if (SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n");
if (SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n");
} }
void PrintUsage(const char* program) { void PrintUsage(const char* program) {
printf( printf(
"Usage:\n" "Usage:\n"
" %s --help | --help-psm | --version\n" " %s --help | --help-psm | --help-oem | --version\n"
" %s --list-langs [--tessdata-dir PATH]\n" " %s --list-langs [--tessdata-dir PATH]\n"
" %s --print-parameters [options...] [configfile...]\n" " %s --print-parameters [options...] [configfile...]\n"
" %s imagename|stdin outputbase|stdout [options...] [configfile...]\n", " %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
@ -100,27 +112,33 @@ void PrintUsage(const char* program) {
void PrintHelpForPSM() { void PrintHelpForPSM() {
const char* msg = const char* msg =
"Page segmentation modes:\n" "Page segmentation modes:\n"
" 0 Orientation and script detection (OSD) only.\n" " 0 Orientation and script detection (OSD) only.\n"
" 1 Automatic page segmentation with OSD.\n" " 1 Automatic page segmentation with OSD.\n"
" 2 Automatic page segmentation, but no OSD, or OCR.\n" " 2 Automatic page segmentation, but no OSD, or OCR.\n"
" 3 Fully automatic page segmentation, but no OSD. (Default)\n" " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
" 4 Assume a single column of text of variable sizes.\n" " 4 Assume a single column of text of variable sizes.\n"
" 5 Assume a single uniform block of vertically aligned text.\n" " 5 Assume a single uniform block of vertically aligned text.\n"
" 6 Assume a single uniform block of text.\n" " 6 Assume a single uniform block of text.\n"
" 7 Treat the image as a single text line.\n" " 7 Treat the image as a single text line.\n"
" 8 Treat the image as a single word.\n" " 8 Treat the image as a single word.\n"
" 9 Treat the image as a single word in a circle.\n" " 9 Treat the image as a single word in a circle.\n"
" 10 Treat the image as a single character.\n" " 10 Treat the image as a single character.\n"
" 11 Sparse text. Find as much text as possible in no"
" particular order.\n"
" 12 Sparse text with OSD.\n"
" 13 Raw line. Treat the image as a single text line,\n"
"\t\t\tbypassing hacks that are Tesseract-specific.\n";
//TODO: Consider publishing these modes. printf("%s", msg);
#if 0 }
" 11 Sparse text. Find as much text as possible in no"
" particular order.\n" void PrintHelpForOEM() {
" 12 Sparse text with OSD.\n" const char* msg =
" 13 Raw line. Treat the image as a single text line,\n" "OCR Engine modes:\n"
"\t\t\tbypassing hacks that are Tesseract-specific.\n" " 0 Original Tesseract only.\n"
#endif " 1 Neural nets LSTM only.\n"
; " 2 Tesseract + LSTM.\n"
" 3 Default, based on what is available.\n";
printf("%s", msg); printf("%s", msg);
} }
@ -136,32 +154,34 @@ void PrintHelpMessage(const char* program) {
" -l LANG[+LANG] Specify language(s) used for OCR.\n" " -l LANG[+LANG] Specify language(s) used for OCR.\n"
" -c VAR=VALUE Set value for config variables.\n" " -c VAR=VALUE Set value for config variables.\n"
" Multiple -c arguments are allowed.\n" " Multiple -c arguments are allowed.\n"
" -psm NUM Specify page segmentation mode.\n" " --psm NUM Specify page segmentation mode.\n"
"NOTE: These options must occur before any configfile.\n" " --oem NUM Specify OCR Engine mode.\n"
; "NOTE: These options must occur before any configfile.\n";
printf("\n%s\n", ocr_options); printf("\n%s\n", ocr_options);
PrintHelpForPSM(); PrintHelpForPSM();
PrintHelpForOEM();
const char *single_options = const char* single_options =
"Single options:\n" "Single options:\n"
" -h, --help Show this help message.\n" " -h, --help Show this help message.\n"
" --help-psm Show page segmentation modes.\n" " --help-psm Show page segmentation modes.\n"
" --help-oem Show OCR Engine modes.\n"
" -v, --version Show version information.\n" " -v, --version Show version information.\n"
" --list-langs List available languages for tesseract engine.\n" " --list-langs List available languages for tesseract engine.\n"
" --print-parameters Print tesseract parameters to stdout.\n" " --print-parameters Print tesseract parameters.\n";
;
printf("\n%s", single_options); printf("\n%s", single_options);
} }
void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) { void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
char** argv) {
char opt1[256], opt2[255]; char opt1[256], opt2[255];
for (int i = 0; i < argc; i++) { for (int i = 0; i < argc; i++) {
if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
strncpy(opt1, argv[i + 1], 255); strncpy(opt1, argv[i + 1], 255);
opt1[255] = '\0'; opt1[255] = '\0';
char *p = strchr(opt1, '='); char* p = strchr(opt1, '=');
if (!p) { if (!p) {
fprintf(stderr, "Missing = in configvar assignment\n"); fprintf(stderr, "Missing = in configvar assignment\n");
exit(1); exit(1);
@ -190,8 +210,8 @@ void PrintLangsList(tesseract::TessBaseAPI* api) {
} }
void PrintBanner() { void PrintBanner() {
tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
tesseract::TessBaseAPI::Version()); tesseract::TessBaseAPI::Version());
} }
/** /**
@ -209,31 +229,26 @@ void PrintBanner() {
* but that doesn't work. * but that doesn't work.
*/ */
void FixPageSegMode(tesseract::TessBaseAPI* api, void FixPageSegMode(tesseract::TessBaseAPI* api,
tesseract::PageSegMode pagesegmode) { tesseract::PageSegMode pagesegmode) {
if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
api->SetPageSegMode(pagesegmode); api->SetPageSegMode(pagesegmode);
} }
// NOTE: arg_i is used here to avoid ugly *i so many times in this function // NOTE: arg_i is used here to avoid ugly *i so many times in this function
void ParseArgs(const int argc, char** argv, void ParseArgs(const int argc, char** argv, const char** lang,
const char** lang, const char** image, const char** outputbase,
const char** image, const char** datapath, bool* list_langs, bool* print_parameters,
const char** outputbase, GenericVector<STRING>* vars_vec,
const char** datapath, GenericVector<STRING>* vars_values, int* arg_i,
bool* list_langs, tesseract::PageSegMode* pagesegmode,
bool* print_parameters, tesseract::OcrEngineMode* enginemode) {
GenericVector<STRING>* vars_vec,
GenericVector<STRING>* vars_values,
int* arg_i,
tesseract::PageSegMode* pagesegmode) {
if (argc == 1) { if (argc == 1) {
PrintHelpMessage(argv[0]); PrintHelpMessage(argv[0]);
exit(0); exit(0);
} }
if (argc == 2) { if (argc == 2) {
if ((strcmp(argv[1], "-h") == 0) || if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
(strcmp(argv[1], "--help") == 0)) {
PrintHelpMessage(argv[0]); PrintHelpMessage(argv[0]);
exit(0); exit(0);
} }
@ -241,8 +256,11 @@ void ParseArgs(const int argc, char** argv,
PrintHelpForPSM(); PrintHelpForPSM();
exit(0); exit(0);
} }
if ((strcmp(argv[1], "-v") == 0) || if ((strcmp(argv[1], "--help-oem") == 0)) {
(strcmp(argv[1], "--version") == 0)) { PrintHelpForOEM();
exit(0);
}
if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) {
PrintVersionInfo(); PrintVersionInfo();
exit(0); exit(0);
} }
@ -269,8 +287,16 @@ void ParseArgs(const int argc, char** argv,
noocr = true; noocr = true;
*list_langs = true; *list_langs = true;
} else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) { } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
// The parameter -psm is deprecated and was replaced by --psm.
// It is still supported for compatibility reasons.
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1])); *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
++i; ++i;
} else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
++i;
} else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
*enginemode = static_cast<tesseract::OcrEngineMode>(atoi(argv[i + 1]));
++i;
} else if (strcmp(argv[i], "--print-parameters") == 0) { } else if (strcmp(argv[i], "--print-parameters") == 0) {
noocr = true; noocr = true;
*print_parameters = true; *print_parameters = true;
@ -298,10 +324,10 @@ void ParseArgs(const int argc, char** argv,
} }
} }
void PreloadRenderers(tesseract::TessBaseAPI* api, void PreloadRenderers(
tesseract::PointerVector<tesseract::TessResultRenderer>* renderers, tesseract::TessBaseAPI* api,
tesseract::PageSegMode pagesegmode, tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
const char* outputbase) { tesseract::PageSegMode pagesegmode, const char* outputbase) {
if (pagesegmode == tesseract::PSM_OSD_ONLY) { if (pagesegmode == tesseract::PSM_OSD_ONLY) {
renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
} else { } else {
@ -311,7 +337,7 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
bool font_info; bool font_info;
api->GetBoolVariable("hocr_font_info", &font_info); api->GetBoolVariable("hocr_font_info", &font_info);
renderers->push_back( renderers->push_back(
new tesseract::TessHOcrRenderer(outputbase, font_info)); new tesseract::TessHOcrRenderer(outputbase, font_info));
} }
api->GetBoolVariable("tessedit_create_tsv", &b); api->GetBoolVariable("tessedit_create_tsv", &b);
@ -324,8 +350,10 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
api->GetBoolVariable("tessedit_create_pdf", &b); api->GetBoolVariable("tessedit_create_pdf", &b);
if (b) { if (b) {
renderers->push_back(new tesseract::TessPDFRenderer(outputbase, bool textonly;
api->GetDatapath())); api->GetBoolVariable("textonly_pdf", &textonly);
renderers->push_back(new tesseract::TessPDFRenderer(
outputbase, api->GetDatapath(), textonly));
} }
api->GetBoolVariable("tessedit_write_unlv", &b); api->GetBoolVariable("tessedit_write_unlv", &b);
@ -358,26 +386,36 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
* main() * main()
* *
**********************************************************************/ **********************************************************************/
int main(int argc, char **argv) {
int main(int argc, char** argv) {
const char* lang = "eng"; const char* lang = "eng";
const char* image = NULL; const char* image = NULL;
const char* outputbase = NULL; const char* outputbase = NULL;
const char* datapath = NULL; const char* datapath = NULL;
bool list_langs = false; bool list_langs = false;
bool print_parameters = false; bool print_parameters = false;
GenericVector<STRING> vars_vec, vars_values;
int arg_i = 1; int arg_i = 1;
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
/* main() calls functions like ParseArgs which call exit().
* This results in memory leaks if vars_vec and vars_values are
* declared as auto variables (destructor is not called then). */
static GenericVector<STRING> vars_vec;
static GenericVector<STRING> vars_values;
#if !defined(DEBUG)
// Disable debugging and informational messages from Leptonica.
setMsgSeverity(L_SEVERITY_ERROR);
#endif
#if defined(HAVE_TIFFIO_H) && defined(_WIN32) #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
/* Show libtiff warnings on console (not in GUI). */ /* Show libtiff warnings on console (not in GUI). */
TIFFSetWarningHandler(Win32WarningHandler); TIFFSetWarningHandler(Win32WarningHandler);
#endif /* HAVE_TIFFIO_H && _WIN32 */ #endif /* HAVE_TIFFIO_H && _WIN32 */
ParseArgs(argc, argv, ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
&lang, &image, &outputbase, &datapath, &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
&list_langs, &print_parameters, &enginemode);
&vars_vec, &vars_values, &arg_i, &pagesegmode);
bool banner = false; bool banner = false;
if (outputbase != NULL && strcmp(outputbase, "-") && if (outputbase != NULL && strcmp(outputbase, "-") &&
@ -390,8 +428,8 @@ int main(int argc, char **argv) {
api.SetOutputName(outputbase); api.SetOutputName(outputbase);
int init_failed = api.Init(datapath, lang, tesseract::OEM_DEFAULT, int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
&(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values, false); argc - arg_i, &vars_vec, &vars_values, false);
if (init_failed) { if (init_failed) {
fprintf(stderr, "Could not initialize tesseract.\n"); fprintf(stderr, "Could not initialize tesseract.\n");
exit(1); exit(1);
@ -400,8 +438,8 @@ int main(int argc, char **argv) {
SetVariablesFromCLArgs(&api, argc, argv); SetVariablesFromCLArgs(&api, argc, argv);
if (list_langs) { if (list_langs) {
PrintLangsList(&api); PrintLangsList(&api);
exit(0); exit(0);
} }
if (print_parameters) { if (print_parameters) {
@ -430,12 +468,13 @@ int main(int argc, char **argv) {
tesseract::TextlineOrder order; tesseract::TextlineOrder order;
float deskew_angle; float deskew_angle;
tesseract::PageIterator* it = api.AnalyseLayout(); tesseract::PageIterator* it = api.AnalyseLayout();
if (it) { if (it) {
it->Orientation(&orientation, &direction, &order, &deskew_angle); it->Orientation(&orientation, &direction, &order, &deskew_angle);
tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ tprintf(
"Deskew angle: %.4f\n", "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
orientation, direction, order, deskew_angle); "Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
} else { } else {
ret_val = 1; ret_val = 1;
} }
@ -450,14 +489,12 @@ int main(int argc, char **argv) {
// ambigs.train, box.train, box.train.stderr, linebox, rebox // ambigs.train, box.train, box.train.stderr, linebox, rebox
bool b = false; bool b = false;
bool in_training_mode = bool in_training_mode =
(api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
tesseract::PointerVector<tesseract::TessResultRenderer> renderers; tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
if (in_training_mode) { if (in_training_mode) {
renderers.push_back(NULL); renderers.push_back(NULL);
} else { } else {

View File

@ -1,4 +1,4 @@
os: Visual Studio 2015 os: Visual Studio 2017
platform: platform:
- Win32 - Win32
@ -6,20 +6,40 @@ platform:
configuration: configuration:
- Release - Release
# for curl
install:
- set PATH=C:\Program Files\Git\mingw64\bin;%PATH%
before_build: before_build:
- if %platform%==Win32 set generator=Visual Studio 14 - if %platform%==Win32 set generator=Visual Studio 15 2017
- if %platform%==Win64 set generator=Visual Studio 14 Win64 - if %platform%==Win64 set generator=Visual Studio 15 2017 Win64
- if %platform%==Win32 set vcplatform=Win32 - if %platform%==Win32 set vcplatform=Win32
- if %platform%==Win64 set vcplatform=x64 - if %platform%==Win64 set vcplatform=x64
- curl -fsS -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip - curl -fsS -L -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip
- 7z x cppan.zip - 7z x cppan.zip
- set PATH=%PATH%;%cd% - set PATH=%PATH%;%cd%
- cppan # dummy run to create %USERPROFILE%\.cppan\cppan.yml
- ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nbuild_warning_level: 0`n"'
- ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nbuild_system_verbose: false`n"'
- ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nvar_check_jobs: 1`n"'
build_script: build_script:
- cppan
- mkdir build - mkdir build
- mkdir build\bin
- mkdir build\bin\Release
- cd build - cd build
- cmake .. -G "%generator%" -DSTATIC=1 #- cmd: 'echo local_settings: > cppan.yml'
- msbuild tesseract.sln /p:Platform=%vcplatform% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" #- cmd: 'echo generator: %generator% >> cppan.yml'
#- cmd: 'echo use_shared_libs: true >> cppan.yml'
#- cppan --build ..
- cmake .. -G "%generator%" -DBUILD_TRAINING_TOOLS=Off -DAPPVEYOR=1
- cmake --build . --config Release > bin\Release\log.txt 2>&1
artifacts:
- path: build\bin\Release
#- path: build
name: tesseract-$(APPVEYOR_BUILD_VERSION)

38
arch/Makefile.am Normal file
View File

@ -0,0 +1,38 @@
AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer
AUTOMAKE_OPTIONS = subdir-objects
SUBDIRS =
AM_CXXFLAGS =
if VISIBILITY
AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
AM_CPPFLAGS += -DTESS_EXPORTS
endif
include_HEADERS = dotproductavx.h dotproductsse.h simddetect.h
noinst_HEADERS =
if !USING_MULTIPLELIBS
noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la
noinst_LTLIBRARIES += libtesseract_arch.la
else
lib_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la
lib_LTLIBRARIES += libtesseract_arch.la
libtesseract_arch_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
libtesseract_avx_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
libtesseract_sse_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
endif
if AVX_OPT
libtesseract_avx_la_CXXFLAGS = -mavx
endif
if SSE41_OPT
libtesseract_sse_la_CXXFLAGS = -msse4.1
endif
libtesseract_arch_la_SOURCES = simddetect.cpp
libtesseract_avx_la_SOURCES = dotproductavx.cpp
libtesseract_sse_la_SOURCES = dotproductsse.cpp

112
arch/dotproductavx.cpp Normal file
View File

@ -0,0 +1,112 @@
///////////////////////////////////////////////////////////////////////
// File: dotproductavx.cpp
// Description: Architecture-specific dot-product function.
// Author: Ray Smith
// Created: Wed Jul 22 10:48:05 PDT 2015
//
// (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#if !defined(__AVX__)
// Implementation for non-avx archs.
#include "dotproductavx.h"
#include <stdio.h>
#include <stdlib.h>
namespace tesseract {
double DotProductAVX(const double* u, const double* v, int n) {
fprintf(stderr, "DotProductAVX can't be used on Android\n");
abort();
}
} // namespace tesseract
#else // !defined(__AVX__)
// Implementation for avx capable archs.
#include <immintrin.h>
#include <stdint.h>
#include "dotproductavx.h"
#include "host.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel AVX intrinsics to access the SIMD instruction set.
double DotProductAVX(const double* u, const double* v, int n) {
int max_offset = n - 4;
int offset = 0;
// Accumulate a set of 4 sums in sum, by loading pairs of 4 values from u and
// v, and multiplying them together in parallel.
__m256d sum = _mm256_setzero_pd();
if (offset <= max_offset) {
offset = 4;
// Aligned load is reputedly faster but requires 32 byte aligned input.
if ((reinterpret_cast<const uintptr_t>(u) & 31) == 0 &&
(reinterpret_cast<const uintptr_t>(v) & 31) == 0) {
// Use aligned load.
__m256d floats1 = _mm256_load_pd(u);
__m256d floats2 = _mm256_load_pd(v);
// Multiply.
sum = _mm256_mul_pd(floats1, floats2);
while (offset <= max_offset) {
floats1 = _mm256_load_pd(u + offset);
floats2 = _mm256_load_pd(v + offset);
offset += 4;
__m256d product = _mm256_mul_pd(floats1, floats2);
sum = _mm256_add_pd(sum, product);
}
} else {
// Use unaligned load.
__m256d floats1 = _mm256_loadu_pd(u);
__m256d floats2 = _mm256_loadu_pd(v);
// Multiply.
sum = _mm256_mul_pd(floats1, floats2);
while (offset <= max_offset) {
floats1 = _mm256_loadu_pd(u + offset);
floats2 = _mm256_loadu_pd(v + offset);
offset += 4;
__m256d product = _mm256_mul_pd(floats1, floats2);
sum = _mm256_add_pd(sum, product);
}
}
}
// Add the 4 product sums together horizontally. Not so easy as with sse, as
// there is no add across the upper/lower 128 bit boundary, so permute to
// move the upper 128 bits to lower in another register.
__m256d sum2 = _mm256_permute2f128_pd(sum, sum, 1);
sum = _mm256_hadd_pd(sum, sum2);
sum = _mm256_hadd_pd(sum, sum);
double result;
// _mm256_extract_f64 doesn't exist, but resist the temptation to use an sse
// instruction, as that introduces a 70 cycle delay. All this casting is to
// fool the intrinsics into thinking we are extracting the bottom int64.
auto cast_sum = _mm256_castpd_si256(sum);
*(reinterpret_cast<inT64*>(&result)) =
#if defined(_WIN32) || defined(__i386__)
// This is a very simple workaround that is activated
// for all platforms that do not have _mm256_extract_epi64.
// _mm256_extract_epi64(X, Y) == ((uint64_t*)&X)[Y]
((uint64_t*)&cast_sum)[0]
#else
_mm256_extract_epi64(cast_sum, 0)
#endif
;
while (offset < n) {
result += u[offset] * v[offset];
++offset;
}
return result;
}
} // namespace tesseract.
#endif // ANDROID_BUILD

30
arch/dotproductavx.h Normal file
View File

@ -0,0 +1,30 @@
///////////////////////////////////////////////////////////////////////
// File: dotproductavx.h
// Description: Architecture-specific dot-product function.
// Author: Ray Smith
// Created: Wed Jul 22 10:51:05 PDT 2015
//
// (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_ARCH_DOTPRODUCTAVX_H_
#define TESSERACT_ARCH_DOTPRODUCTAVX_H_
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel AVX intrinsics to access the SIMD instruction set.
double DotProductAVX(const double* u, const double* v, int n);
} // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCTAVX_H_

141
arch/dotproductsse.cpp Normal file
View File

@ -0,0 +1,141 @@
///////////////////////////////////////////////////////////////////////
// File: dotproductsse.cpp
// Description: Architecture-specific dot-product function.
// Author: Ray Smith
// Created: Wed Jul 22 10:57:45 PDT 2015
//
// (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#if !defined(__SSE4_1__)
// This code can't compile with "-msse4.1", so use dummy stubs.
#include "dotproductsse.h"
#include <stdio.h>
#include <stdlib.h>
namespace tesseract {
double DotProductSSE(const double* u, const double* v, int n) {
fprintf(stderr, "DotProductSSE can't be used on Android\n");
abort();
}
inT32 IntDotProductSSE(const inT8* u, const inT8* v, int n) {
fprintf(stderr, "IntDotProductSSE can't be used on Android\n");
abort();
}
} // namespace tesseract
#else // !defined(__SSE4_1__)
// Non-Android code here
#include <emmintrin.h>
#include <smmintrin.h>
#include <stdint.h>
#include "dotproductsse.h"
#include "host.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
double DotProductSSE(const double* u, const double* v, int n) {
int max_offset = n - 2;
int offset = 0;
// Accumulate a set of 2 sums in sum, by loading pairs of 2 values from u and
// v, and multiplying them together in parallel.
__m128d sum = _mm_setzero_pd();
if (offset <= max_offset) {
offset = 2;
// Aligned load is reputedly faster but requires 16 byte aligned input.
if ((reinterpret_cast<const uintptr_t>(u) & 15) == 0 &&
(reinterpret_cast<const uintptr_t>(v) & 15) == 0) {
// Use aligned load.
sum = _mm_load_pd(u);
__m128d floats2 = _mm_load_pd(v);
// Multiply.
sum = _mm_mul_pd(sum, floats2);
while (offset <= max_offset) {
__m128d floats1 = _mm_load_pd(u + offset);
floats2 = _mm_load_pd(v + offset);
offset += 2;
floats1 = _mm_mul_pd(floats1, floats2);
sum = _mm_add_pd(sum, floats1);
}
} else {
// Use unaligned load.
sum = _mm_loadu_pd(u);
__m128d floats2 = _mm_loadu_pd(v);
// Multiply.
sum = _mm_mul_pd(sum, floats2);
while (offset <= max_offset) {
__m128d floats1 = _mm_loadu_pd(u + offset);
floats2 = _mm_loadu_pd(v + offset);
offset += 2;
floats1 = _mm_mul_pd(floats1, floats2);
sum = _mm_add_pd(sum, floats1);
}
}
}
// Add the 2 sums in sum horizontally.
sum = _mm_hadd_pd(sum, sum);
// Extract the low result.
double result = _mm_cvtsd_f64(sum);
// Add on any left-over products.
while (offset < n) {
result += u[offset] * v[offset];
++offset;
}
return result;
}
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
inT32 IntDotProductSSE(const inT8* u, const inT8* v, int n) {
int max_offset = n - 8;
int offset = 0;
// Accumulate a set of 4 32-bit sums in sum, by loading 8 pairs of 8-bit
// values, extending to 16 bit, multiplying to make 32 bit results.
__m128i sum = _mm_setzero_si128();
if (offset <= max_offset) {
offset = 8;
__m128i packed1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(u));
__m128i packed2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(v));
sum = _mm_cvtepi8_epi16(packed1);
packed2 = _mm_cvtepi8_epi16(packed2);
// The magic _mm_add_epi16 is perfect here. It multiplies 8 pairs of 16 bit
// ints to make 32 bit results, which are then horizontally added in pairs
// to make 4 32 bit results that still fit in a 128 bit register.
sum = _mm_madd_epi16(sum, packed2);
while (offset <= max_offset) {
packed1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(u + offset));
packed2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(v + offset));
offset += 8;
packed1 = _mm_cvtepi8_epi16(packed1);
packed2 = _mm_cvtepi8_epi16(packed2);
packed1 = _mm_madd_epi16(packed1, packed2);
sum = _mm_add_epi32(sum, packed1);
}
}
// Sum the 4 packed 32 bit sums and extract the low result.
sum = _mm_hadd_epi32(sum, sum);
sum = _mm_hadd_epi32(sum, sum);
inT32 result = _mm_cvtsi128_si32(sum);
while (offset < n) {
result += u[offset] * v[offset];
++offset;
}
return result;
}
} // namespace tesseract.
#endif // ANDROID_BUILD

35
arch/dotproductsse.h Normal file
View File

@ -0,0 +1,35 @@
///////////////////////////////////////////////////////////////////////
// File: dotproductsse.h
// Description: Architecture-specific dot-product function.
// Author: Ray Smith
// Created: Wed Jul 22 10:57:05 PDT 2015
//
// (C) Copyright 2015, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_ARCH_DOTPRODUCTSSE_H_
#define TESSERACT_ARCH_DOTPRODUCTSSE_H_
#include "host.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
double DotProductSSE(const double* u, const double* v, int n);
// Computes and returns the dot product of the n-vectors u and v.
// Uses Intel SSE intrinsics to access the SIMD instruction set.
inT32 IntDotProductSSE(const inT8* u, const inT8* v, int n);
} // namespace tesseract.
#endif // TESSERACT_ARCH_DOTPRODUCTSSE_H_

68
arch/simddetect.cpp Normal file
View File

@ -0,0 +1,68 @@
///////////////////////////////////////////////////////////////////////
// File: simddetect.h
// Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith)
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "simddetect.h"
#include "tprintf.h"
#undef X86_BUILD
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
#if !defined(ANDROID_BUILD)
#define X86_BUILD 1
#endif // !ANDROID_BUILD
#endif // x86 target
#if defined(X86_BUILD)
#if defined(__GNUC__)
#include <cpuid.h>
#elif defined(_WIN32)
#include <intrin.h>
#endif
#endif
SIMDDetect SIMDDetect::detector;
// If true, then AVX has been detected.
bool SIMDDetect::avx_available_;
// If true, then SSe4.1 has been detected.
bool SIMDDetect::sse_available_;
// Constructor.
// Tests the architecture in a system-dependent way to detect AVX, SSE and
// any other available SIMD equipment.
// __GNUC__ is also defined by compilers that include GNU extensions such as
// clang.
SIMDDetect::SIMDDetect() {
#if defined(X86_BUILD)
#if defined(__GNUC__)
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
sse_available_ = (ecx & 0x00080000) != 0;
avx_available_ = (ecx & 0x10000000) != 0;
}
#elif defined(_WIN32)
int cpuInfo[4];
__cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
__cpuid(cpuInfo, 1);
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
}
#else
#error "I don't know how to test for SIMD with this compiler"
#endif
#endif // X86_BUILD
}

41
arch/simddetect.h Normal file
View File

@ -0,0 +1,41 @@
///////////////////////////////////////////////////////////////////////
// File: simddetect.h
// Description: Architecture detector.
// Author: Stefan Weil (based on code from Ray Smith)
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "platform.h"
// Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one.
class SIMDDetect {
public:
// Returns true if AVX is available on this system.
static inline bool IsAVXAvailable() { return detector.avx_available_; }
// Returns true if SSE4.1 is available on this system.
static inline bool IsSSEAvailable() { return detector.sse_available_; }
private:
// Constructor, must set all static member variables.
SIMDDetect();
private:
// Singleton.
static SIMDDetect detector;
// If true, then AVX has been detected.
static TESS_API bool avx_available_;
// If true, then SSe4.1 has been detected.
static TESS_API bool sse_available_;
};

View File

@ -1,4 +1,13 @@
#!/bin/sh #!/bin/sh
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is a simple script which is meant to help developers # This is a simple script which is meant to help developers
# better deal with the GNU autotools, specifically: # better deal with the GNU autotools, specifically:
@ -37,7 +46,20 @@ if [ "$1" = "clean" ]; then
find . -iname "Makefile.in" -type f -exec rm '{}' + find . -iname "Makefile.in" -type f -exec rm '{}' +
fi fi
# create m4 directory if it not exists # Prevent any errors that might result from failing to properly invoke
# `libtoolize` or `glibtoolize,` whichever is present on your system,
# from occurring by testing for its existence and capturing the absolute path to
# its location for caching purposes prior to using it later on in 'Step 2:'
if command -v libtoolize >/dev/null 2>&1; then
LIBTOOLIZE="$(command -v libtoolize)"
elif command -v glibtoolize >/dev/null 2>&1; then
LIBTOOLIZE="$(command -v glibtoolize)"
else
echo "Unable to find a valid copy of libtoolize or glibtoolize in your PATH!"
bail_out
fi
# create m4 directory if it does not exist
if [ ! -d m4 ]; then if [ ! -d m4 ]; then
mkdir m4 mkdir m4
fi fi
@ -61,9 +83,9 @@ aclocal -I config || bail_out
# --- Step 2: # --- Step 2:
echo "Running libtoolize" echo "Running $LIBTOOLIZE"
libtoolize -f -c || glibtoolize -f -c || bail_out $LIBTOOLIZE -f -c || bail_out
libtoolize --automake || glibtoolize --automake || bail_out $LIBTOOLIZE --automake || bail_out
# --- Step 3: Generate config.h.in from: # --- Step 3: Generate config.h.in from:
# . configure.ac (look for AM_CONFIG_HEADER tag or AC_CONFIG_HEADER tag) # . configure.ac (look for AM_CONFIG_HEADER tag or AC_CONFIG_HEADER tag)

View File

@ -1,12 +1,14 @@
AM_CPPFLAGS += \ AM_CPPFLAGS += \
-DUSE_STD_NAMESPACE \ -DUSE_STD_NAMESPACE \
-I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \ -I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \
-I$(top_srcdir)/arch -I$(top_srcdir)/lstm \
-I$(top_srcdir)/viewer \ -I$(top_srcdir)/viewer \
-I$(top_srcdir)/classify -I$(top_srcdir)/dict \ -I$(top_srcdir)/classify -I$(top_srcdir)/dict \
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \ -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
-I$(top_srcdir)/textord -I$(top_srcdir)/opencl -I$(top_srcdir)/textord -I$(top_srcdir)/opencl
AM_CPPFLAGS += $(OPENCL_CPPFLAGS) AM_CPPFLAGS += $(OPENCL_CPPFLAGS)
AM_CPPFLAGS += $(OPENMP_CXXFLAGS)
if VISIBILITY if VISIBILITY
AM_CPPFLAGS += -DTESS_EXPORTS \ AM_CPPFLAGS += -DTESS_EXPORTS \
@ -33,18 +35,18 @@ libtesseract_main_la_LIBADD = \
../ccstruct/libtesseract_ccstruct.la \ ../ccstruct/libtesseract_ccstruct.la \
../viewer/libtesseract_viewer.la \ ../viewer/libtesseract_viewer.la \
../dict/libtesseract_dict.la \ ../dict/libtesseract_dict.la \
../arch/libtesseract_avx.la \
../arch/libtesseract_sse.la \
../lstm/libtesseract_lstm.la \
../classify/libtesseract_classify.la \ ../classify/libtesseract_classify.la \
../cutil/libtesseract_cutil.la \ ../cutil/libtesseract_cutil.la \
../opencl/libtesseract_opencl.la ../opencl/libtesseract_opencl.la
if !NO_CUBE_BUILD
libtesseract_main_la_LIBADD += ../cube/libtesseract_cube.la
endif
endif endif
libtesseract_main_la_SOURCES = \ libtesseract_main_la_SOURCES = \
adaptions.cpp applybox.cpp control.cpp \ adaptions.cpp applybox.cpp control.cpp \
docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \ docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \
ltrresultiterator.cpp \ linerec.cpp ltrresultiterator.cpp \
osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \ osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \
pagewalk.cpp par_control.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \ pagewalk.cpp par_control.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
reject.cpp resultiterator.cpp superscript.cpp \ reject.cpp resultiterator.cpp superscript.cpp \
@ -52,12 +54,3 @@ libtesseract_main_la_SOURCES = \
tfacepp.cpp thresholder.cpp \ tfacepp.cpp thresholder.cpp \
werdit.cpp werdit.cpp
if !NO_CUBE_BUILD
AM_CPPFLAGS += \
-I$(top_srcdir)/neural_networks/runtime -I$(top_srcdir)/cube
noinst_HEADERS += \
cube_reco_context.h cubeclassifier.h tesseract_cube_combiner.h
libtesseract_main_la_SOURCES += \
cube_control.cpp cube_reco_context.cpp cubeclassifier.cpp \
tesseract_cube_combiner.cpp
endif

View File

@ -1,8 +1,8 @@
/****************************************************************** /******************************************************************
* File: control.cpp (Formerly control.c) * File: control.cpp (Formerly control.c)
* Description: Module-independent matcher controller. * Description: Module-independent matcher controller.
* Author: Ray Smith * Author: Ray Smith
* Created: Thu Apr 23 11:09:58 BST 1992 * Created: Thu Apr 23 11:09:58 BST 1992
* ReHacked: Tue Sep 22 08:42:49 BST 1992 Phil Cheatle * ReHacked: Tue Sep 22 08:42:49 BST 1992 Phil Cheatle
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
@ -31,21 +31,22 @@
#include <errno.h> #include <errno.h>
#endif #endif
#include <ctype.h> #include <ctype.h>
#include "ocrclass.h" #include "callcpp.h"
#include "werdit.h" #include "control.h"
#include "docqual.h"
#include "drawfx.h" #include "drawfx.h"
#include "tessbox.h" #include "fixspace.h"
#include "tessvars.h" #include "globals.h"
#include "lstmrecognizer.h"
#include "ocrclass.h"
#include "output.h"
#include "pgedit.h" #include "pgedit.h"
#include "reject.h" #include "reject.h"
#include "fixspace.h"
#include "docqual.h"
#include "control.h"
#include "output.h"
#include "callcpp.h"
#include "globals.h"
#include "sorthelper.h" #include "sorthelper.h"
#include "tessbox.h"
#include "tesseractclass.h" #include "tesseractclass.h"
#include "tessvars.h"
#include "werdit.h"
#define MIN_FONT_ROW_COUNT 8 #define MIN_FONT_ROW_COUNT 8
#define MAX_XHEIGHT_DIFF 3 #define MAX_XHEIGHT_DIFF 3
@ -73,7 +74,6 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res,
} }
} }
/** /**
* Recognize a single word in interactive mode. * Recognize a single word in interactive mode.
* *
@ -85,7 +85,12 @@ BOOL8 Tesseract::recog_interactive(PAGE_RES_IT* pr_it) {
WordData word_data(*pr_it); WordData word_data(*pr_it);
SetupWordPassN(2, &word_data); SetupWordPassN(2, &word_data);
classify_word_and_language(2, pr_it, &word_data); // LSTM doesn't run on pass2, but we want to run pass2 for tesseract.
if (lstm_recognizer_ == NULL) {
classify_word_and_language(2, pr_it, &word_data);
} else {
classify_word_and_language(1, pr_it, &word_data);
}
if (tessedit_debug_quality_metrics) { if (tessedit_debug_quality_metrics) {
WERD_RES* word_res = pr_it->word(); WERD_RES* word_res = pr_it->word();
word_char_quality(word_res, pr_it->row()->row, &char_qual, &good_char_qual); word_char_quality(word_res, pr_it->row()->row, &char_qual, &good_char_qual);
@ -188,8 +193,8 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
WERD_RES* word_res = new WERD_RES; WERD_RES* word_res = new WERD_RES;
word_res->InitForRetryRecognition(*word->word); word_res->InitForRetryRecognition(*word->word);
word->lang_words.push_back(word_res); word->lang_words.push_back(word_res);
// Cube doesn't get setup for pass2. // LSTM doesn't get setup for pass2.
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY) { if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
word_res->SetupForRecognition( word_res->SetupForRecognition(
lang_t->unicharset, lang_t, BestPix(), lang_t->unicharset, lang_t, BestPix(),
lang_t->tessedit_ocr_engine_mode, NULL, lang_t->tessedit_ocr_engine_mode, NULL,
@ -219,16 +224,14 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
if (pass_n == 1) { if (pass_n == 1) {
monitor->progress = 70 * w / words->size(); monitor->progress = 70 * w / words->size();
if (monitor->progress_callback != NULL) { if (monitor->progress_callback != NULL) {
TBOX box = pr_it->word()->word->bounding_box(); TBOX box = pr_it->word()->word->bounding_box();
(*monitor->progress_callback)(monitor->progress, (*monitor->progress_callback)(monitor->progress, box.left(),
box.left(), box.right(), box.right(), box.top(), box.bottom());
box.top(), box.bottom());
} }
} else { } else {
monitor->progress = 70 + 30 * w / words->size(); monitor->progress = 70 + 30 * w / words->size();
if (monitor->progress_callback!=NULL) { if (monitor->progress_callback != NULL) {
(*monitor->progress_callback)(monitor->progress, (*monitor->progress_callback)(monitor->progress, 0, 0, 0, 0);
0, 0, 0, 0);
} }
} }
if (monitor->deadline_exceeded() || if (monitor->deadline_exceeded() ||
@ -253,7 +256,8 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
pr_it->forward(); pr_it->forward();
ASSERT_HOST(pr_it->word() != NULL); ASSERT_HOST(pr_it->word() != NULL);
bool make_next_word_fuzzy = false; bool make_next_word_fuzzy = false;
if (ReassignDiacritics(pass_n, pr_it, &make_next_word_fuzzy)) { if (!AnyLSTMLang() &&
ReassignDiacritics(pass_n, pr_it, &make_next_word_fuzzy)) {
// Needs to be setup again to see the new outlines in the chopped_word. // Needs to be setup again to see the new outlines in the chopped_word.
SetupWordPassN(pass_n, word); SetupWordPassN(pass_n, word);
} }
@ -384,9 +388,8 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false; if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false;
} }
// The next passes can only be run if tesseract has been used, as cube // The next passes are only required for Tess-only.
// doesn't set all the necessary outputs in WERD_RES. if (AnyTessLang() && !AnyLSTMLang()) {
if (AnyTessLang()) {
// ****************** Pass 3 ******************* // ****************** Pass 3 *******************
// Fix fuzzy spaces. // Fix fuzzy spaces.
set_global_loc_code(LOC_FUZZY_SPACE); set_global_loc_code(LOC_FUZZY_SPACE);
@ -402,15 +405,6 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
// ****************** Pass 5,6 ******************* // ****************** Pass 5,6 *******************
rejection_passes(page_res, monitor, target_word_box, word_config); rejection_passes(page_res, monitor, target_word_box, word_config);
#ifndef NO_CUBE_BUILD
// ****************** Pass 7 *******************
// Cube combiner.
// If cube is loaded and its combiner is present, run it.
if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) {
run_cube_combiner(page_res);
}
#endif
// ****************** Pass 8 ******************* // ****************** Pass 8 *******************
font_recognition_pass(page_res); font_recognition_pass(page_res);
@ -438,8 +432,13 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
for (page_res_it.restart_page(); page_res_it.word() != NULL; for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) { page_res_it.forward()) {
WERD_RES* word = page_res_it.word(); WERD_RES* word = page_res_it.word();
if (word->best_choice == NULL || word->best_choice->length() == 0) POLY_BLOCK* pb = page_res_it.block()->block != NULL
? page_res_it.block()->block->poly_block()
: NULL;
if (word->best_choice == NULL || word->best_choice->length() == 0 ||
(word->best_choice->IsAllSpaces() && (pb == NULL || pb->IsText()))) {
page_res_it.DeleteCurrentWord(); page_res_it.DeleteCurrentWord();
}
} }
if (monitor != NULL) { if (monitor != NULL) {
@ -539,7 +538,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
} }
} }
} }
if (overrides_word1.size() >= 1) { if (!overrides_word1.empty()) {
// Excellent, we have some bigram matches. // Excellent, we have some bigram matches.
if (EqualIgnoringCaseAndTerminalPunct(*w_prev->best_choice, if (EqualIgnoringCaseAndTerminalPunct(*w_prev->best_choice,
*overrides_word1[best_idx]) && *overrides_word1[best_idx]) &&
@ -755,16 +754,32 @@ void Tesseract::script_pos_pass(PAGE_RES* page_res) {
} }
} }
// Factored helper considers the indexed word and updates all the pointed // Helper finds the gap between the index word and the next.
// values. static void WordGap(const PointerVector<WERD_RES>& words, int index, int* right,
static void EvaluateWord(const PointerVector<WERD_RES>& words, int index, int* next_left) {
float* rating, float* certainty, bool* bad,
bool* valid_permuter, int* right, int* next_left) {
*right = -MAX_INT32; *right = -MAX_INT32;
*next_left = MAX_INT32; *next_left = MAX_INT32;
if (index < words.size()) { if (index < words.size()) {
*right = words[index]->word->bounding_box().right();
if (index + 1 < words.size())
*next_left = words[index + 1]->word->bounding_box().left();
}
}
// Factored helper computes the rating, certainty, badness and validity of
// the permuter of the words in [first_index, end_index).
static void EvaluateWordSpan(const PointerVector<WERD_RES>& words,
int first_index, int end_index, float* rating,
float* certainty, bool* bad,
bool* valid_permuter) {
if (end_index <= first_index) {
*bad = true;
*valid_permuter = false;
}
for (int index = first_index; index < end_index && index < words.size();
++index) {
WERD_CHOICE* choice = words[index]->best_choice; WERD_CHOICE* choice = words[index]->best_choice;
if (choice == NULL) { if (choice == nullptr) {
*bad = true; *bad = true;
} else { } else {
*rating += choice->rating(); *rating += choice->rating();
@ -772,12 +787,6 @@ static void EvaluateWord(const PointerVector<WERD_RES>& words, int index,
if (!Dict::valid_word_permuter(choice->permuter(), false)) if (!Dict::valid_word_permuter(choice->permuter(), false))
*valid_permuter = false; *valid_permuter = false;
} }
*right = words[index]->word->bounding_box().right();
if (index + 1 < words.size())
*next_left = words[index + 1]->word->bounding_box().left();
} else {
*valid_permuter = false;
*bad = true;
} }
} }
@ -802,24 +811,13 @@ static int SelectBestWords(double rating_ratio,
while (b < best_words->size() || n < new_words->size()) { while (b < best_words->size() || n < new_words->size()) {
// Start of the current run in each. // Start of the current run in each.
int start_b = b, start_n = n; int start_b = b, start_n = n;
// Rating of the current run in each.
float b_rating = 0.0f, n_rating = 0.0f;
// Certainty of the current run in each.
float b_certainty = 0.0f, n_certainty = 0.0f;
// True if any word is missing its best choice.
bool b_bad = false, n_bad = false;
// True if all words have a valid permuter.
bool b_valid_permuter = true, n_valid_permuter = true;
while (b < best_words->size() || n < new_words->size()) { while (b < best_words->size() || n < new_words->size()) {
int b_right = -MAX_INT32; int b_right = -MAX_INT32;
int next_b_left = MAX_INT32; int next_b_left = MAX_INT32;
EvaluateWord(*best_words, b, &b_rating, &b_certainty, &b_bad, WordGap(*best_words, b, &b_right, &next_b_left);
&b_valid_permuter, &b_right, &next_b_left);
int n_right = -MAX_INT32; int n_right = -MAX_INT32;
int next_n_left = MAX_INT32; int next_n_left = MAX_INT32;
EvaluateWord(*new_words, n, &n_rating, &n_certainty, &n_bad, WordGap(*new_words, n, &n_right, &next_n_left);
&n_valid_permuter, &n_right, &next_n_left);
if (MAX(b_right, n_right) < MIN(next_b_left, next_n_left)) { if (MAX(b_right, n_right) < MIN(next_b_left, next_n_left)) {
// The word breaks overlap. [start_b,b] and [start_n, n] match. // The word breaks overlap. [start_b,b] and [start_n, n] match.
break; break;
@ -831,6 +829,20 @@ static int SelectBestWords(double rating_ratio,
else else
++n; ++n;
} }
// Rating of the current run in each.
float b_rating = 0.0f, n_rating = 0.0f;
// Certainty of the current run in each.
float b_certainty = 0.0f, n_certainty = 0.0f;
// True if any word is missing its best choice.
bool b_bad = false, n_bad = false;
// True if all words have a valid permuter.
bool b_valid_permuter = true, n_valid_permuter = true;
int end_b = b < best_words->size() ? b + 1 : b;
int end_n = n < new_words->size() ? n + 1 : n;
EvaluateWordSpan(*best_words, start_b, end_b, &b_rating, &b_certainty,
&b_bad, &b_valid_permuter);
EvaluateWordSpan(*new_words, start_n, end_n, &n_rating, &n_certainty,
&n_bad, &n_valid_permuter);
bool new_better = false; bool new_better = false;
if (!n_bad && (b_bad || (n_certainty > b_certainty && if (!n_bad && (b_bad || (n_certainty > b_certainty &&
n_rating < b_rating) || n_rating < b_rating) ||
@ -838,7 +850,7 @@ static int SelectBestWords(double rating_ratio,
n_rating < b_rating * rating_ratio && n_rating < b_rating * rating_ratio &&
n_certainty > b_certainty - certainty_margin))) { n_certainty > b_certainty - certainty_margin))) {
// New is better. // New is better.
for (int i = start_n; i <= n; ++i) { for (int i = start_n; i < end_n; ++i) {
out_words.push_back((*new_words)[i]); out_words.push_back((*new_words)[i]);
(*new_words)[i] = NULL; (*new_words)[i] = NULL;
++num_new; ++num_new;
@ -846,14 +858,12 @@ static int SelectBestWords(double rating_ratio,
new_better = true; new_better = true;
} else if (!b_bad) { } else if (!b_bad) {
// Current best is better. // Current best is better.
for (int i = start_b; i <= b; ++i) { for (int i = start_b; i < end_b; ++i) {
out_words.push_back((*best_words)[i]); out_words.push_back((*best_words)[i]);
(*best_words)[i] = NULL; (*best_words)[i] = NULL;
++num_best; ++num_best;
} }
} }
int end_b = b < best_words->size() ? b + 1 : b;
int end_n = n < new_words->size() ? n + 1 : n;
if (debug) { if (debug) {
tprintf("%d new words %s than %d old words: r: %g v %g c: %g v %g" tprintf("%d new words %s than %d old words: r: %g v %g c: %g v %g"
" valid dict: %d v %d\n", " valid dict: %d v %d\n",
@ -876,10 +886,9 @@ static int SelectBestWords(double rating_ratio,
// Returns positive if this recognizer found more new best words than the // Returns positive if this recognizer found more new best words than the
// number kept from best_words. // number kept from best_words.
int Tesseract::RetryWithLanguage(const WordData& word_data, int Tesseract::RetryWithLanguage(const WordData& word_data,
WordRecognizer recognizer, WordRecognizer recognizer, bool debug,
WERD_RES** in_word, WERD_RES** in_word,
PointerVector<WERD_RES>* best_words) { PointerVector<WERD_RES>* best_words) {
bool debug = classify_debug_level || cube_debug_level;
if (debug) { if (debug) {
tprintf("Trying word using lang %s, oem %d\n", tprintf("Trying word using lang %s, oem %d\n",
lang.string(), static_cast<int>(tessedit_ocr_engine_mode)); lang.string(), static_cast<int>(tessedit_ocr_engine_mode));
@ -898,8 +907,7 @@ int Tesseract::RetryWithLanguage(const WordData& word_data,
new_words[i]->DebugTopChoice("Lang result"); new_words[i]->DebugTopChoice("Lang result");
} }
// Initial version is a bit of a hack based on better certainty and rating // Initial version is a bit of a hack based on better certainty and rating
// (to reduce false positives from cube) or a dictionary vs non-dictionary // or a dictionary vs non-dictionary word.
// word.
return SelectBestWords(classify_max_rating_ratio, return SelectBestWords(classify_max_rating_ratio,
classify_max_certainty_margin, classify_max_certainty_margin,
debug, &new_words, best_words); debug, &new_words, best_words);
@ -1283,7 +1291,8 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
// Points to the best result. May be word or in lang_words. // Points to the best result. May be word or in lang_words.
WERD_RES* word = word_data->word; WERD_RES* word = word_data->word;
clock_t start_t = clock(); clock_t start_t = clock();
if (classify_debug_level || cube_debug_level) { bool debug = classify_debug_level > 0 || multilang_debug_level > 0;
if (debug) {
tprintf("%s word with lang %s at:", tprintf("%s word with lang %s at:",
word->done ? "Already done" : "Processing", word->done ? "Already done" : "Processing",
most_recently_used_->lang.string()); most_recently_used_->lang.string());
@ -1302,12 +1311,12 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
most_recently_used_ != sub_langs_[sub]; ++sub) {} most_recently_used_ != sub_langs_[sub]; ++sub) {}
} }
most_recently_used_->RetryWithLanguage( most_recently_used_->RetryWithLanguage(
*word_data, recognizer, &word_data->lang_words[sub], &best_words); *word_data, recognizer, debug, &word_data->lang_words[sub], &best_words);
Tesseract* best_lang_tess = most_recently_used_; Tesseract* best_lang_tess = most_recently_used_;
if (!WordsAcceptable(best_words)) { if (!WordsAcceptable(best_words)) {
// Try all the other languages to see if they are any better. // Try all the other languages to see if they are any better.
if (most_recently_used_ != this && if (most_recently_used_ != this &&
this->RetryWithLanguage(*word_data, recognizer, this->RetryWithLanguage(*word_data, recognizer, debug,
&word_data->lang_words[sub_langs_.size()], &word_data->lang_words[sub_langs_.size()],
&best_words) > 0) { &best_words) > 0) {
best_lang_tess = this; best_lang_tess = this;
@ -1315,7 +1324,7 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
for (int i = 0; !WordsAcceptable(best_words) && i < sub_langs_.size(); for (int i = 0; !WordsAcceptable(best_words) && i < sub_langs_.size();
++i) { ++i) {
if (most_recently_used_ != sub_langs_[i] && if (most_recently_used_ != sub_langs_[i] &&
sub_langs_[i]->RetryWithLanguage(*word_data, recognizer, sub_langs_[i]->RetryWithLanguage(*word_data, recognizer, debug,
&word_data->lang_words[i], &word_data->lang_words[i],
&best_words) > 0) { &best_words) > 0) {
best_lang_tess = sub_langs_[i]; best_lang_tess = sub_langs_[i];
@ -1357,11 +1366,25 @@ void Tesseract::classify_word_pass1(const WordData& word_data,
BLOCK* block = word_data.block; BLOCK* block = word_data.block;
prev_word_best_choice_ = word_data.prev_word != NULL prev_word_best_choice_ = word_data.prev_word != NULL
? word_data.prev_word->word->best_choice : NULL; ? word_data.prev_word->word->best_choice : NULL;
#ifndef NO_CUBE_BUILD #ifndef ANDROID_BUILD
// If we only intend to run cube - run it and return. if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) { tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
cube_word_pass1(block, row, *in_word); if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
return; LSTMRecognizeWord(*block, row, *in_word, out_words);
if (!out_words->empty())
return; // Successful lstm recognition.
}
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
// No fallback allowed, so use a fake.
(*in_word)->SetupFake(lstm_recognizer_->GetUnicharset());
return;
}
// Fall back to tesseract for failed words or odd words.
(*in_word)->SetupForRecognition(unicharset, this, BestPix(),
OEM_TESSERACT_ONLY, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx, row, block);
} }
#endif #endif
WERD_RES* word = *in_word; WERD_RES* word = *in_word;
@ -1497,11 +1520,7 @@ void Tesseract::classify_word_pass2(const WordData& word_data,
WERD_RES** in_word, WERD_RES** in_word,
PointerVector<WERD_RES>* out_words) { PointerVector<WERD_RES>* out_words) {
// Return if we do not want to run Tesseract. // Return if we do not want to run Tesseract.
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY && if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED &&
word_data.word->best_choice != NULL)
return;
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
return; return;
} }
ROW* row = word_data.row; ROW* row = word_data.row;
@ -1886,7 +1905,7 @@ static void find_modal_font( //good chars in word
* Get the fonts for the word. * Get the fonts for the word.
*/ */
void Tesseract::set_word_fonts(WERD_RES *word) { void Tesseract::set_word_fonts(WERD_RES *word) {
// Don't try to set the word fonts for a cube word, as the configs // Don't try to set the word fonts for an lstm word, as the configs
// will be meaningless. // will be meaningless.
if (word->chopped_word == NULL) return; if (word->chopped_word == NULL) return;
ASSERT_HOST(word->best_choice != NULL); ASSERT_HOST(word->best_choice != NULL);

View File

@ -1,432 +0,0 @@
/******************************************************************
* File: cube_control.cpp
* Description: Tesseract class methods for invoking cube convolutional
* neural network word recognizer.
* Author: Raquel Romano
* Created: September 2009
*
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#include "allheaders.h"
#include "cube_object.h"
#include "cube_reco_context.h"
#include "tesseractclass.h"
#include "tesseract_cube_combiner.h"
namespace tesseract {
/**
* @name convert_prob_to_tess_certainty
*
* Normalize a probability in the range [0.0, 1.0] to a tesseract
* certainty in the range [-20.0, 0.0]
*/
static float convert_prob_to_tess_certainty(float prob) {
return (prob - 1.0) * 20.0;
}
/**
* @name char_box_to_tbox
*
* Create a TBOX from a character bounding box. If nonzero, the
* x_offset accounts for any additional padding of the word box that
* should be taken into account.
*
*/
TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
l_int32 left;
l_int32 top;
l_int32 width;
l_int32 height;
l_int32 right;
l_int32 bottom;
boxGetGeometry(char_box, &left, &top, &width, &height);
left += word_box.left() - x_offset;
right = left + width;
top = word_box.bottom() + word_box.height() - top;
bottom = top - height;
return TBOX(left, bottom, right, top);
}
/**
* @name extract_cube_state
*
* Extract CharSamp objects and character bounding boxes from the
* CubeObject's state. The caller should free both structres.
*
*/
bool Tesseract::extract_cube_state(CubeObject* cube_obj,
int* num_chars,
Boxa** char_boxes,
CharSamp*** char_samples) {
if (!cube_obj) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
"passed to extract_cube_state\n");
}
return false;
}
// Note that the CubeObject accessors return either the deslanted or
// regular objects search object or beam search object, whichever
// was used in the last call to Recognize()
CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
if (!cube_search_obj) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
"cube's search object in extract_cube_state.\n");
}
return false;
}
BeamSearch *beam_search_obj = cube_obj->BeamObj();
if (!beam_search_obj) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
"cube's beam search object in extract_cube_state.\n");
}
return false;
}
// Get the character samples and bounding boxes by backtracking
// through the beam search path
int best_node_index = beam_search_obj->BestPresortedNodeIndex();
*char_samples = beam_search_obj->BackTrack(
cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
if (!*char_samples)
return false;
return true;
}
/**
* @name create_cube_box_word
*
* Fill the given BoxWord with boxes from character bounding
* boxes. The char_boxes have local coordinates w.r.t. the
* word bounding box, i.e., the left-most character bbox of each word
* has (0,0) left-top coord, but the BoxWord must be defined in page
* coordinates.
*/
bool Tesseract::create_cube_box_word(Boxa *char_boxes,
int num_chars,
TBOX word_box,
BoxWord* box_word) {
if (!box_word) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
}
return false;
}
// Find the x-coordinate of left-most char_box, which could be
// nonzero if the word image was padded before recognition took place.
int x_offset = -1;
for (int i = 0; i < num_chars; ++i) {
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
if (x_offset < 0 || char_box->x < x_offset) {
x_offset = char_box->x;
}
boxDestroy(&char_box);
}
for (int i = 0; i < num_chars; ++i) {
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
boxDestroy(&char_box);
box_word->InsertBox(i, tbox);
}
return true;
}
/**
* @name init_cube_objects
*
* Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
* Returns false if cube context could not be created or if load_combiner is
* true, but the combiner could not be loaded.
*/
bool Tesseract::init_cube_objects(bool load_combiner,
TessdataManager *tessdata_manager) {
ASSERT_HOST(cube_cntxt_ == NULL);
ASSERT_HOST(tess_cube_combiner_ == NULL);
// Create the cube context object
cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
if (cube_cntxt_ == NULL) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
"instantiate CubeRecoContext\n");
}
return false;
}
// Create the combiner object and load the combiner net for target languages.
if (load_combiner) {
tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
if (!tess_cube_combiner_ || !tess_cube_combiner_->LoadCombinerNet()) {
delete cube_cntxt_;
cube_cntxt_ = NULL;
if (tess_cube_combiner_ != NULL) {
delete tess_cube_combiner_;
tess_cube_combiner_ = NULL;
}
if (cube_debug_level > 0)
tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
return false;
}
}
return true;
}
/**
* @name run_cube_combiner
*
* Iterates through tesseract's results and calls cube on each word,
* combining the results with the existing tesseract result.
*/
void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
if (page_res == NULL || tess_cube_combiner_ == NULL)
return;
PAGE_RES_IT page_res_it(page_res);
// Iterate through the word results and call cube on each word.
for (page_res_it.restart_page(); page_res_it.word () != NULL;
page_res_it.forward()) {
BLOCK* block = page_res_it.block()->block;
if (block->poly_block() != NULL && !block->poly_block()->IsText())
continue; // Don't deal with non-text blocks.
WERD_RES* word = page_res_it.word();
// Skip cube entirely if tesseract's certainty is greater than threshold.
int combiner_run_thresh = convert_prob_to_tess_certainty(
cube_cntxt_->Params()->CombinerRunThresh());
if (word->best_choice->certainty() >= combiner_run_thresh) {
continue;
}
// Use the same language as Tesseract used for the word.
Tesseract* lang_tess = word->tesseract;
// Setup a trial WERD_RES in which to classify with cube.
WERD_RES cube_word;
cube_word.InitForRetryRecognition(*word);
cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
OEM_CUBE_ONLY,
NULL, false, false, false,
page_res_it.row()->row,
page_res_it.block()->block);
CubeObject *cube_obj = lang_tess->cube_recognize_word(
page_res_it.block()->block, &cube_word);
if (cube_obj != NULL)
lang_tess->cube_combine_word(cube_obj, &cube_word, word);
delete cube_obj;
}
}
/**
* @name cube_word_pass1
*
* Recognizes a single word using (only) cube. Compatible with
* Tesseract's classify_word_pass1/classify_word_pass2.
*/
void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
CubeObject *cube_obj = cube_recognize_word(block, word);
delete cube_obj;
}
/**
* @name cube_recognize_word
*
* Cube recognizer to recognize a single word as with classify_word_pass1
* but also returns the cube object in case the combiner is needed.
*/
CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
if (!cube_binary_ || !cube_cntxt_) {
if (cube_debug_level > 0 && !cube_binary_)
tprintf("Tesseract::run_cube(): NULL binary image.\n");
word->SetupFake(unicharset);
return NULL;
}
TBOX word_box = word->word->bounding_box();
if (block != NULL && (block->re_rotation().x() != 1.0f ||
block->re_rotation().y() != 0.0f)) {
// TODO(rays) We have to rotate the bounding box to get the true coords.
// This will be achieved in the future via DENORM.
// In the mean time, cube can't process this word.
if (cube_debug_level > 0) {
tprintf("Cube can't process rotated word at:");
word_box.print();
}
word->SetupFake(unicharset);
return NULL;
}
CubeObject* cube_obj = new tesseract::CubeObject(
cube_cntxt_, cube_binary_, word_box.left(),
pixGetHeight(cube_binary_) - word_box.top(),
word_box.width(), word_box.height());
if (!cube_recognize(cube_obj, block, word)) {
delete cube_obj;
return NULL;
}
return cube_obj;
}
/**
* @name cube_combine_word
*
* Combines the cube and tesseract results for a single word, leaving the
* result in tess_word.
*/
void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
WERD_RES* tess_word) {
float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
cube_obj);
// If combiner probability is greater than tess/cube combiner
// classifier threshold, i.e. tesseract wins, then just return the
// tesseract result unchanged, as the combiner knows nothing about how
// correct the answer is. If cube and tesseract agree, then improve the
// scores before returning.
WERD_CHOICE* tess_best = tess_word->best_choice;
WERD_CHOICE* cube_best = cube_word->best_choice;
if (cube_debug_level || classify_debug_level) {
tprintf("Combiner prob = %g vs threshold %g\n",
combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
}
if (combiner_prob >=
cube_cntxt_->Params()->CombinerClassifierThresh()) {
if (tess_best->unichar_string() == cube_best->unichar_string()) {
// Cube and tess agree, so improve the scores.
tess_best->set_rating(tess_best->rating() / 2);
tess_best->set_certainty(tess_best->certainty() / 2);
}
return;
}
// Cube wins.
// It is better for the language combiner to have all tesseract scores,
// so put them in the cube result.
cube_best->set_rating(tess_best->rating());
cube_best->set_certainty(tess_best->certainty());
if (cube_debug_level || classify_debug_level) {
tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
tess_best->unichar_string().string(),
cube_best->unichar_string().string());
}
tess_word->ConsumeWordResults(cube_word);
}
/**
* @name cube_recognize
*
* Call cube on the current word, and write the result to word.
* Sets up a fake result and returns false if something goes wrong.
*/
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
WERD_RES *word) {
// Run cube
WordAltList *cube_alt_list = cube_obj->RecognizeWord();
if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
if (cube_debug_level > 0) {
tprintf("Cube returned nothing for word at:");
word->word->bounding_box().print();
}
word->SetupFake(unicharset);
return false;
}
// Get cube's best result and its probability, mapped to tesseract's
// certainty range
char_32 *cube_best_32 = cube_alt_list->Alt(0);
double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
string cube_best_str;
CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
// Retrieve Cube's character bounding boxes and CharSamples,
// corresponding to the most recent call to RecognizeWord().
Boxa *char_boxes = NULL;
CharSamp **char_samples = NULL;;
int num_chars;
if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
&& cube_debug_level > 0) {
tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
"cube state.\n");
word->SetupFake(unicharset);
return false;
}
// Convert cube's character bounding boxes to a BoxWord.
BoxWord cube_box_word;
TBOX tess_word_box = word->word->bounding_box();
if (word->denorm.block() != NULL)
tess_word_box.rotate(word->denorm.block()->re_rotation());
bool box_word_success = create_cube_box_word(char_boxes, num_chars,
tess_word_box,
&cube_box_word);
boxaDestroy(&char_boxes);
if (!box_word_success) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
"create cube BoxWord\n");
}
word->SetupFake(unicharset);
return false;
}
// Fill tesseract result's fields with cube results
fill_werd_res(cube_box_word, cube_best_str.c_str(), word);
// Create cube's best choice.
BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
for (int i = 0; i < num_chars; ++i) {
UNICHAR_ID uch_id =
cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
-1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
}
word->FakeClassifyWord(num_chars, choices);
// within a word, cube recognizes the word in reading order.
word->best_choice->set_unichars_in_script_order(true);
delete [] choices;
delete [] char_samples;
// Some sanity checks
ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
if (cube_debug_level || classify_debug_level) {
tprintf("Cube result: %s r=%g, c=%g\n",
word->best_choice->unichar_string().string(),
word->best_choice->rating(),
word->best_choice->certainty());
}
return true;
}
/**
* @name fill_werd_res
*
* Fill Tesseract's word result fields with cube's.
*
*/
void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
const char* cube_best_str,
WERD_RES* tess_werd_res) {
delete tess_werd_res->box_word;
tess_werd_res->box_word = new BoxWord(cube_box_word);
tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
tess_werd_res->word);
// Fill text and remaining fields
tess_werd_res->word->set_text(cube_best_str);
tess_werd_res->tess_failed = FALSE;
tess_werd_res->tess_accepted = tess_acceptable_word(tess_werd_res);
// There is no output word, so we can' call AdaptableWord, but then I don't
// think we need to. Fudge the result with accepted.
tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
// Set word to done, i.e., ignore all of tesseract's tests for rejection
tess_werd_res->done = tess_werd_res->tess_accepted;
}
} // namespace tesseract

View File

@ -1,208 +0,0 @@
/**********************************************************************
* File: cube_reco_context.cpp
* Description: Implementation of the Cube Recognition Context Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <string>
#include <limits.h>
#include "cube_reco_context.h"
#include "classifier_factory.h"
#include "cube_tuning_params.h"
#include "dict.h"
#include "feature_bmp.h"
#include "tessdatamanager.h"
#include "tesseractclass.h"
#include "tess_lang_model.h"
namespace tesseract {
/**
* Instantiate a CubeRecoContext object using a Tesseract object.
* CubeRecoContext will not take ownership of tess_obj, but will
* record the pointer to it and will make use of various Tesseract
* components (language model, flags, etc). Thus the caller should
* keep tess_obj alive so long as the instantiated CubeRecoContext is used.
*/
CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
tess_obj_ = tess_obj;
lang_ = "";
loaded_ = false;
lang_mod_ = NULL;
params_ = NULL;
char_classifier_ = NULL;
char_set_ = NULL;
word_size_model_ = NULL;
char_bigrams_ = NULL;
word_unigrams_ = NULL;
noisy_input_ = false;
size_normalization_ = false;
}
CubeRecoContext::~CubeRecoContext() {
if (char_classifier_ != NULL) {
delete char_classifier_;
char_classifier_ = NULL;
}
if (word_size_model_ != NULL) {
delete word_size_model_;
word_size_model_ = NULL;
}
if (char_set_ != NULL) {
delete char_set_;
char_set_ = NULL;
}
if (char_bigrams_ != NULL) {
delete char_bigrams_;
char_bigrams_ = NULL;
}
if (word_unigrams_ != NULL) {
delete word_unigrams_;
word_unigrams_ = NULL;
}
if (lang_mod_ != NULL) {
delete lang_mod_;
lang_mod_ = NULL;
}
if (params_ != NULL) {
delete params_;
params_ = NULL;
}
}
/**
* Returns the path of the data files by looking up the TESSDATA_PREFIX
* environment variable and appending a "tessdata" directory to it
*/
bool CubeRecoContext::GetDataFilePath(string *path) const {
*path = tess_obj_->datadir.string();
return true;
}
/**
* The object initialization function that loads all the necessary
* components of a RecoContext. TessdataManager is used to load the
* data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET
* component is present, Cube will be instantiated with the unicharset
* specified in this component and the corresponding dictionary
* (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
* Tesseract's. Otherwise, TessdataManager will assume that Cube will
* be using Tesseract's unicharset and dawgs, and will load the
* unicharset from the TESSDATA_UNICHARSET component and will load the
* dawgs from TESSDATA_*_DAWG components.
*/
bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset) {
ASSERT_HOST(tess_obj_ != NULL);
tess_unicharset_ = tess_unicharset;
string data_file_path;
// Get the data file path.
if (GetDataFilePath(&data_file_path) == false) {
fprintf(stderr, "Unable to get data file path\n");
return false;
}
// Get the language from the Tesseract object.
lang_ = tess_obj_->lang.string();
// Create the char set.
if ((char_set_ =
CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
"CharSet\n");
return false;
}
// Create the language model.
string lm_file_name = data_file_path + lang_ + ".cube.lm";
string lm_params;
if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
"language model params from %s\n", lm_file_name.c_str());
return false;
}
lang_mod_ = new TessLangModel(lm_params, data_file_path,
tess_obj_->getDict().load_system_dawg,
tessdata_manager, this);
if (lang_mod_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create "
"TessLangModel\n");
return false;
}
// Create the optional char bigrams object.
char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
// Create the optional word unigrams object.
word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
// Create the optional size model.
word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
char_set_, Contextual());
// Load tuning params.
params_ = CubeTuningParams::Create(data_file_path, lang_);
if (params_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
"CubeTuningParams from %s\n", data_file_path.c_str());
return false;
}
// Create the char classifier.
char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
lang_mod_, char_set_,
params_);
if (char_classifier_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
"CharClassifierFactory object from %s\n", data_file_path.c_str());
return false;
}
loaded_ = true;
return true;
}
/** Creates a CubeRecoContext object using a tesseract object */
CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset) {
// create the object
CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
if (cntxt == NULL) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create "
"CubeRecoContext object\n");
return NULL;
}
// load the necessary components
if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
"CubeRecoContext object\n");
delete cntxt;
return NULL;
}
// success
return cntxt;
}
} // tesseract}

View File

@ -1,157 +0,0 @@
/**********************************************************************
* File: cube_reco_context.h
* Description: Declaration of the Cube Recognition Context Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
// (or a thread) would create one CubeRecoContext object per language.
// The CubeRecoContext object also provides methods to get and set the
// different attribues of the Cube OCR Engine.
#ifndef CUBE_RECO_CONTEXT_H
#define CUBE_RECO_CONTEXT_H
#include <string>
#include "neural_net.h"
#include "lang_model.h"
#include "classifier_base.h"
#include "feature_base.h"
#include "char_set.h"
#include "word_size_model.h"
#include "char_bigrams.h"
#include "word_unigrams.h"
namespace tesseract {
class Tesseract;
class TessdataManager;
class CubeRecoContext {
public:
// Reading order enum type
enum ReadOrder {
L2R,
R2L
};
// Instantiate using a Tesseract object
CubeRecoContext(Tesseract *tess_obj);
~CubeRecoContext();
// accessor functions
inline const string & Lang() const { return lang_; }
inline CharSet *CharacterSet() const { return char_set_; }
const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
inline CharClassifier *Classifier() const { return char_classifier_; }
inline WordSizeModel *SizeModel() const { return word_size_model_; }
inline CharBigrams *Bigrams() const { return char_bigrams_; }
inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
inline TuningParams *Params() const { return params_; }
inline LangModel *LangMod() const { return lang_mod_; }
// the reading order of the language
inline ReadOrder ReadingOrder() const {
return ((lang_ == "ara") ? R2L : L2R);
}
// does the language support case
inline bool HasCase() const {
return (lang_ != "ara" && lang_ != "hin");
}
inline bool Cursive() const {
return (lang_ == "ara");
}
inline bool HasItalics() const {
return (lang_ != "ara" && lang_ != "hin");
}
inline bool Contextual() const {
return (lang_ == "ara");
}
// RecoContext runtime flags accessor functions
inline bool SizeNormalization() const { return size_normalization_; }
inline bool NoisyInput() const { return noisy_input_; }
inline bool OOD() const { return lang_mod_->OOD(); }
inline bool Numeric() const { return lang_mod_->Numeric(); }
inline bool WordList() const { return lang_mod_->WordList(); }
inline bool Punc() const { return lang_mod_->Punc(); }
inline bool CaseSensitive() const {
return char_classifier_->CaseSensitive();
}
inline void SetSizeNormalization(bool size_normalization) {
size_normalization_ = size_normalization;
}
inline void SetNoisyInput(bool noisy_input) {
noisy_input_ = noisy_input;
}
inline void SetOOD(bool ood_enabled) {
lang_mod_->SetOOD(ood_enabled);
}
inline void SetNumeric(bool numeric_enabled) {
lang_mod_->SetNumeric(numeric_enabled);
}
inline void SetWordList(bool word_list_enabled) {
lang_mod_->SetWordList(word_list_enabled);
}
inline void SetPunc(bool punc_enabled) {
lang_mod_->SetPunc(punc_enabled);
}
inline void SetCaseSensitive(bool case_sensitive) {
char_classifier_->SetCaseSensitive(case_sensitive);
}
inline tesseract::Tesseract *TesseractObject() const {
return tess_obj_;
}
// Returns the path of the data files
bool GetDataFilePath(string *path) const;
// Creates a CubeRecoContext object using a tesseract object. Data
// files are loaded via the tessdata_manager, and the tesseract
// unicharset is provided in order to map Cube's unicharset to
// Tesseract's in the case where the two unicharsets differ.
static CubeRecoContext *Create(Tesseract *tess_obj,
TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset);
private:
bool loaded_;
string lang_;
CharSet *char_set_;
UNICHARSET *tess_unicharset_;
WordSizeModel *word_size_model_;
CharClassifier *char_classifier_;
CharBigrams *char_bigrams_;
WordUnigrams *word_unigrams_;
TuningParams *params_;
LangModel *lang_mod_;
Tesseract *tess_obj_; // CubeRecoContext does not own this pointer
bool size_normalization_;
bool noisy_input_;
// Loads and initialized all the necessary components of a
// CubeRecoContext. See .cpp for more details.
bool Load(TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset);
};
}
#endif // CUBE_RECO_CONTEXT_H

View File

@ -1,134 +0,0 @@
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
///////////////////////////////////////////////////////////////////////
// File: cubeclassifier.cpp
// Description: Cube implementation of a ShapeClassifier.
// Author: Ray Smith
// Created: Wed Nov 23 10:39:45 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "cubeclassifier.h"
#include "char_altlist.h"
#include "char_set.h"
#include "cube_object.h"
#include "cube_reco_context.h"
#include "tessclassifier.h"
#include "tesseractclass.h"
#include "trainingsample.h"
#include "unicharset.h"
namespace tesseract {
CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
: cube_cntxt_(tesseract->GetCubeRecoContext()),
shape_table_(*tesseract->shape_table()) {
}
CubeClassifier::~CubeClassifier() {
}
/// Classifies the given [training] sample, writing to results.
/// See ShapeClassifier for a full description.
int CubeClassifier::UnicharClassifySample(
const TrainingSample& sample, Pix* page_pix, int debug,
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
results->clear();
if (page_pix == NULL) return 0;
ASSERT_HOST(cube_cntxt_ != NULL);
const TBOX& char_box = sample.bounding_box();
CubeObject* cube_obj = new tesseract::CubeObject(
cube_cntxt_, page_pix, char_box.left(),
pixGetHeight(page_pix) - char_box.top(),
char_box.width(), char_box.height());
CharAltList* alt_list = cube_obj->RecognizeChar();
if (alt_list != NULL) {
alt_list->Sort();
CharSet* char_set = cube_cntxt_->CharacterSet();
for (int i = 0; i < alt_list->AltCount(); ++i) {
// Convert cube representation to a shape_id.
int alt_id = alt_list->Alt(i);
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
if (unichar_id >= 0)
results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
}
delete alt_list;
}
delete cube_obj;
return results->size();
}
/** Provides access to the ShapeTable that this classifier works with. */
const ShapeTable* CubeClassifier::GetShapeTable() const {
return &shape_table_;
}
CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract)
: cube_cntxt_(tesseract->GetCubeRecoContext()),
shape_table_(*tesseract->shape_table()),
pruner_(new TessClassifier(true, tesseract)) {
}
CubeTessClassifier::~CubeTessClassifier() {
delete pruner_;
}
/// Classifies the given [training] sample, writing to results.
/// See ShapeClassifier for a full description.
int CubeTessClassifier::UnicharClassifySample(
const TrainingSample& sample, Pix* page_pix, int debug,
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
keep_this, results);
if (page_pix == NULL) return num_results;
ASSERT_HOST(cube_cntxt_ != NULL);
const TBOX& char_box = sample.bounding_box();
CubeObject* cube_obj = new tesseract::CubeObject(
cube_cntxt_, page_pix, char_box.left(),
pixGetHeight(page_pix) - char_box.top(),
char_box.width(), char_box.height());
CharAltList* alt_list = cube_obj->RecognizeChar();
CharSet* char_set = cube_cntxt_->CharacterSet();
if (alt_list != NULL) {
for (int r = 0; r < num_results; ++r) {
// Get the best cube probability of the unichar in the result.
double best_prob = 0.0;
for (int i = 0; i < alt_list->AltCount(); ++i) {
int alt_id = alt_list->Alt(i);
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
if (unichar_id == (*results)[r].unichar_id &&
alt_list->AltProb(i) > best_prob) {
best_prob = alt_list->AltProb(i);
}
}
(*results)[r].rating = best_prob;
}
delete alt_list;
// Re-sort by rating.
results->sort(&UnicharRating::SortDescendingRating);
}
delete cube_obj;
return results->size();
}
/** Provides access to the ShapeTable that this classifier works with. */
const ShapeTable* CubeTessClassifier::GetShapeTable() const {
return &shape_table_;
}
} // namespace tesseract

View File

@ -1,80 +0,0 @@
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
///////////////////////////////////////////////////////////////////////
// File: cubeclassifier.h
// Description: Cube implementation of a ShapeClassifier.
// Author: Ray Smith
// Created: Wed Nov 23 10:36:32 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
#define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
#include "shapeclassifier.h"
namespace tesseract {
class Classify;
class CubeRecoContext;
class ShapeTable;
class TessClassifier;
class Tesseract;
class TrainingSample;
struct UnicharRating;
// Cube implementation of a ShapeClassifier.
class CubeClassifier : public ShapeClassifier {
public:
explicit CubeClassifier(Tesseract* tesseract);
virtual ~CubeClassifier();
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, UNICHAR_ID keep_this,
GenericVector<UnicharRating>* results);
// Provides access to the ShapeTable that this classifier works with.
virtual const ShapeTable* GetShapeTable() const;
private:
// Cube objects.
CubeRecoContext* cube_cntxt_;
const ShapeTable& shape_table_;
};
// Combination of Tesseract class pruner with scoring by cube.
class CubeTessClassifier : public ShapeClassifier {
public:
explicit CubeTessClassifier(Tesseract* tesseract);
virtual ~CubeTessClassifier();
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, UNICHAR_ID keep_this,
GenericVector<UnicharRating>* results);
// Provides access to the ShapeTable that this classifier works with.
virtual const ShapeTable* GetShapeTable() const;
private:
// Cube objects.
CubeRecoContext* cube_cntxt_;
const ShapeTable& shape_table_;
TessClassifier* pruner_;
};
} // namespace tesseract
#endif /* THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_ */

View File

@ -1,8 +1,8 @@
/****************************************************************** /******************************************************************
* File: docqual.cpp (Formerly docqual.c) * File: docqual.cpp (Formerly docqual.c)
* Description: Document Quality Metrics * Description: Document Quality Metrics
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Mon May 9 11:27:28 BST 1994 * Created: Mon May 9 11:27:28 BST 1994
* *
* (C) Copyright 1994, Hewlett-Packard Ltd. * (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -98,8 +98,8 @@ void Tesseract::word_char_quality(WERD_RES *word,
ROW *row, ROW *row,
inT16 *match_count, inT16 *match_count,
inT16 *accepted_match_count) { inT16 *accepted_match_count) {
if (word->bln_boxes == NULL || if (word->bln_boxes == NULL || word->rebuild_word == NULL ||
word->rebuild_word == NULL || word->rebuild_word->blobs.empty()) { word->rebuild_word->blobs.empty()) {
*match_count = 0; *match_count = 0;
*accepted_match_count = 0; *accepted_match_count = 0;
return; return;
@ -132,7 +132,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) {
int expected_outline_count; int expected_outline_count;
if (STRING (outlines_odd).contains (c)) if (STRING (outlines_odd).contains (c))
return 0; //Don't use this char return 0; // Don't use this char
else if (STRING (outlines_2).contains (c)) else if (STRING (outlines_2).contains (c))
expected_outline_count = 2; expected_outline_count = 2;
else else
@ -151,17 +151,16 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
} }
} }
/************************************************************************* /*************************************************************************
* unrej_good_quality_words() * unrej_good_quality_words()
* Accept potential rejects in words which pass the following checks: * Accept potential rejects in words which pass the following checks:
* - Contains a potential reject * - Contains a potential reject
* - Word looks like a sensible alpha word. * - Word looks like a sensible alpha word.
* - Word segmentation is the same as the original image * - Word segmentation is the same as the original image
* - All characters have the expected number of outlines * - All characters have the expected number of outlines
* NOTE - the rejection counts are recalculated after unrejection * NOTE - the rejection counts are recalculated after unrejection
* - CAN'T do it in a single pass without a bit of fiddling * - CAN'T do it in a single pass without a bit of fiddling
* - keep it simple but inefficient * - keep it simple but inefficient
*************************************************************************/ *************************************************************************/
void Tesseract::unrej_good_quality_words( //unreject potential void Tesseract::unrej_good_quality_words( //unreject potential
PAGE_RES_IT &page_res_it) { PAGE_RES_IT &page_res_it) {
@ -403,7 +402,6 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
} // namespace tesseract } // namespace tesseract
/************************************************************************* /*************************************************************************
* reject_whole_page() * reject_whole_page()
* Don't believe any of it - set the reject map to 00..00 in all words * Don't believe any of it - set the reject map to 00..00 in all words

View File

@ -624,10 +624,6 @@ void EquationDetect::IdentifySeedParts() {
} }
float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) { float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) {
#if LIBLEPT_MINOR_VERSION < 69 && LIBLEPT_MAJOR_VERSION <= 1
// This will disable the detector because no seed will be identified.
return 1.0f;
#else
Pix *pix_bi = lang_tesseract_->pix_binary(); Pix *pix_bi = lang_tesseract_->pix_binary();
int pix_height = pixGetHeight(pix_bi); int pix_height = pixGetHeight(pix_bi);
Box* box = boxCreate(tbox.left(), pix_height - tbox.top(), Box* box = boxCreate(tbox.left(), pix_height - tbox.top(),
@ -639,7 +635,6 @@ float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) {
boxDestroy(&box); boxDestroy(&box);
return fract; return fract;
#endif
} }
bool EquationDetect::CheckSeedFgDensity(const float density_th, bool EquationDetect::CheckSeedFgDensity(const float density_th,

View File

@ -17,8 +17,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H__ #ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H_
#define TESSERACT_CCMAIN_EQUATIONDETECT_H__ #define TESSERACT_CCMAIN_EQUATIONDETECT_H_
#include "blobbox.h" #include "blobbox.h"
#include "equationdetectbase.h" #include "equationdetectbase.h"

View File

@ -3,8 +3,8 @@
* Description: Implements a pass over the page res, exploring the alternative * Description: Implements a pass over the page res, exploring the alternative
* spacing possibilities, trying to use context to improve the * spacing possibilities, trying to use context to improve the
* word spacing * word spacing
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Thu Oct 21 11:38:43 BST 1993 * Created: Thu Oct 21 11:38:43 BST 1993
* *
* (C) Copyright 1993, Hewlett-Packard Ltd. * (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -211,7 +211,6 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
} }
} }
/** /**
* @name eval_word_spacing() * @name eval_word_spacing()
* The basic measure is the number of characters in contextually confirmed * The basic measure is the number of characters in contextually confirmed

333
ccmain/linerec.cpp Normal file
View File

@ -0,0 +1,333 @@
///////////////////////////////////////////////////////////////////////
// File: linerec.cpp
// Description: Top-level line-based recognition module for Tesseract.
// Author: Ray Smith
// Created: Thu May 02 09:47:06 PST 2013
//
// (C) Copyright 2013, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "tesseractclass.h"
#include "allheaders.h"
#include "boxread.h"
#include "imagedata.h"
#ifndef ANDROID_BUILD
#include "lstmrecognizer.h"
#include "recodebeam.h"
#endif
#include "ndminx.h"
#include "pageres.h"
#include "tprintf.h"
namespace tesseract {
// Arbitarary penalty for non-dictionary words.
// TODO(rays) How to learn this?
const float kNonDictionaryPenalty = 5.0f;
// Scale factor to make certainty more comparable to Tesseract.
const float kCertaintyScale = 7.0f;
// Worst acceptable certainty for a dictionary word.
const float kWorstDictCertainty = -25.0f;
// Generates training data for training a line recognizer, eg LSTM.
// Breaks the page into lines, according to the boxes, and writes them to a
// serialized DocumentData based on output_basename.
void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
const STRING& output_basename,
BLOCK_LIST *block_list) {
STRING lstmf_name = output_basename + ".lstmf";
DocumentData images(lstmf_name);
if (applybox_page > 0) {
// Load existing document for the previous pages.
if (!images.LoadDocument(lstmf_name.string(), "eng", 0, 0, NULL)) {
tprintf("Failed to read training data from %s!\n", lstmf_name.string());
return;
}
}
GenericVector<TBOX> boxes;
GenericVector<STRING> texts;
// Get the boxes for this page, if there are any.
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, NULL,
NULL) ||
boxes.empty()) {
tprintf("Failed to read boxes from %s\n", input_imagename.string());
return;
}
TrainFromBoxes(boxes, texts, block_list, &images);
images.Shuffle();
if (!images.SaveDocument(lstmf_name.string(), NULL)) {
tprintf("Failed to write training data to %s!\n", lstmf_name.string());
}
}
// Generates training data for training a line recognizer, eg LSTM.
// Breaks the boxes into lines, normalizes them, converts to ImageData and
// appends them to the given training_data.
void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
BLOCK_LIST *block_list,
DocumentData* training_data) {
int box_count = boxes.size();
// Process all the text lines in this page, as defined by the boxes.
int end_box = 0;
// Don't let \t, which marks newlines in the box file, get into the line
// content, as that makes the line unusable in training.
while (end_box < texts.size() && texts[end_box] == "\t") ++end_box;
for (int start_box = end_box; start_box < box_count; start_box = end_box) {
// Find the textline of boxes starting at start and their bounding box.
TBOX line_box = boxes[start_box];
STRING line_str = texts[start_box];
for (end_box = start_box + 1; end_box < box_count && texts[end_box] != "\t";
++end_box) {
line_box += boxes[end_box];
line_str += texts[end_box];
}
// Find the most overlapping block.
BLOCK* best_block = NULL;
int best_overlap = 0;
BLOCK_IT b_it(block_list);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOCK* block = b_it.data();
if (block->poly_block() != NULL && !block->poly_block()->IsText())
continue; // Not a text block.
TBOX block_box = block->bounding_box();
block_box.rotate(block->re_rotation());
if (block_box.major_overlap(line_box)) {
TBOX overlap_box = line_box.intersection(block_box);
if (overlap_box.area() > best_overlap) {
best_overlap = overlap_box.area();
best_block = block;
}
}
}
ImageData* imagedata = NULL;
if (best_block == NULL) {
tprintf("No block overlapping textline: %s\n", line_str.string());
} else {
imagedata = GetLineData(line_box, boxes, texts, start_box, end_box,
*best_block);
}
if (imagedata != NULL)
training_data->AddPageToDocument(imagedata);
// Don't let \t, which marks newlines in the box file, get into the line
// content, as that makes the line unusable in training.
while (end_box < texts.size() && texts[end_box] == "\t") ++end_box;
}
}
// Returns an Imagedata containing the image of the given box,
// and ground truth boxes/truth text if available in the input.
// The image is not normalized in any way.
ImageData* Tesseract::GetLineData(const TBOX& line_box,
const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
int start_box, int end_box,
const BLOCK& block) {
TBOX revised_box;
ImageData* image_data = GetRectImage(line_box, block, kImagePadding,
&revised_box);
if (image_data == NULL) return NULL;
image_data->set_page_number(applybox_page);
// Copy the boxes and shift them so they are relative to the image.
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
ICOORD shift = -revised_box.botleft();
GenericVector<TBOX> line_boxes;
GenericVector<STRING> line_texts;
for (int b = start_box; b < end_box; ++b) {
TBOX box = boxes[b];
box.rotate(block_rotation);
box.move(shift);
line_boxes.push_back(box);
line_texts.push_back(texts[b]);
}
GenericVector<int> page_numbers;
page_numbers.init_to_size(line_boxes.size(), applybox_page);
image_data->AddBoxes(line_boxes, line_texts, page_numbers);
return image_data;
}
// Helper gets the image of a rectangle, using the block.re_rotation() if
// needed to get to the image, and rotating the result back to horizontal
// layout. (CJK characters will be on their left sides) The vertical text flag
// is set in the returned ImageData if the text was originally vertical, which
// can be used to invoke a different CJK recognition engine. The revised_box
// is also returned to enable calculation of output bounding boxes.
ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
int padding, TBOX* revised_box) const {
TBOX wbox = box;
wbox.pad(padding, padding);
*revised_box = wbox;
// Number of clockwise 90 degree rotations needed to get back to tesseract
// coords from the clipped image.
int num_rotations = 0;
if (block.re_rotation().y() > 0.0f)
num_rotations = 1;
else if (block.re_rotation().x() < 0.0f)
num_rotations = 2;
else if (block.re_rotation().y() < 0.0f)
num_rotations = 3;
// Handle two cases automatically: 1 the box came from the block, 2 the box
// came from a box file, and refers to the image, which the block may not.
if (block.bounding_box().major_overlap(*revised_box))
revised_box->rotate(block.re_rotation());
// Now revised_box always refers to the image.
// BestPix is never colormapped, but may be of any depth.
Pix* pix = BestPix();
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
TBOX image_box(0, 0, width, height);
// Clip to image bounds;
*revised_box &= image_box;
if (revised_box->null_box()) return NULL;
Box* clip_box = boxCreate(revised_box->left(), height - revised_box->top(),
revised_box->width(), revised_box->height());
Pix* box_pix = pixClipRectangle(pix, clip_box, NULL);
if (box_pix == NULL) return NULL;
boxDestroy(&clip_box);
if (num_rotations > 0) {
Pix* rot_pix = pixRotateOrth(box_pix, num_rotations);
pixDestroy(&box_pix);
box_pix = rot_pix;
}
// Convert sub-8-bit images to 8 bit.
int depth = pixGetDepth(box_pix);
if (depth < 8) {
Pix* grey;
grey = pixConvertTo8(box_pix, false);
pixDestroy(&box_pix);
box_pix = grey;
}
bool vertical_text = false;
if (num_rotations > 0) {
// Rotated the clipped revised box back to internal coordinates.
FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y());
revised_box->rotate(rotation);
if (num_rotations != 2)
vertical_text = true;
}
return new ImageData(vertical_text, box_pix);
}
#ifndef ANDROID_BUILD
// Recognizes a word or group of words, converting to WERD_RES in *words.
// Analogous to classify_word_pass1, but can handle a group of words as well.
void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
PointerVector<WERD_RES>* words) {
TBOX word_box = word->word->bounding_box();
// Get the word image - no frills.
if (tessedit_pageseg_mode == PSM_SINGLE_WORD ||
tessedit_pageseg_mode == PSM_RAW_LINE) {
// In single word mode, use the whole image without any other row/word
// interpretation.
word_box = TBOX(0, 0, ImageWidth(), ImageHeight());
} else {
float baseline = row->base_line((word_box.left() + word_box.right()) / 2);
if (baseline + row->descenders() < word_box.bottom())
word_box.set_bottom(baseline + row->descenders());
if (baseline + row->x_height() + row->ascenders() > word_box.top())
word_box.set_top(baseline + row->x_height() + row->ascenders());
}
ImageData* im_data = GetRectImage(word_box, block, kImagePadding, &word_box);
if (im_data == NULL) return;
lstm_recognizer_->RecognizeLine(*im_data, true, classify_debug_level > 0,
kWorstDictCertainty / kCertaintyScale,
lstm_use_matrix, &unicharset, word_box, 2.0,
false, words);
delete im_data;
SearchWords(words);
}
// Apply segmentation search to the given set of words, within the constraints
// of the existing ratings matrix. If there is already a best_choice on a word
// leaves it untouched and just sets the done/accepted etc flags.
void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
// Run the segmentation search on the network outputs and make a BoxWord
// for each of the output words.
// If we drop a word as junk, then there is always a space in front of the
// next.
const Dict* stopper_dict = lstm_recognizer_->GetDict();
if (stopper_dict == nullptr) stopper_dict = &getDict();
bool any_nonspace_delimited = false;
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
if (word->best_choice != nullptr &&
word->best_choice->ContainsAnyNonSpaceDelimited()) {
any_nonspace_delimited = true;
break;
}
}
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
if (word->best_choice == NULL) {
// If we are using the beam search, the unicharset had better match!
word->SetupWordScript(unicharset);
WordSearch(word);
} else if (word->best_choice->unicharset() == &unicharset &&
!lstm_recognizer_->IsRecoding()) {
// We set up the word without using the dictionary, so set the permuter
// now, but we can only do it because the unicharsets match.
word->best_choice->set_permuter(
getDict().valid_word(*word->best_choice, true));
}
if (word->best_choice == NULL) {
// It is a dud.
word->SetupFake(lstm_recognizer_->GetUnicharset());
} else {
// Set the best state.
for (int i = 0; i < word->best_choice->length(); ++i) {
int length = word->best_choice->state(i);
word->best_state.push_back(length);
}
word->reject_map.initialise(word->best_choice->length());
word->tess_failed = false;
word->tess_accepted = true;
word->tess_would_adapt = false;
word->done = true;
word->tesseract = this;
float word_certainty = MIN(word->space_certainty,
word->best_choice->certainty());
word_certainty *= kCertaintyScale;
// Arbitrary ding factor for non-dictionary words.
if (!lstm_recognizer_->IsRecoding() &&
!Dict::valid_word_permuter(word->best_choice->permuter(), true))
word_certainty -= kNonDictionaryPenalty;
if (getDict().stopper_debug_level >= 1) {
tprintf("Best choice certainty=%g, space=%g, scaled=%g, final=%g\n",
word->best_choice->certainty(), word->space_certainty,
MIN(word->space_certainty, word->best_choice->certainty()) *
kCertaintyScale,
word_certainty);
word->best_choice->print();
}
word->best_choice->set_certainty(word_certainty);
// Discard words that are impossibly bad, but allow a bit more for
// dictionary words, and keep bad words in non-space-delimited langs.
if (word_certainty >= RecodeBeamSearch::kMinCertainty ||
any_nonspace_delimited ||
(word_certainty >= kWorstDictCertainty &&
Dict::valid_word_permuter(word->best_choice->permuter(), true))) {
word->tess_accepted = stopper_dict->AcceptableResult(word);
} else {
if (getDict().stopper_debug_level >= 1) {
tprintf("Deleting word with certainty %g\n", word_certainty);
word->best_choice->print();
}
// It is a dud.
word->SetupFake(lstm_recognizer_->GetUnicharset());
}
}
}
}
#endif // ANDROID_BUILD
} // namespace tesseract.

View File

@ -145,13 +145,12 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const {
return 0.0f; return 0.0f;
} }
void LTRResultIterator::RowAttributes(float* row_height, void LTRResultIterator::RowAttributes(float* row_height, float* descenders,
float* descenders,
float* ascenders) const { float* ascenders) const {
*row_height = it_->row()->row->x_height() + it_->row()-> row->ascenders() *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
- it_->row()->row->descenders(); it_->row()->row->descenders();
*descenders = it_->row()->row->descenders(); *descenders = it_->row()->row->descenders();
*ascenders = it_->row()->row->ascenders(); *ascenders = it_->row()->row->ascenders();
} }
// Returns the font attributes of the current word. If iterating at a higher // Returns the font attributes of the current word. If iterating at a higher
@ -221,6 +220,12 @@ bool LTRResultIterator::WordIsFromDictionary() const {
permuter == USER_DAWG_PERM; permuter == USER_DAWG_PERM;
} }
// Returns the number of blanks before the current word.
int LTRResultIterator::BlanksBeforeWord() const {
if (it_->word() == NULL) return 1;
return it_->word()->word->space();
}
// Returns true if the current word is numeric. // Returns true if the current word is numeric.
bool LTRResultIterator::WordIsNumeric() const { bool LTRResultIterator::WordIsNumeric() const {
if (it_->word() == NULL) return false; // Already at the end! if (it_->word() == NULL) return false; // Already at the end!

View File

@ -18,8 +18,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ #ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ #define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "platform.h" #include "platform.h"
#include "pageiterator.h" #include "pageiterator.h"
@ -92,8 +92,7 @@ class TESS_API LTRResultIterator : public PageIterator {
float Confidence(PageIteratorLevel level) const; float Confidence(PageIteratorLevel level) const;
// Returns the attributes of the current row. // Returns the attributes of the current row.
void RowAttributes(float* row_height, void RowAttributes(float* row_height, float* descenders,
float* descenders,
float* ascenders) const; float* ascenders) const;
// ============= Functions that refer to words only ============. // ============= Functions that refer to words only ============.
@ -125,6 +124,9 @@ class TESS_API LTRResultIterator : public PageIterator {
// Returns true if the current word was found in a dictionary. // Returns true if the current word was found in a dictionary.
bool WordIsFromDictionary() const; bool WordIsFromDictionary() const;
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// Returns true if the current word is numeric. // Returns true if the current word is numeric.
bool WordIsNumeric() const; bool WordIsNumeric() const;
@ -216,4 +218,4 @@ class ChoiceIterator {
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ #endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -18,8 +18,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H__ #ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H__ #define TESSERACT_CCMAIN_MUTABLEITERATOR_H_
#include "resultiterator.h" #include "resultiterator.h"
@ -61,4 +61,4 @@ class MutableIterator : public ResultIterator {
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H__ #endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H_

View File

@ -164,13 +164,19 @@ void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
int vertical_y = 1; int vertical_y = 1;
tesseract::TabVector_LIST v_lines; tesseract::TabVector_LIST v_lines;
tesseract::TabVector_LIST h_lines; tesseract::TabVector_LIST h_lines;
int resolution = (kMinCredibleResolution > pixGetXRes(pix)) ? int resolution;
kMinCredibleResolution : pixGetXRes(pix); if (kMinCredibleResolution > pixGetXRes(pix)) {
resolution = kMinCredibleResolution;
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
pixGetXRes(pix), resolution);
} else {
resolution = pixGetXRes(pix);
}
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
&vertical_x, &vertical_y, &vertical_x, &vertical_y,
NULL, &v_lines, &h_lines); NULL, &v_lines, &h_lines);
Pix* im_pix = tesseract::ImageFind::FindImages(pix); Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
if (im_pix != NULL) { if (im_pix != NULL) {
pixSubtract(pix, pix, im_pix); pixSubtract(pix, pix, im_pix);
pixDestroy(&im_pix); pixDestroy(&im_pix);

View File

@ -17,8 +17,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_OSDETECT_H__ #ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H__ #define TESSERACT_CCMAIN_OSDETECT_H_
#include "strngs.h" #include "strngs.h"
#include "unicharset.h" #include "unicharset.h"
@ -135,4 +135,4 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
// applied for the text to be upright (readable). // applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int& id); TESS_API int OrientationIdToValue(const int& id);
#endif // TESSERACT_CCMAIN_OSDETECT_H__ #endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,8 +1,8 @@
/****************************************************************** /******************************************************************
* File: output.cpp (Formerly output.c) * File: output.cpp (Formerly output.c)
* Description: Output pass * Description: Output pass
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Thu Aug 4 10:56:08 BST 1994 * Created: Thu Aug 4 10:56:08 BST 1994
* *
* (C) Copyright 1994, Hewlett-Packard Ltd. * (C) Copyright 1994, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -78,18 +78,16 @@ void Tesseract::output_pass( //Tess output pass //send to api
while (page_res_it.word () != NULL) { while (page_res_it.word () != NULL) {
check_debug_pt (page_res_it.word (), 120); check_debug_pt (page_res_it.word (), 120);
if (target_word_box) if (target_word_box) {
{ TBOX current_word_box = page_res_it.word()->word->bounding_box();
FCOORD center_pt(
TBOX current_word_box=page_res_it.word ()->word->bounding_box(); (current_word_box.right() + current_word_box.left()) / 2,
FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2); (current_word_box.bottom() + current_word_box.top()) / 2);
if (!target_word_box->contains(center_pt)) if (!target_word_box->contains(center_pt)) {
{ page_res_it.forward();
page_res_it.forward (); continue;
continue; }
} }
}
if (tessedit_write_block_separators && if (tessedit_write_block_separators &&
block_of_last_word != page_res_it.block ()) { block_of_last_word != page_res_it.block ()) {
block_of_last_word = page_res_it.block (); block_of_last_word = page_res_it.block ();
@ -337,7 +335,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
rating_per_ch = word.rating() / word_res->reject_map.length(); rating_per_ch = word.rating() / word_res->reject_map.length();
if (rating_per_ch >= suspect_rating_per_ch) if (rating_per_ch >= suspect_rating_per_ch)
return; //Don't touch bad ratings return; // Don't touch bad ratings
if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) { if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
/* Unreject any Tess Acceptable word - but NOT tess reject chs*/ /* Unreject any Tess Acceptable word - but NOT tess reject chs*/

View File

@ -87,7 +87,7 @@ const PageIterator& PageIterator::operator=(const PageIterator& src) {
rect_top_ = src.rect_top_; rect_top_ = src.rect_top_;
rect_width_ = src.rect_width_; rect_width_ = src.rect_width_;
rect_height_ = src.rect_height_; rect_height_ = src.rect_height_;
if (it_ != NULL) delete it_; delete it_;
it_ = new PAGE_RES_IT(*src.it_); it_ = new PAGE_RES_IT(*src.it_);
BeginWord(src.blob_index_); BeginWord(src.blob_index_);
return *this; return *this;
@ -597,10 +597,8 @@ void PageIterator::BeginWord(int offset) {
} }
word_ = NULL; word_ = NULL;
// We will be iterating the box_word. // We will be iterating the box_word.
if (cblob_it_ != NULL) { delete cblob_it_;
delete cblob_it_; cblob_it_ = NULL;
cblob_it_ = NULL;
}
} else { } else {
// No recognition yet, so a "symbol" is a cblob. // No recognition yet, so a "symbol" is a cblob.
word_ = word_res->word; word_ = word_res->word;

View File

@ -18,8 +18,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H__ #ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H__ #define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "publictypes.h" #include "publictypes.h"
#include "platform.h" #include "platform.h"
@ -361,4 +361,4 @@ class TESS_API PageIterator {
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H__ #endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -18,9 +18,6 @@
**********************************************************************/ **********************************************************************/
#ifdef _WIN32 #ifdef _WIN32
#ifndef __GNUC__
#include <windows.h>
#endif // __GNUC__
#ifndef unlink #ifndef unlink
#include <io.h> #include <io.h>
#endif #endif
@ -40,6 +37,7 @@
#include "blobbox.h" #include "blobbox.h"
#include "blread.h" #include "blread.h"
#include "colfind.h" #include "colfind.h"
#include "debugpixa.h"
#include "equationdetect.h" #include "equationdetect.h"
#include "imagefind.h" #include "imagefind.h"
#include "linefind.h" #include "linefind.h"
@ -179,28 +177,6 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
return auto_page_seg_ret_val; return auto_page_seg_ret_val;
} }
// Helper writes a grey image to a file for use by scrollviewer.
// Normally for speed we don't display the image in the layout debug windows.
// If textord_debug_images is true, we draw the image as a background to some
// of the debug windows. printable determines whether these
// images are optimized for printing instead of screen display.
static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
Pix* grey_pix = pixCreate(pixGetWidth(pix_binary),
pixGetHeight(pix_binary), 8);
// Printable images are light grey on white, but for screen display
// they are black on dark grey so the other colors show up well.
if (printable) {
pixSetAll(grey_pix);
pixSetMasked(grey_pix, pix_binary, 192);
} else {
pixSetAllArbitrary(grey_pix, 64);
pixSetMasked(grey_pix, pix_binary, 0);
}
AlignedBlob::IncrementDebugPix();
pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
pixDestroy(&grey_pix);
}
/** /**
* Auto page segmentation. Divide the page image into blocks of uniform * Auto page segmentation. Divide the page image into blocks of uniform
* text linespacing and images. * text linespacing and images.
@ -229,9 +205,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
TO_BLOCK_LIST* to_blocks, TO_BLOCK_LIST* to_blocks,
BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess, BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
OSResults* osr) { OSResults* osr) {
if (textord_debug_images) {
WriteDebugBackgroundImage(textord_debug_printable, pix_binary_);
}
Pix* photomask_pix = NULL; Pix* photomask_pix = NULL;
Pix* musicmask_pix = NULL; Pix* musicmask_pix = NULL;
// The blocks made by the ColumnFinder. Moved to blocks before return. // The blocks made by the ColumnFinder. Moved to blocks before return.
@ -253,9 +226,10 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
if (equ_detect_) { if (equ_detect_) {
finder->SetEquationDetect(equ_detect_); finder->SetEquationDetect(equ_detect_);
} }
result = finder->FindBlocks( result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix, to_block, photomask_pix, pix_thresholds_,
pix_thresholds_, pix_grey_, &found_blocks, diacritic_blobs, to_blocks); pix_grey_, &pixa_debug_, &found_blocks,
diacritic_blobs, to_blocks);
if (result >= 0) if (result >= 0)
finder->GetDeskewVectors(&deskew_, &reskew_); finder->GetDeskewVectors(&deskew_, &reskew_);
delete finder; delete finder;
@ -268,11 +242,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
BLOCK_IT block_it(blocks); BLOCK_IT block_it(blocks);
// Move the found blocks to the input/output blocks. // Move the found blocks to the input/output blocks.
block_it.add_list_after(&found_blocks); block_it.add_list_after(&found_blocks);
if (textord_debug_images) {
// The debug image is no longer needed so delete it.
unlink(AlignedBlob::textord_debug_pix().string());
}
return result; return result;
} }
@ -314,19 +283,21 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
ASSERT_HOST(pix_binary_ != NULL); ASSERT_HOST(pix_binary_ != NULL);
if (tessedit_dump_pageseg_images) { if (tessedit_dump_pageseg_images) {
pixWrite("tessinput.png", pix_binary_, IFF_PNG); pixa_debug_.AddPix(pix_binary_, "PageSegInput");
} }
// Leptonica is used to find the rule/separator lines in the input. // Leptonica is used to find the rule/separator lines in the input.
LineFinder::FindAndRemoveLines(source_resolution_, LineFinder::FindAndRemoveLines(source_resolution_,
textord_tabfind_show_vlines, pix_binary_, textord_tabfind_show_vlines, pix_binary_,
&vertical_x, &vertical_y, music_mask_pix, &vertical_x, &vertical_y, music_mask_pix,
&v_lines, &h_lines); &v_lines, &h_lines);
if (tessedit_dump_pageseg_images) if (tessedit_dump_pageseg_images) {
pixWrite("tessnolines.png", pix_binary_, IFF_PNG); pixa_debug_.AddPix(pix_binary_, "NoLines");
}
// Leptonica is used to find a mask of the photo regions in the input. // Leptonica is used to find a mask of the photo regions in the input.
*photo_mask_pix = ImageFind::FindImages(pix_binary_); *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
if (tessedit_dump_pageseg_images) if (tessedit_dump_pageseg_images) {
pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); pixa_debug_.AddPix(pix_binary_, "NoImages");
}
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear(); if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
// The rest of the algorithm uses the usual connected components. // The rest of the algorithm uses the usual connected components.
@ -412,9 +383,10 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
"Don't rotate.\n", osd_margin); "Don't rotate.\n", osd_margin);
osd_orientation = 0; osd_orientation = 0;
} else { } else {
tprintf("OSD: Weak margin (%.2f) for %d blob text block, " tprintf(
"but using orientation anyway: %d\n", "OSD: Weak margin (%.2f) for %d blob text block, "
osd_margin, osd_blobs.length(), osd_orientation); "but using orientation anyway: %d\n",
osd_margin, osd_blobs.length(), osd_orientation);
} }
} }
} }

View File

@ -18,9 +18,9 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#include "tesseractclass.h" #include "tesseractclass.h"
#ifdef OPENMP #ifdef _OPENMP
#include <omp.h> #include <omp.h>
#endif // OPENMP #endif // _OPENMP
namespace tesseract { namespace tesseract {
@ -53,7 +53,9 @@ void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
} }
// Pre-classify all the blobs. // Pre-classify all the blobs.
if (tessedit_parallelize > 1) { if (tessedit_parallelize > 1) {
#pragma omp parallel for num_threads(10) #ifdef _OPENMP
#pragma omp parallel for num_threads(10)
#endif // _OPENMP
for (int b = 0; b < blobs.size(); ++b) { for (int b = 0; b < blobs.size(); ++b) {
*blobs[b].choices = *blobs[b].choices =
blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL); blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);

View File

@ -2052,7 +2052,7 @@ void ConvertHypothesizedModelRunsToParagraphs(
bool single_line_paragraph = false; bool single_line_paragraph = false;
SetOfModels models; SetOfModels models;
rows[start].NonNullHypotheses(&models); rows[start].NonNullHypotheses(&models);
if (models.size() > 0) { if (!models.empty()) {
model = models[0]; model = models[0];
if (rows[start].GetLineType(model) != LT_BODY) if (rows[start].GetLineType(model) != LT_BODY)
single_line_paragraph = true; single_line_paragraph = true;
@ -2113,6 +2113,7 @@ void ConvertHypothesizedModelRunsToParagraphs(
if ((*row_owners)[row] != NULL) { if ((*row_owners)[row] != NULL) {
tprintf("Memory leak! ConvertHypothesizeModelRunsToParagraphs() called " tprintf("Memory leak! ConvertHypothesizeModelRunsToParagraphs() called "
"more than once!\n"); "more than once!\n");
delete (*row_owners)[row];
} }
(*row_owners)[row] = p; (*row_owners)[row] = p;
} }
@ -2189,17 +2190,17 @@ void LeftoverSegments(const GenericVector<RowScratchRegisters> &rows,
SetOfModels models_w_crowns; SetOfModels models_w_crowns;
rows[i].StrongHypotheses(&models); rows[i].StrongHypotheses(&models);
rows[i].NonNullHypotheses(&models_w_crowns); rows[i].NonNullHypotheses(&models_w_crowns);
if (models.empty() && models_w_crowns.size() > 0) { if (models.empty() && !models_w_crowns.empty()) {
// Crown paragraph. Is it followed by a modeled line? // Crown paragraph. Is it followed by a modeled line?
for (int end = i + 1; end < rows.size(); end++) { for (int end = i + 1; end < rows.size(); end++) {
SetOfModels end_models; SetOfModels end_models;
SetOfModels strong_end_models; SetOfModels strong_end_models;
rows[end].NonNullHypotheses(&end_models); rows[end].NonNullHypotheses(&end_models);
rows[end].StrongHypotheses(&strong_end_models); rows[end].StrongHypotheses(&strong_end_models);
if (end_models.size() == 0) { if (end_models.empty()) {
needs_fixing = true; needs_fixing = true;
break; break;
} else if (strong_end_models.size() > 0) { } else if (!strong_end_models.empty()) {
needs_fixing = false; needs_fixing = false;
break; break;
} }
@ -2484,7 +2485,7 @@ void InitializeRowInfo(bool after_recognition,
info->ltr = ltr >= rtl; info->ltr = ltr >= rtl;
info->has_leaders = num_leaders > 3; info->has_leaders = num_leaders > 3;
info->num_words = werds.size(); info->num_words = werds.size();
if (werds.size() > 0) { if (!werds.empty()) {
WERD_RES *lword = werds[0], *rword = werds[werds.size() - 1]; WERD_RES *lword = werds[0], *rword = werds[werds.size() - 1];
info->lword_text = lword->best_choice->unichar_string().string(); info->lword_text = lword->best_choice->unichar_string().string();
info->rword_text = rword->best_choice->unichar_string().string(); info->rword_text = rword->best_choice->unichar_string().string();
@ -2537,7 +2538,7 @@ void DetectParagraphs(int debug_level,
// If we're called before text recognition, we might not have // If we're called before text recognition, we might not have
// tight block bounding boxes, so trim by the minimum on each side. // tight block bounding boxes, so trim by the minimum on each side.
if (row_infos.size() > 0) { if (!row_infos.empty()) {
int min_lmargin = row_infos[0].pix_ldistance; int min_lmargin = row_infos[0].pix_ldistance;
int min_rmargin = row_infos[0].pix_rdistance; int min_rmargin = row_infos[0].pix_rdistance;
for (int i = 1; i < row_infos.size(); i++) { for (int i = 1; i < row_infos.size(); i++) {

View File

@ -329,13 +329,19 @@ void ParamsEditor::WriteParams(char *filename,
fclose(fp); fclose(fp);
sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename); sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
int a = sv_window_->ShowYesNoDialog(msg_str); int a = sv_window_->ShowYesNoDialog(msg_str);
if (a == 'n') { return; } // don't write if (a == 'n') {
return;
} // don't write
} }
fp = fopen (filename, "wb"); // can we write to it? fp = fopen (filename, "wb"); // can we write to it?
if (fp == NULL) { if (fp == NULL) {
sv_window_->AddMessage("Can't write to file " "%s" "", filename); sv_window_->AddMessage(
"Can't write to file "
"%s"
"",
filename);
return; return;
} }

View File

@ -19,14 +19,12 @@
// //
// Tesseract parameter editor is used to edit all the parameters used // Tesseract parameter editor is used to edit all the parameters used
// within tesseract from the ui. // within tesseract from the ui.
#ifndef TESSERACT_CCMAIN_PARAMSD_H_
#define TESSERACT_CCMAIN_PARAMSD_H_
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
#ifndef VARABLED_H
#define VARABLED_H
#include "elst.h" #include "elst.h"
#ifndef NO_CUBE_BUILD
#include "scrollview.h"
#endif
#include "params.h" #include "params.h"
#include "tesseractclass.h" #include "tesseractclass.h"
@ -122,5 +120,5 @@ class ParamsEditor : public SVEventHandler {
ScrollView* sv_window_; ScrollView* sv_window_;
}; };
#endif #endif // GRAPHICS_DISABLED
#endif #endif // TESSERACT_CCMAIN_PARAMSD_H_

View File

@ -191,7 +191,7 @@ ScrollView* bln_word_window_handle() { // return handle
*/ */
void build_image_window(int width, int height) { void build_image_window(int width, int height) {
if (image_win != NULL) { delete image_win; } delete image_win;
image_win = new ScrollView(editor_image_win_name.string(), image_win = new ScrollView(editor_image_win_name.string(),
editor_image_xpos, editor_image_ypos, editor_image_xpos, editor_image_ypos,
width + 1, width + 1,

View File

@ -1,8 +1,8 @@
/********************************************************************** /**********************************************************************
* File: reject.cpp (Formerly reject.c) * File: reject.cpp (Formerly reject.c)
* Description: Rejection functions used in tessedit * Description: Rejection functions used in tessedit
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Wed Sep 23 16:50:21 BST 1992 * Created: Wed Sep 23 16:50:21 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -19,8 +19,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H__ #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H__ #define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include "platform.h" #include "platform.h"
#include "ltrresultiterator.h" #include "ltrresultiterator.h"
@ -241,4 +241,4 @@ class TESS_API ResultIterator : public LTRResultIterator {
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H__ #endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -40,11 +40,14 @@
#include "efio.h" #include "efio.h"
#include "danerror.h" #include "danerror.h"
#include "globals.h" #include "globals.h"
#ifndef ANDROID_BUILD
#include "lstmrecognizer.h"
#endif
#include "tesseractclass.h" #include "tesseractclass.h"
#include "params.h" #include "params.h"
#define VARDIR "configs/" /*variables files */ #define VARDIR "configs/" /*variables files */
//config under api // config under api
#define API_CONFIG "configs/api_config" #define API_CONFIG "configs/api_config"
ETEXT_DESC *global_monitor = NULL; // progress monitor ETEXT_DESC *global_monitor = NULL; // progress monitor
@ -89,8 +92,8 @@ bool Tesseract::init_tesseract_lang_data(
const char *arg0, const char *textbase, const char *language, const char *arg0, const char *textbase, const char *language,
OcrEngineMode oem, char **configs, int configs_size, OcrEngineMode oem, char **configs, int configs_size,
const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values, const GenericVector<STRING> *vars_values, bool set_only_non_debug_params,
bool set_only_non_debug_params) { TessdataManager *mgr) {
// Set the basename, compute the data directory. // Set the basename, compute the data directory.
main_setup(arg0, textbase); main_setup(arg0, textbase);
@ -102,20 +105,39 @@ bool Tesseract::init_tesseract_lang_data(
// Initialize TessdataManager. // Initialize TessdataManager.
STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix; STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
if (!tessdata_manager.Init(tessdata_path.string(), if (!mgr->is_loaded() && !mgr->Init(tessdata_path.string())) {
tessdata_manager_debug_level)) { // Try without tessdata.
return false; m_data_sub_dir.set_value("");
main_setup(arg0, textbase);
language_data_path_prefix = datadir;
language_data_path_prefix += lang;
language_data_path_prefix += ".";
tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
if (!mgr->Init(tessdata_path.string())) {
tprintf("Error opening data file %s\n", tessdata_path.string());
tprintf(
"Please make sure the TESSDATA_PREFIX environment variable is set"
" to your \"tessdata\" directory.\n");
return false;
}
}
if (oem == OEM_DEFAULT) {
// Set the engine mode from availability, which can then be overidden by
// the config file when we read it below.
if (!mgr->IsLSTMAvailable()) {
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
} else if (!mgr->IsBaseAvailable()) {
tessedit_ocr_engine_mode.set_value(OEM_LSTM_ONLY);
} else {
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_LSTM_COMBINED);
}
} }
// If a language specific config file (lang.config) exists, load it in. // If a language specific config file (lang.config) exists, load it in.
if (tessdata_manager.SeekToStart(TESSDATA_LANG_CONFIG)) { TFile fp;
ParamUtils::ReadParamsFromFp( if (mgr->GetComponent(TESSDATA_LANG_CONFIG, &fp)) {
tessdata_manager.GetDataFilePtr(), ParamUtils::ReadParamsFromFp(SET_PARAM_CONSTRAINT_NONE, &fp,
tessdata_manager.GetEndOffset(TESSDATA_LANG_CONFIG), this->params());
SET_PARAM_CONSTRAINT_NONE, this->params());
if (tessdata_manager_debug_level) {
tprintf("Loaded language config file\n");
}
} }
SetParamConstraint set_params_constraint = set_only_non_debug_params ? SetParamConstraint set_params_constraint = set_only_non_debug_params ?
@ -145,10 +167,6 @@ bool Tesseract::init_tesseract_lang_data(
if (params_file != NULL) { if (params_file != NULL) {
ParamUtils::PrintParams(params_file, this->params()); ParamUtils::PrintParams(params_file, this->params());
fclose(params_file); fclose(params_file);
if (tessdata_manager_debug_level > 0) {
tprintf("Wrote parameters to %s\n",
tessedit_write_params_to_file.string());
}
} else { } else {
tprintf("Failed to open %s for writing params.\n", tprintf("Failed to open %s for writing params.\n",
tessedit_write_params_to_file.string()); tessedit_write_params_to_file.string());
@ -157,30 +175,48 @@ bool Tesseract::init_tesseract_lang_data(
// Determine which ocr engine(s) should be loaded and used for recognition. // Determine which ocr engine(s) should be loaded and used for recognition.
if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem); if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
if (tessdata_manager_debug_level) {
tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
static_cast<int>(tessedit_ocr_engine_mode));
}
// If we are only loading the config file (and so not planning on doing any // If we are only loading the config file (and so not planning on doing any
// recognition) then there's nothing else do here. // recognition) then there's nothing else do here.
if (tessedit_init_config_only) { if (tessedit_init_config_only) {
if (tessdata_manager_debug_level) {
tprintf("Returning after loading config file\n");
}
return true; return true;
} }
// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded.
// If LSTM_ONLY is requested, the base Tesseract files are *Not* required.
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
if (mgr->swap()) {
tprintf("Error: LSTM requested on big-endian hardware!!\n");
tprintf("Big-endian not yet supported! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
} else if (mgr->GetComponent(TESSDATA_LSTM, &fp)) {
lstm_recognizer_ = new LSTMRecognizer;
ASSERT_HOST(lstm_recognizer_->DeSerialize(mgr->swap(), &fp));
if (lstm_use_matrix) lstm_recognizer_->LoadDictionary(language, mgr);
} else {
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
}
}
#endif
// Load the unicharset // Load the unicharset
if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) || if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) { // Avoid requiring a unicharset when we aren't running base tesseract.
#ifndef ANDROID_BUILD
unicharset.CopyFrom(lstm_recognizer_->GetUnicharset());
#endif
} else if (!mgr->GetComponent(TESSDATA_UNICHARSET, &fp) ||
!unicharset.load_from_file(&fp, false)) {
return false; return false;
} }
if (unicharset.size() > MAX_NUM_CLASSES) { if (unicharset.size() > MAX_NUM_CLASSES) {
tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n"); tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
return false; return false;
} }
if (tessdata_manager_debug_level) tprintf("Loaded unicharset\n");
right_to_left_ = unicharset.major_right_to_left(); right_to_left_ = unicharset.major_right_to_left();
// Setup initial unichar ambigs table and read universal ambigs. // Setup initial unichar ambigs table and read universal ambigs.
@ -189,33 +225,11 @@ bool Tesseract::init_tesseract_lang_data(
unichar_ambigs.InitUnicharAmbigs(unicharset, use_ambigs_for_adaption); unichar_ambigs.InitUnicharAmbigs(unicharset, use_ambigs_for_adaption);
unichar_ambigs.LoadUniversal(encoder_unicharset, &unicharset); unichar_ambigs.LoadUniversal(encoder_unicharset, &unicharset);
if (!tessedit_ambigs_training && if (!tessedit_ambigs_training && mgr->GetComponent(TESSDATA_AMBIGS, &fp)) {
tessdata_manager.SeekToStart(TESSDATA_AMBIGS)) { unichar_ambigs.LoadUnicharAmbigs(encoder_unicharset, &fp,
TFile ambigs_file; ambigs_debug_level,
ambigs_file.Open(tessdata_manager.GetDataFilePtr(), use_ambigs_for_adaption, &unicharset);
tessdata_manager.GetEndOffset(TESSDATA_AMBIGS) + 1);
unichar_ambigs.LoadUnicharAmbigs(
encoder_unicharset,
&ambigs_file,
ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
} }
// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded. Currently everything needs
// the base tesseract data, which supplies other useful information, but
// alternative engines, such as cube and LSTM are optional.
#ifndef NO_CUBE_BUILD
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
if (tessdata_manager_debug_level)
tprintf("Loaded Cube w/out combiner\n");
} else if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) {
ASSERT_HOST(init_cube_objects(true, &tessdata_manager));
if (tessdata_manager_debug_level)
tprintf("Loaded Cube with combiner\n");
}
#endif
// Init ParamsModel. // Init ParamsModel.
// Load pass1 and pass2 weights (for now these two sets are the same, but in // Load pass1 and pass2 weights (for now these two sets are the same, but in
// the future separate sets of weights can be generated). // the future separate sets of weights can be generated).
@ -223,15 +237,12 @@ bool Tesseract::init_tesseract_lang_data(
p < ParamsModel::PTRAIN_NUM_PASSES; ++p) { p < ParamsModel::PTRAIN_NUM_PASSES; ++p) {
language_model_->getParamsModel().SetPass( language_model_->getParamsModel().SetPass(
static_cast<ParamsModel::PassEnum>(p)); static_cast<ParamsModel::PassEnum>(p));
if (tessdata_manager.SeekToStart(TESSDATA_PARAMS_MODEL)) { if (mgr->GetComponent(TESSDATA_PARAMS_MODEL, &fp)) {
if (!language_model_->getParamsModel().LoadFromFp( if (!language_model_->getParamsModel().LoadFromFp(lang.string(), &fp)) {
lang.string(), tessdata_manager.GetDataFilePtr(),
tessdata_manager.GetEndOffset(TESSDATA_PARAMS_MODEL))) {
return false; return false;
} }
} }
} }
if (tessdata_manager_debug_level) language_model_->getParamsModel().Print();
return true; return true;
} }
@ -276,8 +287,6 @@ void Tesseract::ParseLanguageString(const char* lang_str,
remains = next; remains = next;
// Check whether lang_code is already in the target vector and add. // Check whether lang_code is already in the target vector and add.
if (!IsStrInList(lang_code, *target)) { if (!IsStrInList(lang_code, *target)) {
if (tessdata_manager_debug_level)
tprintf("Adding language '%s' to list\n", lang_code.string());
target->push_back(lang_code); target->push_back(lang_code);
} }
} }
@ -287,12 +296,13 @@ void Tesseract::ParseLanguageString(const char* lang_str,
// string and recursively any additional languages required by any language // string and recursively any additional languages required by any language
// traineddata file (via tessedit_load_sublangs in its config) that is loaded. // traineddata file (via tessedit_load_sublangs in its config) that is loaded.
// See init_tesseract_internal for args. // See init_tesseract_internal for args.
int Tesseract::init_tesseract( int Tesseract::init_tesseract(const char *arg0, const char *textbase,
const char *arg0, const char *textbase, const char *language, const char *language, OcrEngineMode oem,
OcrEngineMode oem, char **configs, int configs_size, char **configs, int configs_size,
const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values, const GenericVector<STRING> *vars_values,
bool set_only_non_debug_params) { bool set_only_non_debug_params,
TessdataManager *mgr) {
GenericVector<STRING> langs_to_load; GenericVector<STRING> langs_to_load;
GenericVector<STRING> langs_not_to_load; GenericVector<STRING> langs_not_to_load;
ParseLanguageString(language, &langs_to_load, &langs_not_to_load); ParseLanguageString(language, &langs_to_load, &langs_not_to_load);
@ -314,15 +324,15 @@ int Tesseract::init_tesseract(
} }
int result = tess_to_init->init_tesseract_internal( int result = tess_to_init->init_tesseract_internal(
arg0, textbase, lang_str, oem, configs, configs_size, arg0, textbase, lang_str, oem, configs, configs_size, vars_vec,
vars_vec, vars_values, set_only_non_debug_params); vars_values, set_only_non_debug_params, mgr);
// Forget that language, but keep any reader we were given.
mgr->Clear();
if (!loaded_primary) { if (!loaded_primary) {
if (result < 0) { if (result < 0) {
tprintf("Failed loading language '%s'\n", lang_str); tprintf("Failed loading language '%s'\n", lang_str);
} else { } else {
if (tessdata_manager_debug_level)
tprintf("Loaded language '%s' as main language\n", lang_str);
ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
&langs_to_load, &langs_not_to_load); &langs_to_load, &langs_not_to_load);
loaded_primary = true; loaded_primary = true;
@ -332,8 +342,6 @@ int Tesseract::init_tesseract(
tprintf("Failed loading language '%s'\n", lang_str); tprintf("Failed loading language '%s'\n", lang_str);
delete tess_to_init; delete tess_to_init;
} else { } else {
if (tessdata_manager_debug_level)
tprintf("Loaded language '%s' as secondary language\n", lang_str);
sub_langs_.push_back(tess_to_init); sub_langs_.push_back(tess_to_init);
// Add any languages that this language requires // Add any languages that this language requires
ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
@ -358,16 +366,11 @@ int Tesseract::init_tesseract(
this->language_model_->getParamsModel()); this->language_model_->getParamsModel());
} }
tprintf("Using params model of the primary language\n"); tprintf("Using params model of the primary language\n");
if (tessdata_manager_debug_level) {
this->language_model_->getParamsModel().Print();
}
} else { } else {
this->language_model_->getParamsModel().Clear(); this->language_model_->getParamsModel().Clear();
for (int s = 0; s < sub_langs_.size(); ++s) { for (int s = 0; s < sub_langs_.size(); ++s) {
sub_langs_[s]->language_model_->getParamsModel().Clear(); sub_langs_[s]->language_model_->getParamsModel().Clear();
} }
if (tessdata_manager_debug_level)
tprintf("Using default language params\n");
} }
} }
@ -391,33 +394,26 @@ int Tesseract::init_tesseract(
// in vars_vec. // in vars_vec.
// If set_only_init_params is true, then only the initialization variables // If set_only_init_params is true, then only the initialization variables
// will be set. // will be set.
int Tesseract::init_tesseract_internal( int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase,
const char *arg0, const char *textbase, const char *language, const char *language, OcrEngineMode oem,
OcrEngineMode oem, char **configs, int configs_size, char **configs, int configs_size,
const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values, const GenericVector<STRING> *vars_values,
bool set_only_non_debug_params) { bool set_only_non_debug_params,
TessdataManager *mgr) {
if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs, if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
configs_size, vars_vec, vars_values, configs_size, vars_vec, vars_values,
set_only_non_debug_params)) { set_only_non_debug_params, mgr)) {
return -1; return -1;
} }
if (tessedit_init_config_only) { if (tessedit_init_config_only) {
tessdata_manager.End();
return 0; return 0;
} }
// If only Cube will be used, skip loading Tesseract classifier's // If only LSTM will be used, skip loading Tesseract classifier's
// pre-trained templates. // pre-trained templates and dictionary.
bool init_tesseract_classifier = bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY;
(tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY || program_editup(textbase, init_tesseract ? mgr : nullptr,
tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED); init_tesseract ? mgr : nullptr);
// If only Cube will be used and if it has its own Unicharset,
// skip initializing permuter and loading Tesseract Dawgs.
bool init_dict =
!(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
program_editup(textbase, init_tesseract_classifier, init_dict);
tessdata_manager.End();
return 0; //Normal exit return 0; //Normal exit
} }
@ -462,14 +458,14 @@ void Tesseract::SetupUniversalFontIds() {
} }
// init the LM component // init the LM component
int Tesseract::init_tesseract_lm(const char *arg0, int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase,
const char *textbase, const char *language, TessdataManager *mgr) {
const char *language) {
if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY, if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
NULL, 0, NULL, NULL, false)) NULL, 0, NULL, NULL, false, mgr))
return -1; return -1;
getDict().Load(Dict::GlobalDawgCache()); getDict().SetupForLoad(Dict::GlobalDawgCache());
tessdata_manager.End(); getDict().Load(lang, mgr);
getDict().FinishLoad();
return 0; return 0;
} }

View File

@ -1,306 +0,0 @@
/**********************************************************************
* File: tesseract_cube_combiner.h
* Description: Declaration of the Tesseract & Cube results combiner Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The TesseractCubeCombiner class provides the functionality of combining
// the recognition results of Tesseract and Cube at the word level
#include <algorithm>
#include <string>
#include <vector>
#include <wctype.h>
#include "tesseract_cube_combiner.h"
#include "cube_object.h"
#include "cube_reco_context.h"
#include "cube_utils.h"
#include "neural_net.h"
#include "tesseractclass.h"
#include "word_altlist.h"
namespace tesseract {
TesseractCubeCombiner::TesseractCubeCombiner(CubeRecoContext *cube_cntxt) {
cube_cntxt_ = cube_cntxt;
combiner_net_ = NULL;
}
TesseractCubeCombiner::~TesseractCubeCombiner() {
if (combiner_net_ != NULL) {
delete combiner_net_;
combiner_net_ = NULL;
}
}
bool TesseractCubeCombiner::LoadCombinerNet() {
ASSERT_HOST(cube_cntxt_);
// Compute the path of the combiner net
string data_path;
cube_cntxt_->GetDataFilePath(&data_path);
string net_file_name = data_path + cube_cntxt_->Lang() +
".tesseract_cube.nn";
// Return false if file does not exist
FILE *fp = fopen(net_file_name.c_str(), "rb");
if (fp == NULL)
return false;
else
fclose(fp);
// Load and validate net
combiner_net_ = NeuralNet::FromFile(net_file_name);
if (combiner_net_ == NULL) {
tprintf("Could not read combiner net file %s", net_file_name.c_str());
return false;
} else if (combiner_net_->out_cnt() != 2) {
tprintf("Invalid combiner net file %s! Output count != 2\n",
net_file_name.c_str());
delete combiner_net_;
combiner_net_ = NULL;
return false;
}
return true;
}
// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
// strips punc and/or normalizes case and then converts back
string TesseractCubeCombiner::NormalizeString(const string &str,
bool remove_punc,
bool norm_case) {
// convert to UTF32
string_32 str32;
CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
// strip punc and normalize
string_32 new_str32;
for (int idx = 0; idx < str32.length(); idx++) {
// if no punc removal is required or not a punctuation character
if (!remove_punc || iswpunct(str32[idx]) == 0) {
char_32 norm_char = str32[idx];
// normalize case if required
if (norm_case && iswalpha(norm_char)) {
norm_char = towlower(norm_char);
}
new_str32.push_back(norm_char);
}
}
// convert back to UTF8
string new_str;
CubeUtils::UTF32ToUTF8(new_str32.c_str(), &new_str);
return new_str;
}
// Compares 2 strings optionally ignoring punctuation
int TesseractCubeCombiner::CompareStrings(const string &str1,
const string &str2,
bool ignore_punc,
bool ignore_case) {
if (!ignore_punc && !ignore_case) {
return str1.compare(str2);
}
string norm_str1 = NormalizeString(str1, ignore_punc, ignore_case);
string norm_str2 = NormalizeString(str2, ignore_punc, ignore_case);
return norm_str1.compare(norm_str2);
}
// Check if a string is a valid Tess dict word or not
bool TesseractCubeCombiner::ValidWord(const string &str) {
return (cube_cntxt_->TesseractObject()->getDict().valid_word(str.c_str())
> 0);
}
// Public method for computing the combiner features. The agreement
// output parameter will be true if both answers are identical,
// and false otherwise.
bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str,
int tess_confidence,
CubeObject *cube_obj,
WordAltList *cube_alt_list,
vector<double> *features,
bool *agreement) {
features->clear();
*agreement = false;
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)
return false;
// Get Cube's best string; return false if empty
char_32 *cube_best_str32 = cube_alt_list->Alt(0);
if (cube_best_str32 == NULL || CubeUtils::StrLen(cube_best_str32) < 1)
return false;
string cube_best_str;
int cube_best_cost = cube_alt_list->AltCost(0);
int cube_best_bigram_cost = 0;
bool cube_best_bigram_cost_valid = true;
if (cube_cntxt_->Bigrams())
cube_best_bigram_cost = cube_cntxt_->Bigrams()->
Cost(cube_best_str32, cube_cntxt_->CharacterSet());
else
cube_best_bigram_cost_valid = false;
CubeUtils::UTF32ToUTF8(cube_best_str32, &cube_best_str);
// Get Tesseract's UTF32 string
string_32 tess_str32;
CubeUtils::UTF8ToUTF32(tess_str.c_str(), &tess_str32);
// Compute agreement flag
*agreement = (tess_str.compare(cube_best_str) == 0);
// Get Cube's second best string; if empty, return false
char_32 *cube_next_best_str32;
string cube_next_best_str;
int cube_next_best_cost = WORST_COST;
if (cube_alt_list->AltCount() > 1) {
cube_next_best_str32 = cube_alt_list->Alt(1);
if (cube_next_best_str32 == NULL ||
CubeUtils::StrLen(cube_next_best_str32) == 0) {
return false;
}
cube_next_best_cost = cube_alt_list->AltCost(1);
CubeUtils::UTF32ToUTF8(cube_next_best_str32, &cube_next_best_str);
}
// Rank of Tesseract's top result in Cube's alternate list
int tess_rank = 0;
for (tess_rank = 0; tess_rank < cube_alt_list->AltCount(); tess_rank++) {
string alt_str;
CubeUtils::UTF32ToUTF8(cube_alt_list->Alt(tess_rank), &alt_str);
if (alt_str == tess_str)
break;
}
// Cube's cost for tesseract's result. Note that this modifies the
// state of cube_obj, including its alternate list by calling RecognizeWord()
int tess_cost = cube_obj->WordCost(tess_str.c_str());
// Cube's bigram cost of Tesseract's string
int tess_bigram_cost = 0;
int tess_bigram_cost_valid = true;
if (cube_cntxt_->Bigrams())
tess_bigram_cost = cube_cntxt_->Bigrams()->
Cost(tess_str32.c_str(), cube_cntxt_->CharacterSet());
else
tess_bigram_cost_valid = false;
// Tesseract confidence
features->push_back(tess_confidence);
// Cube cost of Tesseract string
features->push_back(tess_cost);
// Cube Rank of Tesseract string
features->push_back(tess_rank);
// length of Tesseract OCR string
features->push_back(tess_str.length());
// Tesseract OCR string in dictionary
features->push_back(ValidWord(tess_str));
if (tess_bigram_cost_valid) {
// bigram cost of Tesseract string
features->push_back(tess_bigram_cost);
}
// Cube tess_cost of Cube best string
features->push_back(cube_best_cost);
// Cube tess_cost of Cube next best string
features->push_back(cube_next_best_cost);
// length of Cube string
features->push_back(cube_best_str.length());
// Cube string in dictionary
features->push_back(ValidWord(cube_best_str));
if (cube_best_bigram_cost_valid) {
// bigram cost of Cube string
features->push_back(cube_best_bigram_cost);
}
// case-insensitive string comparison, including punctuation
int compare_nocase_punc = CompareStrings(cube_best_str,
tess_str, false, true);
features->push_back(compare_nocase_punc == 0);
// case-sensitive string comparison, ignoring punctuation
int compare_case_nopunc = CompareStrings(cube_best_str,
tess_str, true, false);
features->push_back(compare_case_nopunc == 0);
// case-insensitive string comparison, ignoring punctuation
int compare_nocase_nopunc = CompareStrings(cube_best_str,
tess_str, true, true);
features->push_back(compare_nocase_nopunc == 0);
return true;
}
// The CubeObject parameter is used for 2 purposes: 1) to retrieve
// cube's alt list, and 2) to compute cube's word cost for the
// tesseract result. The call to CubeObject::WordCost() modifies
// the object's alternate list, so previous state will be lost.
float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res,
CubeObject *cube_obj) {
// If no combiner is loaded or the cube object is undefined,
// tesseract wins with probability 1.0
if (combiner_net_ == NULL || cube_obj == NULL) {
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
"Cube objects not initialized; defaulting to Tesseract\n");
return 1.0;
}
// Retrieve the alternate list from the CubeObject's current state.
// If the alt list empty, tesseract wins with probability 1.0
WordAltList *cube_alt_list = cube_obj->AlternateList();
if (cube_alt_list == NULL)
cube_alt_list = cube_obj->RecognizeWord();
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
"Cube returned no results; defaulting to Tesseract\n");
return 1.0;
}
return CombineResults(tess_res, cube_obj, cube_alt_list);
}
// The alt_list parameter is expected to have been extracted from the
// CubeObject that recognized the word to be combined. The cube_obj
// parameter passed may be either same instance or a separate instance to
// be used only by the combiner. In both cases, its alternate
// list will be modified by an internal call to RecognizeWord().
float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res,
CubeObject *cube_obj,
WordAltList *cube_alt_list) {
// If no combiner is loaded or the cube object is undefined, or the
// alt list is empty, tesseract wins with probability 1.0
if (combiner_net_ == NULL || cube_obj == NULL ||
cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
"Cube result cannot be retrieved; defaulting to Tesseract\n");
return 1.0;
}
// Tesseract result string, tesseract confidence, and cost of
// tesseract result according to cube
string tess_str = tess_res->best_choice->unichar_string().string();
// Map certainty [-20.0, 0.0] to confidence [0, 100]
int tess_confidence = MIN(100, MAX(1, static_cast<int>(
100 + (5 * tess_res->best_choice->certainty()))));
// Compute the combiner features. If feature computation fails or
// answers are identical, tesseract wins with probability 1.0
vector<double> features;
bool agreement;
bool combiner_success = ComputeCombinerFeatures(tess_str, tess_confidence,
cube_obj, cube_alt_list,
&features, &agreement);
if (!combiner_success || agreement)
return 1.0;
// Classify combiner feature vector and return output (probability
// of tesseract class).
double net_out[2];
if (!combiner_net_->FeedForward(&features[0], net_out))
return 1.0;
return net_out[1];
}
}

View File

@ -1,103 +0,0 @@
/**********************************************************************
* File: tesseract_cube_combiner.h
* Description: Declaration of the Tesseract & Cube results combiner Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The TesseractCubeCombiner class provides the functionality of combining
// the recognition results of Tesseract and Cube at the word level
#ifndef TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
#define TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
#include <string>
#include <vector>
#include "pageres.h"
#ifdef _WIN32
#include <windows.h>
using namespace std;
#endif
#ifdef USE_STD_NAMESPACE
using std::string;
using std::vector;
#endif
namespace tesseract {
class CubeObject;
class NeuralNet;
class CubeRecoContext;
class WordAltList;
class TesseractCubeCombiner {
public:
explicit TesseractCubeCombiner(CubeRecoContext *cube_cntxt);
virtual ~TesseractCubeCombiner();
// There are 2 public methods for combining the results of tesseract
// and cube. Both return the probability that the Tesseract result is
// correct. The difference between the two interfaces is in how the
// passed-in CubeObject is used.
// The CubeObject parameter is used for 2 purposes: 1) to retrieve
// cube's alt list, and 2) to compute cube's word cost for the
// tesseract result. Both uses may modify the state of the
// CubeObject (including the BeamSearch state) with a call to
// RecognizeWord().
float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj);
// The alt_list parameter is expected to have been extracted from the
// CubeObject that recognized the word to be combined. The cube_obj
// parameter passed in is a separate instance to be used only by
// the combiner.
float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj,
WordAltList *alt_list);
// Public method for computing the combiner features. The agreement
// output parameter will be true if both answers are identical,
// false otherwise. Modifies the cube_alt_list, so no assumptions
// should be made about its state upon return.
bool ComputeCombinerFeatures(const string &tess_res,
int tess_confidence,
CubeObject *cube_obj,
WordAltList *cube_alt_list,
vector<double> *features,
bool *agreement);
// Is the word valid according to Tesseract's language model
bool ValidWord(const string &str);
// Loads the combiner neural network from file, using cube_cntxt_
// to find path.
bool LoadCombinerNet();
private:
// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
// strips punc and/or normalizes case and then converts back
string NormalizeString(const string &str, bool remove_punc, bool norm_case);
// Compares 2 strings after optionally normalizing them and or stripping
// punctuation
int CompareStrings(const string &str1, const string &str2, bool ignore_punc,
bool norm_case);
NeuralNet *combiner_net_; // pointer to the combiner NeuralNet object
CubeRecoContext *cube_cntxt_; // used for language ID and data paths
};
}
#endif // TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H

View File

@ -42,14 +42,11 @@
#include "tesseractclass.h" #include "tesseractclass.h"
#include "allheaders.h" #include "allheaders.h"
#ifndef NO_CUBE_BUILD
#include "cube_reco_context.h"
#endif
#include "edgblob.h" #include "edgblob.h"
#include "equationdetect.h" #include "equationdetect.h"
#include "globals.h" #include "globals.h"
#ifndef NO_CUBE_BUILD #ifndef ANDROID_BUILD
#include "tesseract_cube_combiner.h" #include "lstmrecognizer.h"
#endif #endif
namespace tesseract { namespace tesseract {
@ -65,6 +62,9 @@ Tesseract::Tesseract()
"Generate training data from boxed chars", this->params()), "Generate training data from boxed chars", this->params()),
BOOL_MEMBER(tessedit_make_boxes_from_boxes, false, BOOL_MEMBER(tessedit_make_boxes_from_boxes, false,
"Generate more boxes from boxed chars", this->params()), "Generate more boxes from boxed chars", this->params()),
BOOL_MEMBER(tessedit_train_line_recognizer, false,
"Break input into lines and remap boxes if present",
this->params()),
BOOL_MEMBER(tessedit_dump_pageseg_images, false, BOOL_MEMBER(tessedit_dump_pageseg_images, false,
"Dump intermediate images made during page segmentation", "Dump intermediate images made during page segmentation",
this->params()), this->params()),
@ -76,11 +76,10 @@ Tesseract::Tesseract()
" 5=line, 6=word, 7=char" " 5=line, 6=word, 7=char"
" (Values from PageSegMode enum in publictypes.h)", " (Values from PageSegMode enum in publictypes.h)",
this->params()), this->params()),
INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY, INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
"Which OCR engine(s) to run (Tesseract, Cube, both)." "Which OCR engine(s) to run (Tesseract, LSTM, both)."
" Defaults to loading and running only Tesseract" " Defaults to loading and running the most accurate"
" (no Cube,no combiner)." " available.",
" Values from OcrEngineMode enum in tesseractclass.h)",
this->params()), this->params()),
STRING_MEMBER(tessedit_char_blacklist, "", STRING_MEMBER(tessedit_char_blacklist, "",
"Blacklist of chars not to recognize", this->params()), "Blacklist of chars not to recognize", this->params()),
@ -215,13 +214,16 @@ Tesseract::Tesseract()
BOOL_MEMBER(test_pt, false, "Test for point", this->params()), BOOL_MEMBER(test_pt, false, "Test for point", this->params()),
double_MEMBER(test_pt_x, 99999.99, "xcoord", this->params()), double_MEMBER(test_pt_x, 99999.99, "xcoord", this->params()),
double_MEMBER(test_pt_y, 99999.99, "ycoord", this->params()), double_MEMBER(test_pt_y, 99999.99, "ycoord", this->params()),
INT_MEMBER(multilang_debug_level, 0, "Print multilang debug info.",
this->params()),
INT_MEMBER(paragraph_debug_level, 0, "Print paragraph debug info.", INT_MEMBER(paragraph_debug_level, 0, "Print paragraph debug info.",
this->params()), this->params()),
BOOL_MEMBER(paragraph_text_based, true, BOOL_MEMBER(paragraph_text_based, true,
"Run paragraph detection on the post-text-recognition " "Run paragraph detection on the post-text-recognition "
"(more accurate)", "(more accurate)",
this->params()), this->params()),
INT_MEMBER(cube_debug_level, 0, "Print cube debug info.", this->params()), BOOL_MEMBER(lstm_use_matrix, 1,
"Use ratings matrix/beam search with lstm", this->params()),
STRING_MEMBER(outlines_odd, "%| ", "Non standard number of outlines", STRING_MEMBER(outlines_odd, "%| ", "Non standard number of outlines",
this->params()), this->params()),
STRING_MEMBER(outlines_2, "ij!?%\":;", "Non standard number of outlines", STRING_MEMBER(outlines_2, "ij!?%\":;", "Non standard number of outlines",
@ -265,7 +267,7 @@ Tesseract::Tesseract()
this->params()), this->params()),
BOOL_MEMBER(tessedit_debug_quality_metrics, false, BOOL_MEMBER(tessedit_debug_quality_metrics, false,
"Output data to debug file", this->params()), "Output data to debug file", this->params()),
BOOL_MEMBER(bland_unrej, false, "unrej potential with no chekcs", BOOL_MEMBER(bland_unrej, false, "unrej potential with no checks",
this->params()), this->params()),
double_MEMBER(quality_rowrej_pc, 1.1, double_MEMBER(quality_rowrej_pc, 1.1,
"good_quality_doc gte good char limit", this->params()), "good_quality_doc gte good char limit", this->params()),
@ -389,6 +391,9 @@ Tesseract::Tesseract()
this->params()), this->params()),
BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file", BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file",
this->params()), this->params()),
BOOL_MEMBER(textonly_pdf, false,
"Create PDF with only one invisible text layer",
this->params()),
STRING_MEMBER(unrecognised_char, "|", STRING_MEMBER(unrecognised_char, "|",
"Output char for unidentified blobs", this->params()), "Output char for unidentified blobs", this->params()),
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()), INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
@ -398,8 +403,8 @@ Tesseract::Tesseract()
"Don't suspect dict wds longer than this", this->params()), "Don't suspect dict wds longer than this", this->params()),
BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected", BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
this->params()), this->params()),
double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit", double_MEMBER(suspect_rating_per_ch, 999.9,
this->params()), "Don't touch bad rating limit", this->params()),
double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit", double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
this->params()), this->params()),
BOOL_MEMBER(tessedit_minimal_rejection, false, BOOL_MEMBER(tessedit_minimal_rejection, false,
@ -452,7 +457,7 @@ Tesseract::Tesseract()
this->params()), this->params()),
INT_MEMBER(tessedit_page_number, -1, INT_MEMBER(tessedit_page_number, -1,
"-1 -> All pages" "-1 -> All pages"
" , else specifc page to process", " , else specific page to process",
this->params()), this->params()),
BOOL_MEMBER(tessedit_write_images, false, BOOL_MEMBER(tessedit_write_images, false,
"Capture the image from the IPE", this->params()), "Capture the image from the IPE", this->params()),
@ -461,10 +466,6 @@ Tesseract::Tesseract()
STRING_MEMBER(file_type, ".tif", "Filename extension", this->params()), STRING_MEMBER(file_type, ".tif", "Filename extension", this->params()),
BOOL_MEMBER(tessedit_override_permuter, true, "According to dict_word", BOOL_MEMBER(tessedit_override_permuter, true, "According to dict_word",
this->params()), this->params()),
INT_MEMBER(tessdata_manager_debug_level, 0,
"Debug level for"
" TessdataManager functions.",
this->params()),
STRING_MEMBER(tessedit_load_sublangs, "", STRING_MEMBER(tessedit_load_sublangs, "",
"List of languages to load with this one", this->params()), "List of languages to load with this one", this->params()),
BOOL_MEMBER(tessedit_use_primary_params_model, false, BOOL_MEMBER(tessedit_use_primary_params_model, false,
@ -512,7 +513,6 @@ Tesseract::Tesseract()
"Page separator (default is form feed control character)", "Page separator (default is form feed control character)",
this->params()), this->params()),
// The following parameters were deprecated and removed from their // The following parameters were deprecated and removed from their
// original // original
// locations. The parameters are temporarily kept here to give Tesseract // locations. The parameters are temporarily kept here to give Tesseract
@ -604,8 +604,8 @@ Tesseract::Tesseract()
backup_config_file_(NULL), backup_config_file_(NULL),
pix_binary_(NULL), pix_binary_(NULL),
cube_binary_(NULL),
pix_grey_(NULL), pix_grey_(NULL),
pix_original_(NULL),
pix_thresholds_(NULL), pix_thresholds_(NULL),
source_resolution_(0), source_resolution_(0),
textord_(this), textord_(this),
@ -616,33 +616,28 @@ Tesseract::Tesseract()
reskew_(1.0f, 0.0f), reskew_(1.0f, 0.0f),
most_recently_used_(this), most_recently_used_(this),
font_table_size_(0), font_table_size_(0),
#ifndef NO_CUBE_BUILD equ_detect_(NULL),
cube_cntxt_(NULL), #ifndef ANDROID_BUILD
tess_cube_combiner_(NULL), lstm_recognizer_(NULL),
#endif #endif
equ_detect_(NULL) { train_line_page_num_(0) {
} }
Tesseract::~Tesseract() { Tesseract::~Tesseract() {
Clear(); Clear();
pixDestroy(&pix_original_);
end_tesseract(); end_tesseract();
sub_langs_.delete_data_pointers(); sub_langs_.delete_data_pointers();
#ifndef NO_CUBE_BUILD #ifndef ANDROID_BUILD
// Delete cube objects. delete lstm_recognizer_;
if (cube_cntxt_ != NULL) { lstm_recognizer_ = NULL;
delete cube_cntxt_;
cube_cntxt_ = NULL;
}
if (tess_cube_combiner_ != NULL) {
delete tess_cube_combiner_;
tess_cube_combiner_ = NULL;
}
#endif #endif
} }
void Tesseract::Clear() { void Tesseract::Clear() {
STRING debug_name = imagebasename + "_debug.pdf";
pixa_debug_.WritePDF(debug_name.string());
pixDestroy(&pix_binary_); pixDestroy(&pix_binary_);
pixDestroy(&cube_binary_);
pixDestroy(&pix_grey_); pixDestroy(&pix_grey_);
pixDestroy(&pix_thresholds_); pixDestroy(&pix_thresholds_);
pixDestroy(&scaled_color_); pixDestroy(&scaled_color_);
@ -692,8 +687,6 @@ void Tesseract::SetBlackAndWhitelist() {
// page segmentation. // page segmentation.
void Tesseract::PrepareForPageseg() { void Tesseract::PrepareForPageseg() {
textord_.set_use_cjk_fp_model(textord_use_cjk_fp_model); textord_.set_use_cjk_fp_model(textord_use_cjk_fp_model);
pixDestroy(&cube_binary_);
cube_binary_ = pixClone(pix_binary());
// Find the max splitter strategy over all langs. // Find the max splitter strategy over all langs.
ShiroRekhaSplitter::SplitStrategy max_pageseg_strategy = ShiroRekhaSplitter::SplitStrategy max_pageseg_strategy =
static_cast<ShiroRekhaSplitter::SplitStrategy>( static_cast<ShiroRekhaSplitter::SplitStrategy>(
@ -704,9 +697,6 @@ void Tesseract::PrepareForPageseg() {
static_cast<inT32>(sub_langs_[i]->pageseg_devanagari_split_strategy)); static_cast<inT32>(sub_langs_[i]->pageseg_devanagari_split_strategy));
if (pageseg_strategy > max_pageseg_strategy) if (pageseg_strategy > max_pageseg_strategy)
max_pageseg_strategy = pageseg_strategy; max_pageseg_strategy = pageseg_strategy;
// Clone the cube image to all the sub langs too.
pixDestroy(&sub_langs_[i]->cube_binary_);
sub_langs_[i]->cube_binary_ = pixClone(pix_binary());
pixDestroy(&sub_langs_[i]->pix_binary_); pixDestroy(&sub_langs_[i]->pix_binary_);
sub_langs_[i]->pix_binary_ = pixClone(pix_binary()); sub_langs_[i]->pix_binary_ = pixClone(pix_binary());
} }
@ -714,7 +704,7 @@ void Tesseract::PrepareForPageseg() {
// the newly splitted image. // the newly splitted image.
splitter_.set_orig_pix(pix_binary()); splitter_.set_orig_pix(pix_binary());
splitter_.set_pageseg_split_strategy(max_pageseg_strategy); splitter_.set_pageseg_split_strategy(max_pageseg_strategy);
if (splitter_.Split(true)) { if (splitter_.Split(true, &pixa_debug_)) {
ASSERT_HOST(splitter_.splitted_image()); ASSERT_HOST(splitter_.splitted_image());
pixDestroy(&pix_binary_); pixDestroy(&pix_binary_);
pix_binary_ = pixClone(splitter_.splitted_image()); pix_binary_ = pixClone(splitter_.splitted_image());
@ -743,7 +733,7 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list,
splitter_.set_segmentation_block_list(block_list); splitter_.set_segmentation_block_list(block_list);
splitter_.set_ocr_split_strategy(max_ocr_strategy); splitter_.set_ocr_split_strategy(max_ocr_strategy);
// Run the splitter for OCR // Run the splitter for OCR
bool split_for_ocr = splitter_.Split(false); bool split_for_ocr = splitter_.Split(false, &pixa_debug_);
// Restore pix_binary to the binarized original pix for future reference. // Restore pix_binary to the binarized original pix for future reference.
ASSERT_HOST(splitter_.orig_pix()); ASSERT_HOST(splitter_.orig_pix());
pixDestroy(&pix_binary_); pixDestroy(&pix_binary_);

View File

@ -23,22 +23,22 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__ #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H_
#define TESSERACT_CCMAIN_TESSERACTCLASS_H__ #define TESSERACT_CCMAIN_TESSERACTCLASS_H_
#include "allheaders.h" #include "allheaders.h"
#include "control.h" #include "control.h"
#include "docqual.h" #include "debugpixa.h"
#include "devanagari_processing.h" #include "devanagari_processing.h"
#include "docqual.h"
#include "genericvector.h" #include "genericvector.h"
#include "params.h"
#include "ocrclass.h" #include "ocrclass.h"
#include "params.h"
#include "textord.h" #include "textord.h"
#include "wordrec.h" #include "wordrec.h"
class BLOB_CHOICE_LIST_CLIST; class BLOB_CHOICE_LIST_CLIST;
class BLOCK_LIST; class BLOCK_LIST;
class CharSamp;
struct OSResults; struct OSResults;
class PAGE_RES; class PAGE_RES;
class PAGE_RES_IT; class PAGE_RES_IT;
@ -77,8 +77,7 @@ class WERD_RES;
// WordRec (wordrec/wordrec.h) // WordRec (wordrec/wordrec.h)
// ^ Members include: WERD*, DENORM* // ^ Members include: WERD*, DENORM*
// Tesseract (ccmain/tesseractclass.h) // Tesseract (ccmain/tesseractclass.h)
// Members include: Pix*, CubeRecoContext*, // Members include: Pix*
// TesseractCubeCombiner*
// //
// Other important classes: // Other important classes:
// //
@ -97,16 +96,11 @@ class WERD_RES;
namespace tesseract { namespace tesseract {
class ColumnFinder; class ColumnFinder;
#ifndef NO_CUBE_BUILD class DocumentData;
class CubeLineObject;
class CubeObject;
class CubeRecoContext;
#endif
class EquationDetect; class EquationDetect;
class ImageData;
class LSTMRecognizer;
class Tesseract; class Tesseract;
#ifndef NO_CUBE_BUILD
class TesseractCubeCombiner;
#endif
// A collection of various variables for statistics and debugging. // A collection of various variables for statistics and debugging.
struct TesseractStats { struct TesseractStats {
@ -189,7 +183,7 @@ class Tesseract : public Wordrec {
} }
// Destroy any existing pix and return a pointer to the pointer. // Destroy any existing pix and return a pointer to the pointer.
Pix** mutable_pix_binary() { Pix** mutable_pix_binary() {
Clear(); pixDestroy(&pix_binary_);
return &pix_binary_; return &pix_binary_;
} }
Pix* pix_binary() const { Pix* pix_binary() const {
@ -202,16 +196,24 @@ class Tesseract : public Wordrec {
pixDestroy(&pix_grey_); pixDestroy(&pix_grey_);
pix_grey_ = grey_pix; pix_grey_ = grey_pix;
} }
// Returns a pointer to a Pix representing the best available image of the Pix* pix_original() const { return pix_original_; }
// page. The image will be 8-bit grey if the input was grey or color. Note // Takes ownership of the given original_pix.
// that in grey 0 is black and 255 is white. If the input was binary, then void set_pix_original(Pix* original_pix) {
// the returned Pix will be binary. Note that here black is 1 and white is 0. pixDestroy(&pix_original_);
// To tell the difference pixGetDepth() will return 8 or 1. pix_original_ = original_pix;
// In either case, the return value is a borrowed Pix, and should not be // Clone to sublangs as well.
// deleted or pixDestroyed. for (int i = 0; i < sub_langs_.size(); ++i)
Pix* BestPix() const { sub_langs_[i]->set_pix_original(original_pix ? pixClone(original_pix)
return pix_grey_ != NULL ? pix_grey_ : pix_binary_; : nullptr);
} }
// Returns a pointer to a Pix representing the best available (original) image
// of the page. Can be of any bit depth, but never color-mapped, as that has
// always been dealt with. Note that in grey and color, 0 is black and 255 is
// white. If the input was binary, then black is 1 and white is 0.
// To tell the difference pixGetDepth() will return 32, 8 or 1.
// In any case, the return value is a borrowed Pix, and should not be
// deleted or pixDestroyed.
Pix* BestPix() const { return pix_original_; }
void set_pix_thresholds(Pix* thresholds) { void set_pix_thresholds(Pix* thresholds) {
pixDestroy(&pix_thresholds_); pixDestroy(&pix_thresholds_);
pix_thresholds_ = thresholds; pix_thresholds_ = thresholds;
@ -254,11 +256,19 @@ class Tesseract : public Wordrec {
Tesseract* get_sub_lang(int index) const { Tesseract* get_sub_lang(int index) const {
return sub_langs_[index]; return sub_langs_[index];
} }
// Returns true if any language uses Tesseract (as opposed to cube). // Returns true if any language uses Tesseract (as opposed to LSTM).
bool AnyTessLang() const { bool AnyTessLang() const {
if (tessedit_ocr_engine_mode != OEM_CUBE_ONLY) return true; if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
for (int i = 0; i < sub_langs_.size(); ++i) { for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_CUBE_ONLY) if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) return true;
}
return false;
}
// Returns true if any language uses the LSTM.
bool AnyLSTMLang() const {
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) return true;
for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY)
return true; return true;
} }
return false; return false;
@ -293,6 +303,46 @@ class Tesseract : public Wordrec {
// par_control.cpp // par_control.cpp
void PrerecAllWordsPar(const GenericVector<WordData>& words); void PrerecAllWordsPar(const GenericVector<WordData>& words);
//// linerec.cpp
// Generates training data for training a line recognizer, eg LSTM.
// Breaks the page into lines, according to the boxes, and writes them to a
// serialized DocumentData based on output_basename.
void TrainLineRecognizer(const STRING& input_imagename,
const STRING& output_basename,
BLOCK_LIST *block_list);
// Generates training data for training a line recognizer, eg LSTM.
// Breaks the boxes into lines, normalizes them, converts to ImageData and
// appends them to the given training_data.
void TrainFromBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
BLOCK_LIST *block_list,
DocumentData* training_data);
// Returns an Imagedata containing the image of the given textline,
// and ground truth boxes/truth text if available in the input.
// The image is not normalized in any way.
ImageData* GetLineData(const TBOX& line_box,
const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
int start_box, int end_box,
const BLOCK& block);
// Helper gets the image of a rectangle, using the block.re_rotation() if
// needed to get to the image, and rotating the result back to horizontal
// layout. (CJK characters will be on their left sides) The vertical text flag
// is set in the returned ImageData if the text was originally vertical, which
// can be used to invoke a different CJK recognition engine. The revised_box
// is also returned to enable calculation of output bounding boxes.
ImageData* GetRectImage(const TBOX& box, const BLOCK& block, int padding,
TBOX* revised_box) const;
// Recognizes a word or group of words, converting to WERD_RES in *words.
// Analogous to classify_word_pass1, but can handle a group of words as well.
void LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
PointerVector<WERD_RES>* words);
// Apply segmentation search to the given set of words, within the constraints
// of the existing ratings matrix. If there is already a best_choice on a word
// leaves it untouched and just sets the done/accepted etc flags.
void SearchWords(PointerVector<WERD_RES>* words);
//// control.h ///////////////////////////////////////////////////////// //// control.h /////////////////////////////////////////////////////////
bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box, bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,
const char* word_config, int pass); const char* word_config, int pass);
@ -324,9 +374,8 @@ class Tesseract : public Wordrec {
// Helper to recognize the word using the given (language-specific) tesseract. // Helper to recognize the word using the given (language-specific) tesseract.
// Returns positive if this recognizer found more new best words than the // Returns positive if this recognizer found more new best words than the
// number kept from best_words. // number kept from best_words.
int RetryWithLanguage(const WordData& word_data, int RetryWithLanguage(const WordData& word_data, WordRecognizer recognizer,
WordRecognizer recognizer, bool debug, WERD_RES** in_word,
WERD_RES** in_word,
PointerVector<WERD_RES>* best_words); PointerVector<WERD_RES>* best_words);
// Moves good-looking "noise"/diacritics from the reject list to the main // Moves good-looking "noise"/diacritics from the reject list to the main
// blob list on the current word. Returns true if anything was done, and // blob list on the current word. Returns true if anything was done, and
@ -428,34 +477,6 @@ class Tesseract : public Wordrec {
int *left_ok, int *left_ok,
int *right_ok) const; int *right_ok) const;
//// cube_control.cpp ///////////////////////////////////////////////////
#ifndef NO_CUBE_BUILD
bool init_cube_objects(bool load_combiner,
TessdataManager *tessdata_manager);
// Iterates through tesseract's results and calls cube on each word,
// combining the results with the existing tesseract result.
void run_cube_combiner(PAGE_RES *page_res);
// Recognizes a single word using (only) cube. Compatible with
// Tesseract's classify_word_pass1/classify_word_pass2.
void cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word);
// Cube recognizer to recognize a single word as with classify_word_pass1
// but also returns the cube object in case the combiner is needed.
CubeObject* cube_recognize_word(BLOCK* block, WERD_RES* word);
// Combines the cube and tesseract results for a single word, leaving the
// result in tess_word.
void cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
WERD_RES* tess_word);
// Call cube on the current word, and write the result to word.
// Sets up a fake result and returns false if something goes wrong.
bool cube_recognize(CubeObject *cube_obj, BLOCK* block, WERD_RES *word);
void fill_werd_res(const BoxWord& cube_box_word,
const char* cube_best_str,
WERD_RES* tess_werd_res);
bool extract_cube_state(CubeObject* cube_obj, int* num_chars,
Boxa** char_boxes, CharSamp*** char_samples);
bool create_cube_box_word(Boxa *char_boxes, int num_chars,
TBOX word_box, BoxWord* box_word);
#endif
//// output.h ////////////////////////////////////////////////////////// //// output.h //////////////////////////////////////////////////////////
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box); void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box);
@ -475,20 +496,17 @@ class Tesseract : public Wordrec {
// string and recursively any additional languages required by any language // string and recursively any additional languages required by any language
// traineddata file (via tessedit_load_sublangs in its config) that is loaded. // traineddata file (via tessedit_load_sublangs in its config) that is loaded.
// See init_tesseract_internal for args. // See init_tesseract_internal for args.
int init_tesseract(const char *arg0, int init_tesseract(const char* arg0, const char* textbase,
const char *textbase, const char* language, OcrEngineMode oem, char** configs,
const char *language, int configs_size, const GenericVector<STRING>* vars_vec,
OcrEngineMode oem, const GenericVector<STRING>* vars_values,
char **configs, bool set_only_init_params, TessdataManager* mgr);
int configs_size,
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_init_params);
int init_tesseract(const char *datapath, int init_tesseract(const char *datapath,
const char *language, const char *language,
OcrEngineMode oem) { OcrEngineMode oem) {
return init_tesseract(datapath, NULL, language, oem, TessdataManager mgr;
NULL, 0, NULL, NULL, false); return init_tesseract(datapath, NULL, language, oem, NULL, 0, NULL, NULL,
false, &mgr);
} }
// Common initialization for a single language. // Common initialization for a single language.
// arg0 is the datapath for the tessdata directory, which could be the // arg0 is the datapath for the tessdata directory, which could be the
@ -506,36 +524,30 @@ class Tesseract : public Wordrec {
// in vars_vec. // in vars_vec.
// If set_only_init_params is true, then only the initialization variables // If set_only_init_params is true, then only the initialization variables
// will be set. // will be set.
int init_tesseract_internal(const char *arg0, int init_tesseract_internal(const char* arg0, const char* textbase,
const char *textbase, const char* language, OcrEngineMode oem,
const char *language, char** configs, int configs_size,
OcrEngineMode oem, const GenericVector<STRING>* vars_vec,
char **configs, const GenericVector<STRING>* vars_values,
int configs_size, bool set_only_init_params, TessdataManager* mgr);
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_init_params);
// Set the universal_id member of each font to be unique among all // Set the universal_id member of each font to be unique among all
// instances of the same font loaded. // instances of the same font loaded.
void SetupUniversalFontIds(); void SetupUniversalFontIds();
int init_tesseract_lm(const char *arg0, int init_tesseract_lm(const char* arg0, const char* textbase,
const char *textbase, const char* language, TessdataManager* mgr);
const char *language);
void recognize_page(STRING& image_name); void recognize_page(STRING& image_name);
void end_tesseract(); void end_tesseract();
bool init_tesseract_lang_data(const char *arg0, bool init_tesseract_lang_data(const char* arg0, const char* textbase,
const char *textbase, const char* language, OcrEngineMode oem,
const char *language, char** configs, int configs_size,
OcrEngineMode oem, const GenericVector<STRING>* vars_vec,
char **configs, const GenericVector<STRING>* vars_values,
int configs_size, bool set_only_init_params,
const GenericVector<STRING> *vars_vec, TessdataManager* mgr);
const GenericVector<STRING> *vars_values,
bool set_only_init_params);
void ParseLanguageString(const char* lang_str, void ParseLanguageString(const char* lang_str,
GenericVector<STRING>* to_load, GenericVector<STRING>* to_load,
@ -783,16 +795,17 @@ class Tesseract : public Wordrec {
"Generate training data from boxed chars"); "Generate training data from boxed chars");
BOOL_VAR_H(tessedit_make_boxes_from_boxes, false, BOOL_VAR_H(tessedit_make_boxes_from_boxes, false,
"Generate more boxes from boxed chars"); "Generate more boxes from boxed chars");
BOOL_VAR_H(tessedit_train_line_recognizer, false,
"Break input into lines and remap boxes if present");
BOOL_VAR_H(tessedit_dump_pageseg_images, false, BOOL_VAR_H(tessedit_dump_pageseg_images, false,
"Dump intermediate images made during page segmentation"); "Dump intermediate images made during page segmentation");
INT_VAR_H(tessedit_pageseg_mode, PSM_SINGLE_BLOCK, INT_VAR_H(tessedit_pageseg_mode, PSM_SINGLE_BLOCK,
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
" 5=line, 6=word, 7=char" " 5=line, 6=word, 7=char"
" (Values from PageSegMode enum in publictypes.h)"); " (Values from PageSegMode enum in publictypes.h)");
INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY, INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
"Which OCR engine(s) to run (Tesseract, Cube, both). Defaults" "Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults"
" to loading and running only Tesseract (no Cube, no combiner)." " to loading and running the most accurate available.");
" (Values from OcrEngineMode enum in tesseractclass.h)");
STRING_VAR_H(tessedit_char_blacklist, "", STRING_VAR_H(tessedit_char_blacklist, "",
"Blacklist of chars not to recognize"); "Blacklist of chars not to recognize");
STRING_VAR_H(tessedit_char_whitelist, "", STRING_VAR_H(tessedit_char_whitelist, "",
@ -886,11 +899,12 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(test_pt, false, "Test for point"); BOOL_VAR_H(test_pt, false, "Test for point");
double_VAR_H(test_pt_x, 99999.99, "xcoord"); double_VAR_H(test_pt_x, 99999.99, "xcoord");
double_VAR_H(test_pt_y, 99999.99, "ycoord"); double_VAR_H(test_pt_y, 99999.99, "ycoord");
INT_VAR_H(multilang_debug_level, 0, "Print multilang debug info.");
INT_VAR_H(paragraph_debug_level, 0, "Print paragraph debug info."); INT_VAR_H(paragraph_debug_level, 0, "Print paragraph debug info.");
BOOL_VAR_H(paragraph_text_based, true, BOOL_VAR_H(paragraph_text_based, true,
"Run paragraph detection on the post-text-recognition " "Run paragraph detection on the post-text-recognition "
"(more accurate)"); "(more accurate)");
INT_VAR_H(cube_debug_level, 1, "Print cube debug info."); BOOL_VAR_H(lstm_use_matrix, 1, "Use ratings matrix/beam searct with lstm");
STRING_VAR_H(outlines_odd, "%| ", "Non standard number of outlines"); STRING_VAR_H(outlines_odd, "%| ", "Non standard number of outlines");
STRING_VAR_H(outlines_2, "ij!?%\":;", "Non standard number of outlines"); STRING_VAR_H(outlines_2, "ij!?%\":;", "Non standard number of outlines");
BOOL_VAR_H(docqual_excuse_outline_errs, false, BOOL_VAR_H(docqual_excuse_outline_errs, false,
@ -926,7 +940,7 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats"); BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats");
BOOL_VAR_H(tessedit_debug_quality_metrics, false, BOOL_VAR_H(tessedit_debug_quality_metrics, false,
"Output data to debug file"); "Output data to debug file");
BOOL_VAR_H(bland_unrej, false, "unrej potential with no chekcs"); BOOL_VAR_H(bland_unrej, false, "unrej potential with no checks");
double_VAR_H(quality_rowrej_pc, 1.1, double_VAR_H(quality_rowrej_pc, 1.1,
"good_quality_doc gte good char limit"); "good_quality_doc gte good char limit");
BOOL_VAR_H(unlv_tilde_crunching, true, BOOL_VAR_H(unlv_tilde_crunching, true,
@ -1005,13 +1019,14 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file"); BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
BOOL_VAR_H(tessedit_create_tsv, false, "Write .tsv output file"); BOOL_VAR_H(tessedit_create_tsv, false, "Write .tsv output file");
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file"); BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
BOOL_VAR_H(textonly_pdf, false,
"Create PDF with only one invisible text layer");
STRING_VAR_H(unrecognised_char, "|", STRING_VAR_H(unrecognised_char, "|",
"Output char for unidentified blobs"); "Output char for unidentified blobs");
INT_VAR_H(suspect_level, 99, "Suspect marker level"); INT_VAR_H(suspect_level, 99, "Suspect marker level");
INT_VAR_H(suspect_space_level, 100, INT_VAR_H(suspect_space_level, 100,
"Min suspect level for rejecting spaces"); "Min suspect level for rejecting spaces");
INT_VAR_H(suspect_short_words, 2, INT_VAR_H(suspect_short_words, 2, "Don't Suspect dict wds longer than this");
"Don't Suspect dict wds longer than this");
BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected"); BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit"); double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit");
double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit"); double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
@ -1045,13 +1060,11 @@ class Tesseract : public Wordrec {
INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this"); INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this");
BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes"); BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes");
INT_VAR_H(tessedit_page_number, -1, INT_VAR_H(tessedit_page_number, -1,
"-1 -> All pages, else specifc page to process"); "-1 -> All pages, else specific page to process");
BOOL_VAR_H(tessedit_write_images, false, "Capture the image from the IPE"); BOOL_VAR_H(tessedit_write_images, false, "Capture the image from the IPE");
BOOL_VAR_H(interactive_display_mode, false, "Run interactively?"); BOOL_VAR_H(interactive_display_mode, false, "Run interactively?");
STRING_VAR_H(file_type, ".tif", "Filename extension"); STRING_VAR_H(file_type, ".tif", "Filename extension");
BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word"); BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word");
INT_VAR_H(tessdata_manager_debug_level, 0,
"Debug level for TessdataManager functions.");
STRING_VAR_H(tessedit_load_sublangs, "", STRING_VAR_H(tessedit_load_sublangs, "",
"List of languages to load with this one"); "List of languages to load with this one");
BOOL_VAR_H(tessedit_use_primary_params_model, false, BOOL_VAR_H(tessedit_use_primary_params_model, false,
@ -1157,10 +1170,6 @@ class Tesseract : public Wordrec {
PAGE_RES_IT* pr_it, PAGE_RES_IT* pr_it,
FILE *output_file); FILE *output_file);
#ifndef NO_CUBE_BUILD
inline CubeRecoContext *GetCubeRecoContext() { return cube_cntxt_; }
#endif
private: private:
// The filename of a backup config file. If not null, then we currently // The filename of a backup config file. If not null, then we currently
// have a temporary debug config file loaded, and backup_config_file_ // have a temporary debug config file loaded, and backup_config_file_
@ -1171,12 +1180,14 @@ class Tesseract : public Wordrec {
// Image used for input to layout analysis and tesseract recognition. // Image used for input to layout analysis and tesseract recognition.
// May be modified by the ShiroRekhaSplitter to eliminate the top-line. // May be modified by the ShiroRekhaSplitter to eliminate the top-line.
Pix* pix_binary_; Pix* pix_binary_;
// Unmodified image used for input to cube. Always valid.
Pix* cube_binary_;
// Grey-level input image if the input was not binary, otherwise NULL. // Grey-level input image if the input was not binary, otherwise NULL.
Pix* pix_grey_; Pix* pix_grey_;
// Original input image. Color if the input was color.
Pix* pix_original_;
// Thresholds that were used to generate the thresholded image from grey. // Thresholds that were used to generate the thresholded image from grey.
Pix* pix_thresholds_; Pix* pix_thresholds_;
// Debug images. If non-empty, will be written on destruction.
DebugPixa pixa_debug_;
// Input image resolution after any scaling. The resolution is not well // Input image resolution after any scaling. The resolution is not well
// transmitted by operations on Pix, so we keep an independent record here. // transmitted by operations on Pix, so we keep an independent record here.
int source_resolution_; int source_resolution_;
@ -1199,16 +1210,14 @@ class Tesseract : public Wordrec {
Tesseract* most_recently_used_; Tesseract* most_recently_used_;
// The size of the font table, ie max possible font id + 1. // The size of the font table, ie max possible font id + 1.
int font_table_size_; int font_table_size_;
#ifndef NO_CUBE_BUILD
// Cube objects.
CubeRecoContext* cube_cntxt_;
TesseractCubeCombiner *tess_cube_combiner_;
#endif
// Equation detector. Note: this pointer is NOT owned by the class. // Equation detector. Note: this pointer is NOT owned by the class.
EquationDetect* equ_detect_; EquationDetect* equ_detect_;
// LSTM recognizer, if available.
LSTMRecognizer* lstm_recognizer_;
// Output "page" number (actually line number) using TrainLineRecognizer.
int train_line_page_num_;
}; };
} // namespace tesseract } // namespace tesseract
#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H_
#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__

View File

@ -152,19 +152,27 @@ void ImageThresholder::SetImage(const Pix* pix) {
int depth; int depth;
pixGetDimensions(src, &image_width_, &image_height_, &depth); pixGetDimensions(src, &image_width_, &image_height_, &depth);
// Convert the image as necessary so it is one of binary, plain RGB, or // Convert the image as necessary so it is one of binary, plain RGB, or
// 8 bit with no colormap. // 8 bit with no colormap. Guarantee that we always end up with our own copy,
if (depth > 1 && depth < 8) { // not just a clone of the input.
if (pixGetColormap(src)) {
Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
depth = pixGetDepth(tmp);
if (depth > 1 && depth < 8) {
pix_ = pixConvertTo8(tmp, false);
pixDestroy(&tmp);
} else {
pix_ = tmp;
}
} else if (depth > 1 && depth < 8) {
pix_ = pixConvertTo8(src, false); pix_ = pixConvertTo8(src, false);
} else if (pixGetColormap(src)) {
pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
} else { } else {
pix_ = pixClone(src); pix_ = pixCopy(NULL, src);
} }
depth = pixGetDepth(pix_); depth = pixGetDepth(pix_);
pix_channels_ = depth / 8; pix_channels_ = depth / 8;
pix_wpl_ = pixGetWpl(pix_); pix_wpl_ = pixGetWpl(pix_);
scale_ = 1; scale_ = 1;
estimated_res_ = yres_ = pixGetYRes(src); estimated_res_ = yres_ = pixGetYRes(pix_);
Init(); Init();
} }
@ -173,8 +181,11 @@ void ImageThresholder::SetImage(const Pix* pix) {
// Caller must use pixDestroy to free the created Pix. // Caller must use pixDestroy to free the created Pix.
void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) { void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
if (pix_channels_ == 0) { if (pix_channels_ == 0) {
// We have a binary image, so it just has to be cloned. // We have a binary image, but it still has to be copied, as this API
*pix = GetPixRect(); // allows the caller to modify the output.
Pix* original = GetPixRect();
*pix = pixCopy(nullptr, original);
pixDestroy(&original);
} else { } else {
OtsuThresholdRectToPix(pix_, pix); OtsuThresholdRectToPix(pix_, pix);
} }
@ -257,10 +268,10 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
OpenclDevice od; OpenclDevice od;
if ((num_channels == 4 || num_channels == 1) && if ((num_channels == 4 || num_channels == 1) &&
od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) { od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) {
od.ThresholdRectToPixOCL((const unsigned char*)pixGetData(src_pix), od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
num_channels, pixGetWpl(src_pix) * 4, pixGetWpl(src_pix) * 4, thresholds, hi_values,
thresholds, hi_values, out_pix /*pix_OCL*/, out_pix /*pix_OCL*/, rect_height_, rect_width_,
rect_height_, rect_width_, rect_top_, rect_left_); rect_top_, rect_left_);
} else { } else {
#endif #endif
ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix); ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);

View File

@ -17,8 +17,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H__ #ifndef TESSERACT_CCMAIN_THRESHOLDER_H_
#define TESSERACT_CCMAIN_THRESHOLDER_H__ #define TESSERACT_CCMAIN_THRESHOLDER_H_
#include "platform.h" #include "platform.h"
#include "publictypes.h" #include "publictypes.h"
@ -186,4 +186,4 @@ class TESS_API ImageThresholder {
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCMAIN_THRESHOLDER_H__ #endif // TESSERACT_CCMAIN_THRESHOLDER_H_

View File

@ -12,7 +12,7 @@ endif
include_HEADERS = publictypes.h include_HEADERS = publictypes.h
noinst_HEADERS = \ noinst_HEADERS = \
blamer.h blckerr.h blobbox.h blobs.h blread.h boxread.h boxword.h ccstruct.h coutln.h crakedge.h \ blamer.h blckerr.h blobbox.h blobs.h blread.h boxread.h boxword.h ccstruct.h coutln.h crakedge.h \
detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \ debugpixa.h detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \
imagedata.h \ imagedata.h \
ipoints.h \ ipoints.h \
linlsq.h matrix.h mod128.h normalis.h \ linlsq.h matrix.h mod128.h normalis.h \

View File

@ -317,7 +317,7 @@ void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) {
int num_blobs = word->chopped_word->blobs.size(); int num_blobs = word->chopped_word->blobs.size();
int box_index = 0; int box_index = 0;
int blob_index = 0; int blob_index = 0;
inT16 truth_x; inT16 truth_x = -1;
while (box_index < truth_word_.length() && blob_index < num_blobs) { while (box_index < truth_word_.length() && blob_index < num_blobs) {
truth_x = norm_truth_word_.BlobBox(box_index).right(); truth_x = norm_truth_word_.BlobBox(box_index).right();
TBLOB * curr_blob = word->chopped_word->blobs[blob_index]; TBLOB * curr_blob = word->chopped_word->blobs[blob_index];

View File

@ -1,8 +1,8 @@
/********************************************************************** /**********************************************************************
* File: blobbox.cpp (Formerly blobnbox.c) * File: blobbox.cpp (Formerly blobnbox.c)
* Description: Code for the textord blob class. * Description: Code for the textord blob class.
* Author: Ray Smith * Author: Ray Smith
* Created: Thu Jul 30 09:08:51 BST 1992 * Created: Thu Jul 30 09:08:51 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -31,7 +31,9 @@
#define PROJECTION_MARGIN 10 //arbitrary #define PROJECTION_MARGIN 10 //arbitrary
#define EXTERN #define EXTERN
ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK) ELISTIZE(BLOBNBOX)
ELIST2IZE(TO_ROW)
ELISTIZE(TO_BLOCK)
// Up to 30 degrees is allowed for rotations of diacritic blobs. // Up to 30 degrees is allowed for rotations of diacritic blobs.
const double kCosSmallAngle = 0.866; const double kCosSmallAngle = 0.866;
@ -176,7 +178,7 @@ void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
gaps[dir] = MAX_INT16; gaps[dir] = MAX_INT16;
BLOBNBOX* neighbour = neighbours_[dir]; BLOBNBOX* neighbour = neighbours_[dir];
if (neighbour != NULL) { if (neighbour != NULL) {
TBOX n_box = neighbour->bounding_box(); const TBOX& n_box = neighbour->bounding_box();
if (dir == BND_LEFT || dir == BND_RIGHT) { if (dir == BND_LEFT || dir == BND_RIGHT) {
gaps[dir] = box.x_gap(n_box); gaps[dir] = box.x_gap(n_box);
} else { } else {

View File

@ -815,12 +815,10 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
float input_y_offset = 0.0f; float input_y_offset = 0.0f;
float final_y_offset = static_cast<float>(kBlnBaselineOffset); float final_y_offset = static_cast<float>(kBlnBaselineOffset);
float scale = kBlnXHeight / x_height; float scale = kBlnXHeight / x_height;
if (hint == tesseract::OEM_CUBE_ONLY || row == NULL) { if (row == NULL) {
word_middle = word_box.left(); word_middle = word_box.left();
input_y_offset = word_box.bottom(); input_y_offset = word_box.bottom();
final_y_offset = 0.0f; final_y_offset = 0.0f;
if (hint == tesseract::OEM_CUBE_ONLY)
scale = 1.0f;
} else { } else {
input_y_offset = row->base_line(word_middle) + baseline_shift; input_y_offset = row->base_line(word_middle) + baseline_shift;
} }
@ -834,7 +832,7 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
baseline = blob_box.bottom(); baseline = blob_box.bottom();
blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
scale, scale * 1.5f); scale, scale * 1.5f);
} else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) { } else if (row != NULL) {
baseline = row->base_line(mid_x) + baseline_shift; baseline = row->base_line(mid_x) + baseline_shift;
} }
// The image will be 8-bit grey if the input was grey or color. Note that in // The image will be 8-bit grey if the input was grey or color. Note that in

View File

@ -34,8 +34,7 @@ FILE* OpenBoxFile(const STRING& fname) {
STRING filename = BoxFileName(fname); STRING filename = BoxFileName(fname);
FILE* box_file = NULL; FILE* box_file = NULL;
if (!(box_file = fopen(filename.string(), "rb"))) { if (!(box_file = fopen(filename.string(), "rb"))) {
CANTOPENFILE.error("read_next_box", TESSEXIT, CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s",
"Can't open box file %s",
filename.string()); filename.string());
} }
return box_file; return box_file;
@ -56,6 +55,8 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
GenericVector<char> box_data; GenericVector<char> box_data;
if (!tesseract::LoadDataFromFile(BoxFileName(filename), &box_data)) if (!tesseract::LoadDataFromFile(BoxFileName(filename), &box_data))
return false; return false;
// Convert the array of bytes to a string, so it can be used by the parser.
box_data.push_back('\0');
return ReadMemBoxes(target_page, skip_blanks, &box_data[0], boxes, texts, return ReadMemBoxes(target_page, skip_blanks, &box_data[0], boxes, texts,
box_texts, pages); box_texts, pages);
} }

View File

@ -17,8 +17,8 @@
* *
**********************************************************************/ **********************************************************************/
#ifndef TESSERACT_CCUTIL_BOXREAD_H__ #ifndef TESSERACT_CCUTIL_BOXREAD_H_
#define TESSERACT_CCUTIL_BOXREAD_H__ #define TESSERACT_CCUTIL_BOXREAD_H_
#include <stdio.h> #include <stdio.h>
#include "genericvector.h" #include "genericvector.h"
@ -82,4 +82,4 @@ bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
STRING* box_str); STRING* box_str);
#endif // TESSERACT_CCUTIL_BOXREAD_H__ #endif // TESSERACT_CCUTIL_BOXREAD_H_

View File

@ -17,8 +17,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CSTRUCT_BOXWORD_H__ #ifndef TESSERACT_CSTRUCT_BOXWORD_H_
#define TESSERACT_CSTRUCT_BOXWORD_H__ #define TESSERACT_CSTRUCT_BOXWORD_H_
#include "genericvector.h" #include "genericvector.h"
#include "rect.h" #include "rect.h"
@ -82,9 +82,7 @@ class BoxWord {
const TBOX& bounding_box() const { const TBOX& bounding_box() const {
return bbox_; return bbox_;
} }
int length() const { int length() const { return length_; }
return length_;
}
const TBOX& BlobBox(int index) const { const TBOX& BlobBox(int index) const {
return boxes_[index]; return boxes_[index];
} }
@ -99,5 +97,4 @@ class BoxWord {
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CSTRUCT_BOXWORD_H_
#endif // TESSERACT_CSTRUCT_BOXWORD_H__

View File

@ -16,8 +16,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H__ #ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H_
#define TESSERACT_CCSTRUCT_CCSTRUCT_H__ #define TESSERACT_CCSTRUCT_CCSTRUCT_H_
#include "cutil.h" #include "cutil.h"
@ -40,5 +40,4 @@ class CCStruct : public CUtil {
class Tesseract; class Tesseract;
} // namespace tesseract } // namespace tesseract
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H_
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H__

View File

@ -48,9 +48,9 @@ ICOORD C_OUTLINE::step_coords[4] = {
* @param length length of loop * @param length length of loop
*/ */
C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left, C_OUTLINE::C_OUTLINE(CRACKEDGE* startpt, ICOORD bot_left, ICOORD top_right,
ICOORD top_right, inT16 length) inT16 length)
: box (bot_left, top_right), start (startpt->pos), offsets(NULL) { : box(bot_left, top_right), start(startpt->pos), offsets(NULL) {
inT16 stepindex; //index to step inT16 stepindex; //index to step
CRACKEDGE *edgept; //current point CRACKEDGE *edgept; //current point
@ -71,7 +71,6 @@ C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left,
} }
} }
/** /**
* @name C_OUTLINE::C_OUTLINE * @name C_OUTLINE::C_OUTLINE
* *
@ -139,7 +138,7 @@ inT16 length //length of loop
* @param rotation rotate to coord * @param rotation rotate to coord
*/ */
C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(NULL) { C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(NULL) {
TBOX new_box; //easy bounding TBOX new_box; //easy bounding
inT16 stepindex; //index to step inT16 stepindex; //index to step
inT16 dirdiff; //direction change inT16 dirdiff; //direction change
@ -300,7 +299,6 @@ inT32 C_OUTLINE::perimeter() const {
return total_steps; return total_steps;
} }
/** /**
* @name C_OUTLINE::outer_area * @name C_OUTLINE::outer_area
* *
@ -332,7 +330,6 @@ inT32 C_OUTLINE::outer_area() const {
return total; return total;
} }
/** /**
* @name C_OUTLINE::count_transitions * @name C_OUTLINE::count_transitions
* *
@ -459,7 +456,6 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) {
return total; return total;
} }
/** /**
* @name C_OUTLINE::operator< * @name C_OUTLINE::operator<
* *
@ -468,8 +464,7 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) {
*/ */
BOOL8 BOOL8
C_OUTLINE::operator< (const C_OUTLINE & other) const C_OUTLINE::operator<(const C_OUTLINE& other) const {
{
inT16 count = 0; //winding count inT16 count = 0; //winding count
ICOORD pos; //position of point ICOORD pos; //position of point
inT32 stepindex; //index to cstep inT32 stepindex; //index to cstep
@ -495,7 +490,6 @@ C_OUTLINE::operator< (const C_OUTLINE & other) const
return count != 0; return count != 0;
} }
/** /**
* @name C_OUTLINE::winding_number * @name C_OUTLINE::winding_number
* *
@ -534,7 +528,6 @@ inT16 C_OUTLINE::winding_number(ICOORD point) const {
return count; //winding number return count; //winding number
} }
/** /**
* C_OUTLINE::turn_direction * C_OUTLINE::turn_direction
* *
@ -563,7 +556,6 @@ inT16 C_OUTLINE::turn_direction() const { //winding number
return count; //winding number return count; //winding number
} }
/** /**
* @name C_OUTLINE::reverse * @name C_OUTLINE::reverse
* *
@ -586,7 +578,6 @@ void C_OUTLINE::reverse() { //reverse drection
} }
} }
/** /**
* @name C_OUTLINE::move * @name C_OUTLINE::move
* *
@ -661,14 +652,27 @@ static void ComputeGradient(const l_uint32* data, int wpl,
int x, int y, int width, int height, int x, int y, int width, int height,
ICOORD* gradient) { ICOORD* gradient) {
const l_uint32* line = data + y * wpl; const l_uint32* line = data + y * wpl;
int pix_x_y = x < width && y < height ? int pix_x_y =
GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x) : 255; x < width && y < height
int pix_x_prevy = x < width && y > 0 ? ? GET_DATA_BYTE(
GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line - wpl)), x) : 255; const_cast<void*>(reinterpret_cast<const void*>(line)), x)
int pix_prevx_prevy = x > 0 && y > 0 ? : 255;
GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<void const*>(line - wpl)), x - 1) : 255; int pix_x_prevy =
int pix_prevx_y = x > 0 && y < height ? x < width && y > 0
GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x - 1) : 255; ? GET_DATA_BYTE(
const_cast<void*>(reinterpret_cast<const void*>(line - wpl)), x)
: 255;
int pix_prevx_prevy =
x > 0 && y > 0
? GET_DATA_BYTE(
const_cast<void*>(reinterpret_cast<void const*>(line - wpl)),
x - 1)
: 255;
int pix_prevx_y =
x > 0 && y < height
? GET_DATA_BYTE(
const_cast<void*>(reinterpret_cast<const void*>(line)), x - 1)
: 255;
gradient->set_x(pix_x_y + pix_x_prevy - (pix_prevx_y + pix_prevx_prevy)); gradient->set_x(pix_x_y + pix_x_prevy - (pix_prevx_y + pix_prevx_prevy));
gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y)); gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y));
} }
@ -684,8 +688,10 @@ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign,
if (y <= 0 || y >= height) if (y <= 0 || y >= height)
return false; return false;
const l_uint32* line = data + y * wpl; const l_uint32* line = data + y * wpl;
int pixel1 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line - wpl)), x); int pixel1 = GET_DATA_BYTE(
int pixel2 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x); const_cast<void*>(reinterpret_cast<const void*>(line - wpl)), x);
int pixel2 =
GET_DATA_BYTE(const_cast<void*>(reinterpret_cast<const void*>(line)), x);
int diff = (pixel2 - pixel1) * diff_sign; int diff = (pixel2 - pixel1) * diff_sign;
if (diff > *best_diff) { if (diff > *best_diff) {
*best_diff = diff; *best_diff = diff;
@ -705,8 +711,10 @@ static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign,
int* best_diff, int* best_sum, int* best_x) { int* best_diff, int* best_sum, int* best_x) {
if (x <= 0 || x >= width) if (x <= 0 || x >= width)
return false; return false;
int pixel1 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x - 1); int pixel1 = GET_DATA_BYTE(
int pixel2 = GET_DATA_BYTE(const_cast<void*> (reinterpret_cast<const void *>(line)), x); const_cast<void*>(reinterpret_cast<const void*>(line)), x - 1);
int pixel2 =
GET_DATA_BYTE(const_cast<void*>(reinterpret_cast<const void*>(line)), x);
int diff = (pixel2 - pixel1) * diff_sign; int diff = (pixel2 - pixel1) * diff_sign;
if (diff > *best_diff) { if (diff > *best_diff) {
*best_diff = diff; *best_diff = diff;
@ -954,8 +962,7 @@ void C_OUTLINE::render_outline(int left, int top, Pix* pix) const {
*/ */
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
void C_OUTLINE::plot(ScrollView* window, void C_OUTLINE::plot(ScrollView* window, ScrollView::Color colour) const {
ScrollView::Color colour) const {
inT16 stepindex; // index to cstep inT16 stepindex; // index to cstep
ICOORD pos; // current position ICOORD pos; // current position
DIR128 stepdir; // direction of step DIR128 stepdir; // direction of step
@ -1016,7 +1023,6 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
} }
#endif #endif
/** /**
* @name C_OUTLINE::operator= * @name C_OUTLINE::operator=
* *
@ -1024,7 +1030,7 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
* @param source assign from this * @param source assign from this
*/ */
C_OUTLINE & C_OUTLINE::operator= (const C_OUTLINE & source) { C_OUTLINE& C_OUTLINE::operator=(const C_OUTLINE& source) {
box = source.box; box = source.box;
start = source.start; start = source.start;
if (steps != NULL) if (steps != NULL)

52
ccstruct/debugpixa.h Normal file
View File

@ -0,0 +1,52 @@
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#include "allheaders.h"
namespace tesseract {
// Class to hold a Pixa collection of debug images with captions and save them
// to a PDF file.
class DebugPixa {
public:
// TODO(rays) add another constructor with size control.
DebugPixa() {
pixa_ = pixaCreate(0);
fonts_ = bmfCreate(nullptr, 14);
}
// If the filename_ has been set and there are any debug images, they are
// written to the set filename_.
~DebugPixa() {
pixaDestroy(&pixa_);
bmfDestroy(&fonts_);
}
// Adds the given pix to the set of pages in the PDF file, with the given
// caption added to the top.
void AddPix(const Pix* pix, const char* caption) {
int depth = pixGetDepth(const_cast<Pix*>(pix));
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
Pix* pix_debug = pixAddSingleTextblock(
const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
pixaAddPix(pixa_, pix_debug, L_INSERT);
}
// Sets the destination filename and enables images to be written to a PDF
// on destruction.
void WritePDF(const char* filename) {
if (pixaGetCount(pixa_) > 0) {
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
pixaClear(pixa_);
}
}
private:
// The collection of images to put in the PDF.
Pixa* pixa_;
// The fonts used to draw text captions.
L_Bmf* fonts_;
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_

View File

@ -17,8 +17,8 @@
* *
**********************************************************************/ **********************************************************************/
#ifndef TESSERACT_CCSTRUCT_DPPOINT_H__ #ifndef TESSERACT_CCSTRUCT_DPPOINT_H_
#define TESSERACT_CCSTRUCT_DPPOINT_H__ #define TESSERACT_CCSTRUCT_DPPOINT_H_
#include "host.h" #include "host.h"
@ -98,5 +98,4 @@ class DPPoint {
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_DPPOINT_H__ #endif // TESSERACT_CCSTRUCT_DPPOINT_H_

View File

@ -31,7 +31,7 @@ bool FontInfo::Serialize(FILE* fp) const {
} }
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool FontInfo::DeSerialize(bool swap, FILE* fp) { bool FontInfo::DeSerialize(bool swap, TFile* fp) {
if (!read_info(fp, this, swap)) return false; if (!read_info(fp, this, swap)) return false;
if (!read_spacing_info(fp, this, swap)) return false; if (!read_spacing_info(fp, this, swap)) return false;
return true; return true;
@ -51,7 +51,7 @@ bool FontInfoTable::Serialize(FILE* fp) const {
} }
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool FontInfoTable::DeSerialize(bool swap, FILE* fp) { bool FontInfoTable::DeSerialize(bool swap, TFile* fp) {
truncate(0); truncate(0);
return this->DeSerializeClasses(swap, fp); return this->DeSerializeClasses(swap, fp);
} }
@ -149,19 +149,15 @@ void FontSetDeleteCallback(FontSet fs) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(FILE* f, FontInfo* fi, bool swap) { bool read_info(TFile* f, FontInfo* fi, bool swap) {
inT32 size; inT32 size;
if (fread(&size, sizeof(size), 1, f) != 1) return false; if (f->FReadEndian(&size, sizeof(size), 1, swap) != 1) return false;
if (swap)
Reverse32(&size);
char* font_name = new char[size + 1]; char* font_name = new char[size + 1];
fi->name = font_name; fi->name = font_name;
if (static_cast<int>(fread(font_name, sizeof(*font_name), size, f)) != size) if (f->FRead(font_name, sizeof(*font_name), size) != size) return false;
return false;
font_name[size] = '\0'; font_name[size] = '\0';
if (fread(&fi->properties, sizeof(fi->properties), 1, f) != 1) return false; if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1, swap) != 1)
if (swap) return false;
Reverse32(&fi->properties);
return true; return true;
} }
@ -174,26 +170,22 @@ bool write_info(FILE* f, const FontInfo& fi) {
return true; return true;
} }
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) { bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) {
inT32 vec_size, kern_size; inT32 vec_size, kern_size;
if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false; if (f->FReadEndian(&vec_size, sizeof(vec_size), 1, swap) != 1) return false;
if (swap) Reverse32(&vec_size);
ASSERT_HOST(vec_size >= 0); ASSERT_HOST(vec_size >= 0);
if (vec_size == 0) return true; if (vec_size == 0) return true;
fi->init_spacing(vec_size); fi->init_spacing(vec_size);
for (int i = 0; i < vec_size; ++i) { for (int i = 0; i < vec_size; ++i) {
FontSpacingInfo *fs = new FontSpacingInfo(); FontSpacingInfo *fs = new FontSpacingInfo();
if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 || if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, swap) !=
fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 || 1 ||
fread(&kern_size, sizeof(kern_size), 1, f) != 1) { f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, swap) !=
1 ||
f->FReadEndian(&kern_size, sizeof(kern_size), 1, swap) != 1) {
delete fs; delete fs;
return false; return false;
} }
if (swap) {
ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before));
ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after));
Reverse32(&kern_size);
}
if (kern_size < 0) { // indication of a NULL entry in fi->spacing_vec if (kern_size < 0) { // indication of a NULL entry in fi->spacing_vec
delete fs; delete fs;
continue; continue;
@ -237,16 +229,12 @@ bool write_spacing_info(FILE* f, const FontInfo& fi) {
return true; return true;
} }
bool read_set(FILE* f, FontSet* fs, bool swap) { bool read_set(TFile* f, FontSet* fs, bool swap) {
if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false; if (f->FReadEndian(&fs->size, sizeof(fs->size), 1, swap) != 1) return false;
if (swap)
Reverse32(&fs->size);
fs->configs = new int[fs->size]; fs->configs = new int[fs->size];
for (int i = 0; i < fs->size; ++i) { if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size, swap) !=
if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false; fs->size)
if (swap) return false;
Reverse32(&fs->configs[i]);
}
return true; return true;
} }

View File

@ -67,7 +67,7 @@ struct FontInfo {
bool Serialize(FILE* fp) const; bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp); bool DeSerialize(bool swap, TFile* fp);
// Reserves unicharset_size spots in spacing_vec. // Reserves unicharset_size spots in spacing_vec.
void init_spacing(int unicharset_size) { void init_spacing(int unicharset_size) {
@ -152,7 +152,7 @@ class FontInfoTable : public GenericVector<FontInfo> {
bool Serialize(FILE* fp) const; bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp); bool DeSerialize(bool swap, TFile* fp);
// Returns true if the given set of fonts includes one with the same // Returns true if the given set of fonts includes one with the same
// properties as font_id. // properties as font_id.
@ -177,11 +177,11 @@ void FontInfoDeleteCallback(FontInfo f);
void FontSetDeleteCallback(FontSet fs); void FontSetDeleteCallback(FontSet fs);
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(FILE* f, FontInfo* fi, bool swap); bool read_info(TFile* f, FontInfo* fi, bool swap);
bool write_info(FILE* f, const FontInfo& fi); bool write_info(FILE* f, const FontInfo& fi);
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap); bool read_spacing_info(TFile* f, FontInfo* fi, bool swap);
bool write_spacing_info(FILE* f, const FontInfo& fi); bool write_spacing_info(FILE* f, const FontInfo& fi);
bool read_set(FILE* f, FontSet* fs, bool swap); bool read_set(TFile* f, FontSet* fs, bool swap);
bool write_set(FILE* f, const FontSet& fs); bool write_set(FILE* f, const FontSet& fs);
} // namespace tesseract. } // namespace tesseract.

View File

@ -1,3 +1,12 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef HPDSIZES_H #ifndef HPDSIZES_H
#define HPDSIZES_H #define HPDSIZES_H

View File

@ -24,12 +24,22 @@
#include "imagedata.h" #include "imagedata.h"
#if defined(__MINGW32__)
#include <unistd.h>
#else
#include <thread>
#endif
#include "allheaders.h" #include "allheaders.h"
#include "boxread.h" #include "boxread.h"
#include "callcpp.h" #include "callcpp.h"
#include "helpers.h" #include "helpers.h"
#include "tprintf.h" #include "tprintf.h"
// Number of documents to read ahead while training. Doesn't need to be very
// large.
const int kMaxReadAhead = 8;
namespace tesseract { namespace tesseract {
WordFeature::WordFeature() : x_(0), y_(0), dir_(0) { WordFeature::WordFeature() : x_(0), y_(0), dir_(0) {
@ -182,6 +192,19 @@ bool ImageData::DeSerialize(bool swap, TFile* fp) {
return true; return true;
} }
// As DeSerialize, but only seeks past the data - hence a static method.
bool ImageData::SkipDeSerialize(bool swap, TFile* fp) {
if (!STRING::SkipDeSerialize(swap, fp)) return false;
inT32 page_number;
if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false;
if (!GenericVector<char>::SkipDeSerialize(swap, fp)) return false;
if (!STRING::SkipDeSerialize(swap, fp)) return false;
if (!GenericVector<TBOX>::SkipDeSerialize(swap, fp)) return false;
if (!GenericVector<STRING>::SkipDeSerializeClasses(swap, fp)) return false;
inT8 vertical = 0;
return fp->FRead(&vertical, sizeof(vertical), 1) == 1;
}
// Saves the given Pix as a PNG-encoded string and destroys it. // Saves the given Pix as a PNG-encoded string and destroys it.
void ImageData::SetPix(Pix* pix) { void ImageData::SetPix(Pix* pix) {
SetPixInternal(pix, &image_data_); SetPixInternal(pix, &image_data_);
@ -195,37 +218,34 @@ Pix* ImageData::GetPix() const {
// Gets anything and everything with a non-NULL pointer, prescaled to a // Gets anything and everything with a non-NULL pointer, prescaled to a
// given target_height (if 0, then the original image height), and aligned. // given target_height (if 0, then the original image height), and aligned.
// Also returns (if not NULL) the width and height of the scaled image. // Also returns (if not NULL) the width and height of the scaled image.
// The return value is the scale factor that was applied to the image to // The return value is the scaled Pix, which must be pixDestroyed after use,
// achieve the target_height. // and scale_factor (if not NULL) is set to the scale factor that was applied
float ImageData::PreScale(int target_height, Pix** pix, // to the image to achieve the target_height.
int* scaled_width, int* scaled_height, Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor,
GenericVector<TBOX>* boxes) const { int* scaled_width, int* scaled_height,
GenericVector<TBOX>* boxes) const {
int input_width = 0; int input_width = 0;
int input_height = 0; int input_height = 0;
Pix* src_pix = GetPix(); Pix* src_pix = GetPix();
ASSERT_HOST(src_pix != NULL); ASSERT_HOST(src_pix != NULL);
input_width = pixGetWidth(src_pix); input_width = pixGetWidth(src_pix);
input_height = pixGetHeight(src_pix); input_height = pixGetHeight(src_pix);
if (target_height == 0) if (target_height == 0) {
target_height = input_height; target_height = MIN(input_height, max_height);
}
float im_factor = static_cast<float>(target_height) / input_height; float im_factor = static_cast<float>(target_height) / input_height;
if (scaled_width != NULL) if (scaled_width != NULL)
*scaled_width = IntCastRounded(im_factor * input_width); *scaled_width = IntCastRounded(im_factor * input_width);
if (scaled_height != NULL) if (scaled_height != NULL)
*scaled_height = target_height; *scaled_height = target_height;
if (pix != NULL) { // Get the scaled image.
// Get the scaled image. Pix* pix = pixScale(src_pix, im_factor, im_factor);
pixDestroy(pix); if (pix == NULL) {
*pix = pixScale(src_pix, im_factor, im_factor); tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
if (*pix == NULL) { input_width, input_height, im_factor);
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
input_width, input_height, im_factor);
}
if (scaled_width != NULL)
*scaled_width = pixGetWidth(*pix);
if (scaled_height != NULL)
*scaled_height = pixGetHeight(*pix);
} }
if (scaled_width != NULL) *scaled_width = pixGetWidth(pix);
if (scaled_height != NULL) *scaled_height = pixGetHeight(pix);
pixDestroy(&src_pix); pixDestroy(&src_pix);
if (boxes != NULL) { if (boxes != NULL) {
// Get the boxes. // Get the boxes.
@ -241,7 +261,8 @@ float ImageData::PreScale(int target_height, Pix** pix,
boxes->push_back(box); boxes->push_back(box);
} }
} }
return im_factor; if (scale_factor != NULL) *scale_factor = im_factor;
return pix;
} }
int ImageData::MemoryUsed() const { int ImageData::MemoryUsed() const {
@ -266,19 +287,20 @@ void ImageData::Display() const {
// Draw the boxes. // Draw the boxes.
win->Pen(ScrollView::RED); win->Pen(ScrollView::RED);
win->Brush(ScrollView::NONE); win->Brush(ScrollView::NONE);
win->TextAttributes("Arial", kTextSize, false, false, false); int text_size = kTextSize;
for (int b = 0; b < boxes_.size(); ++b) { if (!boxes_.empty() && boxes_[0].height() * 2 < text_size)
boxes_[b].plot(win); text_size = boxes_[0].height() * 2;
win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string()); win->TextAttributes("Arial", text_size, false, false, false);
TBOX scaled(boxes_[b]); if (!boxes_.empty()) {
scaled.scale(256.0 / height); for (int b = 0; b < boxes_.size(); ++b) {
scaled.plot(win); boxes_[b].plot(win);
win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string());
}
} else {
// The full transcription.
win->Pen(ScrollView::CYAN);
win->Text(0, height + kTextSize * 2, transcription_.string());
} }
// The full transcription.
win->Pen(ScrollView::CYAN);
win->Text(0, height + kTextSize * 2, transcription_.string());
// Add the features.
win->Pen(ScrollView::GREEN);
win->Update(); win->Update();
window_wait(win); window_wait(win);
#endif #endif
@ -340,27 +362,51 @@ bool ImageData::AddBoxes(const char* box_text) {
return false; return false;
} }
DocumentData::DocumentData(const STRING& name) // Thread function to call ReCachePages.
: document_name_(name), pages_offset_(0), total_pages_(0), void* ReCachePagesFunc(void* data) {
memory_used_(0), max_memory_(0), reader_(NULL) {} DocumentData* document_data = reinterpret_cast<DocumentData*>(data);
document_data->ReCachePages();
return NULL;
}
DocumentData::~DocumentData() {} DocumentData::DocumentData(const STRING& name)
: document_name_(name),
pages_offset_(-1),
total_pages_(-1),
memory_used_(0),
max_memory_(0),
reader_(NULL) {}
DocumentData::~DocumentData() {
SVAutoLock lock_p(&pages_mutex_);
SVAutoLock lock_g(&general_mutex_);
}
// Reads all the pages in the given lstmf filename to the cache. The reader // Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file. // is used to read the file.
bool DocumentData::LoadDocument(const char* filename, const char* lang, bool DocumentData::LoadDocument(const char* filename, const char* lang,
int start_page, inT64 max_memory, int start_page, inT64 max_memory,
FileReader reader) { FileReader reader) {
SetDocument(filename, lang, max_memory, reader);
pages_offset_ = start_page;
return ReCachePages();
}
// Sets up the document, without actually loading it.
void DocumentData::SetDocument(const char* filename, const char* lang,
inT64 max_memory, FileReader reader) {
SVAutoLock lock_p(&pages_mutex_);
SVAutoLock lock(&general_mutex_);
document_name_ = filename; document_name_ = filename;
lang_ = lang; lang_ = lang;
pages_offset_ = start_page; pages_offset_ = -1;
max_memory_ = max_memory; max_memory_ = max_memory;
reader_ = reader; reader_ = reader;
return ReCachePages();
} }
// Writes all the pages to the given filename. Returns false on error. // Writes all the pages to the given filename. Returns false on error.
bool DocumentData::SaveDocument(const char* filename, FileWriter writer) { bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
SVAutoLock lock(&pages_mutex_);
TFile fp; TFile fp;
fp.OpenWrite(NULL); fp.OpenWrite(NULL);
if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) { if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
@ -370,112 +416,184 @@ bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
return true; return true;
} }
bool DocumentData::SaveToBuffer(GenericVector<char>* buffer) { bool DocumentData::SaveToBuffer(GenericVector<char>* buffer) {
SVAutoLock lock(&pages_mutex_);
TFile fp; TFile fp;
fp.OpenWrite(buffer); fp.OpenWrite(buffer);
return pages_.Serialize(&fp); return pages_.Serialize(&fp);
} }
// Returns a pointer to the page with the given index, modulo the total // Adds the given page data to this document, counting up memory.
// number of pages, recaching if needed. void DocumentData::AddPageToDocument(ImageData* page) {
const ImageData* DocumentData::GetPage(int index) { SVAutoLock lock(&pages_mutex_);
index = Modulo(index, total_pages_); pages_.push_back(page);
if (index < pages_offset_ || index >= pages_offset_ + pages_.size()) { set_memory_used(memory_used() + page->MemoryUsed());
pages_offset_ = index;
if (!ReCachePages()) return NULL;
}
return pages_[index - pages_offset_];
} }
// Loads as many pages can fit in max_memory_ starting at index pages_offset_. // If the given index is not currently loaded, loads it using a separate
// thread.
void DocumentData::LoadPageInBackground(int index) {
ImageData* page = NULL;
if (IsPageAvailable(index, &page)) return;
SVAutoLock lock(&pages_mutex_);
if (pages_offset_ == index) return;
pages_offset_ = index;
pages_.clear();
SVSync::StartThread(ReCachePagesFunc, this);
}
// Returns a pointer to the page with the given index, modulo the total
// number of pages. Blocks until the background load is completed.
const ImageData* DocumentData::GetPage(int index) {
ImageData* page = NULL;
while (!IsPageAvailable(index, &page)) {
// If there is no background load scheduled, schedule one now.
pages_mutex_.Lock();
bool needs_loading = pages_offset_ != index;
pages_mutex_.Unlock();
if (needs_loading) LoadPageInBackground(index);
// We can't directly load the page, or the background load will delete it
// while the caller is using it, so give it a chance to work.
#if defined(__MINGW32__)
sleep(1);
#else
std::this_thread::sleep_for(std::chrono::seconds(1));
#endif
}
return page;
}
// Returns true if the requested page is available, and provides a pointer,
// which may be NULL if the document is empty. May block, even though it
// doesn't guarantee to return true.
bool DocumentData::IsPageAvailable(int index, ImageData** page) {
SVAutoLock lock(&pages_mutex_);
int num_pages = NumPages();
if (num_pages == 0 || index < 0) {
*page = NULL; // Empty Document.
return true;
}
if (num_pages > 0) {
index = Modulo(index, num_pages);
if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
*page = pages_[index - pages_offset_]; // Page is available already.
return true;
}
}
return false;
}
// Removes all pages from memory and frees the memory, but does not forget
// the document metadata.
inT64 DocumentData::UnCache() {
SVAutoLock lock(&pages_mutex_);
inT64 memory_saved = memory_used();
pages_.clear();
pages_offset_ = -1;
set_total_pages(-1);
set_memory_used(0);
tprintf("Unloaded document %s, saving %d memory\n", document_name_.string(),
memory_saved);
return memory_saved;
}
// Shuffles all the pages in the document.
void DocumentData::Shuffle() {
TRand random;
// Different documents get shuffled differently, but the same for the same
// name.
random.set_seed(document_name_.string());
int num_pages = pages_.size();
// Execute one random swap for each page in the document.
for (int i = 0; i < num_pages; ++i) {
int src = random.IntRand() % num_pages;
int dest = random.IntRand() % num_pages;
std::swap(pages_[src], pages_[dest]);
}
}
// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
// starting at index pages_offset_.
bool DocumentData::ReCachePages() { bool DocumentData::ReCachePages() {
SVAutoLock lock(&pages_mutex_);
// Read the file. // Read the file.
set_total_pages(0);
set_memory_used(0);
int loaded_pages = 0;
pages_.truncate(0);
TFile fp; TFile fp;
if (!fp.Open(document_name_, reader_)) return false; if (!fp.Open(document_name_, reader_) ||
memory_used_ = 0; !PointerVector<ImageData>::DeSerializeSize(false, &fp, &loaded_pages) ||
if (!pages_.DeSerialize(false, &fp)) { loaded_pages <= 0) {
tprintf("Deserialize failed: %s\n", document_name_.string()); tprintf("Deserialize header failed: %s\n", document_name_.string());
pages_.truncate(0);
return false; return false;
} }
total_pages_ = pages_.size(); pages_offset_ %= loaded_pages;
pages_offset_ %= total_pages_; // Skip pages before the first one we want, and load the rest until max
// Delete pages before the first one we want, and relocate the rest. // memory and skip the rest after that.
int page; int page;
for (page = 0; page < pages_.size(); ++page) { for (page = 0; page < loaded_pages; ++page) {
if (page < pages_offset_) { if (page < pages_offset_ ||
delete pages_[page]; (max_memory_ > 0 && memory_used() > max_memory_)) {
pages_[page] = NULL; if (!PointerVector<ImageData>::DeSerializeSkip(false, &fp)) break;
} else { } else {
ImageData* image_data = pages_[page]; if (!pages_.DeSerializeElement(false, &fp)) break;
if (max_memory_ > 0 && page > pages_offset_ && ImageData* image_data = pages_.back();
memory_used_ + image_data->MemoryUsed() > max_memory_)
break; // Don't go over memory quota unless the first image.
if (image_data->imagefilename().length() == 0) { if (image_data->imagefilename().length() == 0) {
image_data->set_imagefilename(document_name_); image_data->set_imagefilename(document_name_);
image_data->set_page_number(page); image_data->set_page_number(page);
} }
image_data->set_language(lang_); image_data->set_language(lang_);
memory_used_ += image_data->MemoryUsed(); set_memory_used(memory_used() + image_data->MemoryUsed());
if (pages_offset_ != 0) {
pages_[page - pages_offset_] = image_data;
pages_[page] = NULL;
}
} }
} }
pages_.truncate(page - pages_offset_); if (page < loaded_pages) {
tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", tprintf("Deserialize failed: %s read %d/%d pages\n",
pages_.size(), total_pages_, pages_offset_, document_name_.string(), page, loaded_pages);
pages_offset_ + pages_.size(), document_name_.string()); pages_.truncate(0);
} else {
tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", pages_.size(),
loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(),
document_name_.string());
}
set_total_pages(loaded_pages);
return !pages_.empty(); return !pages_.empty();
} }
// Adds the given page data to this document, counting up memory.
void DocumentData::AddPageToDocument(ImageData* page) {
pages_.push_back(page);
memory_used_ += page->MemoryUsed();
}
// A collection of DocumentData that knows roughly how much memory it is using. // A collection of DocumentData that knows roughly how much memory it is using.
DocumentCache::DocumentCache(inT64 max_memory) DocumentCache::DocumentCache(inT64 max_memory)
: total_pages_(0), memory_used_(0), max_memory_(max_memory) {} : num_pages_per_doc_(0), max_memory_(max_memory) {}
DocumentCache::~DocumentCache() {} DocumentCache::~DocumentCache() {}
// Adds all the documents in the list of filenames, counting memory. // Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files. // The reader is used to read the files.
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames, bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
const char* lang, FileReader reader) { const char* lang,
inT64 fair_share_memory = max_memory_ / filenames.size(); CachingStrategy cache_strategy,
FileReader reader) {
cache_strategy_ = cache_strategy;
inT64 fair_share_memory = 0;
// In the round-robin case, each DocumentData handles restricting its content
// to its fair share of memory. In the sequential case, DocumentCache
// determines which DocumentDatas are held entirely in memory.
if (cache_strategy_ == CS_ROUND_ROBIN)
fair_share_memory = max_memory_ / filenames.size();
for (int arg = 0; arg < filenames.size(); ++arg) { for (int arg = 0; arg < filenames.size(); ++arg) {
STRING filename = filenames[arg]; STRING filename = filenames[arg];
DocumentData* document = new DocumentData(filename); DocumentData* document = new DocumentData(filename);
if (document->LoadDocument(filename.string(), lang, 0, document->SetDocument(filename.string(), lang, fair_share_memory, reader);
fair_share_memory, reader)) { AddToCache(document);
AddToCache(document);
} else {
tprintf("Failed to load image %s!\n", filename.string());
delete document;
}
} }
tprintf("Loaded %d pages, total %gMB\n", if (!documents_.empty()) {
total_pages_, memory_used_ / 1048576.0); // Try to get the first page now to verify the list of filenames.
return total_pages_ > 0; if (GetPageBySerial(0) != NULL) return true;
tprintf("Load of page 0 failed!\n");
}
return false;
} }
// Adds document to the cache, throwing out other documents if needed. // Adds document to the cache.
bool DocumentCache::AddToCache(DocumentData* data) { bool DocumentCache::AddToCache(DocumentData* data) {
inT64 new_memory = data->memory_used();
memory_used_ += new_memory;
documents_.push_back(data); documents_.push_back(data);
total_pages_ += data->NumPages();
// Delete the first item in the array, and other pages of the same name
// while memory is full.
while (memory_used_ >= max_memory_ && max_memory_ > 0) {
tprintf("Memory used=%lld vs max=%lld, discarding doc of size %lld\n",
memory_used_ , max_memory_, documents_[0]->memory_used());
memory_used_ -= documents_[0]->memory_used();
total_pages_ -= documents_[0]->NumPages();
documents_.remove(0);
}
return true; return true;
} }
@ -488,11 +606,104 @@ DocumentData* DocumentCache::FindDocument(const STRING& document_name) const {
return NULL; return NULL;
} }
// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
// strategy, could take a long time.
int DocumentCache::TotalPages() {
if (cache_strategy_ == CS_SEQUENTIAL) {
// In sequential mode, we assume each doc has the same number of pages
// whether it is true or not.
if (num_pages_per_doc_ == 0) GetPageSequential(0);
return num_pages_per_doc_ * documents_.size();
}
int total_pages = 0;
int num_docs = documents_.size();
for (int d = 0; d < num_docs; ++d) {
// We have to load a page to make NumPages() valid.
documents_[d]->GetPage(0);
total_pages += documents_[d]->NumPages();
}
return total_pages;
}
// Returns a page by serial number, selecting them in a round-robin fashion // Returns a page by serial number, selecting them in a round-robin fashion
// from all the documents. // from all the documents. Highly disk-intensive, but doesn't need samples
const ImageData* DocumentCache::GetPageBySerial(int serial) { // to be shuffled between files to begin with.
int document_index = serial % documents_.size(); const ImageData* DocumentCache::GetPageRoundRobin(int serial) {
return documents_[document_index]->GetPage(serial / documents_.size()); int num_docs = documents_.size();
int doc_index = serial % num_docs;
const ImageData* doc = documents_[doc_index]->GetPage(serial / num_docs);
for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) {
doc_index = (serial + offset) % num_docs;
int page = (serial + offset) / num_docs;
documents_[doc_index]->LoadPageInBackground(page);
}
return doc;
}
// Returns a page by serial number, selecting them in sequence from each file.
// Requires the samples to be shuffled between the files to give a random or
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
const ImageData* DocumentCache::GetPageSequential(int serial) {
int num_docs = documents_.size();
ASSERT_HOST(num_docs > 0);
if (num_pages_per_doc_ == 0) {
// Use the pages in the first doc as the number of pages in each doc.
documents_[0]->GetPage(0);
num_pages_per_doc_ = documents_[0]->NumPages();
if (num_pages_per_doc_ == 0) {
tprintf("First document cannot be empty!!\n");
ASSERT_HOST(num_pages_per_doc_ > 0);
}
// Get rid of zero now if we don't need it.
if (serial / num_pages_per_doc_ % num_docs > 0) documents_[0]->UnCache();
}
int doc_index = serial / num_pages_per_doc_ % num_docs;
const ImageData* doc =
documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
// Count up total memory. Background loading makes it more complicated to
// keep a running count.
inT64 total_memory = 0;
for (int d = 0; d < num_docs; ++d) {
total_memory += documents_[d]->memory_used();
}
if (total_memory >= max_memory_) {
// Find something to un-cache.
// If there are more than 3 in front, then serial is from the back reader
// of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then
// we create a hole between them and then un-caching the backmost occupied
// will work for both.
int num_in_front = CountNeighbourDocs(doc_index, 1);
for (int offset = num_in_front - 2;
offset > 1 && total_memory >= max_memory_; --offset) {
int next_index = (doc_index + offset) % num_docs;
total_memory -= documents_[next_index]->UnCache();
}
// If that didn't work, the best solution is to un-cache from the back. If
// we take away the document that a 2nd reader is using, it will put it
// back and make a hole between.
int num_behind = CountNeighbourDocs(doc_index, -1);
for (int offset = num_behind; offset < 0 && total_memory >= max_memory_;
++offset) {
int next_index = (doc_index + offset + num_docs) % num_docs;
total_memory -= documents_[next_index]->UnCache();
}
}
int next_index = (doc_index + 1) % num_docs;
if (!documents_[next_index]->IsCached() && total_memory < max_memory_) {
documents_[next_index]->LoadPageInBackground(0);
}
return doc;
}
// Helper counts the number of adjacent cached neighbours of index looking in
// direction dir, ie index+dir, index+2*dir etc.
int DocumentCache::CountNeighbourDocs(int index, int dir) {
int num_docs = documents_.size();
for (int offset = dir; abs(offset) < num_docs; offset += dir) {
int offset_index = (index + offset + num_docs) % num_docs;
if (!documents_[offset_index]->IsCached()) return offset - dir;
}
return num_docs;
} }
} // namespace tesseract. } // namespace tesseract.

View File

@ -25,6 +25,7 @@
#include "normalis.h" #include "normalis.h"
#include "rect.h" #include "rect.h"
#include "strngs.h" #include "strngs.h"
#include "svutil.h"
struct Pix; struct Pix;
@ -34,8 +35,22 @@ namespace tesseract {
const int kFeaturePadding = 2; const int kFeaturePadding = 2;
// Number of pixels to pad around text boxes. // Number of pixels to pad around text boxes.
const int kImagePadding = 4; const int kImagePadding = 4;
// Number of training images to combine into a mini-batch for training.
const int kNumPagesPerMiniBatch = 100; // Enum to determine the caching and data sequencing strategy.
enum CachingStrategy {
// Reads all of one file before moving on to the next. Requires samples to be
// shuffled across files. Uses the count of samples in the first file as
// the count in all the files to achieve high-speed random access. As a
// consequence, if subsequent files are smaller, they get entries used more
// than once, and if subsequent files are larger, some entries are not used.
// Best for larger data sets that don't fit in memory.
CS_SEQUENTIAL,
// Reads one sample from each file in rotation. Does not require shuffled
// samples, but is extremely disk-intensive. Samples in smaller files also
// get used more often than samples in larger files.
// Best for smaller data sets that mostly fit in memory.
CS_ROUND_ROBIN,
};
class WordFeature { class WordFeature {
public: public:
@ -103,6 +118,8 @@ class ImageData {
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp); bool DeSerialize(bool swap, TFile* fp);
// As DeSerialize, but only seeks past the data - hence a static method.
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
// Other accessors. // Other accessors.
const STRING& imagefilename() const { const STRING& imagefilename() const {
@ -145,11 +162,12 @@ class ImageData {
// Gets anything and everything with a non-NULL pointer, prescaled to a // Gets anything and everything with a non-NULL pointer, prescaled to a
// given target_height (if 0, then the original image height), and aligned. // given target_height (if 0, then the original image height), and aligned.
// Also returns (if not NULL) the width and height of the scaled image. // Also returns (if not NULL) the width and height of the scaled image.
// The return value is the scale factor that was applied to the image to // The return value is the scaled Pix, which must be pixDestroyed after use,
// achieve the target_height. // and scale_factor (if not NULL) is set to the scale factor that was applied
float PreScale(int target_height, Pix** pix, // to the image to achieve the target_height.
int* scaled_width, int* scaled_height, Pix* PreScale(int target_height, int max_height, float* scale_factor,
GenericVector<TBOX>* boxes) const; int* scaled_width, int* scaled_height,
GenericVector<TBOX>* boxes) const;
int MemoryUsed() const; int MemoryUsed() const;
@ -184,6 +202,8 @@ class ImageData {
// A collection of ImageData that knows roughly how much memory it is using. // A collection of ImageData that knows roughly how much memory it is using.
class DocumentData { class DocumentData {
friend void* ReCachePagesFunc(void* data);
public: public:
explicit DocumentData(const STRING& name); explicit DocumentData(const STRING& name);
~DocumentData(); ~DocumentData();
@ -192,6 +212,9 @@ class DocumentData {
// is used to read the file. // is used to read the file.
bool LoadDocument(const char* filename, const char* lang, int start_page, bool LoadDocument(const char* filename, const char* lang, int start_page,
inT64 max_memory, FileReader reader); inT64 max_memory, FileReader reader);
// Sets up the document, without actually loading it.
void SetDocument(const char* filename, const char* lang, inT64 max_memory,
FileReader reader);
// Writes all the pages to the given filename. Returns false on error. // Writes all the pages to the given filename. Returns false on error.
bool SaveDocument(const char* filename, FileWriter writer); bool SaveDocument(const char* filename, FileWriter writer);
bool SaveToBuffer(GenericVector<char>* buffer); bool SaveToBuffer(GenericVector<char>* buffer);
@ -200,26 +223,64 @@ class DocumentData {
void AddPageToDocument(ImageData* page); void AddPageToDocument(ImageData* page);
const STRING& document_name() const { const STRING& document_name() const {
SVAutoLock lock(&general_mutex_);
return document_name_; return document_name_;
} }
int NumPages() const { int NumPages() const {
SVAutoLock lock(&general_mutex_);
return total_pages_; return total_pages_;
} }
inT64 memory_used() const { inT64 memory_used() const {
SVAutoLock lock(&general_mutex_);
return memory_used_; return memory_used_;
} }
// If the given index is not currently loaded, loads it using a separate
// thread. Note: there are 4 cases:
// Document uncached: IsCached() returns false, total_pages_ < 0.
// Required page is available: IsPageAvailable returns true. In this case,
// total_pages_ > 0 and
// pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
// Pages are loaded, but the required one is not.
// The requested page is being loaded by LoadPageInBackground. In this case,
// index == pages_offset_. Once the loading starts, the pages lock is held
// until it completes, at which point IsPageAvailable will unblock and return
// true.
void LoadPageInBackground(int index);
// Returns a pointer to the page with the given index, modulo the total // Returns a pointer to the page with the given index, modulo the total
// number of pages, recaching if needed. // number of pages. Blocks until the background load is completed.
const ImageData* GetPage(int index); const ImageData* GetPage(int index);
// Returns true if the requested page is available, and provides a pointer,
// which may be NULL if the document is empty. May block, even though it
// doesn't guarantee to return true.
bool IsPageAvailable(int index, ImageData** page);
// Takes ownership of the given page index. The page is made NULL in *this. // Takes ownership of the given page index. The page is made NULL in *this.
ImageData* TakePage(int index) { ImageData* TakePage(int index) {
SVAutoLock lock(&pages_mutex_);
ImageData* page = pages_[index]; ImageData* page = pages_[index];
pages_[index] = NULL; pages_[index] = NULL;
return page; return page;
} }
// Returns true if the document is currently loaded or in the process of
// loading.
bool IsCached() const { return NumPages() >= 0; }
// Removes all pages from memory and frees the memory, but does not forget
// the document metadata. Returns the memory saved.
inT64 UnCache();
// Shuffles all the pages in the document.
void Shuffle();
private: private:
// Loads as many pages can fit in max_memory_ starting at index pages_offset_. // Sets the value of total_pages_ behind a mutex.
void set_total_pages(int total) {
SVAutoLock lock(&general_mutex_);
total_pages_ = total;
}
void set_memory_used(inT64 memory_used) {
SVAutoLock lock(&general_mutex_);
memory_used_ = memory_used;
}
// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
// starting at index pages_offset_.
bool ReCachePages(); bool ReCachePages();
private: private:
@ -239,43 +300,77 @@ class DocumentData {
inT64 max_memory_; inT64 max_memory_;
// Saved reader from LoadDocument to allow re-caching. // Saved reader from LoadDocument to allow re-caching.
FileReader reader_; FileReader reader_;
// Mutex that protects pages_ and pages_offset_ against multiple parallel
// loads, and provides a wait for page.
SVMutex pages_mutex_;
// Mutex that protects other data members that callers want to access without
// waiting for a load operation.
mutable SVMutex general_mutex_;
}; };
// A collection of DocumentData that knows roughly how much memory it is using. // A collection of DocumentData that knows roughly how much memory it is using.
// Note that while it supports background read-ahead, it assumes that a single
// thread is accessing documents, ie it is not safe for multiple threads to
// access different documents in parallel, as one may de-cache the other's
// content.
class DocumentCache { class DocumentCache {
public: public:
explicit DocumentCache(inT64 max_memory); explicit DocumentCache(inT64 max_memory);
~DocumentCache(); ~DocumentCache();
// Deletes all existing documents from the cache.
void Clear() {
documents_.clear();
num_pages_per_doc_ = 0;
}
// Adds all the documents in the list of filenames, counting memory. // Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files. // The reader is used to read the files.
bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang, bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang,
FileReader reader); CachingStrategy cache_strategy, FileReader reader);
// Adds document to the cache, throwing out other documents if needed. // Adds document to the cache.
bool AddToCache(DocumentData* data); bool AddToCache(DocumentData* data);
// Finds and returns a document by name. // Finds and returns a document by name.
DocumentData* FindDocument(const STRING& document_name) const; DocumentData* FindDocument(const STRING& document_name) const;
// Returns a page by serial number, selecting them in a round-robin fashion // Returns a page by serial number using the current cache_strategy_ to
// from all the documents. // determine the mapping from serial number to page.
const ImageData* GetPageBySerial(int serial); const ImageData* GetPageBySerial(int serial) {
if (cache_strategy_ == CS_SEQUENTIAL)
return GetPageSequential(serial);
else
return GetPageRoundRobin(serial);
}
const PointerVector<DocumentData>& documents() const { const PointerVector<DocumentData>& documents() const {
return documents_; return documents_;
} }
int total_pages() const { // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
return total_pages_; // strategy, could take a long time.
} int TotalPages();
private: private:
// Returns a page by serial number, selecting them in a round-robin fashion
// from all the documents. Highly disk-intensive, but doesn't need samples
// to be shuffled between files to begin with.
const ImageData* GetPageRoundRobin(int serial);
// Returns a page by serial number, selecting them in sequence from each file.
// Requires the samples to be shuffled between the files to give a random or
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
const ImageData* GetPageSequential(int serial);
// Helper counts the number of adjacent cached neighbour documents_ of index
// looking in direction dir, ie index+dir, index+2*dir etc.
int CountNeighbourDocs(int index, int dir);
// A group of pages that corresponds in some loose way to a document. // A group of pages that corresponds in some loose way to a document.
PointerVector<DocumentData> documents_; PointerVector<DocumentData> documents_;
// Total of all pages. // Strategy to use for caching and serializing data samples.
int total_pages_; CachingStrategy cache_strategy_;
// Total of all memory used by the cache. // Number of pages in the first document, used as a divisor in
inT64 memory_used_; // GetPageSequential to determine the document index.
int num_pages_per_doc_;
// Max memory allowed in this cache. // Max memory allowed in this cache.
inT64 max_memory_; inT64 max_memory_;
}; };

View File

@ -1,8 +1,12 @@
/* -*-C-*- /* -*-C-*-
****************************************************************************** ******************************************************************************
* File: matrix.h (Formerly matrix.h)
* Description: Generic 2-d array/matrix and banded triangular matrix class.
* Author: Ray Smith
* TODO(rays) Separate from ratings matrix, which it also contains:
* *
* File: matrix.h (Formerly matrix.h) * Descrition: Ratings matrix class (specialization of banded matrix).
* Description: Ratings matrix code. (Used by associator) * Segmentation search matrix of lists of BLOB_CHOICE.
* Author: Mark Seaman, OCR Technology * Author: Mark Seaman, OCR Technology
* Created: Wed May 16 13:22:06 1990 * Created: Wed May 16 13:22:06 1990
* Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt * Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt
@ -22,12 +26,16 @@
** limitations under the License. ** limitations under the License.
* *
*********************************************************************************/ *********************************************************************************/
#ifndef TESSERACT_CCSTRUCT_MATRIX_H__ #ifndef TESSERACT_CCSTRUCT_MATRIX_H_
#define TESSERACT_CCSTRUCT_MATRIX_H__ #define TESSERACT_CCSTRUCT_MATRIX_H_
#include <math.h>
#include "kdpair.h" #include "kdpair.h"
#include "points.h"
#include "serialis.h"
#include "unicharset.h" #include "unicharset.h"
class BLOB_CHOICE;
class BLOB_CHOICE_LIST; class BLOB_CHOICE_LIST;
#define NOT_CLASSIFIED reinterpret_cast<BLOB_CHOICE_LIST*>(0) #define NOT_CLASSIFIED reinterpret_cast<BLOB_CHOICE_LIST*>(0)
@ -44,34 +52,60 @@ class GENERIC_2D_ARRAY {
// either pass the memory in, or allocate after by calling Resize(). // either pass the memory in, or allocate after by calling Resize().
GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array) GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array)
: empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) { : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) {
size_allocated_ = dim1 * dim2;
} }
// Original constructor for a full rectangular matrix DOES allocate memory // Original constructor for a full rectangular matrix DOES allocate memory
// and initialize it to empty. // and initialize it to empty.
GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty) GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty)
: empty_(empty), dim1_(dim1), dim2_(dim2) { : empty_(empty), dim1_(dim1), dim2_(dim2) {
array_ = new T[dim1_ * dim2_]; int new_size = dim1 * dim2;
for (int x = 0; x < dim1_; x++) array_ = new T[new_size];
for (int y = 0; y < dim2_; y++) size_allocated_ = new_size;
this->put(x, y, empty_); for (int i = 0; i < size_allocated_; ++i)
array_[i] = empty_;
}
// Default constructor for array allocation. Use Resize to set the size.
GENERIC_2D_ARRAY()
: array_(NULL), empty_(static_cast<T>(0)), dim1_(0), dim2_(0),
size_allocated_(0) {
}
GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY<T>& src)
: array_(NULL), empty_(static_cast<T>(0)), dim1_(0), dim2_(0),
size_allocated_(0) {
*this = src;
} }
virtual ~GENERIC_2D_ARRAY() { delete[] array_; } virtual ~GENERIC_2D_ARRAY() { delete[] array_; }
void operator=(const GENERIC_2D_ARRAY<T>& src) {
ResizeNoInit(src.dim1(), src.dim2());
memcpy(array_, src.array_, num_elements() * sizeof(array_[0]));
}
// Reallocate the array to the given size. Does not keep old data, but does
// not initialize the array either.
void ResizeNoInit(int size1, int size2) {
int new_size = size1 * size2;
if (new_size > size_allocated_) {
delete [] array_;
array_ = new T[new_size];
size_allocated_ = new_size;
}
dim1_ = size1;
dim2_ = size2;
}
// Reallocate the array to the given size. Does not keep old data. // Reallocate the array to the given size. Does not keep old data.
void Resize(int size1, int size2, const T& empty) { void Resize(int size1, int size2, const T& empty) {
empty_ = empty; empty_ = empty;
if (size1 != dim1_ || size2 != dim2_) { ResizeNoInit(size1, size2);
dim1_ = size1;
dim2_ = size2;
delete [] array_;
array_ = new T[dim1_ * dim2_];
}
Clear(); Clear();
} }
// Reallocate the array to the given size, keeping old data. // Reallocate the array to the given size, keeping old data.
void ResizeWithCopy(int size1, int size2) { void ResizeWithCopy(int size1, int size2) {
if (size1 != dim1_ || size2 != dim2_) { if (size1 != dim1_ || size2 != dim2_) {
T* new_array = new T[size1 * size2]; int new_size = size1 * size2;
T* new_array = new T[new_size];
for (int col = 0; col < size1; ++col) { for (int col = 0; col < size1; ++col) {
for (int row = 0; row < size2; ++row) { for (int row = 0; row < size2; ++row) {
int old_index = col * dim2() + row; int old_index = col * dim2() + row;
@ -87,6 +121,7 @@ class GENERIC_2D_ARRAY {
array_ = new_array; array_ = new_array;
dim1_ = size1; dim1_ = size1;
dim2_ = size2; dim2_ = size2;
size_allocated_ = new_size;
} }
} }
@ -106,9 +141,16 @@ class GENERIC_2D_ARRAY {
if (fwrite(array_, sizeof(*array_), size, fp) != size) return false; if (fwrite(array_, sizeof(*array_), size, fp) != size) return false;
return true; return true;
} }
bool Serialize(tesseract::TFile* fp) const {
if (!SerializeSize(fp)) return false;
if (fp->FWrite(&empty_, sizeof(empty_), 1) != 1) return false;
int size = num_elements();
if (fp->FWrite(array_, sizeof(*array_), size) != size) return false;
return true;
}
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// Only works with bitwise-serializeable typ // Only works with bitwise-serializeable types!
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp) { bool DeSerialize(bool swap, FILE* fp) {
if (!DeSerializeSize(swap, fp)) return false; if (!DeSerializeSize(swap, fp)) return false;
@ -122,6 +164,18 @@ class GENERIC_2D_ARRAY {
} }
return true; return true;
} }
bool DeSerialize(bool swap, tesseract::TFile* fp) {
if (!DeSerializeSize(swap, fp)) return false;
if (fp->FRead(&empty_, sizeof(empty_), 1) != 1) return false;
if (swap) ReverseN(&empty_, sizeof(empty_));
int size = num_elements();
if (fp->FRead(array_, sizeof(*array_), size) != size) return false;
if (swap) {
for (int i = 0; i < size; ++i)
ReverseN(&array_[i], sizeof(array_[i]));
}
return true;
}
// Writes to the given file. Returns false in case of error. // Writes to the given file. Returns false in case of error.
// Assumes a T::Serialize(FILE*) const function. // Assumes a T::Serialize(FILE*) const function.
@ -163,11 +217,17 @@ class GENERIC_2D_ARRAY {
} }
// Put a list element into the matrix at a specific location. // Put a list element into the matrix at a specific location.
void put(ICOORD pos, const T& thing) {
array_[this->index(pos.x(), pos.y())] = thing;
}
void put(int column, int row, const T& thing) { void put(int column, int row, const T& thing) {
array_[this->index(column, row)] = thing; array_[this->index(column, row)] = thing;
} }
// Get the item at a specified location from the matrix. // Get the item at a specified location from the matrix.
T get(ICOORD pos) const {
return array_[this->index(pos.x(), pos.y())];
}
T get(int column, int row) const { T get(int column, int row) const {
return array_[this->index(column, row)]; return array_[this->index(column, row)];
} }
@ -187,6 +247,207 @@ class GENERIC_2D_ARRAY {
return &array_[this->index(column, 0)]; return &array_[this->index(column, 0)];
} }
// Adds addend to *this, element-by-element.
void operator+=(const GENERIC_2D_ARRAY<T>& addend) {
if (dim2_ == addend.dim2_) {
// Faster if equal size in the major dimension.
int size = MIN(num_elements(), addend.num_elements());
for (int i = 0; i < size; ++i) {
array_[i] += addend.array_[i];
}
} else {
for (int x = 0; x < dim1_; x++) {
for (int y = 0; y < dim2_; y++) {
(*this)(x, y) += addend(x, y);
}
}
}
}
// Subtracts minuend from *this, element-by-element.
void operator-=(const GENERIC_2D_ARRAY<T>& minuend) {
if (dim2_ == minuend.dim2_) {
// Faster if equal size in the major dimension.
int size = MIN(num_elements(), minuend.num_elements());
for (int i = 0; i < size; ++i) {
array_[i] -= minuend.array_[i];
}
} else {
for (int x = 0; x < dim1_; x++) {
for (int y = 0; y < dim2_; y++) {
(*this)(x, y) -= minuend(x, y);
}
}
}
}
// Adds addend to all elements.
void operator+=(const T& addend) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] += addend;
}
}
// Multiplies *this by factor, element-by-element.
void operator*=(const T& factor) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] *= factor;
}
}
// Clips *this to the given range.
void Clip(const T& rangemin, const T& rangemax) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] = ClipToRange(array_[i], rangemin, rangemax);
}
}
// Returns true if all elements of *this are within the given range.
// Only uses operator<
bool WithinBounds(const T& rangemin, const T& rangemax) const {
int size = num_elements();
for (int i = 0; i < size; ++i) {
const T& value = array_[i];
if (value < rangemin || rangemax < value)
return false;
}
return true;
}
// Normalize the whole array.
double Normalize() {
int size = num_elements();
if (size <= 0) return 0.0;
// Compute the mean.
double mean = 0.0;
for (int i = 0; i < size; ++i) {
mean += array_[i];
}
mean /= size;
// Subtract the mean and compute the standard deviation.
double sd = 0.0;
for (int i = 0; i < size; ++i) {
double normed = array_[i] - mean;
array_[i] = normed;
sd += normed * normed;
}
sd = sqrt(sd / size);
if (sd > 0.0) {
// Divide by the sd.
for (int i = 0; i < size; ++i) {
array_[i] /= sd;
}
}
return sd;
}
// Returns the maximum value of the array.
T Max() const {
int size = num_elements();
if (size <= 0) return empty_;
// Compute the max.
T max_value = array_[0];
for (int i = 1; i < size; ++i) {
const T& value = array_[i];
if (value > max_value) max_value = value;
}
return max_value;
}
// Returns the maximum absolute value of the array.
T MaxAbs() const {
int size = num_elements();
if (size <= 0) return empty_;
// Compute the max.
T max_abs = static_cast<T>(0);
for (int i = 0; i < size; ++i) {
T value = static_cast<T>(fabs(array_[i]));
if (value > max_abs) max_abs = value;
}
return max_abs;
}
// Accumulates the element-wise sums of squares of src into *this.
void SumSquares(const GENERIC_2D_ARRAY<T>& src) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] += src.array_[i] * src.array_[i];
}
}
// Scales each element using the ada-grad algorithm, ie array_[i] by
// sqrt(num_samples/max(1,sqsum[i])).
void AdaGradScaling(const GENERIC_2D_ARRAY<T>& sqsum, int num_samples) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] *= sqrt(num_samples / MAX(1.0, sqsum.array_[i]));
}
}
void AssertFinite() const {
int size = num_elements();
for (int i = 0; i < size; ++i) {
ASSERT_HOST(isfinite(array_[i]));
}
}
// REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a
// num_dims-dimensional array/tensor with dimensions given by dims, (ordered
// from most significant to least significant, the same as standard C arrays)
// and moves src_dim to dest_dim, with the initial dest_dim and any dimensions
// in between shifted towards the hole left by src_dim. Example:
// Current data content: array_=[0, 1, 2, ....119]
// perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]...
// but the current dimensions are irrelevant.
// num_dims = 4, dims=[5, 4, 3, 2]
// src_dim=3, dest_dim=1
// tensor=[[[[0, 1][2, 3][4, 5]]
// [[6, 7][8, 9][10, 11]]
// [[12, 13][14, 15][16, 17]]
// [[18, 19][20, 21][22, 23]]]
// [[[24, 25]...
// output dims =[5, 2, 4, 3]
// output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]]
// [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]]
// [[[24, 26, 28]...
// which is stored in the array_ as:
// [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...]
// NOTE: the 2 stored matrix dimensions are simply copied from *this. To
// change the dimensions after the transpose, use ResizeNoInit.
// Higher dimensions above 2 are strictly the responsibility of the caller.
void RotatingTranspose(const int* dims, int num_dims, int src_dim,
int dest_dim, GENERIC_2D_ARRAY<T>* result) const {
int max_d = MAX(src_dim, dest_dim);
int min_d = MIN(src_dim, dest_dim);
// In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the
// ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1]
// being contiguous blocks of data that will move together, and
// [d0, min_d -1] being replicas of the transpose operation.
// num_replicas represents the large dimensions unchanged by the operation.
// move_size represents the small dimensions unchanged by the operation.
// src_step represents the stride in the src between each adjacent group
// in the destination.
int num_replicas = 1, move_size = 1, src_step = 1;
for (int d = 0; d < min_d; ++d) num_replicas *= dims[d];
for (int d = max_d + 1; d < num_dims; ++d) move_size *= dims[d];
for (int d = src_dim + 1; d < num_dims; ++d) src_step *= dims[d];
if (src_dim > dest_dim) src_step *= dims[src_dim];
// wrap_size is the size of a single replica, being the amount that is
// handled num_replicas times.
int wrap_size = move_size;
for (int d = min_d; d <= max_d; ++d) wrap_size *= dims[d];
result->ResizeNoInit(dim1_, dim2_);
result->empty_ = empty_;
const T* src = array_;
T* dest = result->array_;
for (int replica = 0; replica < num_replicas; ++replica) {
for (int start = 0; start < src_step; start += move_size) {
for (int pos = start; pos < wrap_size; pos += src_step) {
memcpy(dest, src + pos, sizeof(*dest) * move_size);
dest += move_size;
}
}
src += wrap_size;
}
}
// Delete objects pointed to by array_[i]. // Delete objects pointed to by array_[i].
void delete_matrix_pointers() { void delete_matrix_pointers() {
int size = num_elements(); int size = num_elements();
@ -206,6 +467,13 @@ class GENERIC_2D_ARRAY {
if (fwrite(&size, sizeof(size), 1, fp) != 1) return false; if (fwrite(&size, sizeof(size), 1, fp) != 1) return false;
return true; return true;
} }
bool SerializeSize(tesseract::TFile* fp) const {
inT32 size = dim1_;
if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
size = dim2_;
if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
return true;
}
// Factored helper to deserialize the size. // Factored helper to deserialize the size.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeSize(bool swap, FILE* fp) { bool DeSerializeSize(bool swap, FILE* fp) {
@ -219,11 +487,26 @@ class GENERIC_2D_ARRAY {
Resize(size1, size2, empty_); Resize(size1, size2, empty_);
return true; return true;
} }
bool DeSerializeSize(bool swap, tesseract::TFile* fp) {
inT32 size1, size2;
if (fp->FRead(&size1, sizeof(size1), 1) != 1) return false;
if (fp->FRead(&size2, sizeof(size2), 1) != 1) return false;
if (swap) {
ReverseN(&size1, sizeof(size1));
ReverseN(&size2, sizeof(size2));
}
Resize(size1, size2, empty_);
return true;
}
T* array_; T* array_;
T empty_; // The unused cell. T empty_; // The unused cell.
int dim1_; // Size of the 1st dimension in indexing functions. int dim1_; // Size of the 1st dimension in indexing functions.
int dim2_; // Size of the 2nd dimension in indexing functions. int dim2_; // Size of the 2nd dimension in indexing functions.
// The total size to which the array can be expanded before a realloc is
// needed. If Resize is used, memory is retained so it can be re-expanded
// without a further alloc, and this stores the allocated size.
int size_allocated_;
}; };
// A generic class to store a banded triangular matrix with entries of type T. // A generic class to store a banded triangular matrix with entries of type T.
@ -349,4 +632,4 @@ struct MATRIX_COORD {
// The MatrixCoordPair contains a MATRIX_COORD and its priority. // The MatrixCoordPair contains a MATRIX_COORD and its priority.
typedef tesseract::KDPairInc<float, MATRIX_COORD> MatrixCoordPair; typedef tesseract::KDPairInc<float, MATRIX_COORD> MatrixCoordPair;
#endif // TESSERACT_CCSTRUCT_MATRIX_H__ #endif // TESSERACT_CCSTRUCT_MATRIX_H_

View File

@ -1,8 +1,8 @@
/********************************************************************** /**********************************************************************
* File: mod128.c (Formerly dir128.c) * File: mod128.c (Formerly dir128.c)
* Description: Code to convert a DIR128 to an ICOORD. * Description: Code to convert a DIR128 to an ICOORD.
* Author: Ray Smith * Author: Ray Smith
* Created: Tue Oct 22 11:56:09 BST 1991 * Created: Tue Oct 22 11:56:09 BST 1991
* *
* (C) Copyright 1991, Hewlett-Packard Ltd. * (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -86,16 +86,3 @@ DIR128::DIR128( //from fcoord
while (high - low > 1); while (high - low > 1);
dir = low; dir = low;
} }
/**********************************************************************
* dir_to_gradient
*
* Convert a direction to a vector.
**********************************************************************/
#if 0 // code is buggy for negative dir and unused
ICOORD DIR128::vector() const { //convert to vector
return dirtab[dir]; //easy really
}
#endif

View File

@ -1,8 +1,8 @@
/********************************************************************** /**********************************************************************
* File: mod128.h (Formerly dir128.h) * File: mod128.h (Formerly dir128.h)
* Description: Header for class which implements modulo arithmetic. * Description: Header for class which implements modulo arithmetic.
* Author: Ray Smith * Author: Ray Smith
* Created: Tue Mar 26 17:48:13 GMT 1991 * Created: Tue Mar 26 17:48:13 GMT 1991
* *
* (C) Copyright 1991, Hewlett-Packard Ltd. * (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -77,7 +77,6 @@ class DLLSYM DIR128
inT8 get_dir() const { //access function inT8 get_dir() const { //access function
return dir; return dir;
} }
ICOORD vector() const; //turn to vector
private: private:
inT8 dir; //a direction inT8 dir; //a direction

View File

@ -51,23 +51,16 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height,
// only use opencl if compiled w/ OpenCL and selected device is opencl // only use opencl if compiled w/ OpenCL and selected device is opencl
#ifdef USE_OPENCL #ifdef USE_OPENCL
// all of channel 0 then all of channel 1... // all of channel 0 then all of channel 1...
int *histogramAllChannels = new int[kHistogramSize * num_channels]; int* histogramAllChannels = new int[kHistogramSize * num_channels];
// Calculate Histogram on GPU // Calculate Histogram on GPU
OpenclDevice od; OpenclDevice od;
if (od.selectedDeviceIsOpenCL() && if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) &&
(num_channels == 1 || num_channels == 4) && top == 0 && left == 0 ) { top == 0 && left == 0) {
od.HistogramRectOCL( od.HistogramRectOCL((unsigned char*)pixGetData(src_pix), num_channels,
(const unsigned char*)pixGetData(src_pix), pixGetWpl(src_pix) * 4, left, top, width, height,
num_channels, kHistogramSize, histogramAllChannels);
pixGetWpl(src_pix) * 4,
left,
top,
width,
height,
kHistogramSize,
histogramAllChannels);
// Calculate Threshold from Histogram on cpu // Calculate Threshold from Histogram on cpu
for (int ch = 0; ch < num_channels; ++ch) { for (int ch = 0; ch < num_channels; ++ch) {
@ -143,7 +136,6 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height,
delete[] histogramAllChannels; delete[] histogramAllChannels;
#endif // USE_OPENCL #endif // USE_OPENCL
if (!any_good_hivalue) { if (!any_good_hivalue) {
// Use the best of the ones that were not good enough. // Use the best of the ones that were not good enough.
(*hi_values)[best_hi_index] = best_hi_value; (*hi_values)[best_hi_index] = best_hi_value;

View File

@ -17,8 +17,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_OTSUTHR_H__ #ifndef TESSERACT_CCMAIN_OTSUTHR_H_
#define TESSERACT_CCMAIN_OTSUTHR_H__ #define TESSERACT_CCMAIN_OTSUTHR_H_
struct Pix; struct Pix;
@ -53,4 +53,4 @@ int OtsuStats(const int* histogram, int* H_out, int* omega0_out);
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCMAIN_OTSUTHR_H__ #endif // TESSERACT_CCMAIN_OTSUTHR_H_

View File

@ -303,8 +303,9 @@ bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in,
static_cast<tesseract::OcrEngineMode>(norm_mode); static_cast<tesseract::OcrEngineMode>(norm_mode);
tesseract = tess; tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL; POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY && if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) { word->cblob_list()->empty()) ||
(pb != NULL && !pb->IsText())) {
// Empty words occur when all the blobs have been moved to the rej_blobs // Empty words occur when all the blobs have been moved to the rej_blobs
// list, which seems to occur frequently in junk. // list, which seems to occur frequently in junk.
SetupFake(unicharset_in); SetupFake(unicharset_in);
@ -528,13 +529,12 @@ void WERD_RES::FilterWordChoices(int debug_level) {
if (choice->unichar_id(i) != best_choice->unichar_id(j) && if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
choice->certainty(i) - best_choice->certainty(j) < threshold) { choice->certainty(i) - best_choice->certainty(j) < threshold) {
if (debug_level >= 2) { if (debug_level >= 2) {
STRING label; choice->print("WorstCertaintyDiffWorseThan");
label.add_str_int("\nDiscarding bad choice #", index); tprintf(
choice->print(label.string()); "i %d j %d Choice->Blob[i].Certainty %.4g"
tprintf("i %d j %d Chunk %d Choice->Blob[i].Certainty %.4g" " WorstOtherChoiceCertainty %g Threshold %g\n",
" BestChoice->ChunkCertainty[Chunk] %g Threshold %g\n", i, j, choice->certainty(i), best_choice->certainty(j), threshold);
i, j, chunk, choice->certainty(i), tprintf("Discarding bad choice #%d\n", index);
best_choice->certainty(j), threshold);
} }
delete it.extract(); delete it.extract();
break; break;
@ -882,17 +882,18 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
choice_it.add_after_then_move(choices[c]); choice_it.add_after_then_move(choices[c]);
ratings->put(c, c, choice_list); ratings->put(c, c, choice_list);
} }
FakeWordFromRatings(); FakeWordFromRatings(TOP_CHOICE_PERM);
reject_map.initialise(blob_count); reject_map.initialise(blob_count);
best_state.init_to_size(blob_count, 1);
done = true; done = true;
} }
// Creates a WERD_CHOICE for the word using the top choices from the leading // Creates a WERD_CHOICE for the word using the top choices from the leading
// diagonal of the ratings matrix. // diagonal of the ratings matrix.
void WERD_RES::FakeWordFromRatings() { void WERD_RES::FakeWordFromRatings(PermuterType permuter) {
int num_blobs = ratings->dimension(); int num_blobs = ratings->dimension();
WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs); WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
word_choice->set_permuter(TOP_CHOICE_PERM); word_choice->set_permuter(permuter);
for (int b = 0; b < num_blobs; ++b) { for (int b = 0; b < num_blobs; ++b) {
UNICHAR_ID unichar_id = UNICHAR_SPACE; UNICHAR_ID unichar_id = UNICHAR_SPACE;
float rating = MAX_INT32; float rating = MAX_INT32;
@ -1105,6 +1106,7 @@ void WERD_RES::InitNonPointers() {
x_height = 0.0; x_height = 0.0;
caps_height = 0.0; caps_height = 0.0;
baseline_shift = 0.0f; baseline_shift = 0.0f;
space_certainty = 0.0f;
guessed_x_ht = TRUE; guessed_x_ht = TRUE;
guessed_caps_ht = TRUE; guessed_caps_ht = TRUE;
combination = FALSE; combination = FALSE;

View File

@ -1,7 +1,7 @@
/********************************************************************** /**********************************************************************
* File: pageres.h (Formerly page_res.h) * File: pageres.h (Formerly page_res.h)
* Description: Results classes used by control.c * Description: Results classes used by control.c
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Tue Sep 22 08:42:49 BST 1992 * Created: Tue Sep 22 08:42:49 BST 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
@ -295,6 +295,9 @@ class WERD_RES : public ELIST_LINK {
float x_height; // post match estimate float x_height; // post match estimate
float caps_height; // post match estimate float caps_height; // post match estimate
float baseline_shift; // post match estimate. float baseline_shift; // post match estimate.
// Certainty score for the spaces either side of this word (LSTM mode).
// MIN this value with the actual word certainty.
float space_certainty;
/* /*
To deal with fuzzy spaces we need to be able to combine "words" to form To deal with fuzzy spaces we need to be able to combine "words" to form
@ -327,7 +330,7 @@ class WERD_RES : public ELIST_LINK {
} }
// Deep copies everything except the ratings MATRIX. // Deep copies everything except the ratings MATRIX.
// To get that use deep_copy below. // To get that use deep_copy below.
WERD_RES(const WERD_RES &source) : ELIST_LINK(source) { WERD_RES(const WERD_RES& source) : ELIST_LINK(source) {
InitPointers(); InitPointers();
*this = source; // see operator= *this = source; // see operator=
} }
@ -590,7 +593,7 @@ class WERD_RES : public ELIST_LINK {
// Creates a WERD_CHOICE for the word using the top choices from the leading // Creates a WERD_CHOICE for the word using the top choices from the leading
// diagonal of the ratings matrix. // diagonal of the ratings matrix.
void FakeWordFromRatings(); void FakeWordFromRatings(PermuterType permuter);
// Copies the best_choice strings to the correct_text for adaption/training. // Copies the best_choice strings to the correct_text for adaption/training.
void BestChoiceToCorrectText(); void BestChoiceToCorrectText();
@ -630,7 +633,7 @@ class WERD_RES : public ELIST_LINK {
static WERD_RES* deep_copy(const WERD_RES* src) { static WERD_RES* deep_copy(const WERD_RES* src) {
WERD_RES* result = new WERD_RES(*src); WERD_RES* result = new WERD_RES(*src);
// That didn't copy the ratings, but we want a copy if there is one to // That didn't copy the ratings, but we want a copy if there is one to
// begin width. // begin with.
if (src->ratings != NULL) if (src->ratings != NULL)
result->ratings = src->ratings->DeepCopy(); result->ratings = src->ratings->DeepCopy();
return result; return result;

View File

@ -126,7 +126,7 @@ typedef GenericVector<ParamsTrainingHypothesis> ParamsTrainingHypothesisList;
// explored on PASS1, PASS2, fix xheight pass, etc). // explored on PASS1, PASS2, fix xheight pass, etc).
class ParamsTrainingBundle { class ParamsTrainingBundle {
public: public:
ParamsTrainingBundle() {}; ParamsTrainingBundle() {}
// Starts a new hypothesis list. // Starts a new hypothesis list.
// Should be called at the beginning of a new run of the segmentation search. // Should be called at the beginning of a new run of the segmentation search.
void StartHypothesisList() { void StartHypothesisList() {

View File

@ -29,90 +29,74 @@ struct Pix;
CLISTIZEH (PDBLK) CLISTIZEH (PDBLK)
///page block ///page block
class PDBLK class PDBLK {
{
friend class BLOCK_RECT_IT; //< block iterator friend class BLOCK_RECT_IT; //< block iterator
public: public:
///empty constructor /// empty constructor
PDBLK() { PDBLK() {
hand_poly = NULL; hand_poly = NULL;
index_ = 0; index_ = 0;
} }
///simple constructor /// simple constructor
PDBLK(inT16 xmin, //< bottom left PDBLK(inT16 xmin, //< bottom left
inT16 ymin, inT16 ymin,
inT16 xmax, //< top right inT16 xmax, //< top right
inT16 ymax); inT16 ymax);
///set vertex lists /// set vertex lists
///@param left list of left vertices ///@param left list of left vertices
///@param right list of right vertices ///@param right list of right vertices
void set_sides(ICOORDELT_LIST *left, void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right);
ICOORDELT_LIST *right);
///destructor /// destructor
~PDBLK () { ~PDBLK() { delete hand_poly; }
if (hand_poly) delete hand_poly;
}
POLY_BLOCK *poly_block() const { POLY_BLOCK *poly_block() const { return hand_poly; }
return hand_poly; /// set the poly block
} void set_poly_block(POLY_BLOCK *blk) { hand_poly = blk; }
///set the poly block /// get box
void set_poly_block(POLY_BLOCK *blk) { void bounding_box(ICOORD &bottom_left, // bottom left
hand_poly = blk; ICOORD &top_right) const { // topright
} bottom_left = box.botleft();
///get box top_right = box.topright();
void bounding_box(ICOORD &bottom_left, //bottom left }
ICOORD &top_right) const { //topright /// get real box
bottom_left = box.botleft (); const TBOX &bounding_box() const { return box; }
top_right = box.topright ();
}
///get real box
const TBOX &bounding_box() const {
return box;
}
int index() const { int index() const { return index_; }
return index_; void set_index(int value) { index_ = value; }
}
void set_index(int value) {
index_ = value;
}
///is pt inside block /// is pt inside block
BOOL8 contains(ICOORD pt); BOOL8 contains(ICOORD pt);
/// reposition block /// reposition block
void move(const ICOORD vec); // by vector void move(const ICOORD vec); // by vector
// Returns a binary Pix mask with a 1 pixel for every pixel within the // Returns a binary Pix mask with a 1 pixel for every pixel within the
// block. Rotates the coordinate system by rerotation prior to rendering. // block. Rotates the coordinate system by rerotation prior to rendering.
// If not NULL, mask_box is filled with the position box of the returned // If not NULL, mask_box is filled with the position box of the returned
// mask image. // mask image.
Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box); Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box);
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
///draw histogram /// draw histogram
///@param window window to draw in ///@param window window to draw in
///@param serial serial number ///@param serial serial number
///@param colour colour to draw in ///@param colour colour to draw in
void plot(ScrollView* window, void plot(ScrollView *window, inT32 serial, ScrollView::Color colour);
inT32 serial, #endif // GRAPHICS_DISABLED
ScrollView::Color colour);
#endif // GRAPHICS_DISABLED
///assignment /// assignment
///@param source from this ///@param source from this
PDBLK & operator= (const PDBLK & source); PDBLK &operator=(const PDBLK &source);
protected: protected:
POLY_BLOCK *hand_poly; //< weird as well POLY_BLOCK *hand_poly; //< weird as well
ICOORDELT_LIST leftside; //< left side vertices ICOORDELT_LIST leftside; //< left side vertices
ICOORDELT_LIST rightside; //< right side vertices ICOORDELT_LIST rightside; //< right side vertices
TBOX box; //< bounding box TBOX box; //< bounding box
int index_; //< Serial number of this block. int index_; //< Serial number of this block.
}; };
class DLLSYM BLOCK_RECT_IT //rectangle iterator class DLLSYM BLOCK_RECT_IT //rectangle iterator

View File

@ -214,7 +214,7 @@ EDGEPT edgepts[] //output is array
void fix2( //polygonal approx void fix2( //polygonal approx
EDGEPT *start, /*loop to approimate */ EDGEPT *start, /*loop to approimate */
int area) { int area) {
EDGEPT *edgept; /*current point */ EDGEPT *edgept; /*current point */
EDGEPT *edgept1; EDGEPT *edgept1;
EDGEPT *loopstart; /*modified start of loop */ EDGEPT *loopstart; /*modified start of loop */
EDGEPT *linestart; /*start of line segment */ EDGEPT *linestart; /*start of line segment */

View File

@ -1,7 +1,7 @@
/********************************************************************** /**********************************************************************
* File: polyblk.c (Formerly poly_block.c) * File: polyblk.c (Formerly poly_block.c)
* Description: Polygonal blocks * Description: Polygonal blocks
* Author: Sheelagh Lloyd? * Author: Sheelagh Lloyd?
* Created: * Created:
* *
* (C) Copyright 1993, Hewlett-Packard Ltd. * (C) Copyright 1993, Hewlett-Packard Ltd.
@ -294,6 +294,8 @@ void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) {
} }
} }
} }
delete lines;
} }
#endif #endif

View File

@ -17,8 +17,8 @@
// //
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H__ #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H__ #define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
// This file contains types that are used both by the API and internally // This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic // to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
@ -213,7 +213,7 @@ enum PageIteratorLevel {
}; };
/** /**
* JUSTIFICATION_UNKNONW * JUSTIFICATION_UNKNOWN
* The alignment is not clearly one of the other options. This could happen * The alignment is not clearly one of the other options. This could happen
* for example if there are only one or two lines of text or the text looks * for example if there are only one or two lines of text or the text looks
* like source code or poetry. * like source code or poetry.
@ -235,7 +235,7 @@ enum PageIteratorLevel {
* *
* JUSTIFICATION_RIGHT * JUSTIFICATION_RIGHT
* Each line, except possibly the first, is flush to the same right tab stop. * Each line, except possibly the first, is flush to the same right tab stop.
*/ */
enum ParagraphJustification { enum ParagraphJustification {
JUSTIFICATION_UNKNOWN, JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT, JUSTIFICATION_LEFT,
@ -255,17 +255,20 @@ enum ParagraphJustification {
*/ */
enum OcrEngineMode { enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest OEM_TESSERACT_ONLY, // Run Tesseract only - fastest
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
OEM_DEFAULT // Specify this mode when calling init_*(), // to Tesseract when things get difficult.
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes // to indicate that any of the above modes
// should be automatically inferred from the // should be automatically inferred from the
// variables in the language-specific config, // variables in the language-specific config,
// command-line configs, or if not specified // command-line configs, or if not specified
// in any of the above should be set to the // in any of the above should be set to the
// default OEM_TESSERACT_ONLY. // default OEM_TESSERACT_ONLY.
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy
}; };
} // namespace tesseract. } // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H__ #endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,8 +1,8 @@
/********************************************************************** /**********************************************************************
* File: quspline.cpp (Formerly qspline.c) * File: quspline.cpp (Formerly qspline.c)
* Description: Code for the QSPLINE class. * Description: Code for the QSPLINE class.
* Author: Ray Smith * Author: Ray Smith
* Created: Tue Oct 08 17:16:12 BST 1991 * Created: Tue Oct 08 17:16:12 BST 1991
* *
* (C) Copyright 1991, Hewlett-Packard Ltd. * (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -288,7 +288,8 @@ class WERD_CHOICE : public ELIST_LINK {
src_certainty, src_permuter); src_certainty, src_permuter);
} }
WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset); WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) { WERD_CHOICE(const WERD_CHOICE &word)
: ELIST_LINK(word), unicharset_(word.unicharset_) {
this->init(word.length()); this->init(word.length());
this->operator=(word); this->operator=(word);
} }
@ -507,6 +508,20 @@ class WERD_CHOICE : public ELIST_LINK {
} }
return word_str; return word_str;
} }
// Returns true if any unichar_id in the word is a non-space-delimited char.
bool ContainsAnyNonSpaceDelimited() const {
for (int i = 0; i < length_; ++i) {
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) return true;
}
return false;
}
// Returns true if the word is all spaces.
bool IsAllSpaces() const {
for (int i = 0; i < length_; ++i) {
if (unichar_ids_[i] != UNICHAR_SPACE) return false;
}
return true;
}
// Call this to override the default (strict left to right graphemes) // Call this to override the default (strict left to right graphemes)
// with the fact that some engine produces a "reading order" set of // with the fact that some engine produces a "reading order" set of

View File

@ -1,8 +1,8 @@
/********************************************************************** /**********************************************************************
* File: rect.c (Formerly box.c) * File: rect.c (Formerly box.c)
* Description: Bounding box class definition. * Description: Bounding box class definition.
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Wed Oct 16 15:18:45 BST 1991 * Created: Wed Oct 16 15:18:45 BST 1991
* *
* (C) Copyright 1991, Hewlett-Packard Ltd. * (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -29,10 +29,10 @@
* *
**********************************************************************/ **********************************************************************/
TBOX::TBOX( //constructor TBOX::TBOX( // constructor
const ICOORD pt1, //one corner const ICOORD pt1, // one corner
const ICOORD pt2 //the other corner const ICOORD pt2 // the other corner
) { ) {
if (pt1.x () <= pt2.x ()) { if (pt1.x () <= pt2.x ()) {
if (pt1.y () <= pt2.y ()) { if (pt1.y () <= pt2.y ()) {
bot_left = pt1; bot_left = pt1;

View File

@ -1,8 +1,8 @@
/********************************************************************** /**********************************************************************
* File: rect.h (Formerly box.h) * File: rect.h (Formerly box.h)
* Description: Bounding box class definition. * Description: Bounding box class definition.
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Wed Oct 16 15:18:45 BST 1991 * Created: Wed Oct 16 15:18:45 BST 1991
* *
* (C) Copyright 1991, Hewlett-Packard Ltd. * (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -307,9 +307,9 @@ class DLLSYM TBOX { // bounding box
* *
**********************************************************************/ **********************************************************************/
inline TBOX::TBOX( // constructor inline TBOX::TBOX( // constructor
const FCOORD pt // floating centre const FCOORD pt // floating centre
) { ) {
bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ())); bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ()));
top_right = ICOORD ((inT16) ceil (pt.x ()), (inT16) ceil (pt.y ())); top_right = ICOORD ((inT16) ceil (pt.x ()), (inT16) ceil (pt.y ()));
} }

View File

@ -267,10 +267,10 @@ void REJ::full_print(FILE *fp) {
//The REJMAP class has been hacked to use alloc_struct instead of new []. //The REJMAP class has been hacked to use alloc_struct instead of new [].
//This is to reduce memory fragmentation only as it is rather kludgy. //This is to reduce memory fragmentation only as it is rather kludgy.
//alloc_struct by-passes the call to the contsructor of REJ on each // alloc_struct by-passes the call to the constructor of REJ on each
//array element. Although the constructor is empty, the BITS16 members // array element. Although the constructor is empty, the BITS16 members
//do have a constructor which sets all the flags to 0. The memset // do have a constructor which sets all the flags to 0. The memset
//replaces this functionality. // replaces this functionality.
REJMAP::REJMAP( //classwise copy REJMAP::REJMAP( //classwise copy
const REJMAP &source) { const REJMAP &source) {

Some files were not shown because too many files have changed in this diff Show More