This commit is contained in:
Jim O'Regan 2015-05-18 15:08:33 +01:00
commit 6418da35ef
226 changed files with 13827 additions and 9412 deletions

65
.gitignore vendored Normal file
View File

@ -0,0 +1,65 @@
# Windows
*.user
*.log
*.tlog
*.cache
*.obj
*.sdf
*.opensdf
*.lastbuildstate
*.unsuccessfulbuild
*.suo
*.res
*.ipch
*.manifest
vs2010/DLL_Debug/*
vs2010/DLL_Release/*
vs2010/LIB_Debug/*
vs2010/LIB_Release/*
vs2010/LIB_OpenCL_Release/*
vs2010/LIB_OpenCL_Debug/*
# Linux
# ignore local configuration
config.*
config/*
Makefile
Makefile.in
*.m4
# ignore help scripts/files
configure
libtool
stamp-h1
tesseract.pc
config_auto.h
doc/html/*
api/tesseract
training/ambiguous_words
training/classifier_tester
training/cntraining
training/combine_tessdata
training/dawg2wordlist
training/mftraining
training/set_unicharset_properties
training/shapeclustering
training/text2image
training/unicharset_extractor
training/wordlist2dawg
*.patch
# ignore compilation files
*/.deps/*
*/.libs/*
*.lo
*.la
*.o
*.Plo
*.a
# tessdata
*.cube.*
*.tesseract_cube.*
*.traineddata

View File

@ -1,33 +1,30 @@
autotools (LINUX/UNIX...)
======================
If you have checked out Tesseract from Subversion, you must generate the
configure script.
If you have checked out Tesseract from Subversion, you must generate
the configure script.
If you have tesseract 3.0x installation in your system, please remove it
before new build.
Known depencies for training tools (excluding leptonica):
* compiler with c++ support
* pango-devel
* cairo-devel
* icu-devel
So, the steps for making Tesseract are:
$ ./autogen.sh
$ ./configure
$ make
$ sudo make install
$ sudo make install-langs
$ make training
$ sudo make training-install
'sudo make install-langs' or 'sudo make install LANGS=' will install all
available language data files in tessdata directory.
If you want to install just few of them than run:
$ sudo make install-langs LANGS="eng ara deu"
It will install only English, Arabic and German language datafiles (if
they are present in tessdata directory)
$ sudo make install LANGS="eng ara deu"
will install tesseract library, tesseract programs and English, Arabic
and German language datafiles.
You need to install at least English language data file to TESSDATA_PREFIX
directory. All language data files can be retrieved from git repository:
$ git clone https://code.google.com/p/tesseract-ocr.tessdata/ tessdata
To compile ScrollView.jar you need to download piccolo2d-core-3.0.jar
and piccolo2d-extras-3.0.jar[1] and place them to tesseract/java.

View File

@ -18,7 +18,7 @@ endif
SUBDIRS = ccutil viewer cutil opencl ccstruct dict classify wordrec neural_networks/runtime textord cube ccmain api . tessdata doc
EXTRA_DIST = eurotext.tif phototest.tif ReleaseNotes \
EXTRA_DIST = ReleaseNotes \
aclocal.m4 config configure.ac autogen.sh contrib \
tesseract.pc.in $(TRAINING_SUBDIR) java doc testing
@ -34,31 +34,12 @@ dist-hook:
rm -rf `find $(distdir) -name .svn`
rm -rf `find $(distdir) -name .git`
rm -rf `find $(distdir) -name .deps`
# 'make install' will install only libraries and programs (no language
# data files)
# 'make install LANGS=' will install libraries, programs and all
# language datafiles in tessdata/
# 'make install LANGS="eng ara deu"' will install libraries, programs
# English, Arabic and German language datafiles if they are present
# in tessdata/
# 'make install-langs LANGS="eng ara deu"' will install only English,
# Arabic and German language datafiles if they are present
# in tessdata/
install-data-hook:
@if test $${LANGS+defined}; then \
if test "$${LANGS}" == ""; then \
echo ____All language files will be installed; \
else \
echo ___Folowing language files will be installed: "$$LANGS"; \
fi; \
cd "$(top_builddir)/tessdata" && $(MAKE) install-langs LANG="${LANGS}"; \
else \
echo No language file is installed.; \
fi;
install-langs:
@cd "$(top_builddir)/tessdata" && $(MAKE) $@
rm -rf `find $(distdir) -name .libs`
rm -rf `find $(distdir) -name *.o`
rm -rf `find $(distdir) -name *.lo`
rm -rf `find $(distdir) -name *.la`
rm -rf `find $(distdir)/training -executable -type f`
rm -rf $(distdir)/doc/html/*
ScrollView.jar:
@cd "$(top_builddir)/java" && $(MAKE) $@

View File

@ -1,6 +1,7 @@
Note that this is a text-only and possibly out-of-date version of the
wiki ReadMe, which is located at:
http://code.google.com/p/tesseract-ocr/wiki/ReadMe
https://github.com/tesseract-ocr/tesseract/blob/master/README
Introduction
============
@ -10,15 +11,15 @@ Originally developed at Hewlett Packard Laboratories Bristol and
at Hewlett Packard Co, Greeley Colorado, all the code
in this distribution is now licensed under the Apache License:
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
Dependencies and Licenses
@ -56,7 +57,7 @@ those that want to do their own training. Most users should NOT download
these files.
Instructions for using the training tools are documented separately at
Tesseract wiki http://code.google.com/p/tesseract-ocr/w/list
Tesseract wiki https://github.com/tesseract-ocr/tesseract/wiki
Windows
@ -64,6 +65,9 @@ Windows
Please use installer (for 3.00 and above). Tesseract is library with
command line interface. If you need GUI, please check AddOns wiki page
TODO-UPDATE-WIKI-LINKS
http://code.google.com/p/tesseract-ocr/wiki/AddOns#GUI
If you are building from the sources, the recommended build platform is
@ -82,6 +86,9 @@ tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...]
If you need interface to other applications, please check wrapper section
on AddOns wiki page:
TODO-UPDATE-WIKI-LINKS
http://code.google.com/p/tesseract-ocr/wiki/AddOns#Tesseract_3.0x
@ -112,6 +119,10 @@ If you are linking to the libraries, as Ocropus does, please link to
libtesseract_api.
If you get `leptonica not found` and you've installed it with e.g. homebrew, you
can run `CPPFLAGS="-I/usr/local/include" LDFLAGS="-L/usr/local/lib" ./configure`
instead of `./configure` above.
History
=======

View File

@ -0,0 +1,4 @@
<!--
This file is needed by the android_native_library rule to determine the
project directory for ndk-build.
-->

1
android/Makefile.am Normal file
View File

@ -0,0 +1 @@
EXTRA_DIST = AndroidManifest.xml jni/Android.mk jni/Application.mk

57
android/jni/Android.mk Normal file
View File

@ -0,0 +1,57 @@
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE := tesseract-$(APP_ABI)
LOCAL_STATIC_LIBRARIES := \
mobile_base \
leptonica-$(APP_ABI)
LOCAL_C_INCLUDES := $(APP_C_INCLUDES)
LOCAL_C_INCLUDES += \
$(LOCAL_PATH)/../../api \
$(LOCAL_PATH)/../../ccmain\
$(LOCAL_PATH)/../../ccstruct\
$(LOCAL_PATH)/../../ccutil\
$(LOCAL_PATH)/../../classify\
$(LOCAL_PATH)/../../cutil\
$(LOCAL_PATH)/../../dict\
$(LOCAL_PATH)/../../image\
$(LOCAL_PATH)/../../textord\
$(LOCAL_PATH)/../../third_party\
$(LOCAL_PATH)/../../wordrec\
$(LOCAL_PATH)/../../opencl\
$(LOCAL_PATH)/../../viewer\
$(LOCAL_PATH)/../../../leptonica/include
$(info local c includes=$(LOCAL_C_INCLUDES))
$(info local path=$(LOCAL_PATH))
LOCAL_SRC_FILES := $(wildcard $(LOCAL_PATH)/../../api/*.cpp $(LOCAL_PATH)/../../ccmain/*.cpp $(LOCAL_PATH)/../../ccstruct/*.cpp $(LOCAL_PATH)/../../ccutil/*.cpp $(LOCAL_PATH)/../../classify/*.cpp $(LOCAL_PATH)/../../cutil/*.cpp $(LOCAL_PATH)/../../dict/*.cpp $(LOCAL_PATH)/../../image/*.cpp $(LOCAL_PATH)/../../textord/*.cpp $(LOCAL_PATH)/../../viewer/*.cpp $(LOCAL_PATH)/../../wordrec/*.cpp)
EXPLICIT_SRC_EXCLUDES := \
$(LOCAL_PATH)/../../ccmain/cubeclassifier.cpp \
$(LOCAL_PATH)/../../ccmain/cubeclassifier.h \
$(LOCAL_PATH)/../../ccmain/cube_control.cpp \
$(LOCAL_PATH)/../../ccmain/cube_reco_context.cpp \
$(LOCAL_PATH)/../../ccmain/cube_reco_context.h \
$(LOCAL_PATH)/../../ccmain/tesseract_cube_combiner.cpp \
$(LOCAL_PATH)/../../ccmain/tesseract_cube_combiner.h \
$(LOCAL_PATH)/../../api/pdfrenderer.cpp \
$(LOCAL_PATH)/../../api/tesseractmain.cpp \
LOCAL_SRC_FILES := $(filter-out $(EXPLICIT_SRC_EXCLUDES), $(LOCAL_SRC_FILES))
LOCAL_SRC_FILES := $(LOCAL_SRC_FILES:$(LOCAL_PATH)/%=%)
$(info local src files = $(LOCAL_SRC_FILES))
LOCAL_LDLIBS := -ldl -llog -ljnigraphics
LOCAL_CFLAGS := -DANDROID_BUILD -DGRAPHICS_DISABLED
include $(BUILD_SHARED_LIBRARY)
$(call import-module,mobile/base)
$(call import-module,mobile/base)
$(call import-module,mobile/util/hash)
$(call import-module,third_party/leptonica/android/jni)

View File

@ -0,0 +1,13 @@
# Include common.mk for building google3 native code.
DEPOT_PATH := $(firstword $(subst /google3, ,$(abspath $(call my-dir))))
ifneq ($(wildcard $(DEPOT_PATH)/google3/mobile/build/common.mk),)
include $(DEPOT_PATH)/google3/mobile/build/common.mk
else
include $(DEPOT_PATH)/READONLY/google3/mobile/build/common.mk
endif
# Specify the hash namespace that we're using, based on the APP_STL we're using.
APP_CFLAGS += -Werror -DHASH_NAMESPACE=__gnu_cxx -Wno-error=deprecated-register
APP_PLATFORM := android-16
APP_STL := gnustl_static
NDK_TOOLCHAIN_VERSION := clang

View File

@ -66,7 +66,7 @@ libtesseract_la_LIBADD = \
libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION)
bin_PROGRAMS = tesseract
tesseract_SOURCES = $(top_srcdir)/api/tesseractmain.cpp
tesseract_SOURCES = tesseractmain.cpp
tesseract_CPPFLAGS = $(AM_CPPFLAGS)
if VISIBILITY
tesseract_CPPFLAGS += -DTESS_IMPORTS
@ -78,7 +78,7 @@ if USE_OPENCL
tesseract_LDADD += $(OPENCL_LIB)
endif
if MINGW
if T_WIN
tesseract_LDADD += -lws2_32
libtesseract_la_LDFLAGS += -no-undefined -Wl,--as-needed -lws2_32
endif

View File

@ -28,6 +28,7 @@
#if defined(_WIN32)
#ifdef _MSC_VER
#include "vcsversion.h"
#include "mathfix.h"
#elif MINGW
// workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
@ -51,6 +52,7 @@
#include "allheaders.h"
#include "baseapi.h"
#include "blobclass.h"
#include "resultiterator.h"
#include "mutableiterator.h"
#include "thresholder.h"
@ -138,7 +140,11 @@ TessBaseAPI::~TessBaseAPI() {
* Returns the version identifier as a static string. Do not delete.
*/
const char* TessBaseAPI::Version() {
#if defined(GIT_REV) && (defined(DEBUG) || defined(_DEBUG))
return GIT_REV;
#else
return TESSERACT_VERSION_STR;
#endif
}
/**
@ -741,6 +747,7 @@ void TessBaseAPI::DumpPGM(const char* filename) {
fclose(fp);
}
#ifndef ANDROID_BUILD
/**
* Placeholder for call to Cube and test that the input data is correct.
* reskew is the direction of baselines in the skewed image in
@ -785,11 +792,16 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
ASSERT_HOST(pr_word == word_count);
return 0;
}
#endif
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns NULL on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
@ -797,11 +809,11 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator* TessBaseAPI::AnalyseLayout() {
PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
if (FindLines() == 0) {
if (block_list_->empty())
return NULL; // The page was empty.
page_res_ = new PAGE_RES(block_list_, NULL);
page_res_ = new PAGE_RES(merge_similar_words, block_list_, NULL);
DetectParagraphs(false);
return new PageIterator(
page_res_, tesseract_, thresholder_->GetScaleFactor(),
@ -823,18 +835,22 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
if (page_res_ != NULL)
delete page_res_;
if (block_list_->empty()) {
page_res_ = new PAGE_RES(block_list_, &tesseract_->prev_word_best_choice_);
page_res_ = new PAGE_RES(false, block_list_,
&tesseract_->prev_word_best_choice_);
return 0; // Empty page.
}
tesseract_->SetBlackAndWhitelist();
recognition_done_ = true;
if (tesseract_->tessedit_resegment_from_line_boxes)
if (tesseract_->tessedit_resegment_from_line_boxes) {
page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
else if (tesseract_->tessedit_resegment_from_boxes)
} else if (tesseract_->tessedit_resegment_from_boxes) {
page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
else
page_res_ = new PAGE_RES(block_list_, &tesseract_->prev_word_best_choice_);
} else {
// TODO(rays) LSTM here.
page_res_ = new PAGE_RES(false,
block_list_, &tesseract_->prev_word_best_choice_);
}
if (tesseract_->tessedit_make_boxes_from_boxes) {
tesseract_->CorrectClassifyWords(page_res_);
return 0;
@ -862,7 +878,9 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
page_res_ = NULL;
return -1;
} else if (tesseract_->tessedit_train_from_boxes) {
tesseract_->ApplyBoxTraining(*output_file_, page_res_);
STRING fontname;
ExtractFontName(*output_file_, &fontname);
tesseract_->ApplyBoxTraining(fontname, page_res_);
} else if (tesseract_->tessedit_ambigs_training) {
FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
// OCR the page segmented into words by tesseract.
@ -900,7 +918,8 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
recognition_done_ = true;
page_res_ = new PAGE_RES(block_list_, &(tesseract_->prev_word_best_choice_));
page_res_ = new PAGE_RES(false, block_list_,
&(tesseract_->prev_word_best_choice_));
PAGE_RES_IT page_res_it(page_res_);
@ -1010,6 +1029,7 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
int timeout_millisec,
TessResultRenderer* renderer,
int tessedit_page_number) {
#ifndef ANDROID_BUILD
Pix *pix = NULL;
#ifdef USE_OPENCL
OpenclDevice od;
@ -1040,6 +1060,26 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
if (tessedit_page_number >= 0) break;
}
return true;
#else
return false;
#endif
}
// Master ProcessPages calls ProcessPagesInternal and then does any post-
// processing required due to being in a training mode.
bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer) {
bool result =
ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
if (result) {
if (tesseract_->tessedit_train_from_boxes &&
!tesseract_->WriteTRFile(*output_file_)) {
tprintf("Write of TR file failed: %s\n", output_file_->string());
return false;
}
}
return result;
}
// In the ideal scenario, Tesseract will start working on data as soon
@ -1054,9 +1094,11 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
// identify the scenario that really matters: filelists on
// stdin. We'll still do our best if the user likes pipes. That means
// piling up any data coming into stdin into a memory buffer.
bool TessBaseAPI::ProcessPages(const char* filename,
const char* retry_config, int timeout_millisec,
TessResultRenderer* renderer) {
bool TessBaseAPI::ProcessPagesInternal(const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer) {
#ifndef ANDROID_BUILD
PERF_COUNT_START("ProcessPages")
bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
if (stdInput) {
@ -1144,6 +1186,9 @@ bool TessBaseAPI::ProcessPages(const char* filename,
}
PERF_COUNT_END
return true;
#else
return false;
#endif
}
bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
@ -1177,8 +1222,10 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
failed = Recognize(NULL) < 0;
}
if (tesseract_->tessedit_write_images) {
#ifndef ANDROID_BUILD
Pix* page_pix = GetThresholdedImage();
pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
#endif
}
if (failed && retry_config != NULL && retry_config[0] != '\0') {
// Save current config variables before switching modes.
@ -1384,7 +1431,7 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
hocr_str.add_str_int(" <div class='ocr_page' id='page_", page_id);
hocr_str += "' title='image \"";
if (input_file_) {
HOcrEscape(input_file_->string(), hocr_str);
hocr_str += HOcrEscape(input_file_->string());
} else {
hocr_str += "unknown";
}
@ -1444,7 +1491,7 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
if (font_info) {
hocr_str += "; x_font ";
HOcrEscape(font_name, hocr_str);
hocr_str += HOcrEscape(font_name);
hocr_str.add_str_int("; x_fsize ", pointsize);
}
hocr_str += "'";
@ -1468,11 +1515,7 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
do {
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
if (grapheme && grapheme[0] != 0) {
if (grapheme[1] == 0) {
HOcrEscape(grapheme, hocr_str);
} else {
hocr_str += grapheme;
}
hocr_str += HOcrEscape(grapheme);
}
delete []grapheme;
res_it->Next(RIL_SYMBOL);
@ -1883,6 +1926,10 @@ void TessBaseAPI::ClearPersistentCache() {
int TessBaseAPI::IsValidWord(const char *word) {
return tesseract_->getDict().valid_word(word);
}
// Returns true if utf8_character is defined in the UniCharset.
bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
return tesseract_->unicharset.contains_unichar(utf8_character);
}
// TODO(rays) Obsolete this function and replace with a more aptly named
@ -1931,6 +1978,10 @@ void TessBaseAPI::SetDictFunc(DictFunc f) {
/**
* Sets Dict::probability_in_context_ function to point to the given
* function.
*
* @param f A single function that returns the probability of the current
* "character" (in general a utf-8 string), given the context of a previous
* utf-8 string.
*/
void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
if (tesseract_ != NULL) {
@ -1977,7 +2028,10 @@ void TessBaseAPI::Threshold(Pix** pix) {
// than over-estimate resolution.
thresholder_->SetSourceYResolution(kMinCredibleResolution);
}
thresholder_->ThresholdToPix(pix);
PageSegMode pageseg_mode =
static_cast<PageSegMode>(
static_cast<int>(tesseract_->tessedit_pageseg_mode));
thresholder_->ThresholdToPix(pageseg_mode, pix);
thresholder_->GetImageSizes(&rect_left_, &rect_top_,
&rect_width_, &rect_height_,
&image_width_, &image_height_);
@ -2332,7 +2386,7 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
PAGE_RES *page_res = new PAGE_RES(block_list,
PAGE_RES *page_res = new PAGE_RES(false, block_list,
&(tesseract_->prev_word_best_choice_));
tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1);
return page_res;
@ -2341,7 +2395,7 @@ PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
PAGE_RES* pass1_result) {
if (!pass1_result)
pass1_result = new PAGE_RES(block_list,
pass1_result = new PAGE_RES(false, block_list,
&(tesseract_->prev_word_best_choice_));
tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2);
return pass1_result;
@ -2576,14 +2630,16 @@ int TessBaseAPI::NumDawgs() const {
return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
}
#ifndef ANDROID_BUILD
/** Return a pointer to underlying CubeRecoContext object if present. */
CubeRecoContext *TessBaseAPI::GetCubeRecoContext() const {
return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
}
#endif
/** Escape a char string - remove <>&"' with HTML codes. */
void HOcrEscape(const char* text, STRING& ret) {
STRING HOcrEscape(const char* text) {
STRING ret;
const char *ptr;
for (ptr = text; *ptr; ptr++) {
switch (*ptr) {
@ -2595,5 +2651,7 @@ void HOcrEscape(const char* text, STRING& ret) {
default: ret += *ptr;
}
}
return ret;
}
} // namespace tesseract.

View File

@ -20,8 +20,8 @@
#ifndef TESSERACT_API_BASEAPI_H__
#define TESSERACT_API_BASEAPI_H__
#define TESSERACT_VERSION_STR "3.03.00"
#define TESSERACT_VERSION 0x030300
#define TESSERACT_VERSION_STR "3.04.00"
#define TESSERACT_VERSION 0x030400
#define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
(patch))
@ -484,14 +484,21 @@ class TESS_API TessBaseAPI {
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* Returns NULL on error.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns NULL on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator* AnalyseLayout();
PageIterator* AnalyseLayout() {
return AnalyseLayout(false);
}
PageIterator* AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
@ -531,9 +538,11 @@ class TESS_API TessBaseAPI {
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char* filename,
const char* retry_config, int timeout_millisec,
TessResultRenderer* renderer);
bool ProcessPages(const char* filename, const char* retry_config,
int timeout_millisec, TessResultRenderer* renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char* filename, const char* retry_config,
int timeout_millisec, TessResultRenderer* renderer);
/**
* Turn a single image into symbolic text.
@ -649,6 +658,9 @@ class TESS_API TessBaseAPI {
* in a separate API at some future time.
*/
int IsValidWord(const char *word);
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character);
bool GetTextDirection(int* out_offset, float* out_slope);
@ -870,7 +882,7 @@ class TESS_API TessBaseAPI {
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
void HOcrEscape(const char* text, STRING& ret);
STRING HOcrEscape(const char* text);
} // namespace tesseract.
#endif // TESSERACT_API_BASEAPI_H__

View File

@ -667,6 +667,18 @@ TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, Te
handle->Orientation(orientation, writing_direction, textline_order, deskew_angle);
}
TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent)
{
bool bool_is_list_item, bool_is_crown;
handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown, first_line_indent);
if (is_list_item)
*is_list_item = bool_is_list_item ? TRUE : FALSE;
if (is_crown)
*is_crown = bool_is_crown ? TRUE : FALSE;
}
TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle)
{
delete handle;
@ -687,7 +699,7 @@ TESS_API const TessPageIterator* TESS_CALL TessResultIteratorGetPageIteratorCons
return handle;
}
TESS_API const TessChoiceIterator* TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle)
TESS_API TessChoiceIterator* TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle)
{
return new TessChoiceIterator(*handle);
}

View File

@ -53,6 +53,7 @@ typedef tesseract::Dawg TessDawg;
typedef tesseract::TruthCallback TessTruthCallback;
typedef tesseract::CubeRecoContext TessCubeRecoContext;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef PolyBlockType TessPolyBlockType;
@ -77,6 +78,7 @@ typedef enum TessPolyBlockType { PT_UNKNOWN, PT_FLOWING_TEXT, PT_HEADING_TEX
PT_TABLE, PT_VERTICAL_TEXT, PT_CAPTION_TEXT, PT_FLOWING_IMAGE, PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE, PT_HORZ_LINE, PT_VERT_LINE, PT_NOISE, PT_COUNT } TessPolyBlockType;
typedef enum TessOrientation { ORIENTATION_PAGE_UP, ORIENTATION_PAGE_RIGHT, ORIENTATION_PAGE_DOWN, ORIENTATION_PAGE_LEFT } TessOrientation;
typedef enum TessParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT } TessParagraphJustification;
typedef enum TessWritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT, WRITING_DIRECTION_RIGHT_TO_LEFT, WRITING_DIRECTION_TOP_TO_BOTTOM } TessWritingDirection;
typedef enum TessTextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT, TEXTLINE_ORDER_RIGHT_TO_LEFT, TEXTLINE_ORDER_TOP_TO_BOTTOM } TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
@ -299,7 +301,7 @@ TESS_API TessCubeRecoContext*
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, bool** vertical_writing);
TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, BOOL** vertical_writing);
TESS_API BLOCK_LIST*
TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
@ -335,6 +337,9 @@ TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, T
TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
float* deskew_angle);
TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle);
@ -344,7 +349,7 @@ TESS_API TessPageIterator*
TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle);
TESS_API const TessPageIterator*
TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
TESS_API const TessChoiceIterator*
TESS_API TessChoiceIterator*
TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level);

File diff suppressed because it is too large Load Diff

View File

@ -114,6 +114,13 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
AppendString(utf8);
delete[] utf8;
bool pageBreak = false;
api->GetBoolVariable("include_page_breaks", &pageBreak);
const char* pageSeparator = api->GetStringVariable("page_separator");
if (pageBreak) {
AppendString(pageSeparator);
}
return true;
}

View File

@ -192,14 +192,9 @@ private:
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
static char* GetPDFTextObjects(TessBaseAPI* api,
double width, double height,
int page_number);
// Attempt to create PFD object from an image without transcoding.
static bool fileToPDFObj(char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size);
// Turn a Pix into a the very best PDF object that we can.
static bool pixToPDFObj(tesseract::TessBaseAPI *api, Pix *pix,
long int objnum,
double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size);
};

View File

@ -287,33 +287,36 @@ int main(int argc, char **argv) {
exit(ret_val);
}
tesseract::TessResultRenderer* renderer = NULL;
bool b;
tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
api.GetBoolVariable("tessedit_create_hocr", &b);
if (b) {
bool font_info;
api.GetBoolVariable("hocr_font_info", &font_info);
renderer = new tesseract::TessHOcrRenderer(outputbase, font_info);
renderers.push_back(new tesseract::TessHOcrRenderer(outputbase, font_info));
}
api.GetBoolVariable("tessedit_create_pdf", &b);
if (b && renderer == NULL)
renderer = new tesseract::TessPDFRenderer(outputbase, api.GetDatapath());
if (b) {
renderers.push_back(new tesseract::TessPDFRenderer(outputbase,
api.GetDatapath()));
}
api.GetBoolVariable("tessedit_write_unlv", &b);
if (b && renderer == NULL)
renderer = new tesseract::TessUnlvRenderer(outputbase);
if (b) renderers.push_back(new tesseract::TessUnlvRenderer(outputbase));
api.GetBoolVariable("tessedit_create_boxfile", &b);
if (b && renderer == NULL)
renderer = new tesseract::TessBoxTextRenderer(outputbase);
if (renderer == NULL)
renderer = new tesseract::TessTextRenderer(outputbase);
if (!api.ProcessPages(image, NULL, 0, renderer)) {
fprintf(stderr, "Error during processing.\n");
exit(1);
if (b) renderers.push_back(new tesseract::TessBoxTextRenderer(outputbase));
api.GetBoolVariable("tessedit_create_txt", &b);
if (b) renderers.push_back(new tesseract::TessTextRenderer(outputbase));
if (!renderers.empty()) {
// Since the PointerVector auto-deletes, null-out the renderers that are
// added to the root, and leave the root in the vector.
for (int r = 1; r < renderers.size(); ++r) {
renderers[0]->insert(renderers[r]);
renderers[r] = NULL;
}
if (!api.ProcessPages(image, NULL, 0, renderers[0])) {
fprintf(stderr, "Error during processing.\n");
exit(1);
}
}
PERF_COUNT_END

View File

@ -110,30 +110,20 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
bool find_segmentation,
BLOCK_LIST *block_list) {
int box_count = 0;
int box_failures = 0;
FILE* box_file = OpenBoxFile(fname);
TBOX box;
GenericVector<TBOX> boxes;
GenericVector<STRING> texts, full_texts;
bool found_box = true;
while (found_box) {
int line_number = 0; // Line number of the box file.
STRING text, full_text;
found_box = ReadNextBox(applybox_page, &line_number, box_file, &text, &box);
if (found_box) {
++box_count;
MakeBoxFileStr(text.string(), box, applybox_page, &full_text);
} else {
full_text = "";
}
boxes.push_back(box);
texts.push_back(text);
full_texts.push_back(full_text);
if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts,
NULL)) {
return NULL; // Can't do it.
}
int box_count = boxes.size();
int box_failures = 0;
// Add an empty everything to the end.
boxes.push_back(TBOX());
texts.push_back(STRING());
full_texts.push_back(STRING());
// In word mode, we use the boxes to make a word for each box, but
// in blob mode we use the existing words and maximally chop them first.
PAGE_RES* page_res = find_segmentation ?
@ -239,7 +229,7 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
}
}
}
PAGE_RES* page_res = new PAGE_RES(block_list, NULL);
PAGE_RES* page_res = new PAGE_RES(false, block_list, NULL);
PAGE_RES_IT pr_it(page_res);
WERD_RES* word_res;
while ((word_res = pr_it.word()) != NULL) {
@ -282,7 +272,7 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
// limited by the ability of the chopper to find suitable chop points,
// and not by the value of the certainties.
BLOB_CHOICE* choice =
new BLOB_CHOICE(0, rating, -rating, -1, -1, 0, 0, 0, 0, BCC_FAKE);
new BLOB_CHOICE(0, rating, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE);
blob_choices.push_back(choice);
rating -= 0.125f;
}
@ -301,8 +291,8 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
left_choice->set_certainty(-rating);
// combine confidence w/ serial #
BLOB_CHOICE* right_choice = new BLOB_CHOICE(++right_chop_index,
rating - 0.125f, -rating,
-1, -1, 0, 0, 0, 0, BCC_FAKE);
rating - 0.125f, -rating, -1,
0.0f, 0.0f, 0.0f, BCC_FAKE);
blob_choices.insert(right_choice, blob_number + 1);
}
}
@ -592,7 +582,7 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
int blob_count = 1;
for (int s = 0; s < word_res->seam_array.size(); ++s) {
SEAM* seam = word_res->seam_array[s];
if (seam->split1 == NULL) {
if (!seam->HasAnySplits()) {
word_res->best_state.push_back(blob_count);
blob_count = 1;
} else {
@ -785,13 +775,13 @@ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
}
// Calls LearnWord to extract features for labelled blobs within each word.
// Features are written to the given filename.
void Tesseract::ApplyBoxTraining(const STRING& filename, PAGE_RES* page_res) {
// Features are stored in an internal buffer.
void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
PAGE_RES_IT pr_it(page_res);
int word_count = 0;
for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
word_res = pr_it.forward()) {
LearnWord(filename.string(), word_res);
LearnWord(fontname.string(), word_res);
++word_count;
}
tprintf("Generated training data for %d words\n", word_count);

File diff suppressed because it is too large Load Diff

View File

@ -383,8 +383,8 @@ bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
for (int i = 0; i < num_chars; ++i) {
UNICHAR_ID uch_id =
cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
choices[i] = new BLOB_CHOICE(uch_id, 0.0, cube_certainty, -1, -1,
0, 0, 0, 0, BCC_STATIC_CLASSIFIER);
choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
-1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
}
word->FakeClassifyWord(num_chars, choices);
// within a word, cube recognizes the word in reading order.

View File

@ -205,7 +205,7 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
if ((!word->part_of_combo) && (word->box_word == NULL)) {
WordData word_data(block, row, word);
SetupWordPassN(2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
classify_word_and_language(2, NULL, &word_data);
}
prev_word_best_choice_ = word->best_choice;
}

View File

@ -35,6 +35,8 @@ namespace tesseract {
// guessed that the blob tops are caps and will have placed the xheight too low.
// 3. Noise/logos beside words, or changes in font size on a line. Such
// things can blow the statistics and cause an incorrect estimate.
// 4. Incorrect baseline. Can happen when 2 columns are incorrectly merged.
// In this case the x-height is often still correct.
//
// Algorithm.
// Compare the vertical position (top only) of alphnumerics in a word with
@ -54,6 +56,10 @@ namespace tesseract {
// even if the x-height is incorrect. This is not a terrible assumption, but
// it is not great. An improvement would be to use a classifier that does
// not care about vertical position or scaling at all.
// Separately collect stats on shifted baselines and apply the same logic to
// computing a best-fit shift to fix the error. If the baseline needs to be
// shifted, but the x-height is OK, returns the original x-height along with
// the baseline shift to indicate that recognition needs to re-run.
// If the max-min top of a unicharset char is bigger than kMaxCharTopRange
// then the char top cannot be used to judge misfits or suggest a new top.
@ -92,65 +98,108 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
// Returns a new x-height maximally compatible with the result in word_res.
// See comment above for overall algorithm.
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res) {
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
float* baseline_shift) {
STATS top_stats(0, MAX_UINT8);
STATS shift_stats(-MAX_UINT8, MAX_UINT8);
int bottom_shift = 0;
int num_blobs = word_res->rebuild_word->NumBlobs();
for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
int top = blob->bounding_box().top();
// Clip the top to the limit of normalized feature space.
if (top >= INT_FEAT_RANGE)
top = INT_FEAT_RANGE - 1;
int bottom = blob->bounding_box().bottom();
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
&min_top, &max_top);
// Chars with a wild top range would mess up the result so ignore them.
if (max_top - min_top > kMaxCharTopRange)
continue;
int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top,
top - (max_top + x_ht_acceptance_tolerance));
int height = top - kBlnBaselineOffset;
if (debug_x_ht_level >= 20) {
tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d : ",
unicharset.id_to_unichar(class_id),
height, min_bottom, max_bottom, min_top, max_top,
bottom, top);
}
// Use only chars that fit in the expected bottom range, and where
// the range of tops is sensibly near the xheight.
if (min_bottom <= bottom + x_ht_acceptance_tolerance &&
bottom - x_ht_acceptance_tolerance <= max_bottom &&
min_top > kBlnBaselineOffset &&
max_top - kBlnBaselineOffset >= kBlnXHeight &&
misfit_dist > 0) {
// Compute the x-height position using proportionality between the
// actual height and expected height.
int min_xht = DivRounded(height * kBlnXHeight,
max_top - kBlnBaselineOffset);
int max_xht = DivRounded(height * kBlnXHeight,
min_top - kBlnBaselineOffset);
if (debug_x_ht_level >= 20) {
tprintf(" xht range min=%d, max=%d\n",
min_xht, max_xht);
do {
top_stats.clear();
shift_stats.clear();
for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
if (unicharset.get_isalpha(class_id) ||
unicharset.get_isdigit(class_id)) {
int top = blob->bounding_box().top() + bottom_shift;
// Clip the top to the limit of normalized feature space.
if (top >= INT_FEAT_RANGE)
top = INT_FEAT_RANGE - 1;
int bottom = blob->bounding_box().bottom() + bottom_shift;
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
&min_top, &max_top);
// Chars with a wild top range would mess up the result so ignore them.
if (max_top - min_top > kMaxCharTopRange)
continue;
int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top,
top - (max_top + x_ht_acceptance_tolerance));
int height = top - kBlnBaselineOffset;
if (debug_x_ht_level >= 2) {
tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d: ",
unicharset.id_to_unichar(class_id),
height, min_bottom, max_bottom, min_top, max_top,
bottom, top);
}
// Use only chars that fit in the expected bottom range, and where
// the range of tops is sensibly near the xheight.
if (min_bottom <= bottom + x_ht_acceptance_tolerance &&
bottom - x_ht_acceptance_tolerance <= max_bottom &&
min_top > kBlnBaselineOffset &&
max_top - kBlnBaselineOffset >= kBlnXHeight &&
misfit_dist > 0) {
// Compute the x-height position using proportionality between the
// actual height and expected height.
int min_xht = DivRounded(height * kBlnXHeight,
max_top - kBlnBaselineOffset);
int max_xht = DivRounded(height * kBlnXHeight,
min_top - kBlnBaselineOffset);
if (debug_x_ht_level >= 2) {
tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht);
}
// The range of expected heights gets a vote equal to the distance
// of the actual top from the expected top.
for (int y = min_xht; y <= max_xht; ++y)
top_stats.add(y, misfit_dist);
} else if ((min_bottom > bottom + x_ht_acceptance_tolerance ||
bottom - x_ht_acceptance_tolerance > max_bottom) &&
bottom_shift == 0) {
// Get the range of required bottom shift.
int min_shift = min_bottom - bottom;
int max_shift = max_bottom - bottom;
if (debug_x_ht_level >= 2) {
tprintf(" bottom shift min=%d, max=%d\n", min_shift, max_shift);
}
// The range of expected shifts gets a vote equal to the min distance
// of the actual bottom from the expected bottom, spread over the
// range of its acceptance.
int misfit_weight = abs(min_shift);
if (max_shift > min_shift)
misfit_weight /= max_shift - min_shift;
for (int y = min_shift; y <= max_shift; ++y)
shift_stats.add(y, misfit_weight);
} else {
if (bottom_shift == 0) {
// Things with bottoms that are already ok need to say so, on the
// 1st iteration only.
shift_stats.add(0, kBlnBaselineOffset);
}
if (debug_x_ht_level >= 2) {
tprintf(" already OK\n");
}
}
// The range of expected heights gets a vote equal to the distance
// of the actual top from the expected top.
for (int y = min_xht; y <= max_xht; ++y)
top_stats.add(y, misfit_dist);
} else if (debug_x_ht_level >= 20) {
tprintf(" already OK\n");
}
}
if (shift_stats.get_total() > top_stats.get_total()) {
bottom_shift = IntCastRounded(shift_stats.median());
if (debug_x_ht_level >= 2) {
tprintf("Applying bottom shift=%d\n", bottom_shift);
}
}
} while (bottom_shift != 0 &&
top_stats.get_total() < shift_stats.get_total());
// Baseline shift is opposite sign to the bottom shift.
*baseline_shift = -bottom_shift / word_res->denorm.y_scale();
if (debug_x_ht_level >= 2) {
tprintf("baseline shift=%g\n", *baseline_shift);
}
if (top_stats.get_total() == 0)
return 0.0f;
return bottom_shift != 0 ? word_res->x_height : 0.0f;
// The new xheight is just the median vote, which is then scaled out
// of BLN space back to pixel space to get the x-height in pixel space.
float new_xht = top_stats.median();
if (debug_x_ht_level >= 20) {
if (debug_x_ht_level >= 2) {
tprintf("Median xht=%f\n", new_xht);
tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n",
new_xht, new_xht / word_res->denorm.y_scale());
@ -159,7 +208,7 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res) {
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change)
return new_xht / word_res->denorm.y_scale();
else
return 0.0f;
return bottom_shift != 0 ? word_res->x_height : 0.0f;
}
} // namespace tesseract

View File

@ -544,10 +544,10 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
osr_->scripts_na[i][japanese_id_] += 1.0;
if (prev_id == hangul_id_)
osr_->scripts_na[i][korean_id_] += 1.0;
if (prev_id == han_id_)
if (prev_id == han_id_) {
osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
if (prev_id == han_id_)
osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
}
}
} // iterate over each orientation
}

View File

@ -26,15 +26,23 @@
namespace tesseract {
PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract,
int scale, int scaled_yres,
int rect_left, int rect_top,
PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height)
: page_res_(page_res), tesseract_(tesseract),
word_(NULL), word_length_(0), blob_index_(0), cblob_it_(NULL),
scale_(scale), scaled_yres_(scaled_yres),
rect_left_(rect_left), rect_top_(rect_top),
rect_width_(rect_width), rect_height_(rect_height) {
: page_res_(page_res),
tesseract_(tesseract),
word_(NULL),
word_length_(0),
blob_index_(0),
cblob_it_(NULL),
include_upper_dots_(false),
include_lower_dots_(false),
scale_(scale),
scaled_yres_(scaled_yres),
rect_left_(rect_left),
rect_top_(rect_top),
rect_width_(rect_width),
rect_height_(rect_height) {
it_ = new PAGE_RES_IT(page_res);
PageIterator::Begin();
}
@ -50,12 +58,20 @@ PageIterator::~PageIterator() {
* objects at a higher level.
*/
PageIterator::PageIterator(const PageIterator& src)
: page_res_(src.page_res_), tesseract_(src.tesseract_),
word_(NULL), word_length_(src.word_length_),
blob_index_(src.blob_index_), cblob_it_(NULL),
scale_(src.scale_), scaled_yres_(src.scaled_yres_),
rect_left_(src.rect_left_), rect_top_(src.rect_top_),
rect_width_(src.rect_width_), rect_height_(src.rect_height_) {
: page_res_(src.page_res_),
tesseract_(src.tesseract_),
word_(NULL),
word_length_(src.word_length_),
blob_index_(src.blob_index_),
cblob_it_(NULL),
include_upper_dots_(src.include_upper_dots_),
include_lower_dots_(src.include_lower_dots_),
scale_(src.scale_),
scaled_yres_(src.scaled_yres_),
rect_left_(src.rect_left_),
rect_top_(src.rect_top_),
rect_width_(src.rect_width_),
rect_height_(src.rect_height_) {
it_ = new PAGE_RES_IT(*src.it_);
BeginWord(src.blob_index_);
}
@ -63,6 +79,8 @@ PageIterator::PageIterator(const PageIterator& src)
const PageIterator& PageIterator::operator=(const PageIterator& src) {
page_res_ = src.page_res_;
tesseract_ = src.tesseract_;
include_upper_dots_ = src.include_upper_dots_;
include_lower_dots_ = src.include_lower_dots_;
scale_ = src.scale_;
scaled_yres_ = src.scaled_yres_;
rect_left_ = src.rect_left_;
@ -252,16 +270,19 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
PARA *para = NULL;
switch (level) {
case RIL_BLOCK:
box = it_->block()->block->bounding_box();
box = it_->block()->block->restricted_bounding_box(include_upper_dots_,
include_lower_dots_);
break;
case RIL_PARA:
para = it_->row()->row->para();
// explicit fall-through.
case RIL_TEXTLINE:
box = it_->row()->row->bounding_box();
box = it_->row()->row->restricted_bounding_box(include_upper_dots_,
include_lower_dots_);
break;
case RIL_WORD:
box = it_->word()->word->bounding_box();
box = it_->word()->word->restricted_bounding_box(include_upper_dots_,
include_lower_dots_);
break;
case RIL_SYMBOL:
if (cblob_it_ == NULL)
@ -387,39 +408,23 @@ Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
int left, top, right, bottom;
if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
return NULL;
Pix* pix = NULL;
switch (level) {
case RIL_BLOCK:
case RIL_PARA:
int bleft, btop, bright, bbottom;
BoundingBoxInternal(RIL_BLOCK, &bleft, &btop, &bright, &bbottom);
pix = it_->block()->block->render_mask();
// AND the mask and the image.
pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix),
PIX_SRC & PIX_DST, tesseract_->pix_binary(),
bleft, btop);
if (level == RIL_PARA) {
// RIL_PARA needs further attention:
// clip the paragraph from the block mask.
Box* box = boxCreate(left - bleft, top - btop,
right - left, bottom - top);
Pix* pix2 = pixClipRectangle(pix, box, NULL);
boxDestroy(&box);
pixDestroy(&pix);
pix = pix2;
}
break;
case RIL_TEXTLINE:
case RIL_WORD:
case RIL_SYMBOL:
if (level == RIL_SYMBOL && cblob_it_ != NULL &&
cblob_it_->data()->area() != 0)
return cblob_it_->data()->render();
// Just clip from the bounding box.
Box* box = boxCreate(left, top, right - left, bottom - top);
pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
boxDestroy(&box);
break;
if (level == RIL_SYMBOL && cblob_it_ != NULL &&
cblob_it_->data()->area() != 0)
return cblob_it_->data()->render();
Box* box = boxCreate(left, top, right - left, bottom - top);
Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
boxDestroy(&box);
if (level == RIL_BLOCK || level == RIL_PARA) {
// Clip to the block polygon as well.
TBOX mask_box;
Pix* mask = it_->block()->block->render_mask(&mask_box);
int mask_x = left - mask_box.left();
int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
// AND the mask and pix, putting the result in pix.
pixRasterop(pix, MAX(0, -mask_x), MAX(0, -mask_y), pixGetWidth(pix),
pixGetHeight(pix), PIX_SRC & PIX_DST, mask, MAX(0, mask_x),
MAX(0, mask_y));
pixDestroy(&mask);
}
return pix;
}
@ -452,17 +457,24 @@ Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
Pix* grey_pix = pixClipRectangle(original_img, box, NULL);
boxDestroy(&box);
if (level == RIL_BLOCK) {
Pix* mask = it_->block()->block->render_mask();
Pix* expanded_mask = pixCreate(right - *left, bottom - *top, 1);
pixRasterop(expanded_mask, padding, padding,
pixGetWidth(mask), pixGetHeight(mask),
PIX_SRC, mask, 0, 0);
if (level == RIL_BLOCK || level == RIL_PARA) {
// Clip to the block polygon as well.
TBOX mask_box;
Pix* mask = it_->block()->block->render_mask(&mask_box);
// Copy the mask registered correctly into an image the size of grey_pix.
int mask_x = *left - mask_box.left();
int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
int width = pixGetWidth(grey_pix);
int height = pixGetHeight(grey_pix);
Pix* resized_mask = pixCreate(width, height, 1);
pixRasterop(resized_mask, MAX(0, -mask_x), MAX(0, -mask_y), width, height,
PIX_SRC, mask, MAX(0, mask_x), MAX(0, mask_y));
pixDestroy(&mask);
pixDilateBrick(expanded_mask, expanded_mask, 2*padding + 1, 2*padding + 1);
pixInvert(expanded_mask, expanded_mask);
pixSetMasked(grey_pix, expanded_mask, MAX_UINT32);
pixDestroy(&expanded_mask);
pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
2 * padding + 1);
pixInvert(resized_mask, resized_mask);
pixSetMasked(grey_pix, resized_mask, MAX_UINT32);
pixDestroy(&resized_mask);
}
return grey_pix;
}

View File

@ -179,6 +179,21 @@ class TESS_API PageIterator {
// If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle.
/**
* Controls what to include in a bounding box. Bounding boxes of all levels
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
* Between layout analysis and recognition, it isn't known where all
* diacritics belong, so this control is used to include or exclude some
* diacritics that are above or below the main body of the word. In most cases
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
/**
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
@ -332,6 +347,9 @@ class TESS_API PageIterator {
* Owned by this ResultIterator.
*/
C_BLOB_IT* cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
int scale_;
int scaled_yres_;

View File

@ -134,12 +134,20 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
// UNLV file present. Use PSM_SINGLE_BLOCK.
pageseg_mode = PSM_SINGLE_BLOCK;
}
// The diacritic_blobs holds noise blobs that may be diacritics. They
// are separated out on areas of the image that seem noisy and short-circuit
// the layout process, going straight from the initial partition creation
// right through to after word segmentation, where they are added to the
// rej_cblobs list of the most appropriate word. From there classification
// will determine whether they are used.
BLOBNBOX_LIST diacritic_blobs;
int auto_page_seg_ret_val = 0;
TO_BLOCK_LIST to_blocks;
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
PSM_SPARSE(pageseg_mode)) {
auto_page_seg_ret_val =
AutoPageSeg(pageseg_mode, blocks, &to_blocks, osd_tess, osr);
auto_page_seg_ret_val = AutoPageSeg(
pageseg_mode, blocks, &to_blocks,
enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr);
if (pageseg_mode == PSM_OSD_ONLY)
return auto_page_seg_ret_val;
// To create blobs from the image region bounds uncomment this line:
@ -171,7 +179,7 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
pix_thresholds_, pix_grey_, splitting || cjk_mode,
blocks, &to_blocks);
&diacritic_blobs, blocks, &to_blocks);
return auto_page_seg_ret_val;
}
@ -197,7 +205,6 @@ static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
pixDestroy(&grey_pix);
}
/**
* Auto page segmentation. Divide the page image into blocks of uniform
* text linespacing and images.
@ -207,9 +214,14 @@ static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
* The output goes in the blocks list with corresponding TO_BLOCKs in the
* to_blocks list.
*
* If single_column is true, then no attempt is made to divide the image
* into columns, but multiple blocks are still made if the text is of
* non-uniform linespacing.
* If !PSM_COL_FIND_ENABLED(pageseg_mode), then no attempt is made to divide
* the image into columns, but multiple blocks are still made if the text is
* of non-uniform linespacing.
*
* If diacritic_blobs is non-null, then diacritics/noise blobs, that would
* confuse layout anaylsis by causing textline overlap, are placed there,
* with the expectation that they will be reassigned to words later and
* noise/diacriticness determined via classification.
*
* If osd (orientation and script detection) is true then that is performed
* as well. If only_osd is true, then only orientation and script detection is
@ -217,9 +229,10 @@ static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
* another Tesseract that was initialized especially for osd, and the results
* will be output into osr (orientation and script result).
*/
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode,
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks,
Tesseract* osd_tess, OSResults* osr) {
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
TO_BLOCK_LIST* to_blocks,
BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
OSResults* osr) {
if (textord_debug_images) {
WriteDebugBackgroundImage(textord_debug_printable, pix_binary_);
}
@ -247,10 +260,9 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode,
if (equ_detect_) {
finder->SetEquationDetect(equ_detect_);
}
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
to_block, photomask_pix,
pix_thresholds_, pix_grey_,
&found_blocks, to_blocks);
result = finder->FindBlocks(
pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix,
pix_thresholds_, pix_grey_, &found_blocks, diacritic_blobs, to_blocks);
if (result >= 0)
finder->GetDeskewVectors(&deskew_, &reskew_);
delete finder;
@ -340,6 +352,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
finder = new ColumnFinder(static_cast<int>(to_block->line_size),
blkbox.botleft(), blkbox.topright(),
source_resolution_, textord_use_cjk_fp_model,
textord_tabfind_aligned_gap_fraction,
&v_lines, &h_lines, vertical_x, vertical_y);
finder->SetupAndFilterNoise(*photo_mask_pix, to_block);
@ -354,7 +367,12 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
// We want the text lines horizontal, (vertical text indicates vertical
// textlines) which may conflict (eg vertically written CJK).
int osd_orientation = 0;
bool vertical_text = finder->IsVerticallyAlignedText(to_block, &osd_blobs);
bool vertical_text = textord_tabfind_force_vertical_text;
if (!vertical_text && textord_tabfind_vertical_text) {
vertical_text =
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
to_block, &osd_blobs);
}
if (osd && osd_tess != NULL && osr != NULL) {
GenericVector<int> osd_scripts;
if (osd_tess != this) {

View File

@ -30,15 +30,12 @@ namespace tesseract {
void Tesseract::process_selected_words(
PAGE_RES* page_res, // blocks to check
TBOX & selection_box,
BOOL8(tesseract::Tesseract::*word_processor)( // function to call
BLOCK* block, ROW* row, WERD_RES* word_res)) {
BOOL8(tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it)) {
for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != NULL;
page_res_it.forward()) {
WERD* word = page_res_it.word()->word;
if (word->bounding_box().overlap(selection_box)) {
if (!((this->*word_processor)(page_res_it.block()->block,
page_res_it.row()->row,
page_res_it.word())))
if (!(this->*word_processor)(&page_res_it))
return;
}
}

View File

@ -39,13 +39,11 @@ void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
for (int w = 0; w < words.size(); ++w) {
if (words[w].word->ratings != NULL &&
words[w].word->ratings->get(0, 0) == NULL) {
for (int b = 0; b < words[w].word->chopped_word->NumBlobs(); ++b) {
blobs.push_back(BlobData(b, this, *words[w].word));
}
for (int s = 0; s < words[w].lang_words.size(); ++s) {
const WERD_RES& word = words[w].lang_words[s];
Tesseract* sub = s < sub_langs_.size() ? sub_langs_[s] : this;
const WERD_RES& word = *words[w].lang_words[s];
for (int b = 0; b < word.chopped_word->NumBlobs(); ++b) {
blobs.push_back(BlobData(b, sub_langs_[s], word));
blobs.push_back(BlobData(b, sub, word));
}
}
}

View File

@ -24,7 +24,9 @@
#define VARABLED_H
#include "elst.h"
#ifndef ANDROID_BUILD
#include "scrollview.h"
#endif
#include "params.h"
#include "tesseractclass.h"

View File

@ -306,10 +306,7 @@ SVMenuNode *Tesseract::build_menu_new() {
* Redisplay page
*/
void Tesseract::do_re_display(
BOOL8 (tesseract::Tesseract::*word_painter)(BLOCK* block,
ROW* row,
WERD_RES* word_res)) {
PAGE_RES_IT pr_it(current_page_res);
BOOL8 (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it)) {
int block_count = 1;
image_win->Clear();
@ -317,8 +314,9 @@ void Tesseract::do_re_display(
image_win->Image(pix_binary_, 0, 0);
}
PAGE_RES_IT pr_it(current_page_res);
for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
(this->*word_painter)(pr_it.block()->block, pr_it.row()->row, word);
(this->*word_painter)(&pr_it);
if (display_baselines && pr_it.row() != pr_it.prev_row())
pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN);
if (display_blocks && pr_it.block() != pr_it.prev_block())
@ -657,7 +655,8 @@ void show_point(PAGE_RES* page_res, float x, float y) {
FCOORD pt(x, y);
PAGE_RES_IT pr_it(page_res);
char msg[160];
const int kBufsize = 512;
char msg[kBufsize];
char *msg_ptr = msg;
msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y);
@ -714,11 +713,10 @@ void show_point(PAGE_RES* page_res, float x, float y) {
#endif // GRAPHICS_DISABLED
namespace tesseract {
#ifndef GRAPHICS_DISABLED
BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
WERD_RES* word_res) {
word_res->word->bounding_box().plot(image_win, ScrollView::BLACK,
ScrollView::BLACK);
return word_set_display(block, row, word_res);
BOOL8 Tesseract:: word_blank_and_set_display(PAGE_RES_IT* pr_it) {
pr_it->word()->word->bounding_box().plot(image_win, ScrollView::BLACK,
ScrollView::BLACK);
return word_set_display(pr_it);
}
@ -727,7 +725,8 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
*
* Normalize word and display in word window
*/
BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
BOOL8 Tesseract::word_bln_display(PAGE_RES_IT* pr_it) {
WERD_RES* word_res = pr_it->word();
if (word_res->chopped_word == NULL) {
// Setup word normalization parameters.
word_res->SetupForRecognition(unicharset, this, BestPix(),
@ -735,7 +734,7 @@ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
pr_it->row()->row, pr_it->block()->block);
}
bln_word_window_handle()->Clear();
display_bln_lines(bln_word_window_handle(), ScrollView::CYAN,
@ -758,7 +757,8 @@ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
*
* Display a word according to its display modes
*/
BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
BOOL8 Tesseract::word_display(PAGE_RES_IT* pr_it) {
WERD_RES* word_res = pr_it->word();
WERD* word = word_res->word;
TBOX word_bb; // word bounding box
int word_height; // ht of word BB
@ -918,14 +918,15 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
*
* Dump members to the debug window
*/
BOOL8 Tesseract::word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res) {
if (block != NULL) {
BOOL8 Tesseract::word_dumper(PAGE_RES_IT* pr_it) {
if (pr_it->block()->block != NULL) {
tprintf("\nBlock data...\n");
block->print(NULL, FALSE);
pr_it->block()->block->print(NULL, FALSE);
}
tprintf("\nRow data...\n");
row->print(NULL);
pr_it->row()->row->print(NULL);
tprintf("\nWord data...\n");
WERD_RES* word_res = pr_it->word();
word_res->word->print();
if (word_res->blamer_bundle != NULL && wordrec_debug_blamer &&
word_res->blamer_bundle->incorrect_result_reason() != IRR_CORRECT) {
@ -941,8 +942,8 @@ BOOL8 Tesseract::word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res) {
*
* Display word according to current display mode settings
*/
BOOL8 Tesseract::word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
WERD* word = word_res->word;
BOOL8 Tesseract::word_set_display(PAGE_RES_IT* pr_it) {
WERD* word = pr_it->word()->word;
word->set_display_flag(DF_BOX, word_display_mode.bit(DF_BOX));
word->set_display_flag(DF_TEXT, word_display_mode.bit(DF_TEXT));
word->set_display_flag(DF_POLYGONAL, word_display_mode.bit(DF_POLYGONAL));
@ -950,26 +951,24 @@ BOOL8 Tesseract::word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
word->set_display_flag(DF_BN_POLYGONAL,
word_display_mode.bit(DF_BN_POLYGONAL));
word->set_display_flag(DF_BLAMER, word_display_mode.bit(DF_BLAMER));
return word_display(block, row, word_res);
return word_display(pr_it);
}
// page_res is non-const because the iterator doesn't know if you are going
// to change the items it points to! Really a const here though.
void Tesseract::blob_feature_display(PAGE_RES* page_res,
const TBOX& selection_box) {
ROW* row; // row of word
BLOCK* block; // block of word
WERD* word = make_pseudo_word(page_res, selection_box, block, row);
if (word != NULL) {
WERD_RES word_res(word);
word_res.x_height = row->x_height();
word_res.SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
TWERD* bln_word = word_res.chopped_word;
PAGE_RES_IT* it = make_pseudo_word(page_res, selection_box);
if (it != NULL) {
WERD_RES* word_res = it->word();
word_res->x_height = it->row()->row->x_height();
word_res->SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
it->row()->row, it->block()->block);
TWERD* bln_word = word_res->chopped_word;
TBLOB* bln_blob = bln_word->blobs[0];
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
@ -989,7 +988,8 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res,
RenderIntFeature(cn_win, &cn_features[f], ScrollView::GREEN);
cn_win->Update();
delete word;
it->DeleteCurrentWord();
delete it;
}
}

View File

@ -51,15 +51,11 @@ FILE *Tesseract::init_recog_training(const STRING &fname) {
// Copies the bounding box from page_res_it->word() to the given TBOX.
bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
while (page_res_it->block() != NULL) {
if (page_res_it->word() != NULL)
break;
while (page_res_it->block() != NULL && page_res_it->word() == NULL)
page_res_it->forward();
}
if (page_res_it->word() != NULL) {
*tbox = page_res_it->word()->word->bounding_box();
page_res_it->forward();
// If tbox->left() is negative, the training image has vertical text and
// all the coordinates of bounding boxes of page_res are rotated by 90
@ -109,26 +105,34 @@ void Tesseract::recog_training_segmented(const STRING &fname,
// Align bottom left points of the TBOXes.
while (keep_going &&
!NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
keep_going = (bbox.bottom() < tbox.bottom()) ?
read_t(&page_res_it, &tbox) :
ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
if (bbox.bottom() < tbox.bottom()) {
page_res_it.forward();
keep_going = read_t(&page_res_it, &tbox);
} else {
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
&bbox);
}
}
while (keep_going &&
!NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
keep_going = (bbox.left() > tbox.left()) ? read_t(&page_res_it, &tbox) :
ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
if (bbox.left() > tbox.left()) {
page_res_it.forward();
keep_going = read_t(&page_res_it, &tbox);
} else {
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
&bbox);
}
}
// OCR the word if top right points of the TBOXes are similar.
if (keep_going &&
NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) &&
NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) {
ambigs_classify_and_output(page_res_it.prev_word(),
page_res_it.prev_row(),
page_res_it.prev_block(),
label.string(), output_file);
ambigs_classify_and_output(label.string(), &page_res_it, output_file);
examined_words++;
}
page_res_it.forward();
} while (keep_going);
fclose(box_file);
// Set up scripts on all of the words that did not get sent to
// ambigs_classify_and_output. They all should have, but if all the
@ -196,16 +200,15 @@ static void PrintMatrixPaths(int col, int dim,
// raw choice as a result of the classification. For words labeled with a
// single unichar also outputs all alternatives from blob_choices of the
// best choice.
void Tesseract::ambigs_classify_and_output(WERD_RES *werd_res,
ROW_RES *row_res,
BLOCK_RES *block_res,
const char *label,
void Tesseract::ambigs_classify_and_output(const char *label,
PAGE_RES_IT* pr_it,
FILE *output_file) {
// Classify word.
fflush(stdout);
WordData word_data(block_res->block, row_res->row, werd_res);
WordData word_data(*pr_it);
SetupWordPassN(1, &word_data);
classify_word_pass1(&word_data, werd_res);
classify_word_and_language(1, pr_it, &word_data);
WERD_RES* werd_res = word_data.word;
WERD_CHOICE *best_choice = werd_res->best_choice;
ASSERT_HOST(best_choice != NULL);

View File

@ -34,6 +34,13 @@ ResultIterator::ResultIterator(const LTRResultIterator &resit)
: LTRResultIterator(resit) {
in_minor_direction_ = false;
at_beginning_of_minor_run_ = false;
preserve_interword_spaces_ = false;
BoolParam *p = ParamUtils::FindParam<BoolParam>(
"preserve_interword_spaces", GlobalParams()->bool_params,
tesseract_->params()->bool_params);
if (p != NULL) preserve_interword_spaces_ = (bool)(*p);
current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
MoveToLogicalStartOfTextline();
}
@ -629,14 +636,17 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) {
int words_appended = 0;
do {
int numSpaces = preserve_interword_spaces_ ? it_->word()->word->space()
: (words_appended > 0);
for (int i = 0; i < numSpaces; ++i) {
*text += " ";
}
AppendUTF8WordText(text);
words_appended++;
*text += " ";
} while (Next(RIL_WORD) && !IsAtBeginningOf(RIL_TEXTLINE));
if (BidiDebug(1)) {
tprintf("%d words printed\n", words_appended);
}
text->truncate_at(text->length() - 1);
*text += line_separator_;
// If we just finished a paragraph, add an extra newline.
if (it_->block() == NULL || IsAtBeginningOf(RIL_PARA))

View File

@ -46,8 +46,8 @@ class TESS_API ResultIterator : public LTRResultIterator {
virtual ~ResultIterator() {}
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin
/**
* Moves the iterator to point to the start of the page to begin
* an iteration.
*/
virtual void Begin();
@ -181,7 +181,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
void MoveToLogicalStartOfTextline();
/**
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* are set.
*/
void MoveToLogicalStartOfWord();
@ -231,6 +231,12 @@ class TESS_API ResultIterator : public LTRResultIterator {
/** Is the currently pointed-at character in a minor-direction sequence? */
bool in_minor_direction_;
/**
* Should detected inter-word spaces be preserved, or "compressed" to a single
* space character (default behavior).
*/
bool preserve_interword_spaces_;
};
} // namespace tesseract.

View File

@ -194,7 +194,11 @@ bool Tesseract::init_tesseract_lang_data(
if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
}
// Load Cube objects if necessary.
// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded. Currently everything needs
// the base tesseract data, which supplies other useful information, but
// alternative engines, such as cube and LSTM are optional.
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
if (tessdata_manager_debug_level)
@ -204,7 +208,7 @@ bool Tesseract::init_tesseract_lang_data(
if (tessdata_manager_debug_level)
tprintf("Loaded Cube with combiner\n");
}
#endif
// Init ParamsModel.
// Load pass1 and pass2 weights (for now these two sets are the same, but in
// the future separate sets of weights can be generated).
@ -475,5 +479,4 @@ enum CMD_EVENTS
RECOG_PSEUDO,
ACTION_2_CMD_EVENT
};
} // namespace tesseract

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,12 @@
///////////////////////////////////////////////////////////////////////
// File: tesseractclass.h
// Description: An instance of Tesseract. For thread safety, *every*
// Description: The Tesseract class. It holds/owns everything needed
// to run Tesseract on a single language, and also a set of
// sub-Tesseracts to run sub-languages. For thread safety, *every*
// global variable goes in here, directly, or indirectly.
// This makes it safe to run multiple Tesseracts in different
// threads in parallel, and keeps the different language
// instances separate.
// Author: Ray Smith
// Created: Fri Mar 07 08:17:01 PST 2008
//
@ -31,20 +36,20 @@
#include "textord.h"
#include "wordrec.h"
class PAGE_RES;
class PAGE_RES_IT;
class BLOB_CHOICE_LIST_CLIST;
class BLOCK_LIST;
class CharSamp;
class TO_BLOCK_LIST;
class WERD_RES;
class ROW;
class TBOX;
class SVMenuNode;
struct Pix;
class WERD_CHOICE;
class WERD;
class BLOB_CHOICE_LIST_CLIST;
struct OSResults;
class PAGE_RES;
class PAGE_RES_IT;
struct Pix;
class ROW;
class SVMenuNode;
class TBOX;
class TO_BLOCK_LIST;
class WERD;
class WERD_CHOICE;
class WERD_RES;
// Top-level class for all tesseract global instance data.
@ -92,12 +97,16 @@ struct OSResults;
namespace tesseract {
class ColumnFinder;
#ifndef ANDROID_BUILD
class CubeLineObject;
class CubeObject;
class CubeRecoContext;
#endif
class EquationDetect;
class Tesseract;
#ifndef ANDROID_BUILD
class TesseractCubeCombiner;
#endif
// A collection of various variables for statistics and debugging.
struct TesseractStats {
@ -144,10 +153,19 @@ struct WordData {
ROW* row;
BLOCK* block;
WordData* prev_word;
GenericVector<WERD_RES> lang_words;
PointerVector<WERD_RES> lang_words;
};
typedef void (Tesseract::*WordRecognizer)(WordData* word_data, WERD_RES* word);
// Definition of a Tesseract WordRecognizer. The WordData provides the context
// of row/block, in_word holds an initialized, possibly pre-classified word,
// that the recognizer may or may not consume (but if so it sets *in_word=NULL)
// and produces one or more output words in out_words, which may be the
// consumed in_word, or may be generated independently.
// This api allows both a conventional tesseract classifier to work, or a
// line-level classifier that generates multiple words from a merged input.
typedef void (Tesseract::*WordRecognizer)(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words);
class Tesseract : public Wordrec {
public:
@ -236,6 +254,15 @@ class Tesseract : public Wordrec {
Tesseract* get_sub_lang(int index) const {
return sub_langs_[index];
}
// Returns true if any language uses Tesseract (as opposed to cube).
bool AnyTessLang() const {
if (tessedit_ocr_engine_mode != OEM_CUBE_ONLY) return true;
for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_CUBE_ONLY)
return true;
}
return false;
}
void SetBlackAndWhitelist();
@ -256,8 +283,8 @@ class Tesseract : public Wordrec {
int SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
Tesseract* osd_tess, OSResults* osr);
void SetupWordScripts(BLOCK_LIST* blocks);
int AutoPageSeg(PageSegMode pageseg_mode,
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks,
int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
TO_BLOCK_LIST* to_blocks, BLOBNBOX_LIST* diacritic_blobs,
Tesseract* osd_tess, OSResults* osr);
ColumnFinder* SetupPageSegAndDetectOrientation(
bool single_column, bool osd, bool only_osd,
@ -279,6 +306,7 @@ class Tesseract : public Wordrec {
void SetupWordPassN(int pass_n, WordData* word);
// Runs word recognition on all the words.
bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
PAGE_RES_IT* pr_it,
GenericVector<WordData>* words);
bool recog_all_words(PAGE_RES* page_res,
ETEXT_DESC* monitor,
@ -294,32 +322,83 @@ class Tesseract : public Wordrec {
// Sets script positions and detects smallcaps on all output words.
void script_pos_pass(PAGE_RES* page_res);
// Helper to recognize the word using the given (language-specific) tesseract.
// Returns true if the result was better than previously.
bool RetryWithLanguage(const WERD_RES& best_word, WordData* word_data,
WERD_RES* word, WordRecognizer recognizer);
void classify_word_and_language(WordRecognizer recognizer,
// Returns positive if this recognizer found more new best words than the
// number kept from best_words.
int RetryWithLanguage(const WordData& word_data,
WordRecognizer recognizer,
WERD_RES** in_word,
PointerVector<WERD_RES>* best_words);
// Moves good-looking "noise"/diacritics from the reject list to the main
// blob list on the current word. Returns true if anything was done, and
// sets make_next_word_fuzzy if blob(s) were added to the end of the word.
bool ReassignDiacritics(int pass, PAGE_RES_IT* pr_it,
bool* make_next_word_fuzzy);
// Attempts to put noise/diacritic outlines into the blobs that they overlap.
// Input: a set of noisy outlines that probably belong to the real_word.
// Output: outlines that overlapped blobs are set to NULL and put back into
// the word, either in the blobs or in the reject list.
void AssignDiacriticsToOverlappingBlobs(
const GenericVector<C_OUTLINE*>& outlines, int pass, WERD* real_word,
PAGE_RES_IT* pr_it, GenericVector<bool>* word_wanted,
GenericVector<bool>* overlapped_any_blob,
GenericVector<C_BLOB*>* target_blobs);
// Attempts to assign non-overlapping outlines to their nearest blobs or
// make new blobs out of them.
void AssignDiacriticsToNewBlobs(const GenericVector<C_OUTLINE*>& outlines,
int pass, WERD* real_word, PAGE_RES_IT* pr_it,
GenericVector<bool>* word_wanted,
GenericVector<C_BLOB*>* target_blobs);
// Starting with ok_outlines set to indicate which outlines overlap the blob,
// chooses the optimal set (approximately) and returns true if any outlines
// are desired, in which case ok_outlines indicates which ones.
bool SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
PAGE_RES_IT* pr_it, C_BLOB* blob,
const GenericVector<C_OUTLINE*>& outlines,
int num_outlines,
GenericVector<bool>* ok_outlines);
// Classifies the given blob plus the outlines flagged by ok_outlines, undoes
// the inclusion of the outlines, and returns the certainty of the raw choice.
float ClassifyBlobPlusOutlines(const GenericVector<bool>& ok_outlines,
const GenericVector<C_OUTLINE*>& outlines,
int pass_n, PAGE_RES_IT* pr_it, C_BLOB* blob,
STRING* best_str);
// Classifies the given blob (part of word_data->word->word) as an individual
// word, using languages, chopper etc, returning only the certainty of the
// best raw choice, and undoing all the work done to fake out the word.
float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT* pr_it, C_BLOB* blob,
STRING* best_str, float* c2);
void classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
WordData* word_data);
void classify_word_pass1(WordData* word_data, WERD_RES* word);
void classify_word_pass1(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words);
void recog_pseudo_word(PAGE_RES* page_res, // blocks to check
TBOX &selection_box);
void fix_rep_char(PAGE_RES_IT* page_res_it);
void ExplodeRepeatedWord(BLOB_CHOICE* best_choice, PAGE_RES_IT* page_res_it);
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET& char_set,
const char *s,
const char *lengths);
void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);
void classify_word_pass2(WordData* word_data, WERD_RES* word);
void classify_word_pass2(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words);
void ReportXhtFixResult(bool accept_new_word, float new_x_ht,
WERD_RES* word, WERD_RES* new_word);
bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);
bool TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row);
BOOL8 recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res);
// Runs recognition with the test baseline shift and x-height and returns true
// if there was an improvement in recognition result.
bool TestNewNormalization(int original_misfits, float baseline_shift,
float new_x_ht, WERD_RES *word, BLOCK* block,
ROW *row);
BOOL8 recog_interactive(PAGE_RES_IT* pr_it);
// Set fonts of this word.
void set_word_fonts(WERD_RES *word);
void font_recognition_pass(PAGE_RES* page_res);
void dictionary_correction_pass(PAGE_RES* page_res);
BOOL8 check_debug_pt(WERD_RES *word, int location);
//// superscript.cpp ////////////////////////////////////////////////////
@ -350,6 +429,7 @@ class Tesseract : public Wordrec {
int *right_ok) const;
//// cube_control.cpp ///////////////////////////////////////////////////
#ifndef ANDROID_BUILD
bool init_cube_objects(bool load_combiner,
TessdataManager *tessdata_manager);
// Iterates through tesseract's results and calls cube on each word,
@ -375,6 +455,7 @@ class Tesseract : public Wordrec {
Boxa** char_boxes, CharSamp*** char_samples);
bool create_cube_box_word(Boxa *char_boxes, int num_chars,
TBOX word_box, BoxWord* box_word);
#endif
//// output.h //////////////////////////////////////////////////////////
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box);
@ -473,15 +554,13 @@ class Tesseract : public Wordrec {
);
void debug_word(PAGE_RES* page_res, const TBOX &selection_box);
void do_re_display(
BOOL8 (tesseract::Tesseract::*word_painter)(BLOCK* block,
ROW* row,
WERD_RES* word_res));
BOOL8 word_display(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 word_blank_and_set_display(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it));
BOOL8 word_display(PAGE_RES_IT* pr_it);
BOOL8 word_bln_display(PAGE_RES_IT* pr_it);
BOOL8 word_blank_and_set_display(PAGE_RES_IT* pr_its);
BOOL8 word_set_display(PAGE_RES_IT* pr_it);
// #ifndef GRAPHICS_DISABLED
BOOL8 word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 word_dumper(PAGE_RES_IT* pr_it);
// #endif // GRAPHICS_DISABLED
void blob_feature_display(PAGE_RES* page_res, const TBOX& selection_box);
//// reject.h //////////////////////////////////////////////////////////
@ -537,10 +616,7 @@ class Tesseract : public Wordrec {
void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block);
inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
void fix_fuzzy_space_list( //space explorer
WERD_RES_LIST &best_perm,
ROW *row,
BLOCK* block);
void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block);
void fix_fuzzy_spaces( //find fuzzy words
ETEXT_DESC *monitor, //progress monitor
@ -583,9 +659,7 @@ class Tesseract : public Wordrec {
PAGE_RES* page_res, // blocks to check
//function to call
TBOX & selection_box,
BOOL8 (tesseract::Tesseract::*word_processor) (BLOCK* block,
ROW* row,
WERD_RES* word_res));
BOOL8 (tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it));
//// tessbox.cpp ///////////////////////////////////////////////////////
void tess_add_doc_word( //test acceptability
WERD_CHOICE *word_choice //after context
@ -688,8 +762,8 @@ class Tesseract : public Wordrec {
// Creates a fake best_choice entry in each WERD_RES with the correct text.
void CorrectClassifyWords(PAGE_RES* page_res);
// Call LearnWord to extract features for labelled blobs within each word.
// Features are written to the given filename.
void ApplyBoxTraining(const STRING& filename, PAGE_RES* page_res);
// Features are stored in an internal buffer.
void ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res);
//// fixxht.cpp ///////////////////////////////////////////////////////
// Returns the number of misfit blob tops in this word.
@ -698,7 +772,7 @@ class Tesseract : public Wordrec {
// maximally compatible with the result in word_res.
// Returns 0.0f if no x-height is found that is better than the current
// estimate.
float ComputeCompatibleXheight(WERD_RES *word_res);
float ComputeCompatibleXheight(WERD_RES *word_res, float* baseline_shift);
//// Data members ///////////////////////////////////////////////////////
// TODO(ocr-team): Find and remove obsolete parameters.
BOOL_VAR_H(tessedit_resegment_from_boxes, false,
@ -723,6 +797,8 @@ class Tesseract : public Wordrec {
"Blacklist of chars not to recognize");
STRING_VAR_H(tessedit_char_whitelist, "",
"Whitelist of chars to recognize");
STRING_VAR_H(tessedit_char_unblacklist, "",
"List of chars to override tessedit_char_blacklist");
BOOL_VAR_H(tessedit_ambigs_training, false,
"Perform training for ambiguities");
INT_VAR_H(pageseg_devanagari_split_strategy,
@ -752,7 +828,6 @@ class Tesseract : public Wordrec {
"Each bounding box is assumed to contain ngrams. Only"
" learn the ngrams whose outlines overlap horizontally.");
BOOL_VAR_H(tessedit_display_outwords, false, "Draw output words");
BOOL_VAR_H(tessedit_training_tess, false, "Call Tess to learn blobs");
BOOL_VAR_H(tessedit_dump_choices, false, "Dump char choices");
BOOL_VAR_H(tessedit_timing_debug, false, "Print timing stats");
BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,
@ -767,8 +842,28 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_debug_block_rejection, false, "Block and Row stats");
BOOL_VAR_H(tessedit_enable_bigram_correction, true,
"Enable correction based on the word bigram dictionary.");
BOOL_VAR_H(tessedit_enable_dict_correction, false,
"Enable single word correction based on the dictionary.");
INT_VAR_H(tessedit_bigram_debug, 0, "Amount of debug output for bigram "
"correction.");
BOOL_VAR_H(enable_noise_removal, true,
"Remove and conditionally reassign small outlines when they"
" confuse layout analysis, determining diacritics vs noise");
INT_VAR_H(debug_noise_removal, 0, "Debug reassignment of small outlines");
// Worst (min) certainty, for which a diacritic is allowed to make the base
// character worse and still be included.
double_VAR_H(noise_cert_basechar, -8.0, "Hingepoint for base char certainty");
// Worst (min) certainty, for which a non-overlapping diacritic is allowed to
// make the base character worse and still be included.
double_VAR_H(noise_cert_disjoint, -2.5, "Hingepoint for disjoint certainty");
// Worst (min) certainty, for which a diacritic is allowed to make a new
// stand-alone blob.
double_VAR_H(noise_cert_punc, -2.5, "Threshold for new punc char certainty");
// Factor of certainty margin for adding diacritics to not count as worse.
double_VAR_H(noise_cert_factor, 0.375,
"Scaling on certainty diff from Hingepoint");
INT_VAR_H(noise_maxperblob, 8, "Max diacritics to apply to a blob");
INT_VAR_H(noise_maxperword, 16, "Max diacritics to apply to a word");
INT_VAR_H(debug_x_ht_level, 0, "Reestimate debug");
BOOL_VAR_H(debug_acceptable_wds, false, "Dump word pass/fail chk");
STRING_VAR_H(chs_leading_punct, "('`\"", "Leading punctuation");
@ -906,15 +1001,9 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_write_rep_codes, false,
"Write repetition char code");
BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");
BOOL_VAR_H(tessedit_create_txt, true, "Write .txt output file");
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
INT_VAR_H(tessedit_pdf_compression, 0, "Type of image encoding in pdf output:"
"0 - autoselection (default); "
"1 - jpeg; "
"2 - G4; "
"3 - flate");
INT_VAR_H(tessedit_pdf_jpg_quality, 85, "Quality level of jpeg image "
"compression in pdf output");
STRING_VAR_H(unrecognised_char, "|",
"Output char for unidentified blobs");
INT_VAR_H(suspect_level, 99, "Suspect marker level");
@ -978,7 +1067,22 @@ class Tesseract : public Wordrec {
"Only initialize with the config file. Useful if the instance is "
"not going to be used for OCR but say only for layout analysis.");
BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");
BOOL_VAR_H(textord_tabfind_vertical_text, true, "Enable vertical detection");
BOOL_VAR_H(textord_tabfind_force_vertical_text, false,
"Force using vertical text page mode");
double_VAR_H(textord_tabfind_vertical_text_ratio, 0.5,
"Fraction of textlines deemed vertical to use vertical page "
"mode");
double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75,
"Fraction of height used as a minimum gap for aligned blobs.");
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
BOOL_VAR_H(preserve_interword_spaces, false,
"Preserve multiple interword spaces");
BOOL_VAR_H(include_page_breaks, false,
"Include page separator string in output text after each "
"image/page.");
STRING_VAR_H(page_separator, "\f",
"Page separator (default is form feed control character)");
// The following parameters were deprecated and removed from their original
// locations. The parameters are temporarily kept here to give Tesseract
@ -988,6 +1092,8 @@ class Tesseract : public Wordrec {
// reasonably sure that Tesseract users have updated their data files.
//
// BEGIN DEPRECATED PARAMETERS
BOOL_VAR_H(textord_tabfind_vertical_horizontal_mix, true,
"find horizontal lines such as headers in vertical page mode");
INT_VAR_H(tessedit_ok_mode, 5, "Acceptance decision algorithm");
BOOL_VAR_H(load_fixed_length_dawgs, true, "Load fixed length"
" dawgs (e.g. for non-space delimited languages)");
@ -1046,13 +1152,13 @@ class Tesseract : public Wordrec {
PAGE_RES *page_res,
volatile ETEXT_DESC *monitor,
FILE *output_file);
void ambigs_classify_and_output(WERD_RES *werd_res,
ROW_RES *row_res,
BLOCK_RES *block_res,
const char *label,
void ambigs_classify_and_output(const char *label,
PAGE_RES_IT* pr_it,
FILE *output_file);
#ifndef ANDROID_BUILD
inline CubeRecoContext *GetCubeRecoContext() { return cube_cntxt_; }
#endif
private:
// The filename of a backup config file. If not null, then we currently
@ -1092,9 +1198,11 @@ class Tesseract : public Wordrec {
Tesseract* most_recently_used_;
// The size of the font table, ie max possible font id + 1.
int font_table_size_;
#ifndef ANDROID_BUILD
// Cube objects.
CubeRecoContext* cube_cntxt_;
TesseractCubeCombiner *tess_cube_combiner_;
#endif
// Equation detector. Note: this pointer is NOT owned by the class.
EquationDetect* equ_detect_;
};

View File

@ -254,7 +254,7 @@ void Tesseract::join_words(WERD_RES *word,
// Move the word2 seams onto the end of the word1 seam_array.
// Since the seam list is one element short, an empty seam marking the
// end of the last blob in the first word is needed first.
word->seam_array.push_back(new SEAM(0.0f, split_pt, NULL, NULL, NULL));
word->seam_array.push_back(new SEAM(0.0f, split_pt));
word->seam_array += word2->seam_array;
word2->seam_array.truncate(0);
// Fix widths and gaps.

View File

@ -171,7 +171,7 @@ void ImageThresholder::SetImage(const Pix* pix) {
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
void ImageThresholder::ThresholdToPix(Pix** pix) {
void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
if (pix_channels_ == 0) {
// We have a binary image, so it just has to be cloned.
*pix = GetPixRect();

View File

@ -20,7 +20,8 @@
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H__
#define TESSERACT_CCMAIN_THRESHOLDER_H__
#include "platform.h"
#include "platform.h"
#include "publictypes.h"
struct Pix;
@ -116,7 +117,7 @@ class TESS_API ImageThresholder {
/// Threshold the source image as efficiently as possible to the output Pix.
/// Creates a Pix and sets pix to point to the resulting pointer.
/// Caller must use pixDestroy to free the created Pix.
virtual void ThresholdToPix(Pix** pix);
virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix** pix);
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that

View File

@ -23,17 +23,15 @@
* make_pseudo_word
*
* Make all the blobs inside a selection into a single word.
* The word is always a copy and needs to be deleted.
* The returned PAGE_RES_IT* it points to the new word. After use, call
* it->DeleteCurrentWord() to delete the fake word, and then
* delete it to get rid of the iterator itself.
**********************************************************************/
WERD *make_pseudo_word(PAGE_RES* page_res, // Blocks to check.
const TBOX &selection_box,
BLOCK *&pseudo_block,
ROW *&pseudo_row) { // Row of selection.
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box) {
PAGE_RES_IT pr_it(page_res);
C_BLOB_LIST new_blobs; // list of gathered blobs
C_BLOB_IT new_blob_it = &new_blobs; // iterator
WERD *pseudo_word; // fabricated word
for (WERD_RES* word_res = pr_it.word(); word_res != NULL;
word_res = pr_it.forward()) {
@ -45,15 +43,17 @@ WERD *make_pseudo_word(PAGE_RES* page_res, // Blocks to check.
C_BLOB* blob = blob_it.data();
if (blob->bounding_box().overlap(selection_box)) {
new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob));
pseudo_row = pr_it.row()->row;
pseudo_block = pr_it.block()->block;
}
}
if (!new_blobs.empty()) {
WERD* pseudo_word = new WERD(&new_blobs, 1, NULL);
word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
PAGE_RES_IT* it = new PAGE_RES_IT(page_res);
while (it->word() != word_res && it->word() != NULL) it->forward();
ASSERT_HOST(it->word() == word_res);
return it;
}
}
}
if (!new_blobs.empty())
pseudo_word = new WERD(&new_blobs, 1, NULL);
else
pseudo_word = NULL;
return pseudo_word;
return NULL;
}

View File

@ -22,9 +22,6 @@
#include "pageres.h"
WERD *make_pseudo_word(PAGE_RES* page_res, // blocks to check
const TBOX &selection_box,
BLOCK *&pseudo_block,
ROW *&pseudo_row);
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
#endif

View File

@ -137,6 +137,9 @@ class BLOBNBOX:public ELIST_LINK
cblob_ptr = srcblob;
area = static_cast<int>(srcblob->area());
}
~BLOBNBOX() {
if (owns_cblob_) delete cblob_ptr;
}
static BLOBNBOX* RealBlob(C_OUTLINE* outline) {
C_BLOB* blob = new C_BLOB(outline);
return new BLOBNBOX(blob);
@ -387,6 +390,7 @@ class BLOBNBOX:public ELIST_LINK
void set_base_char_blob(BLOBNBOX* blob) {
base_char_blob_ = blob;
}
void set_owns_cblob(bool value) { owns_cblob_ = value; }
bool UniquelyVertical() const {
return vert_possible_ && !horz_possible_;
@ -450,6 +454,7 @@ class BLOBNBOX:public ELIST_LINK
// construction time.
void ConstructionInit() {
cblob_ptr = NULL;
owns_cblob_ = false;
area = 0;
area_stroke_width_ = 0.0f;
horz_stroke_width_ = 0.0f;
@ -525,6 +530,10 @@ class BLOBNBOX:public ELIST_LINK
bool vert_possible_; // Could be part of vertical flow.
bool leader_on_left_; // There is a leader to the left.
bool leader_on_right_; // There is a leader to the right.
// Iff true, then the destructor should delete the cblob_ptr.
// TODO(rays) migrate all uses to correctly setting this flag instead of
// deleting the C_BLOB before deleting the BLOBNBOX.
bool owns_cblob_;
};
class TO_ROW: public ELIST2_LINK

View File

@ -64,6 +64,42 @@ const TPOINT kDivisibleVerticalItalic(1, 5);
CLISTIZE(EDGEPT);
// Returns true when the two line segments cross each other.
// (Moved from outlines.cpp).
// Finds where the projected lines would cross and then checks to see if the
// point of intersection lies on both of the line segments. If it does
// then these two segments cross.
/* static */
bool TPOINT::IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
const TPOINT& b1) {
int b0a1xb0b1, b0b1xb0a0;
int a1b1xa1a0, a1a0xa1b0;
TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;
b0a1.x = a1.x - b0.x;
b0a0.x = a0.x - b0.x;
a1b1.x = b1.x - a1.x;
b0b1.x = b1.x - b0.x;
a1a0.x = a0.x - a1.x;
b0a1.y = a1.y - b0.y;
b0a0.y = a0.y - b0.y;
a1b1.y = b1.y - a1.y;
b0b1.y = b1.y - b0.y;
a1a0.y = a0.y - a1.y;
b0a1xb0b1 = CROSS(b0a1, b0b1);
b0b1xb0a0 = CROSS(b0b1, b0a0);
a1b1xa1a0 = CROSS(a1b1, a1a0);
// For clarity, we want CROSS(a1a0,a1b0) here but we have b0a1 instead of a1b0
// so use -CROSS(a1b0,b0a1) instead, which is the same.
a1a0xa1b0 = -CROSS(a1a0, b0a1);
return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) ||
(b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) &&
((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
}
// Consume the circular list of EDGEPTs to make a TESSLINE.
TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) {
TESSLINE* result = new TESSLINE;
@ -454,6 +490,36 @@ TBOX TBLOB::bounding_box() const {
return box;
}
// Finds and deletes any duplicate outlines in this blob, without deleting
// their EDGEPTs.
void TBLOB::EliminateDuplicateOutlines() {
for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
TESSLINE* last_outline = outline;
for (TESSLINE* other_outline = outline->next; other_outline != NULL;
last_outline = other_outline, other_outline = other_outline->next) {
if (outline->SameBox(*other_outline)) {
last_outline->next = other_outline->next;
// This doesn't leak - the outlines share the EDGEPTs.
other_outline->loop = NULL;
delete other_outline;
other_outline = last_outline;
// If it is part of a cut, then it can't be a hole any more.
outline->is_hole = false;
}
}
}
}
// Swaps the outlines of *this and next if needed to keep the centers in
// increasing x.
void TBLOB::CorrectBlobOrder(TBLOB* next) {
TBOX box = bounding_box();
TBOX next_box = next->bounding_box();
if (box.x_middle() > next_box.x_middle()) {
Swap(&outlines, &next->outlines);
}
}
#ifndef GRAPHICS_DISABLED
void TBLOB::plot(ScrollView* window, ScrollView::Color color,
ScrollView::Color child_color) {
@ -739,8 +805,8 @@ TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) {
// Baseline normalizes the blobs in-place, recording the normalization in the
// DENORMs in the blobs.
void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
bool inverse, float x_height, bool numeric_mode,
tesseract::OcrEngineMode hint,
bool inverse, float x_height, float baseline_shift,
bool numeric_mode, tesseract::OcrEngineMode hint,
const TBOX* norm_box,
DENORM* word_denorm) {
TBOX word_box = bounding_box();
@ -756,7 +822,7 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
if (hint == tesseract::OEM_CUBE_ONLY)
scale = 1.0f;
} else {
input_y_offset = row->base_line(word_middle);
input_y_offset = row->base_line(word_middle) + baseline_shift;
}
for (int b = 0; b < blobs.size(); ++b) {
TBLOB* blob = blobs[b];
@ -769,7 +835,7 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
scale, scale * 1.5f);
} else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) {
baseline = row->base_line(mid_x);
baseline = row->base_line(mid_x) + baseline_shift;
}
// The image will be 8-bit grey if the input was grey or color. Note that in
// a grey image 0 is black and 255 is white. If the input was binary, then
@ -858,18 +924,6 @@ void TWERD::plot(ScrollView* window) {
}
#endif // GRAPHICS_DISABLED
/**********************************************************************
* blob_origin
*
* Compute the origin of a compound blob, define to be the centre
* of the bounding box.
**********************************************************************/
void blob_origin(TBLOB *blob, /*blob to compute on */
TPOINT *origin) { /*return value */
TBOX bbox = blob->bounding_box();
*origin = (bbox.topleft() + bbox.botright()) / 2;
}
/**********************************************************************
* divisible_blob
*

View File

@ -60,6 +60,13 @@ struct TPOINT {
x /= divisor;
y /= divisor;
}
bool operator==(const TPOINT& other) const {
return x == other.x && y == other.y;
}
// Returns true when the two line segments cross each other.
// (Moved from outlines.cpp).
static bool IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
const TPOINT& b1);
inT16 x; // absolute x coord.
inT16 y; // absolute y coord.
@ -87,6 +94,55 @@ struct EDGEPT {
start_step = src.start_step;
step_count = src.step_count;
}
// Returns the squared distance between the points, with the x-component
// weighted by x_factor.
int WeightedDistance(const EDGEPT& other, int x_factor) const {
int x_dist = pos.x - other.pos.x;
int y_dist = pos.y - other.pos.y;
return x_dist * x_dist * x_factor + y_dist * y_dist;
}
// Returns true if the positions are equal.
bool EqualPos(const EDGEPT& other) const { return pos == other.pos; }
// Returns the bounding box of the outline segment from *this to *end.
// Ignores hidden edge flags.
TBOX SegmentBox(const EDGEPT* end) const {
TBOX box(pos.x, pos.y, pos.x, pos.y);
const EDGEPT* pt = this;
do {
pt = pt->next;
if (pt->pos.x < box.left()) box.set_left(pt->pos.x);
if (pt->pos.x > box.right()) box.set_right(pt->pos.x);
if (pt->pos.y < box.bottom()) box.set_bottom(pt->pos.y);
if (pt->pos.y > box.top()) box.set_top(pt->pos.y);
} while (pt != end && pt != this);
return box;
}
// Returns the area of the outline segment from *this to *end.
// Ignores hidden edge flags.
int SegmentArea(const EDGEPT* end) const {
int area = 0;
const EDGEPT* pt = this->next;
do {
TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y);
area += CROSS(origin_vec, pt->vec);
pt = pt->next;
} while (pt != end && pt != this);
return area;
}
// Returns true if the number of points in the outline segment from *this to
// *end is less that min_points and false if we get back to *this first.
// Ignores hidden edge flags.
bool ShortNonCircularSegment(int min_points, const EDGEPT* end) const {
int count = 0;
const EDGEPT* pt = this;
do {
if (pt == end) return true;
pt = pt->next;
++count;
} while (pt != this && count <= min_points);
return false;
}
// Accessors to hide or reveal a cut edge from feature extractors.
void Hide() {
flags[0] = true;
@ -100,9 +156,6 @@ struct EDGEPT {
void MarkChop() {
flags[2] = true;
}
void UnmarkChop() {
flags[2] = false;
}
bool IsChopPt() const {
return flags[2] != 0;
}
@ -162,8 +215,23 @@ struct TESSLINE {
void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const;
TBOX bounding_box() const;
// Returns true if *this and other have equal bounding boxes.
bool SameBox(const TESSLINE& other) const {
return topleft == other.topleft && botright == other.botright;
}
// Returns true if the given line segment crosses any outline of this blob.
bool SegmentCrosses(const TPOINT& pt1, const TPOINT& pt2) const {
if (Contains(pt1) && Contains(pt2)) {
EDGEPT* pt = loop;
do {
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) return true;
pt = pt->next;
} while (pt != loop);
}
return false;
}
// Returns true if the point is contained within the outline box.
bool Contains(const TPOINT& pt) {
bool Contains(const TPOINT& pt) const {
return topleft.x <= pt.x && pt.x <= botright.x &&
botright.y <= pt.y && pt.y <= topleft.y;
}
@ -244,6 +312,31 @@ struct TBLOB {
TBOX bounding_box() const;
// Returns true if the given line segment crosses any outline of this blob.
bool SegmentCrossesOutline(const TPOINT& pt1, const TPOINT& pt2) const {
for (const TESSLINE* outline = outlines; outline != NULL;
outline = outline->next) {
if (outline->SegmentCrosses(pt1, pt2)) return true;
}
return false;
}
// Returns true if the point is contained within any of the outline boxes.
bool Contains(const TPOINT& pt) const {
for (const TESSLINE* outline = outlines; outline != NULL;
outline = outline->next) {
if (outline->Contains(pt)) return true;
}
return false;
}
// Finds and deletes any duplicate outlines in this blob, without deleting
// their EDGEPTs.
void EliminateDuplicateOutlines();
// Swaps the outlines of *this and next if needed to keep the centers in
// increasing x.
void CorrectBlobOrder(TBLOB* next);
const DENORM& denorm() const {
return denorm_;
}
@ -317,7 +410,7 @@ struct TWERD {
// Baseline normalizes the blobs in-place, recording the normalization in the
// DENORMs in the blobs.
void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse,
float x_height, bool numeric_mode,
float x_height, float baseline_shift, bool numeric_mode,
tesseract::OcrEngineMode hint,
const TBOX* norm_box,
DENORM* word_denorm);
@ -358,12 +451,7 @@ if (w) memfree (w)
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
// TODO(rays) This will become a member of TBLOB when TBLOB's definition
// moves to blobs.h
// Returns the center of blob's bounding box in origin.
void blob_origin(TBLOB *blob, TPOINT *origin);
// TODO(rays) Make divisible_blob and divide_blobs members of TBLOB.
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location);
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,

View File

@ -78,7 +78,7 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data,
if (!ParseBoxFileStr(lines[i].string(), &page, &utf8_str, &box)) {
continue;
}
if (skip_blanks && utf8_str == " ") continue;
if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) continue;
if (target_page >= 0 && page != target_page) continue;
if (boxes != NULL) boxes->push_back(box);
if (texts != NULL) texts->push_back(utf8_str);

View File

@ -157,6 +157,13 @@ void BoxWord::InsertBox(int index, const TBOX& box) {
ComputeBoundingBox();
}
// Changes the box at the given index to the new box.
// Recomputes the bounding box.
void BoxWord::ChangeBox(int index, const TBOX& box) {
boxes_[index] = box;
ComputeBoundingBox();
}
// Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box.
void BoxWord::DeleteBox(int index) {

View File

@ -63,6 +63,10 @@ class BoxWord {
// Recomputes the bounding box.
void InsertBox(int index, const TBOX& box);
// Changes the box at the given index to the new box.
// Recomputes the bounding box.
void ChangeBox(int index, const TBOX& box);
// Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box.
void DeleteBox(int index);

View File

@ -59,10 +59,10 @@ bool FontInfoTable::DeSerialize(bool swap, FILE* fp) {
// Returns true if the given set of fonts includes one with the same
// properties as font_id.
bool FontInfoTable::SetContainsFontProperties(
int font_id, const GenericVector<int>& font_set) const {
int font_id, const GenericVector<ScoredFont>& font_set) const {
uinT32 properties = get(font_id).properties;
for (int f = 0; f < font_set.size(); ++f) {
if (get(font_set[f]).properties == properties)
if (get(font_set[f].fontinfo_id).properties == properties)
return true;
}
return false;
@ -70,12 +70,12 @@ bool FontInfoTable::SetContainsFontProperties(
// Returns true if the given set of fonts includes multiple properties.
bool FontInfoTable::SetContainsMultipleFontProperties(
const GenericVector<int>& font_set) const {
const GenericVector<ScoredFont>& font_set) const {
if (font_set.empty()) return false;
int first_font = font_set[0];
int first_font = font_set[0].fontinfo_id;
uinT32 properties = get(first_font).properties;
for (int f = 1; f < font_set.size(); ++f) {
if (get(font_set[f]).properties != properties)
if (get(font_set[f].fontinfo_id).properties != properties)
return true;
}
return false;

View File

@ -31,6 +31,22 @@ namespace tesseract {
class BitVector;
// Simple struct to hold a font and a score. The scores come from the low-level
// integer matcher, so they are in the uinT16 range. Fonts are an index to
// fontinfo_table.
// These get copied around a lot, so best to keep them small.
struct ScoredFont {
ScoredFont() : fontinfo_id(-1), score(0) {}
ScoredFont(int font_id, uinT16 classifier_score)
: fontinfo_id(font_id), score(classifier_score) {}
// Index into fontinfo table, but inside the classifier, may be a shapetable
// index.
inT32 fontinfo_id;
// Raw score from the low-level classifier.
uinT16 score;
};
// Struct for information about spacing between characters in a particular font.
struct FontSpacingInfo {
inT16 x_gap_before;
@ -140,11 +156,11 @@ class FontInfoTable : public GenericVector<FontInfo> {
// Returns true if the given set of fonts includes one with the same
// properties as font_id.
bool SetContainsFontProperties(int font_id,
const GenericVector<int>& font_set) const;
bool SetContainsFontProperties(
int font_id, const GenericVector<ScoredFont>& font_set) const;
// Returns true if the given set of fonts includes multiple properties.
bool SetContainsMultipleFontProperties(
const GenericVector<int>& font_set) const;
const GenericVector<ScoredFont>& font_set) const;
// Moves any non-empty FontSpacingInfo entries from other to this.
void MoveSpacingInfoFrom(FontInfoTable* other);

View File

@ -51,6 +51,7 @@ void WordFeature::ComputeSize(const GenericVector<WordFeature>& features,
// Draws the features in the given window.
void WordFeature::Draw(const GenericVector<WordFeature>& features,
ScrollView* window) {
#ifndef GRAPHICS_DISABLED
for (int f = 0; f < features.size(); ++f) {
FCOORD pos(features[f].x_, features[f].y_);
FCOORD dir;
@ -61,6 +62,7 @@ void WordFeature::Draw(const GenericVector<WordFeature>& features,
window->DrawTo(IntCastRounded(pos.x() + dir.x()),
IntCastRounded(pos.y() + dir.y()));
}
#endif
}
// Writes to the given file. Returns false in case of error.
@ -103,20 +105,13 @@ int FloatWordFeature::SortByXBucket(const void* v1, const void* v2) {
return x_diff;
}
ImageData::ImageData() : page_number_(-1), partial_boxes_(false) {
ImageData::ImageData() : page_number_(-1), vertical_text_(false) {
}
// Takes ownership of the pix and destroys it.
ImageData::ImageData(Pix* pix) : page_number_(0), partial_boxes_(false) {
ImageData::ImageData(bool vertical, Pix* pix)
: page_number_(0), vertical_text_(vertical) {
SetPix(pix);
}
ImageData::ImageData(const GenericVector<WordFeature>& features,
const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts)
: page_number_(0), boxes_(boxes), box_texts_(texts), features_(features),
partial_boxes_(false) {
for (int b = 0; b < box_texts_.size(); ++b)
transcription_ += box_texts_[b];
}
ImageData::~ImageData() {
}
@ -140,47 +135,46 @@ ImageData* ImageData::Build(const char* name, int page_number, const char* lang,
return NULL;
}
image_data->transcription_ = truth_text;
// If we have no boxes, the transcription is in the 0th box_texts_.
image_data->box_texts_.push_back(truth_text);
// We will create a box for the whole image on PreScale, to save unpacking
// the image now.
} else if (truth_text != NULL && truth_text[0] != '\0' &&
image_data->transcription_ != truth_text) {
// Save the truth text as it is present and disagrees with the box text.
image_data->transcription_ = truth_text;
image_data->partial_boxes_ = true;
}
return image_data;
}
// Writes to the given file. Returns false in case of error.
bool ImageData::Serialize(FILE* fp) const {
bool ImageData::Serialize(TFile* fp) const {
if (!imagefilename_.Serialize(fp)) return false;
if (fwrite(&page_number_, sizeof(page_number_), 1, fp) != 1) return false;
if (fp->FWrite(&page_number_, sizeof(page_number_), 1) != 1) return false;
if (!image_data_.Serialize(fp)) return false;
if (!transcription_.Serialize(fp)) return false;
// WARNING: Will not work across different endian machines.
if (!boxes_.Serialize(fp)) return false;
if (!box_texts_.SerializeClasses(fp)) return false;
if (!features_.SerializeClasses(fp)) return false;
if (!side_data_.Serialize(fp)) return false;
inT8 vertical = vertical_text_;
if (fp->FWrite(&vertical, sizeof(vertical), 1) != 1) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool ImageData::DeSerialize(bool swap, FILE* fp) {
bool ImageData::DeSerialize(bool swap, TFile* fp) {
if (!imagefilename_.DeSerialize(swap, fp)) return false;
if (fread(&page_number_, sizeof(page_number_), 1, fp) != 1) return false;
if (fp->FRead(&page_number_, sizeof(page_number_), 1) != 1) return false;
if (swap) ReverseN(&page_number_, sizeof(page_number_));
if (!image_data_.DeSerialize(swap, fp)) return false;
if (!transcription_.DeSerialize(swap, fp)) return false;
// WARNING: Will not work across different endian machines.
if (!boxes_.DeSerialize(swap, fp)) return false;
if (!box_texts_.DeSerializeClasses(swap, fp)) return false;
if (!features_.DeSerializeClasses(swap, fp)) return false;
if (!side_data_.DeSerialize(swap, fp)) return false;
STRING box_str;
for (int i = 0; i < box_texts_.size(); ++i) {
box_str += box_texts_[i];
}
partial_boxes_ = !box_texts_.empty() && transcription_ != box_str;
inT8 vertical = 0;
if (fp->FRead(&vertical, sizeof(vertical), 1) != 1) return false;
vertical_text_ = vertical != 0;
return true;
}
@ -194,28 +188,14 @@ Pix* ImageData::GetPix() const {
return GetPixInternal(image_data_);
}
// Saves the given Pix as a PNG-encoded string and destroys it.
void ImageData::SetPix2(Pix* pix) {
SetPixInternal(pix, &side_data_);
}
// Saves the given PNG-encoded string as the secondary image data.
void ImageData::SetPix2Data(const char* data, int size) {
side_data_.init_to_size(size, 0);
memcpy(&side_data_[0], data, size);
}
// Returns the Pix image for *this. Must be pixDestroyed after use.
Pix* ImageData::GetPix2() const {
return GetPixInternal(side_data_);
}
// Gets anything and everything with a non-NULL pointer, prescaled to a
// given target_height (if 0, then the original image height), and aligned.
// Also returns (if not NULL) the width and height of the scaled image.
void ImageData::PreScale(int target_height, Pix** pix,
int* scaled_width, int* scaled_height,
GenericVector<TBOX>* boxes) const {
// The return value is the scale factor that was applied to the image to
// achieve the target_height.
float ImageData::PreScale(int target_height, Pix** pix,
int* scaled_width, int* scaled_height,
GenericVector<TBOX>* boxes) const {
int input_width = 0;
int input_height = 0;
Pix* src_pix = GetPix();
@ -233,6 +213,10 @@ void ImageData::PreScale(int target_height, Pix** pix,
// Get the scaled image.
pixDestroy(pix);
*pix = pixScale(src_pix, im_factor, im_factor);
if (*pix == NULL) {
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
input_width, input_height, im_factor);
}
if (scaled_width != NULL)
*scaled_width = pixGetWidth(*pix);
if (scaled_height != NULL)
@ -247,51 +231,67 @@ void ImageData::PreScale(int target_height, Pix** pix,
box.scale(im_factor);
boxes->push_back(box);
}
if (boxes->empty()) {
// Make a single box for the whole image.
TBOX box(0, 0, im_factor * input_width, target_height);
boxes->push_back(box);
}
}
return im_factor;
}
int ImageData::MemoryUsed() const {
return image_data_.size() + side_data_.size() +
features_.size() * sizeof(WordFeature);
return image_data_.size();
}
// Draws the data in a new window.
void ImageData::Display() const {
#ifndef GRAPHICS_DISABLED
const int kTextSize = 64;
int x_max, y_max;
WordFeature::ComputeSize(features_, &x_max, &y_max);
ScrollView* win = new ScrollView("Imagedata", 100, 100,
2 * (x_max + 2 * kTextSize),
2 * (y_max + 4 * kTextSize),
x_max + 10, y_max + 3 * kTextSize, true);
// Draw the image.
Pix* pix = GetPix();
int height = 256;
if (pix != NULL) {
height = pixGetHeight(pix);
win->Image(pix, 0, height - 1);
pixDestroy(&pix);
}
if (pix == NULL) return;
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
ScrollView* win = new ScrollView("Imagedata", 100, 100,
2 * (width + 2 * kTextSize),
2 * (height + 4 * kTextSize),
width + 10, height + 3 * kTextSize, true);
win->Image(pix, 0, height - 1);
pixDestroy(&pix);
// Draw the boxes.
win->Pen(ScrollView::RED);
win->Brush(ScrollView::NONE);
win->TextAttributes("Arial", kTextSize, false, false, false);
for (int b = 0; b < boxes_.size(); ++b) {
boxes_[b].plot(win);
win->Text(boxes_[b].left(), y_max + kTextSize, box_texts_[b].string());
win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string());
TBOX scaled(boxes_[b]);
scaled.scale(256.0 / height);
scaled.plot(win);
}
// The full transcription.
win->Pen(ScrollView::CYAN);
win->Text(0, y_max + kTextSize * 2, transcription_.string());
win->Text(0, height + kTextSize * 2, transcription_.string());
// Add the features.
win->Pen(ScrollView::GREEN);
WordFeature::Draw(features_, win);
win->Update();
window_wait(win);
#endif
}
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void ImageData::AddBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
const GenericVector<int>& box_pages) {
// Copy the boxes and make the transcription.
for (int i = 0; i < box_pages.size(); ++i) {
if (page_number_ >= 0 && box_pages[i] != page_number_) continue;
transcription_ += texts[i];
boxes_.push_back(boxes[i]);
box_texts_.push_back(texts[i]);
}
}
// Saves the given Pix as a PNG-encoded string and destroys it.
@ -336,53 +336,92 @@ bool ImageData::AddBoxes(const char* box_text) {
return false;
}
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void ImageData::AddBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
const GenericVector<int>& box_pages) {
// Copy the boxes and make the transcription.
for (int i = 0; i < box_pages.size(); ++i) {
if (page_number_ >= 0 && box_pages[i] != page_number_) continue;
transcription_ += texts[i];
boxes_.push_back(boxes[i]);
box_texts_.push_back(texts[i]);
}
}
DocumentData::DocumentData(const STRING& name)
: document_name_(name), memory_used_(0) {}
: document_name_(name), pages_offset_(0), total_pages_(0),
memory_used_(0), max_memory_(0), reader_(NULL) {}
DocumentData::~DocumentData() {}
// Adds all the pages in the given lstmf filename to the cache. The reader
// Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file.
bool DocumentData::LoadDocument(const char* filename, const char* lang,
int start_page, inT64 max_memory,
FileReader reader) {
// Read the file.
GenericVector<char> file_data;
if (!(*reader)(filename, &file_data)) {
document_name_ = filename;
lang_ = lang;
pages_offset_ = start_page;
max_memory_ = max_memory;
reader_ = reader;
return ReCachePages();
}
// Writes all the pages to the given filename. Returns false on error.
bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
TFile fp;
fp.OpenWrite(NULL);
if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
tprintf("Serialize failed: %s\n", filename);
return false;
}
FILE* fp = fmemopen(&file_data[0], file_data.size(), "rb");
document_name_ = filename;
return true;
}
bool DocumentData::SaveToBuffer(GenericVector<char>* buffer) {
TFile fp;
fp.OpenWrite(buffer);
return pages_.Serialize(&fp);
}
// Returns a pointer to the page with the given index, modulo the total
// number of pages, recaching if needed.
const ImageData* DocumentData::GetPage(int index) {
index = Modulo(index, total_pages_);
if (index < pages_offset_ || index >= pages_offset_ + pages_.size()) {
pages_offset_ = index;
if (!ReCachePages()) return NULL;
}
return pages_[index - pages_offset_];
}
// Loads as many pages can fit in max_memory_ starting at index pages_offset_.
bool DocumentData::ReCachePages() {
// Read the file.
TFile fp;
if (!fp.Open(document_name_, reader_)) return false;
memory_used_ = 0;
if (!pages_.DeSerialize(false, fp)) {
tprintf("Deserialize failed: %s\n", filename);
fclose(fp);
if (!pages_.DeSerialize(false, &fp)) {
tprintf("Deserialize failed: %s\n", document_name_.string());
pages_.truncate(0);
return false;
}
// For each element in file_content, count memory and add additional data.
for (int i = 0; i < pages_.size(); ++i) {
ImageData* image_data = pages_[i];
if (image_data->imagefilename().length() == 0) {
image_data->set_imagefilename(filename);
image_data->set_page_number(i);
total_pages_ = pages_.size();
pages_offset_ %= total_pages_;
// Delete pages before the first one we want, and relocate the rest.
int page;
for (page = 0; page < pages_.size(); ++page) {
if (page < pages_offset_) {
delete pages_[page];
pages_[page] = NULL;
} else {
ImageData* image_data = pages_[page];
if (max_memory_ > 0 && page > pages_offset_ &&
memory_used_ + image_data->MemoryUsed() > max_memory_)
break; // Don't go over memory quota unless the first image.
if (image_data->imagefilename().length() == 0) {
image_data->set_imagefilename(document_name_);
image_data->set_page_number(page);
}
image_data->set_language(lang_);
memory_used_ += image_data->MemoryUsed();
if (pages_offset_ != 0) {
pages_[page - pages_offset_] = image_data;
pages_[page] = NULL;
}
}
image_data->set_language(lang);
memory_used_ += image_data->MemoryUsed();
}
pages_.truncate(page - pages_offset_);
tprintf("Loaded %d/%d pages (%d-%d) of document %s\n",
pages_.size(), total_pages_, pages_offset_,
pages_offset_ + pages_.size(), document_name_.string());
return !pages_.empty();
}
@ -397,16 +436,17 @@ DocumentCache::DocumentCache(inT64 max_memory)
: total_pages_(0), memory_used_(0), max_memory_(max_memory) {}
DocumentCache::~DocumentCache() {}
// Adds all the documents in the list of filenames, couting memory.
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
const char* lang, FileReader reader) {
inT64 fair_share_memory = max_memory_ / filenames.size();
for (int arg = 0; arg < filenames.size(); ++arg) {
STRING filename = filenames[arg] + ".lstmf";
STRING filename = filenames[arg];
DocumentData* document = new DocumentData(filename);
if (document->LoadDocument(filename.string(), lang, reader)) {
if (document->LoadDocument(filename.string(), lang, 0,
fair_share_memory, reader)) {
AddToCache(document);
tprintf("File %d, count=%d\n", arg, document->pages().size());
} else {
tprintf("Failed to load image %s!\n", filename.string());
delete document;
@ -422,14 +462,14 @@ bool DocumentCache::AddToCache(DocumentData* data) {
inT64 new_memory = data->memory_used();
memory_used_ += new_memory;
documents_.push_back(data);
total_pages_ += data->pages().size();
total_pages_ += data->NumPages();
// Delete the first item in the array, and other pages of the same name
// while memory is full.
while (memory_used_ >= max_memory_ && max_memory_ > 0) {
tprintf("Memory used=%lld vs max=%lld, discarding doc of size %lld\n",
memory_used_ , max_memory_, documents_[0]->memory_used());
memory_used_ -= documents_[0]->memory_used();
total_pages_ -= documents_[0]->pages().size();
total_pages_ -= documents_[0]->NumPages();
documents_.remove(0);
}
return true;
@ -446,11 +486,9 @@ DocumentData* DocumentCache::FindDocument(const STRING& document_name) const {
// Returns a page by serial number, selecting them in a round-robin fashion
// from all the documents.
const ImageData* DocumentCache::GetPageBySerial(int serial) const {
const ImageData* DocumentCache::GetPageBySerial(int serial) {
int document_index = serial % documents_.size();
const DocumentData& doc = *documents_[document_index];
int page_index = serial % doc.pages().size();
return doc.pages()[page_index];
return documents_[document_index]->GetPage(serial / documents_.size());
}
} // namespace tesseract.

View File

@ -89,10 +89,7 @@ class ImageData {
public:
ImageData();
// Takes ownership of the pix.
explicit ImageData(Pix* pix);
ImageData(const GenericVector<WordFeature>& features,
const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts);
ImageData(bool vertical, Pix* pix);
~ImageData();
// Builds and returns an ImageData from the basic data. Note that imagedata,
@ -102,10 +99,10 @@ class ImageData {
const char* truth_text, const char* box_text);
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
bool DeSerialize(bool swap, TFile* fp);
// Other accessors.
const STRING& imagefilename() const {
@ -135,37 +132,36 @@ class ImageData {
const GenericVector<TBOX>& boxes() const {
return boxes_;
}
const GenericVector<STRING>& box_texts() const {
return box_texts_;
}
const STRING& box_text(int index) const {
return box_texts_[index];
}
const GenericVector<WordFeature>& features() const {
return features_;
}
bool partial_boxes() const {
return partial_boxes_;
}
// Saves the given Pix as a PNG-encoded string and destroys it.
void SetPix(Pix* pix);
// Returns the Pix image for *this. Must be pixDestroyed after use.
Pix* GetPix() const;
// Saves the given Pix as a PNG-encoded string and destroys it.
void SetPix2(Pix* pix);
// Saves the given PNG-encoded string as the secondary image data.
void SetPix2Data(const char* data, int size);
// Returns the Pix image for *this. Must be pixDestroyed after use.
Pix* GetPix2() const;
// Gets anything and everything with a non-NULL pointer, prescaled to a
// given target_height (if 0, then the original image height), and aligned.
// Also returns (if not NULL) the width and height of the scaled image.
void PreScale(int target_height, Pix** pix,
int* scaled_width, int* scaled_height,
GenericVector<TBOX>* boxes) const;
// The return value is the scale factor that was applied to the image to
// achieve the target_height.
float PreScale(int target_height, Pix** pix,
int* scaled_width, int* scaled_height,
GenericVector<TBOX>* boxes) const;
int MemoryUsed() const;
// Draws the data in a new window.
void Display() const;
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void AddBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
const GenericVector<int>& box_pages);
private:
// Saves the given Pix as a PNG-encoded string and destroys it.
static void SetPixInternal(Pix* pix, GenericVector<char>* image_data);
@ -174,11 +170,6 @@ class ImageData {
// Parses the text string as a box file and adds any discovered boxes that
// match the page number. Returns false on error.
bool AddBoxes(const char* box_text);
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void AddBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
const GenericVector<int>& box_pages);
private:
STRING imagefilename_; // File to read image from.
@ -188,9 +179,7 @@ class ImageData {
STRING transcription_; // UTF-8 ground truth of image.
GenericVector<TBOX> boxes_; // If non-empty boxes of the image.
GenericVector<STRING> box_texts_; // String for text in each box.
GenericVector<WordFeature> features_;
GenericVector<char> side_data_; // PNG file data.
bool partial_boxes_; // Box text disagrees with transcription.
bool vertical_text_; // Image has been rotated from vertical.
};
// A collection of ImageData that knows roughly how much memory it is using.
@ -199,9 +188,13 @@ class DocumentData {
explicit DocumentData(const STRING& name);
~DocumentData();
// Adds all the pages in the given lstmf filename to the cache. The reader
// Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file.
bool LoadDocument(const char* filename, const char* lang, FileReader reader);
bool LoadDocument(const char* filename, const char* lang, int start_page,
inT64 max_memory, FileReader reader);
// Writes all the pages to the given filename. Returns false on error.
bool SaveDocument(const char* filename, FileWriter writer);
bool SaveToBuffer(GenericVector<char>* buffer);
// Adds the given page data to this document, counting up memory.
void AddPageToDocument(ImageData* page);
@ -209,12 +202,15 @@ class DocumentData {
const STRING& document_name() const {
return document_name_;
}
const PointerVector<ImageData>& pages() const {
return pages_;
int NumPages() const {
return total_pages_;
}
inT64 memory_used() const {
return memory_used_;
}
// Returns a pointer to the page with the given index, modulo the total
// number of pages, recaching if needed.
const ImageData* GetPage(int index);
// Takes ownership of the given page index. The page is made NULL in *this.
ImageData* TakePage(int index) {
ImageData* page = pages_[index];
@ -222,13 +218,27 @@ class DocumentData {
return page;
}
private:
// Loads as many pages can fit in max_memory_ starting at index pages_offset_.
bool ReCachePages();
private:
// A name for this document.
STRING document_name_;
// The language of this document.
STRING lang_;
// A group of pages that corresponds in some loose way to a document.
PointerVector<ImageData> pages_;
// Page number of the first index in pages_.
int pages_offset_;
// Total number of pages in document (may exceed size of pages_.)
int total_pages_;
// Total of all pix sizes in the document.
inT64 memory_used_;
// Max memory to use at any time.
inT64 max_memory_;
// Saved reader from LoadDocument to allow re-caching.
FileReader reader_;
};
// A collection of DocumentData that knows roughly how much memory it is using.
@ -237,7 +247,7 @@ class DocumentCache {
explicit DocumentCache(inT64 max_memory);
~DocumentCache();
// Adds all the documents in the list of filenames, couting memory.
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang,
FileReader reader);
@ -250,7 +260,7 @@ class DocumentCache {
// Returns a page by serial number, selecting them in a round-robin fashion
// from all the documents.
const ImageData* GetPageBySerial(int serial) const;
const ImageData* GetPageBySerial(int serial);
const PointerVector<DocumentData>& documents() const {
return documents_;

View File

@ -451,8 +451,8 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
&min_top, &max_top);
// Calculate the scale factor we'll use to get to image y-pixels
double midx = (bbox.left() + bbox.right()) / 2;
double ydiff = (bbox.top() - bbox.bottom()) + 2;
double midx = (bbox.left() + bbox.right()) / 2.0;
double ydiff = (bbox.top() - bbox.bottom()) + 2.0;
FCOORD mid_bot(midx, bbox.bottom()), tmid_bot;
FCOORD mid_high(midx, bbox.bottom() + ydiff), tmid_high;
DenormTransform(NULL, mid_bot, &tmid_bot);
@ -487,7 +487,7 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
top > kBlnCellHeight - kBlnBaselineOffset / 2)
max_top += kBlnBaselineOffset;
top -= bln_yshift;
int height = top - kBlnBaselineOffset - bottom_shift;
int height = top - kBlnBaselineOffset;
double min_height = min_top - kBlnBaselineOffset - tolerance;
double max_height = max_top - kBlnBaselineOffset + tolerance;

View File

@ -86,6 +86,18 @@ void BLOCK::rotate(const FCOORD& rotation) {
box = *poly_block()->bounding_box();
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX BLOCK::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
TBOX box;
// This is a read-only iteration of the rows in the block.
ROW_IT it(const_cast<ROW_LIST*>(&rows));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
}
return box;
}
/**
* BLOCK::reflect_polygon_in_y_axis
*

View File

@ -161,10 +161,14 @@ class BLOCK:public ELIST_LINK, public PDBLK
median_size_.set_y(y);
}
Pix* render_mask() {
return PDBLK::render_mask(re_rotation_);
Pix* render_mask(TBOX* mask_box) {
return PDBLK::render_mask(re_rotation_, mask_box);
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
// Reflects the polygon in the y-axis and recomputes the bounding_box.
// Does nothing to any contained rows/words/blobs etc.
void reflect_polygon_in_y_axis();

View File

@ -80,6 +80,17 @@ ROW::ROW( //constructor
rmargin_ = 0;
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX ROW::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
TBOX box;
// This is a read-only iteration of the words in the row.
WERD_IT it(const_cast<WERD_LIST *>(&words));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
box += it.data()->restricted_bounding_box(upper_dots, lower_dots);
}
return box;
}
/**********************************************************************
* ROW::recalc_bounding_box

View File

@ -85,6 +85,9 @@ class ROW:public ELIST_LINK
TBOX bounding_box() const { //return bounding box
return bound_box;
}
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
void set_lmargin(inT16 lmargin) {
lmargin_ = lmargin;

View File

@ -34,6 +34,13 @@ static const double kStopperAmbiguityThresholdGain = 8.0;
static const double kStopperAmbiguityThresholdOffset = 1.5;
// Max number of broken pieces to associate.
const int kWordrecMaxNumJoinChunks = 4;
// Max ratio of word box height to line size to allow it to be processed as
// a line with other words.
const double kMaxWordSizeRatio = 1.25;
// Max ratio of line box height to line size to allow a new word to be added.
const double kMaxLineSizeRatio = 1.25;
// Max ratio of word gap to line size to allow a new word to be added.
const double kMaxWordGapRatio = 2.0;
// Computes and returns a threshold of certainty difference used to determine
// which words to keep, based on the adjustment factors of the two words.
@ -49,6 +56,7 @@ static double StopperAmbigThreshold(double f1, double f2) {
* Constructor for page results
*************************************************************************/
PAGE_RES::PAGE_RES(
bool merge_similar_words,
BLOCK_LIST *the_block_list,
WERD_CHOICE **prev_word_best_choice_ptr) {
Init();
@ -56,7 +64,8 @@ PAGE_RES::PAGE_RES(
BLOCK_RES_IT block_res_it(&block_res_list);
for (block_it.mark_cycle_pt();
!block_it.cycled_list(); block_it.forward()) {
block_res_it.add_to_end(new BLOCK_RES(block_it.data()));
block_res_it.add_to_end(new BLOCK_RES(merge_similar_words,
block_it.data()));
}
prev_word_best_choice = prev_word_best_choice_ptr;
}
@ -67,7 +76,7 @@ PAGE_RES::PAGE_RES(
* Constructor for BLOCK results
*************************************************************************/
BLOCK_RES::BLOCK_RES(BLOCK *the_block) {
BLOCK_RES::BLOCK_RES(bool merge_similar_words, BLOCK *the_block) {
ROW_IT row_it (the_block->row_list ());
ROW_RES_IT row_res_it(&row_res_list);
@ -83,22 +92,20 @@ BLOCK_RES::BLOCK_RES(BLOCK *the_block) {
block = the_block;
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
row_res_it.add_to_end(new ROW_RES(row_it.data()));
row_res_it.add_to_end(new ROW_RES(merge_similar_words, row_it.data()));
}
}
/*************************************************************************
* ROW_RES::ROW_RES
*
* Constructor for ROW results
*************************************************************************/
ROW_RES::ROW_RES(ROW *the_row) {
ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) {
WERD_IT word_it(the_row->word_list());
WERD_RES_IT word_res_it(&word_res_list);
WERD_RES *combo = NULL; // current combination of fuzzies
WERD_RES *word_res; // current word
WERD *copy_word;
char_count = 0;
@ -106,20 +113,49 @@ ROW_RES::ROW_RES(ROW *the_row) {
whole_word_rej_count = 0;
row = the_row;
bool add_next_word = false;
TBOX union_box;
float line_height = the_row->x_height() + the_row->ascenders() -
the_row->descenders();
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word_res = new WERD_RES(word_it.data());
WERD_RES* word_res = new WERD_RES(word_it.data());
word_res->x_height = the_row->x_height();
if (word_res->word->flag(W_FUZZY_NON)) {
if (add_next_word) {
ASSERT_HOST(combo != NULL);
// We are adding this word to the combination.
word_res->part_of_combo = TRUE;
combo->copy_on(word_res);
} else if (merge_similar_words) {
union_box = word_res->word->bounding_box();
add_next_word = !word_res->word->flag(W_REP_CHAR) &&
union_box.height() <= line_height * kMaxWordSizeRatio;
word_res->odd_size = !add_next_word;
}
if (word_it.data_relative(1)->flag(W_FUZZY_NON)) {
WERD* next_word = word_it.data_relative(1);
if (merge_similar_words) {
if (add_next_word && !next_word->flag(W_REP_CHAR)) {
// Next word will be added on if all of the following are true:
// Not a rep char.
// Box height small enough.
// Union box height small enough.
// Horizontal gap small enough.
TBOX next_box = next_word->bounding_box();
int prev_right = union_box.right();
union_box += next_box;
if (next_box.height() > line_height * kMaxWordSizeRatio ||
union_box.height() > line_height * kMaxLineSizeRatio ||
next_box.left() > prev_right + line_height * kMaxWordGapRatio) {
add_next_word = false;
}
}
next_word->set_flag(W_FUZZY_NON, add_next_word);
} else {
add_next_word = next_word->flag(W_FUZZY_NON);
}
if (add_next_word) {
if (combo == NULL) {
copy_word = new WERD;
//deep copy
*copy_word = *(word_it.data());
*copy_word = *(word_it.data()); // deep copy
combo = new WERD_RES(copy_word);
combo->x_height = the_row->x_height();
combo->combination = TRUE;
@ -171,12 +207,8 @@ WERD_RES& WERD_RES::operator=(const WERD_RES & source) {
if (!wc_dest_it.empty()) {
wc_dest_it.move_to_first();
best_choice = wc_dest_it.data();
best_choice_fontinfo_ids = source.best_choice_fontinfo_ids;
} else {
best_choice = NULL;
if (!best_choice_fontinfo_ids.empty()) {
best_choice_fontinfo_ids.clear();
}
}
if (source.raw_choice != NULL) {
@ -208,6 +240,7 @@ void WERD_RES::CopySimpleFields(const WERD_RES& source) {
done = source.done;
unlv_crunch_mode = source.unlv_crunch_mode;
small_caps = source.small_caps;
odd_size = source.odd_size;
italic = source.italic;
bold = source.bold;
fontinfo = source.fontinfo;
@ -216,6 +249,7 @@ void WERD_RES::CopySimpleFields(const WERD_RES& source) {
fontinfo_id2_count = source.fontinfo_id2_count;
x_height = source.x_height;
caps_height = source.caps_height;
baseline_shift = source.baseline_shift;
guessed_x_ht = source.guessed_x_ht;
guessed_caps_ht = source.guessed_caps_ht;
reject_spaces = source.reject_spaces;
@ -278,8 +312,8 @@ bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in,
float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
? row->body_size() : x_height;
chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
word_xheight, numeric_mode, norm_mode_hint,
norm_box, &denorm);
word_xheight, baseline_shift, numeric_mode,
norm_mode_hint, norm_box, &denorm);
blob_row = row;
SetupBasicsFromChoppedWord(unicharset_in);
SetupBlamerBundle();
@ -318,8 +352,7 @@ void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) {
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
TBOX box = b_it.data()->bounding_box();
box_word->InsertBox(box_word->length(), box);
fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f,
-1, -1, -1, 0, 0, 0, BCC_FAKE);
fake_choices[blob_id++] = new BLOB_CHOICE;
}
FakeClassifyWord(blob_count, fake_choices);
delete [] fake_choices;
@ -331,6 +364,7 @@ void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) {
LogNewCookedChoice(1, false, word);
}
tess_failed = true;
done = true;
}
void WERD_RES::SetupWordScript(const UNICHARSET& uch) {
@ -369,7 +403,8 @@ void WERD_RES::SetupBlobWidthsAndGaps() {
// as the blob widths and gaps.
void WERD_RES::InsertSeam(int blob_number, SEAM* seam) {
// Insert the seam into the SEAMS array.
insert_seam(chopped_word, blob_number, seam, &seam_array);
seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
seam_array.insert(seam, blob_number);
if (ratings != NULL) {
// Expand the ratings matrix.
ratings = ratings->ConsumeAndMakeBigger(blob_number);
@ -446,6 +481,16 @@ void WERD_RES::DebugWordChoices(bool debug, const char* word_to_debug) {
}
}
// Prints the top choice along with the accepted/done flags.
void WERD_RES::DebugTopChoice(const char* msg) const {
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
tess_accepted, tess_would_adapt, done);
if (best_choice == NULL)
tprintf("<Null choice>\n");
else
best_choice->print(msg);
}
// Removes from best_choices all choices which are not within a reasonable
// range of the best choice.
// TODO(rays) incorporate the information used here into the params training
@ -759,12 +804,16 @@ void WERD_RES::RebuildBestState() {
for (int i = 0; i < best_choice->length(); ++i) {
int length = best_choice->state(i);
best_state.push_back(length);
if (length > 1)
join_pieces(seam_array, start, start + length - 1, chopped_word);
if (length > 1) {
SEAM::JoinPieces(seam_array, chopped_word->blobs, start,
start + length - 1);
}
TBLOB* blob = chopped_word->blobs[start];
rebuild_word->blobs.push_back(new TBLOB(*blob));
if (length > 1)
break_pieces(seam_array, start, start + length - 1, chopped_word);
if (length > 1) {
SEAM::BreakPieces(seam_array, chopped_word->blobs, start,
start + length - 1);
}
start += length;
}
}
@ -830,6 +879,7 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
}
FakeWordFromRatings();
reject_map.initialise(blob_count);
done = true;
}
// Creates a WERD_CHOICE for the word using the top choices from the leading
@ -1019,8 +1069,7 @@ bool WERD_RES::PiecesAllNatural(int start, int count) const {
for (int index = start; index < start + count - 1; ++index) {
if (index >= 0 && index < seam_array.size()) {
SEAM* seam = seam_array[index];
if (seam != NULL && seam->split1 != NULL)
return false;
if (seam != NULL && seam->HasAnySplits()) return false;
}
}
return true;
@ -1038,6 +1087,7 @@ void WERD_RES::InitNonPointers() {
done = FALSE;
unlv_crunch_mode = CR_NONE;
small_caps = false;
odd_size = false;
italic = FALSE;
bold = FALSE;
// The fontinfos and tesseract count as non-pointers as they point to
@ -1049,6 +1099,7 @@ void WERD_RES::InitNonPointers() {
fontinfo_id2_count = 0;
x_height = 0.0;
caps_height = 0.0;
baseline_shift = 0.0f;
guessed_x_ht = TRUE;
guessed_caps_ht = TRUE;
combination = FALSE;
@ -1205,23 +1256,16 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
return 0;
}
// Inserts the new_word and a corresponding WERD_RES before the current
// position. The simple fields of the WERD_RES are copied from clone_res and
// the resulting WERD_RES is returned for further setup with best_choice etc.
// Inserts the new_word as a combination owned by a corresponding WERD_RES
// before the current position. The simple fields of the WERD_RES are copied
// from clone_res and the resulting WERD_RES is returned for further setup
// with best_choice etc.
WERD_RES* PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES& clone_res,
WERD* new_word) {
// Insert new_word into the ROW.
WERD_IT w_it(row()->row->word_list());
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
WERD* word = w_it.data();
if (word == word_res->word)
break;
}
ASSERT_HOST(!w_it.cycled_list());
w_it.add_before_then_move(new_word);
// Make a WERD_RES for the new_word.
WERD_RES* new_res = new WERD_RES(new_word);
new_res->CopySimpleFields(clone_res);
new_res->combination = true;
// Insert into the appropriate place in the ROW_RES.
WERD_RES_IT wr_it(&row()->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
@ -1239,6 +1283,163 @@ WERD_RES* PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES& clone_res,
return new_res;
}
// Helper computes the boundaries between blobs in the word. The blob bounds
// are likely very poor, if they come from LSTM, where it only outputs the
// character at one pixel within it, so we find the midpoints between them.
static void ComputeBlobEnds(const WERD_RES& word, C_BLOB_LIST* next_word_blobs,
GenericVector<int>* blob_ends) {
C_BLOB_IT blob_it(word.word->cblob_list());
for (int i = 0; i < word.best_state.size(); ++i) {
int length = word.best_state[i];
// Get the bounding box of the fake blobs
TBOX blob_box = blob_it.data()->bounding_box();
blob_it.forward();
for (int b = 1; b < length; ++b) {
blob_box += blob_it.data()->bounding_box();
blob_it.forward();
}
// This blob_box is crap, so for now we are only looking for the
// boundaries between them.
int blob_end = MAX_INT32;
if (!blob_it.at_first() || next_word_blobs != NULL) {
if (blob_it.at_first())
blob_it.set_to_list(next_word_blobs);
blob_end = (blob_box.right() + blob_it.data()->bounding_box().left()) / 2;
}
blob_ends->push_back(blob_end);
}
}
// Replaces the current WERD/WERD_RES with the given words. The given words
// contain fake blobs that indicate the position of the characters. These are
// replaced with real blobs from the current word as much as possible.
void PAGE_RES_IT::ReplaceCurrentWord(
tesseract::PointerVector<WERD_RES>* words) {
if (words->empty()) {
DeleteCurrentWord();
return;
}
WERD_RES* input_word = word();
// Set the BOL/EOL flags on the words from the input word.
if (input_word->word->flag(W_BOL)) {
(*words)[0]->word->set_flag(W_BOL, true);
} else {
(*words)[0]->word->set_blanks(1);
}
words->back()->word->set_flag(W_EOL, input_word->word->flag(W_EOL));
// Move the blobs from the input word to the new set of words.
// If the input word_res is a combination, then the replacements will also be
// combinations, and will own their own words. If the input word_res is not a
// combination, then the final replacements will not be either, (although it
// is allowed for the input words to be combinations) and their words
// will get put on the row list. This maintains the ownership rules.
WERD_IT w_it(row()->row->word_list());
if (!input_word->combination) {
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
WERD* word = w_it.data();
if (word == input_word->word)
break;
}
// w_it is now set to the input_word's word.
ASSERT_HOST(!w_it.cycled_list());
}
// Insert into the appropriate place in the ROW_RES.
WERD_RES_IT wr_it(&row()->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
WERD_RES* word = wr_it.data();
if (word == input_word)
break;
}
ASSERT_HOST(!wr_it.cycled_list());
// Since we only have an estimate of the bounds between blobs, use the blob
// x-middle as the determiner of where to put the blobs
C_BLOB_IT src_b_it(input_word->word->cblob_list());
src_b_it.sort(&C_BLOB::SortByXMiddle);
C_BLOB_IT rej_b_it(input_word->word->rej_cblob_list());
rej_b_it.sort(&C_BLOB::SortByXMiddle);
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word_w = (*words)[w];
// Compute blob boundaries.
GenericVector<int> blob_ends;
C_BLOB_LIST* next_word_blobs =
w + 1 < words->size() ? (*words)[w + 1]->word->cblob_list() : NULL;
ComputeBlobEnds(*word_w, next_word_blobs, &blob_ends);
// Delete the fake blobs on the current word.
word_w->word->cblob_list()->clear();
C_BLOB_IT dest_it(word_w->word->cblob_list());
// Build the box word as we move the blobs.
tesseract::BoxWord* box_word = new tesseract::BoxWord;
for (int i = 0; i < blob_ends.size(); ++i) {
int end_x = blob_ends[i];
TBOX blob_box;
// Add the blobs up to end_x.
while (!src_b_it.empty() &&
src_b_it.data()->bounding_box().x_middle() < end_x) {
blob_box += src_b_it.data()->bounding_box();
dest_it.add_after_then_move(src_b_it.extract());
src_b_it.forward();
}
while (!rej_b_it.empty() &&
rej_b_it.data()->bounding_box().x_middle() < end_x) {
blob_box += rej_b_it.data()->bounding_box();
dest_it.add_after_then_move(rej_b_it.extract());
rej_b_it.forward();
}
// Clip to the previously computed bounds. Although imperfectly accurate,
// it is good enough, and much more complicated to determine where else
// to clip.
if (i > 0 && blob_box.left() < blob_ends[i - 1])
blob_box.set_left(blob_ends[i - 1]);
if (blob_box.right() > end_x)
blob_box.set_right(end_x);
box_word->InsertBox(i, blob_box);
}
// Fix empty boxes. If a very joined blob sits over multiple characters,
// then we will have some empty boxes from using the middle, so look for
// overlaps.
for (int i = 0; i < box_word->length(); ++i) {
TBOX box = box_word->BlobBox(i);
if (box.null_box()) {
// Nothing has its middle in the bounds of this blob, so use anything
// that overlaps.
for (dest_it.mark_cycle_pt(); !dest_it.cycled_list();
dest_it.forward()) {
TBOX blob_box = dest_it.data()->bounding_box();
if (blob_box.left() < blob_ends[i] &&
(i == 0 || blob_box.right() >= blob_ends[i - 1])) {
if (i > 0 && blob_box.left() < blob_ends[i - 1])
blob_box.set_left(blob_ends[i - 1]);
if (blob_box.right() > blob_ends[i])
blob_box.set_right(blob_ends[i]);
box_word->ChangeBox(i, blob_box);
break;
}
}
}
}
delete word_w->box_word;
word_w->box_word = box_word;
if (!input_word->combination) {
// Insert word_w->word into the ROW. It doesn't own its word, so the
// ROW needs to own it.
w_it.add_before_stay_put(word_w->word);
word_w->combination = false;
}
(*words)[w] = NULL; // We are taking ownership.
wr_it.add_before_stay_put(word_w);
}
// We have taken ownership of the words.
words->clear();
// Delete the current word, which has been replaced. We could just call
// DeleteCurrentWord, but that would iterate both lists again, and we know
// we are already in the right place.
if (!input_word->combination)
delete w_it.extract();
delete wr_it.extract();
ResetWordIterator();
}
// Deletes the current WERD_RES and its underlying WERD.
void PAGE_RES_IT::DeleteCurrentWord() {
// Check that this word is as we expect. part_of_combos are NEVER iterated
@ -1271,6 +1472,33 @@ void PAGE_RES_IT::DeleteCurrentWord() {
ResetWordIterator();
}
// Makes the current word a fuzzy space if not already fuzzy. Updates
// corresponding part of combo if required.
void PAGE_RES_IT::MakeCurrentWordFuzzy() {
WERD* real_word = word_res->word;
if (!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON)) {
real_word->set_flag(W_FUZZY_SP, true);
tprintf("Made word fuzzy at:");
real_word->bounding_box().print();
if (word_res->combination) {
// The next word should be the corresponding part of combo, but we have
// already stepped past it, so find it by search.
WERD_RES_IT wr_it(&row()->word_res_list);
for (wr_it.mark_cycle_pt();
!wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
}
wr_it.forward();
ASSERT_HOST(wr_it.data()->part_of_combo);
real_word = wr_it.data()->word;
ASSERT_HOST(!real_word->flag(W_FUZZY_SP) &&
!real_word->flag(W_FUZZY_NON));
real_word->set_flag(W_FUZZY_SP, true);
tprintf("Made part of combo word fuzzy at:");
real_word->bounding_box().print();
}
}
}
/*************************************************************************
* PAGE_RES_IT::restart_page
*
@ -1298,18 +1526,32 @@ WERD_RES *PAGE_RES_IT::start_page(bool empty_ok) {
// Resets the word_res_it so that it is one past the next_word_res, as
// it should be after internal_forward. If next_row_res != row_res,
// then the next_word_res is in the next row, so there is no need to do
// anything, since operations on the current word will not have disturbed
// the word_res_it.
// anything to word_res_it, but it is still a good idea to reset the pointers
// word_res and prev_word_res, which are still in the current row.
void PAGE_RES_IT::ResetWordIterator() {
if (row_res == next_row_res) {
// Reset the member iterator so it can move forward and detect the
// cycled_list state correctly.
word_res_it.move_to_first();
word_res_it.mark_cycle_pt();
while (!word_res_it.cycled_list() && word_res_it.data() != next_word_res)
word_res_it.forward();
for (word_res_it.mark_cycle_pt();
!word_res_it.cycled_list() && word_res_it.data() != next_word_res;
word_res_it.forward()) {
if (!word_res_it.data()->part_of_combo) {
if (prev_row_res == row_res) prev_word_res = word_res;
word_res = word_res_it.data();
}
}
ASSERT_HOST(!word_res_it.cycled_list());
word_res_it.forward();
} else {
// word_res_it is OK, but reset word_res and prev_word_res if needed.
WERD_RES_IT wr_it(&row_res->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
if (!wr_it.data()->part_of_combo) {
if (prev_row_res == row_res) prev_word_res = word_res;
word_res = wr_it.data();
}
}
}
}

View File

@ -82,7 +82,8 @@ class PAGE_RES { // page result
PAGE_RES() { Init(); } // empty constructor
PAGE_RES(BLOCK_LIST *block_list, // real blocks
PAGE_RES(bool merge_similar_words,
BLOCK_LIST *block_list, // real blocks
WERD_CHOICE **prev_word_best_choice_ptr);
~PAGE_RES () { // destructor
@ -111,7 +112,7 @@ class BLOCK_RES:public ELIST_LINK {
BLOCK_RES() {
} // empty constructor
BLOCK_RES(BLOCK *the_block); // real block
BLOCK_RES(bool merge_similar_words, BLOCK *the_block); // real block
~BLOCK_RES () { // destructor
}
@ -132,7 +133,7 @@ class ROW_RES:public ELIST_LINK {
ROW_RES() {
} // empty constructor
ROW_RES(ROW *the_row); // real row
ROW_RES(bool merge_similar_words, ROW *the_row); // real row
~ROW_RES() { // destructor
}
@ -279,7 +280,8 @@ class WERD_RES : public ELIST_LINK {
BOOL8 tess_accepted; // Tess thinks its ok?
BOOL8 tess_would_adapt; // Tess would adapt?
BOOL8 done; // ready for output?
bool small_caps; // word appears to be small caps
bool small_caps; // word appears to be small caps
bool odd_size; // word is bigger than line or leader dots.
inT8 italic;
inT8 bold;
// The fontinfos are pointers to data owned by the classifier.
@ -292,6 +294,7 @@ class WERD_RES : public ELIST_LINK {
CRUNCH_MODE unlv_crunch_mode;
float x_height; // post match estimate
float caps_height; // post match estimate
float baseline_shift; // post match estimate.
/*
To deal with fuzzy spaces we need to be able to combine "words" to form
@ -312,8 +315,6 @@ class WERD_RES : public ELIST_LINK {
BOOL8 combination; //of two fuzzy gap wds
BOOL8 part_of_combo; //part of a combo
BOOL8 reject_spaces; //Reject spacing?
// FontInfo ids for each unichar in best_choice.
GenericVector<inT8> best_choice_fontinfo_ids;
WERD_RES() {
InitNonPointers();
@ -486,6 +487,9 @@ class WERD_RES : public ELIST_LINK {
// the word_to_debug.
void DebugWordChoices(bool debug, const char* word_to_debug);
// Prints the top choice along with the accepted/done flags.
void DebugTopChoice(const char* msg) const;
// Removes from best_choices all choices which are not within a reasonable
// range of the best choice.
void FilterWordChoices(int debug_level);
@ -694,9 +698,18 @@ class PAGE_RES_IT {
// the resulting WERD_RES is returned for further setup with best_choice etc.
WERD_RES* InsertSimpleCloneWord(const WERD_RES& clone_res, WERD* new_word);
// Replaces the current WERD/WERD_RES with the given words. The given words
// contain fake blobs that indicate the position of the characters. These are
// replaced with real blobs from the current word as much as possible.
void ReplaceCurrentWord(tesseract::PointerVector<WERD_RES>* words);
// Deletes the current WERD_RES and its underlying WERD.
void DeleteCurrentWord();
// Makes the current word a fuzzy space if not already fuzzy. Updates
// corresponding part of combo if required.
void MakeCurrentWordFuzzy();
WERD_RES *forward() { // Get next word.
return internal_forward(false, false);
}
@ -736,9 +749,9 @@ class PAGE_RES_IT {
return next_block_res;
}
void rej_stat_word(); // for page/block/row
void ResetWordIterator();
private:
void ResetWordIterator();
WERD_RES *internal_forward(bool new_block, bool empty_ok);
WERD_RES * prev_word_res; // previous word

View File

@ -77,7 +77,6 @@ void PDBLK::set_sides( //set vertex lists
right_it.add_list_before (right);
}
/**********************************************************************
* PDBLK::contains
*
@ -126,7 +125,7 @@ void PDBLK::move( // reposition block
// Returns a binary Pix mask with a 1 pixel for every pixel within the
// block. Rotates the coordinate system by rerotation prior to rendering.
Pix* PDBLK::render_mask(const FCOORD& rerotation) {
Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) {
TBOX rotated_box(box);
rotated_box.rotate(rerotation);
Pix* pix = pixCreate(rotated_box.width(), rotated_box.height(), 1);
@ -163,6 +162,7 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation) {
pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(),
PIX_SET, NULL, 0, 0);
}
if (mask_box != NULL) *mask_box = rotated_box;
return pix;
}

View File

@ -89,7 +89,9 @@ class PDBLK
// Returns a binary Pix mask with a 1 pixel for every pixel within the
// block. Rotates the coordinate system by rerotation prior to rendering.
Pix* render_mask(const FCOORD& rerotation);
// If not NULL, mask_box is filled with the position box of the returned
// mask image.
Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box);
#ifndef GRAPHICS_DISABLED
///draw histogram

View File

@ -58,7 +58,6 @@ const int par2 = 6750 / (approx_dist * approx_dist);
TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) {
EDGEPT *edgept; // converted steps
TBOX loop_box; // bounding box
inT32 area; // loop area
EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
@ -73,9 +72,9 @@ TESSLINE* ApproximateOutline(bool allow_detailed_fx, C_OUTLINE* c_outline) {
if (!poly_wide_objects_better && loop_box.width() > area)
area = loop_box.width();
area *= area;
edgept = edgesteps_to_edgepts(c_outline, edgepts);
edgesteps_to_edgepts(c_outline, edgepts);
fix2(edgepts, area);
edgept = poly2 (edgepts, area); // 2nd approximation.
EDGEPT* edgept = poly2(edgepts, area); // 2nd approximation.
EDGEPT* startpt = edgept;
EDGEPT* result = NULL;
EDGEPT* prev_result = NULL;

View File

@ -164,28 +164,37 @@ enum PageSegMode {
PSM_SINGLE_CHAR, ///< Treat the image as a single character.
PSM_SPARSE_TEXT, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD, ///< Sparse text with orientation and script det.
PSM_RAW_LINE, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Macros that act on a PageSegMode to determine whether components of
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
#define PSM_OSD_ENABLED(pageseg_mode) ((pageseg_mode) <= PSM_AUTO_OSD || \
(pageseg_mode) == PSM_SPARSE_TEXT_OSD)
#define PSM_COL_FIND_ENABLED(pageseg_mode) \
((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_AUTO)
#define PSM_SPARSE(pageseg_mode) \
((pageseg_mode) == PSM_SPARSE_TEXT || (pageseg_mode) == PSM_SPARSE_TEXT_OSD)
#define PSM_BLOCK_FIND_ENABLED(pageseg_mode) \
((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_COLUMN)
#define PSM_LINE_FIND_ENABLED(pageseg_mode) \
((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_BLOCK)
#define PSM_WORD_FIND_ENABLED(pageseg_mode) \
(((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_LINE) || \
(pageseg_mode) == PSM_SPARSE_TEXT || (pageseg_mode) == PSM_SPARSE_TEXT_OSD)
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator

View File

@ -90,8 +90,6 @@ static const char * const kPermuterTypeNames[] = {
BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
float src_rating, // rating
float src_cert, // certainty
inT16 src_fontinfo_id, // font
inT16 src_fontinfo_id2, // 2nd choice font
int src_script_id, // script
float min_xheight, // min xheight allowed
float max_xheight, // max xheight by this char
@ -100,8 +98,8 @@ BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
unichar_id_ = src_unichar_id;
rating_ = src_rating;
certainty_ = src_cert;
fontinfo_id_ = src_fontinfo_id;
fontinfo_id2_ = src_fontinfo_id2;
fontinfo_id_ = -1;
fontinfo_id2_ = -1;
script_id_ = src_script_id;
min_xheight_ = min_xheight;
max_xheight_ = max_xheight;
@ -126,6 +124,7 @@ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
max_xheight_ = other.max_xheight_;
yshift_ = other.yshift();
classifier_ = other.classifier_;
fonts_ = other.fonts_;
}
// Returns true if *this and other agree on the baseline and x-height

View File

@ -24,6 +24,7 @@
#include "clst.h"
#include "elst.h"
#include "fontinfo.h"
#include "genericvector.h"
#include "matrix.h"
#include "unichar.h"
@ -48,11 +49,11 @@ class BLOB_CHOICE: public ELIST_LINK
{
public:
BLOB_CHOICE() {
unichar_id_ = INVALID_UNICHAR_ID;
unichar_id_ = UNICHAR_SPACE;
fontinfo_id_ = -1;
fontinfo_id2_ = -1;
rating_ = MAX_FLOAT32;
certainty_ = -MAX_FLOAT32;
rating_ = 10.0;
certainty_ = -1.0;
script_id_ = -1;
xgap_before_ = 0;
xgap_after_ = 0;
@ -64,8 +65,6 @@ class BLOB_CHOICE: public ELIST_LINK
BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
float src_rating, // rating
float src_cert, // certainty
inT16 src_fontinfo_id, // font
inT16 src_fontinfo_id2, // 2nd choice font
int script_id, // script
float min_xheight, // min xheight in image pixel units
float max_xheight, // max xheight allowed by this char
@ -89,6 +88,26 @@ class BLOB_CHOICE: public ELIST_LINK
inT16 fontinfo_id2() const {
return fontinfo_id2_;
}
const GenericVector<tesseract::ScoredFont>& fonts() const {
return fonts_;
}
void set_fonts(const GenericVector<tesseract::ScoredFont>& fonts) {
fonts_ = fonts;
int score1 = 0, score2 = 0;
fontinfo_id_ = -1;
fontinfo_id2_ = -1;
for (int f = 0; f < fonts_.size(); ++f) {
if (fonts_[f].score > score1) {
score2 = score1;
fontinfo_id2_ = fontinfo_id_;
score1 = fonts_[f].score;
fontinfo_id_ = fonts_[f].fontinfo_id;
} else if (fonts_[f].score > score2) {
score2 = fonts_[f].score;
fontinfo_id2_ = fonts_[f].fontinfo_id;
}
}
}
int script_id() const {
return script_id_;
}
@ -131,12 +150,6 @@ class BLOB_CHOICE: public ELIST_LINK
void set_certainty(float newrat) {
certainty_ = newrat;
}
void set_fontinfo_id(inT16 newfont) {
fontinfo_id_ = newfont;
}
void set_fontinfo_id2(inT16 newfont) {
fontinfo_id2_ = newfont;
}
void set_script(int newscript_id) {
script_id_ = newscript_id;
}
@ -186,6 +199,8 @@ class BLOB_CHOICE: public ELIST_LINK
private:
UNICHAR_ID unichar_id_; // unichar id
// Fonts and scores. Allowed to be empty.
GenericVector<tesseract::ScoredFont> fonts_;
inT16 fontinfo_id_; // char font information
inT16 fontinfo_id2_; // 2nd choice font information
// Rating is the classifier distance weighted by the length of the outline

View File

@ -78,6 +78,12 @@ class DLLSYM TBOX { // bounding box
void set_right(int x) {
top_right.set_x(x);
}
int x_middle() const {
return (bot_left.x() + top_right.x()) / 2;
}
int y_middle() const {
return (bot_left.y() + top_right.y()) / 2;
}
const ICOORD &botleft() const { // access function
return bot_left;

View File

@ -27,114 +27,236 @@
----------------------------------------------------------------------*/
#include "seam.h"
#include "blobs.h"
#include "freelist.h"
#include "tprintf.h"
#ifdef __UNIX__
#include <assert.h>
#endif
/*----------------------------------------------------------------------
V a r i a b l e s
----------------------------------------------------------------------*/
#define NUM_STARTING_SEAMS 20
/*----------------------------------------------------------------------
Public Function Code
----------------------------------------------------------------------*/
/**
* @name point_in_split
*
* Check to see if either of these points are present in the current
* split.
* @returns TRUE if one of them is split.
*/
bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2) {
return ((split) ? ((exact_point (split->point1, point1) ||
exact_point (split->point1, point2) ||
exact_point (split->point2, point1) ||
exact_point (split->point2, point2)) ? TRUE : FALSE)
: FALSE);
}
/**
* @name point_in_seam
*
* Check to see if either of these points are present in the current
* seam.
* @returns TRUE if one of them is.
*/
bool point_in_seam(const SEAM *seam, SPLIT *split) {
return (point_in_split(seam->split1, split->point1, split->point2) ||
point_in_split(seam->split2, split->point1, split->point2) ||
point_in_split(seam->split3, split->point1, split->point2));
}
/**
* @name point_used_by_split
*
* Return whether this particular EDGEPT * is used in a given split.
* @returns TRUE if the edgept is used by the split.
*/
bool point_used_by_split(SPLIT *split, EDGEPT *point) {
if (split == NULL) return false;
return point == split->point1 || point == split->point2;
}
/**
* @name point_used_by_seam
*
* Return whether this particular EDGEPT * is used in a given seam.
* @returns TRUE if the edgept is used by the seam.
*/
bool point_used_by_seam(SEAM *seam, EDGEPT *point) {
if (seam == NULL) return false;
return point_used_by_split(seam->split1, point) ||
point_used_by_split(seam->split2, point) ||
point_used_by_split(seam->split3, point);
}
/**
* @name combine_seam
*
* Combine two seam records into a single seam. Move the split
* references from the second seam to the first one. The argument
* convention is patterned after strcpy.
*/
void combine_seams(SEAM *dest_seam, SEAM *source_seam) {
dest_seam->priority += source_seam->priority;
dest_seam->location += source_seam->location;
dest_seam->location /= 2;
if (source_seam->split1) {
if (!dest_seam->split1)
dest_seam->split1 = source_seam->split1;
else if (!dest_seam->split2)
dest_seam->split2 = source_seam->split1;
else if (!dest_seam->split3)
dest_seam->split3 = source_seam->split1;
else
delete source_seam->split1; // Wouldn't have fitted.
source_seam->split1 = NULL;
// Returns the bounding box of all the points in the seam.
TBOX SEAM::bounding_box() const {
TBOX box(location_.x, location_.y, location_.x, location_.y);
for (int s = 0; s < num_splits_; ++s) {
box += splits_[s].bounding_box();
}
if (source_seam->split2) {
if (!dest_seam->split2)
dest_seam->split2 = source_seam->split2;
else if (!dest_seam->split3)
dest_seam->split3 = source_seam->split2;
else
delete source_seam->split2; // Wouldn't have fitted.
source_seam->split2 = NULL;
return box;
}
// Returns true if other can be combined into *this.
bool SEAM::CombineableWith(const SEAM& other, int max_x_dist,
float max_total_priority) const {
int dist = location_.x - other.location_.x;
if (-max_x_dist < dist && dist < max_x_dist &&
num_splits_ + other.num_splits_ <= kMaxNumSplits &&
priority_ + other.priority_ < max_total_priority &&
!OverlappingSplits(other) && !SharesPosition(other)) {
return true;
} else {
return false;
}
if (source_seam->split3) {
if (!dest_seam->split3)
dest_seam->split3 = source_seam->split3;
else
delete source_seam->split3; // Wouldn't have fitted.
source_seam->split3 = NULL;
}
// Combines other into *this. Only works if CombinableWith returned true.
void SEAM::CombineWith(const SEAM& other) {
priority_ += other.priority_;
location_ += other.location_;
location_ /= 2;
for (int s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
splits_[num_splits_++] = other.splits_[s];
}
// Returns true if the splits in *this SEAM appear OK in the sense that they
// do not cross any outlines and do not chop off any ridiculously small
// pieces.
bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
// TODO(rays) Try testing all the splits. Duplicating original code for now,
// which tested only the first.
return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
}
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
// seam, which is about to be inserted at insert_index. Returns false if
// any of the computations fails, as this indicates an invalid chop.
// widthn_/widthp_ are only changed if modify is true.
bool SEAM::PrepareToInsertSeam(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs,
int insert_index, bool modify) {
for (int s = 0; s < insert_index; ++s) {
if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false;
}
delete source_seam;
if (!FindBlobWidth(blobs, insert_index, modify)) return false;
for (int s = insert_index; s < seams.size(); ++s) {
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false;
}
return true;
}
// Computes the widthp_/widthn_ range. Returns false if not all the splits
// are accounted for. widthn_/widthp_ are only changed if modify is true.
bool SEAM::FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
bool modify) {
int num_found = 0;
if (modify) {
widthp_ = 0;
widthn_ = 0;
}
for (int s = 0; s < num_splits_; ++s) {
const SPLIT& split = splits_[s];
bool found_split = split.ContainedByBlob(*blobs[index]);
// Look right.
for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
found_split = split.ContainedByBlob(*blobs[b]);
if (found_split && b - index > widthp_ && modify) widthp_ = b - index;
}
// Look left.
for (int b = index - 1; !found_split && b >= 0; --b) {
found_split = split.ContainedByBlob(*blobs[b]);
if (found_split && index - b > widthn_ && modify) widthn_ = index - b;
}
if (found_split) ++num_found;
}
return num_found == num_splits_;
}
// Splits this blob into two blobs by applying the splits included in
// *this SEAM
void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].SplitOutlineList(blob->outlines);
}
blob->ComputeBoundingBoxes();
divide_blobs(blob, other_blob, italic_blob, location_);
blob->EliminateDuplicateOutlines();
other_blob->EliminateDuplicateOutlines();
blob->CorrectBlobOrder(other_blob);
}
// Undoes ApplySeam by removing the seam between these two blobs.
// Produces one blob as a result, and deletes other_blob.
void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const {
if (blob->outlines == NULL) {
blob->outlines = other_blob->outlines;
other_blob->outlines = NULL;
}
TESSLINE* outline = blob->outlines;
while (outline->next) outline = outline->next;
outline->next = other_blob->outlines;
other_blob->outlines = NULL;
delete other_blob;
for (int s = 0; s < num_splits_; ++s) {
splits_[s].UnsplitOutlineList(blob);
}
blob->ComputeBoundingBoxes();
blob->EliminateDuplicateOutlines();
}
// Prints everything in *this SEAM.
void SEAM::Print(const char* label) const {
tprintf(label);
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y,
widthp_, widthn_);
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Print();
if (s + 1 < num_splits_) tprintf(", ");
}
tprintf("\n");
}
// Prints a collection of SEAMs.
/* static */
void SEAM::PrintSeams(const char* label, const GenericVector<SEAM*>& seams) {
if (!seams.empty()) {
tprintf("%s\n", label);
for (int x = 0; x < seams.size(); ++x) {
tprintf("%2d: ", x);
seams[x]->Print("");
}
tprintf("\n");
}
}
#ifndef GRAPHICS_DISABLED
// Draws the seam in the given window.
void SEAM::Mark(ScrollView* window) const {
for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window);
}
#endif
// Break up the blobs in this chain so that they are all independent.
// This operation should undo the affect of join_pieces.
/* static */
void SEAM::BreakPieces(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int first,
int last) {
for (int x = first; x < last; ++x) seams[x]->Reveal();
TESSLINE* outline = blobs[first]->outlines;
int next_blob = first + 1;
while (outline != NULL && next_blob <= last) {
if (outline->next == blobs[next_blob]->outlines) {
outline->next = NULL;
outline = blobs[next_blob]->outlines;
++next_blob;
} else {
outline = outline->next;
}
}
}
// Join a group of base level pieces into a single blob that can then
// be classified.
/* static */
void SEAM::JoinPieces(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int first, int last) {
TESSLINE* outline = blobs[first]->outlines;
if (!outline)
return;
for (int x = first; x < last; ++x) {
SEAM *seam = seams[x];
if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide();
while (outline->next) outline = outline->next;
outline->next = blobs[x + 1]->outlines;
}
}
// Hides the seam so the outlines appear not to be cut by it.
void SEAM::Hide() const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Hide();
}
}
// Undoes hide, so the outlines are cut by the seam.
void SEAM::Reveal() const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Reveal();
}
}
// Computes and returns, but does not set, the full priority of *this SEAM.
float SEAM::FullPriority(int xmin, int xmax, double overlap_knob,
int centered_maxwidth, double center_knob,
double width_change_knob) const {
if (num_splits_ == 0) return 0.0f;
for (int s = 1; s < num_splits_; ++s) {
splits_[s].SplitOutline();
}
float full_priority =
priority_ +
splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth,
center_knob, width_change_knob);
for (int s = num_splits_ - 1; s >= 1; --s) {
splits_[s].UnsplitOutlines();
}
return full_priority;
}
/**
@ -144,7 +266,7 @@ void combine_seams(SEAM *dest_seam, SEAM *source_seam) {
* present in the starting segmentation. Each of the seams created
* by this routine have location information only.
*/
void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) {
void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array) {
seam_array->truncate(0);
TPOINT location;
@ -153,381 +275,6 @@ void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) {
TBOX nbox = word->blobs[b]->bounding_box();
location.x = (bbox.right() + nbox.left()) / 2;
location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
seam_array->push_back(new SEAM(0.0f, location, NULL, NULL, NULL));
}
}
/**
* @name test_insert_seam
*
* @returns true if insert_seam will succeed.
*/
bool test_insert_seam(const GenericVector<SEAM*>& seam_array,
TWERD *word, int index) {
SEAM *test_seam;
int list_length = seam_array.size();
for (int test_index = 0; test_index < index; ++test_index) {
test_seam = seam_array[test_index];
if (test_index + test_seam->widthp < index &&
test_seam->widthp + test_index == index - 1 &&
account_splits(test_seam, word, test_index + 1, 1) < 0)
return false;
}
for (int test_index = index; test_index < list_length; test_index++) {
test_seam = seam_array[test_index];
if (test_index - test_seam->widthn >= index &&
test_index - test_seam->widthn == index &&
account_splits(test_seam, word, test_index + 1, -1) < 0)
return false;
}
return true;
}
/**
* @name insert_seam
*
* Add another seam to a collection of seams at a particular location
* in the seam array.
*/
void insert_seam(const TWERD* word, int index, SEAM *seam,
GenericVector<SEAM*>* seam_array) {
SEAM *test_seam;
int list_length = seam_array->size();
for (int test_index = 0; test_index < index; ++test_index) {
test_seam = seam_array->get(test_index);
if (test_index + test_seam->widthp >= index) {
test_seam->widthp++; /*got in the way */
} else if (test_seam->widthp + test_index == index - 1) {
test_seam->widthp = account_splits(test_seam, word, test_index + 1, 1);
if (test_seam->widthp < 0) {
tprintf("Failed to find any right blob for a split!\n");
print_seam("New dud seam", seam);
print_seam("Failed seam", test_seam);
}
}
}
for (int test_index = index; test_index < list_length; test_index++) {
test_seam = seam_array->get(test_index);
if (test_index - test_seam->widthn < index) {
test_seam->widthn++; /*got in the way */
} else if (test_index - test_seam->widthn == index) {
test_seam->widthn = account_splits(test_seam, word, test_index + 1, -1);
if (test_seam->widthn < 0) {
tprintf("Failed to find any left blob for a split!\n");
print_seam("New dud seam", seam);
print_seam("Failed seam", test_seam);
}
}
}
seam_array->insert(seam, index);
}
/**
* @name account_splits
*
* Account for all the splits by looking to the right (blob_direction == 1),
* or to the left (blob_direction == -1) in the word.
*/
int account_splits(const SEAM *seam, const TWERD *word, int blob_index,
int blob_direction) {
inT8 found_em[3];
inT8 width;
found_em[0] = seam->split1 == NULL;
found_em[1] = seam->split2 == NULL;
found_em[2] = seam->split3 == NULL;
if (found_em[0] && found_em[1] && found_em[2])
return 0;
width = 0;
do {
TBLOB* blob = word->blobs[blob_index];
if (!found_em[0])
found_em[0] = find_split_in_blob(seam->split1, blob);
if (!found_em[1])
found_em[1] = find_split_in_blob(seam->split2, blob);
if (!found_em[2])
found_em[2] = find_split_in_blob(seam->split3, blob);
if (found_em[0] && found_em[1] && found_em[2]) {
return width;
}
width++;
blob_index += blob_direction;
} while (0 <= blob_index && blob_index < word->NumBlobs());
return -1;
}
/**
* @name find_split_in_blob
*
* @returns TRUE if the split is somewhere in this blob.
*/
bool find_split_in_blob(SPLIT *split, TBLOB *blob) {
TESSLINE *outline;
for (outline = blob->outlines; outline != NULL; outline = outline->next)
if (outline->Contains(split->point1->pos))
break;
if (outline == NULL)
return FALSE;
for (outline = blob->outlines; outline != NULL; outline = outline->next)
if (outline->Contains(split->point2->pos))
return TRUE;
return FALSE;
}
/**
* @name join_two_seams
*
* Merge these two seams into a new seam. Duplicate the split records
* in both of the input seams. Return the resultant seam.
*/
SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2) {
SEAM *result = NULL;
SEAM *temp;
assert(seam1 &&seam2);
if (((seam1->split3 == NULL && seam2->split2 == NULL) ||
(seam1->split2 == NULL && seam2->split3 == NULL) ||
seam1->split1 == NULL || seam2->split1 == NULL) &&
(!shared_split_points(seam1, seam2))) {
result = new SEAM(*seam1);
temp = new SEAM(*seam2);
combine_seams(result, temp);
}
return (result);
}
/**
* @name print_seam
*
* Print a list of splits. Show the coordinates of both points in
* each split.
*/
void print_seam(const char *label, SEAM *seam) {
if (seam) {
tprintf(label);
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ",
seam->priority, seam->location.x, seam->location.y,
seam->widthp, seam->widthn);
print_split(seam->split1);
if (seam->split2) {
tprintf(", ");
print_split (seam->split2);
if (seam->split3) {
tprintf(", ");
print_split (seam->split3);
}
}
tprintf("\n");
}
}
/**
* @name print_seams
*
* Print a list of splits. Show the coordinates of both points in
* each split.
*/
void print_seams(const char *label, const GenericVector<SEAM*>& seams) {
char number[CHARS_PER_LINE];
if (!seams.empty()) {
tprintf("%s\n", label);
for (int x = 0; x < seams.size(); ++x) {
sprintf(number, "%2d: ", x);
print_seam(number, seams[x]);
}
tprintf("\n");
}
}
/**
* @name shared_split_points
*
* Check these two seams to make sure that neither of them have two
* points in common. Return TRUE if any of the same points are present
* in any of the splits of both seams.
*/
int shared_split_points(const SEAM *seam1, const SEAM *seam2) {
if (seam1 == NULL || seam2 == NULL)
return (FALSE);
if (seam2->split1 == NULL)
return (FALSE);
if (point_in_seam(seam1, seam2->split1))
return (TRUE);
if (seam2->split2 == NULL)
return (FALSE);
if (point_in_seam(seam1, seam2->split2))
return (TRUE);
if (seam2->split3 == NULL)
return (FALSE);
if (point_in_seam(seam1, seam2->split3))
return (TRUE);
return (FALSE);
}
/**********************************************************************
* break_pieces
*
* Break up the blobs in this chain so that they are all independent.
* This operation should undo the affect of join_pieces.
**********************************************************************/
void break_pieces(const GenericVector<SEAM*>& seams, int first, int last,
TWERD *word) {
for (int x = first; x < last; ++x)
reveal_seam(seams[x]);
TESSLINE *outline = word->blobs[first]->outlines;
int next_blob = first + 1;
while (outline != NULL && next_blob <= last) {
if (outline->next == word->blobs[next_blob]->outlines) {
outline->next = NULL;
outline = word->blobs[next_blob]->outlines;
++next_blob;
} else {
outline = outline->next;
}
}
}
/**********************************************************************
* join_pieces
*
* Join a group of base level pieces into a single blob that can then
* be classified.
**********************************************************************/
void join_pieces(const GenericVector<SEAM*>& seams, int first, int last,
TWERD *word) {
TESSLINE *outline = word->blobs[first]->outlines;
if (!outline)
return;
for (int x = first; x < last; ++x) {
SEAM *seam = seams[x];
if (x - seam->widthn >= first && x + seam->widthp < last)
hide_seam(seam);
while (outline->next)
outline = outline->next;
outline->next = word->blobs[x + 1]->outlines;
}
}
/**********************************************************************
* hide_seam
*
* Change the edge points that are referenced by this seam to make
* them hidden edges.
**********************************************************************/
void hide_seam(SEAM *seam) {
if (seam == NULL || seam->split1 == NULL)
return;
hide_edge_pair (seam->split1->point1, seam->split1->point2);
if (seam->split2 == NULL)
return;
hide_edge_pair (seam->split2->point1, seam->split2->point2);
if (seam->split3 == NULL)
return;
hide_edge_pair (seam->split3->point1, seam->split3->point2);
}
/**********************************************************************
* hide_edge_pair
*
* Change the edge points that are referenced by this seam to make
* them hidden edges.
**********************************************************************/
void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2) {
EDGEPT *edgept;
edgept = pt1;
do {
edgept->Hide();
edgept = edgept->next;
}
while (!exact_point (edgept, pt2) && edgept != pt1);
if (edgept == pt1) {
/* tprintf("Hid entire outline at (%d,%d)!!\n",
edgept->pos.x,edgept->pos.y); */
}
edgept = pt2;
do {
edgept->Hide();
edgept = edgept->next;
}
while (!exact_point (edgept, pt1) && edgept != pt2);
if (edgept == pt2) {
/* tprintf("Hid entire outline at (%d,%d)!!\n",
edgept->pos.x,edgept->pos.y); */
}
}
/**********************************************************************
* reveal_seam
*
* Change the edge points that are referenced by this seam to make
* them hidden edges.
**********************************************************************/
void reveal_seam(SEAM *seam) {
if (seam == NULL || seam->split1 == NULL)
return;
reveal_edge_pair (seam->split1->point1, seam->split1->point2);
if (seam->split2 == NULL)
return;
reveal_edge_pair (seam->split2->point1, seam->split2->point2);
if (seam->split3 == NULL)
return;
reveal_edge_pair (seam->split3->point1, seam->split3->point2);
}
/**********************************************************************
* reveal_edge_pair
*
* Change the edge points that are referenced by this seam to make
* them hidden edges.
**********************************************************************/
void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2) {
EDGEPT *edgept;
edgept = pt1;
do {
edgept->Reveal();
edgept = edgept->next;
}
while (!exact_point (edgept, pt2) && edgept != pt1);
if (edgept == pt1) {
/* tprintf("Hid entire outline at (%d,%d)!!\n",
edgept->pos.x,edgept->pos.y); */
}
edgept = pt2;
do {
edgept->Reveal();
edgept = edgept->next;
}
while (!exact_point (edgept, pt1) && edgept != pt2);
if (edgept == pt2) {
/* tprintf("Hid entire outline at (%d,%d)!!\n",
edgept->pos.x,edgept->pos.y); */
seam_array->push_back(new SEAM(0.0f, location));
}
}

View File

@ -36,95 +36,163 @@
----------------------------------------------------------------------*/
typedef float PRIORITY; /* PRIORITY */
struct SEAM {
// Constructor that was formerly new_seam.
SEAM(PRIORITY priority0, const TPOINT& location0,
SPLIT *splita, SPLIT *splitb, SPLIT *splitc)
: priority(priority0), widthp(0), widthn(0), location(location0),
split1(splita), split2(splitb), split3(splitc) {}
// Copy constructor that was formerly clone_seam.
SEAM(const SEAM& src)
: priority(src.priority), widthp(src.widthp), widthn(src.widthn),
location(src.location) {
clone_split(split1, src.split1);
clone_split(split2, src.split2);
clone_split(split3, src.split3);
class SEAM {
public:
// A seam with no splits
SEAM(float priority, const TPOINT& location)
: priority_(priority),
location_(location),
widthp_(0),
widthn_(0),
num_splits_(0) {}
// A seam with a single split point.
SEAM(float priority, const TPOINT& location, const SPLIT& split)
: priority_(priority),
location_(location),
widthp_(0),
widthn_(0),
num_splits_(1) {
splits_[0] = split;
}
// Destructor was delete_seam.
~SEAM() {
if (split1)
delete_split(split1);
if (split2)
delete_split(split2);
if (split3)
delete_split(split3);
// Default copy constructor, operator= and destructor are OK!
// Accessors.
float priority() const { return priority_; }
void set_priority(float priority) { priority_ = priority; }
bool HasAnySplits() const { return num_splits_ > 0; }
// Returns the bounding box of all the points in the seam.
TBOX bounding_box() const;
// Returns true if other can be combined into *this.
bool CombineableWith(const SEAM& other, int max_x_dist,
float max_total_priority) const;
// Combines other into *this. Only works if CombinableWith returned true.
void CombineWith(const SEAM& other);
// Returns true if the given blob contains all splits of *this SEAM.
bool ContainedByBlob(const TBLOB& blob) const {
for (int s = 0; s < num_splits_; ++s) {
if (!splits_[s].ContainedByBlob(blob)) return false;
}
return true;
}
PRIORITY priority;
inT8 widthp;
inT8 widthn;
TPOINT location;
SPLIT *split1;
SPLIT *split2;
SPLIT *split3;
// Returns true if the given EDGEPT is used by this SEAM, checking only
// the EDGEPT pointer, not the coordinates.
bool UsesPoint(const EDGEPT* point) const {
for (int s = 0; s < num_splits_; ++s) {
if (splits_[s].UsesPoint(point)) return true;
}
return false;
}
// Returns true if *this and other share any common point, by coordinates.
bool SharesPosition(const SEAM& other) const {
for (int s = 0; s < num_splits_; ++s) {
for (int t = 0; t < other.num_splits_; ++t)
if (splits_[s].SharesPosition(other.splits_[t])) return true;
}
return false;
}
// Returns true if *this and other have any vertically overlapping splits.
bool OverlappingSplits(const SEAM& other) const {
for (int s = 0; s < num_splits_; ++s) {
TBOX split1_box = splits_[s].bounding_box();
for (int t = 0; t < other.num_splits_; ++t) {
TBOX split2_box = other.splits_[t].bounding_box();
if (split1_box.y_overlap(split2_box)) return true;
}
}
return false;
}
// Marks the edgepts used by the seam so the segments made by the cut
// never get split further by another seam in the future.
void Finalize() {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].point1->MarkChop();
splits_[s].point2->MarkChop();
}
}
// Returns true if the splits in *this SEAM appear OK in the sense that they
// do not cross any outlines and do not chop off any ridiculously small
// pieces.
bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const;
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
// seam, which is about to be inserted at insert_index. Returns false if
// any of the computations fails, as this indicates an invalid chop.
// widthn_/widthp_ are only changed if modify is true.
bool PrepareToInsertSeam(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int insert_index,
bool modify);
// Computes the widthp_/widthn_ range. Returns false if not all the splits
// are accounted for. widthn_/widthp_ are only changed if modify is true.
bool FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
bool modify);
// Splits this blob into two blobs by applying the splits included in
// *this SEAM
void ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const;
// Undoes ApplySeam by removing the seam between these two blobs.
// Produces one blob as a result, and deletes other_blob.
void UndoSeam(TBLOB* blob, TBLOB* other_blob) const;
// Prints everything in *this SEAM.
void Print(const char* label) const;
// Prints a collection of SEAMs.
static void PrintSeams(const char* label, const GenericVector<SEAM*>& seams);
#ifndef GRAPHICS_DISABLED
// Draws the seam in the given window.
void Mark(ScrollView* window) const;
#endif
// Break up the blobs in this chain so that they are all independent.
// This operation should undo the affect of join_pieces.
static void BreakPieces(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int first,
int last);
// Join a group of base level pieces into a single blob that can then
// be classified.
static void JoinPieces(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int first,
int last);
// Hides the seam so the outlines appear not to be cut by it.
void Hide() const;
// Undoes hide, so the outlines are cut by the seam.
void Reveal() const;
// Computes and returns, but does not set, the full priority of *this SEAM.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float FullPriority(int xmin, int xmax, double overlap_knob,
int centered_maxwidth, double center_knob,
double width_change_knob) const;
private:
// Maximum number of splits that a SEAM can hold.
static const int kMaxNumSplits = 3;
// Priority of this split. Lower is better.
float priority_;
// Position of the middle of the seam.
TPOINT location_;
// A range such that all splits in *this SEAM are contained within blobs in
// the range [index - widthn_,index + widthp_] where index is the index of
// this SEAM in the seams vector.
inT8 widthp_;
inT8 widthn_;
// Number of splits_ that are used.
inT8 num_splits_;
// Set of pairs of points that are the ends of each split in the SEAM.
SPLIT splits_[kMaxNumSplits];
};
/**
* exact_point
*
* Return TRUE if the point positions are the exactly the same. The
* parameters must be of type (EDGEPT*).
*/
#define exact_point(p1,p2) \
(! ((p1->pos.x - p2->pos.x) || (p1->pos.y - p2->pos.y)))
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2);
bool point_in_seam(const SEAM *seam, SPLIT *split);
bool point_used_by_split(SPLIT *split, EDGEPT *point);
bool point_used_by_seam(SEAM *seam, EDGEPT *point);
void combine_seams(SEAM *dest_seam, SEAM *source_seam);
void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array);
bool test_insert_seam(const GenericVector<SEAM*>& seam_array,
TWERD *word, int index);
void insert_seam(const TWERD *word, int index, SEAM *seam,
GenericVector<SEAM*>* seam_array);
int account_splits(const SEAM *seam, const TWERD *word, int blob_index,
int blob_direction);
bool find_split_in_blob(SPLIT *split, TBLOB *blob);
SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2);
void print_seam(const char *label, SEAM *seam);
void print_seams(const char *label, const GenericVector<SEAM*>& seams);
int shared_split_points(const SEAM *seam1, const SEAM *seam2);
void break_pieces(const GenericVector<SEAM*>& seams,
int first, int last, TWERD *word);
void join_pieces(const GenericVector<SEAM*>& seams,
int first, int last, TWERD *word);
void hide_seam(SEAM *seam);
void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2);
void reveal_seam(SEAM *seam);
void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2);
void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array);
#endif

View File

@ -36,23 +36,103 @@
/*----------------------------------------------------------------------
V a r i a b l e s
----------------------------------------------------------------------*/
// Limit on the amount of penalty for the chop being off-center.
const int kCenterGradeCap = 25;
// Ridiculously large priority for splits that are no use.
const double kBadPriority = 999.0;
BOOL_VAR(wordrec_display_splits, 0, "Display splits");
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
/**********************************************************************
* delete_split
*
* Remove this split from existence.
**********************************************************************/
void delete_split(SPLIT *split) {
if (split) {
delete split;
}
// Returns the bounding box of all the points in the split.
TBOX SPLIT::bounding_box() const {
return TBOX(
MIN(point1->pos.x, point2->pos.x), MIN(point1->pos.y, point2->pos.y),
MAX(point1->pos.x, point2->pos.x), MAX(point1->pos.y, point2->pos.y));
}
// Hides the SPLIT so the outlines appear not to be cut by it.
void SPLIT::Hide() const {
EDGEPT* edgept = point1;
do {
edgept->Hide();
edgept = edgept->next;
} while (!edgept->EqualPos(*point2) && edgept != point1);
edgept = point2;
do {
edgept->Hide();
edgept = edgept->next;
} while (!edgept->EqualPos(*point1) && edgept != point2);
}
// Undoes hide, so the outlines are cut by the SPLIT.
void SPLIT::Reveal() const {
EDGEPT* edgept = point1;
do {
edgept->Reveal();
edgept = edgept->next;
} while (!edgept->EqualPos(*point2) && edgept != point1);
edgept = point2;
do {
edgept->Reveal();
edgept = edgept->next;
} while (!edgept->EqualPos(*point1) && edgept != point2);
}
// Compute a split priority based on the bounding boxes of the parts.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
int centered_maxwidth, double center_knob,
double width_change_knob) const {
TBOX box1 = Box12();
TBOX box2 = Box21();
int min_left = MIN(box1.left(), box2.left());
int max_right = MAX(box1.right(), box2.right());
if (xmin < min_left && xmax > max_right) return kBadPriority;
float grade = 0.0f;
// grade_overlap.
int width1 = box1.width();
int width2 = box2.width();
int min_width = MIN(width1, width2);
int overlap = -box1.x_gap(box2);
if (overlap == min_width) {
grade += 100.0f; // Total overlap.
} else {
if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
if (overlap > 0) grade += overlap_knob * overlap;
}
// grade_center_of_blob.
if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
grade += MIN(kCenterGradeCap, center_knob * abs(width1 - width2));
}
// grade_width_change.
float width_change_grade = 20 - (max_right - min_left - MAX(width1, width2));
if (width_change_grade > 0.0f)
grade += width_change_grade * width_change_knob;
return grade;
}
// Returns true if *this SPLIT appears OK in the sense that it does not cross
// any outlines and does not chop off any ridiculously small pieces.
bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
return !IsLittleChunk(min_points, min_area) &&
!blob.SegmentCrossesOutline(point1->pos, point2->pos);
}
// Returns true if the split generates a small chunk in terms of either area
// or number of points.
bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
if (point1->ShortNonCircularSegment(min_points, point2) &&
point1->SegmentArea(point2) < min_area) {
return true;
}
if (point2->ShortNonCircularSegment(min_points, point1) &&
point2->SegmentArea(point1) < min_area) {
return true;
}
return false;
}
/**********************************************************************
* make_edgept
@ -135,102 +215,113 @@ void remove_edgept(EDGEPT *point) {
}
/**********************************************************************
* new_split
* Print
*
* Create a new split record and initialize it. Put it on the display
* list.
* Shows the coordinates of both points in a split.
**********************************************************************/
SPLIT *new_split(EDGEPT *point1, EDGEPT *point2) {
SPLIT *s = new SPLIT;
s->point1 = point1;
s->point2 = point2;
return (s);
}
/**********************************************************************
* print_split
*
* Print a list of splits. Show the coordinates of both points in
* each split.
**********************************************************************/
void print_split(SPLIT *split) {
if (split) {
tprintf("(%d,%d)--(%d,%d)",
split->point1->pos.x, split->point1->pos.y,
split->point2->pos.x, split->point2->pos.y);
void SPLIT::Print() const {
if (this != NULL) {
tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
point2->pos.y);
}
}
#ifndef GRAPHICS_DISABLED
// Draws the split in the given window.
void SPLIT::Mark(ScrollView* window) const {
window->Pen(ScrollView::GREEN);
window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
window->UpdateWindow();
}
#endif
/**********************************************************************
* split_outline
*
* Split between these two edge points.
**********************************************************************/
void split_outline(EDGEPT *join_point1, EDGEPT *join_point2) {
assert(join_point1 != join_point2);
// Creates two outlines out of one by splitting the original one in half.
// Inserts the resulting outlines into the given list.
void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
SplitOutline();
while (outlines->next != NULL) outlines = outlines->next;
EDGEPT* temp2 = join_point2->next;
EDGEPT* temp1 = join_point1->next;
/* Create two new points */
EDGEPT* new_point1 = make_edgept(join_point1->pos.x, join_point1->pos.y,
temp1, join_point2);
EDGEPT* new_point2 = make_edgept(join_point2->pos.x, join_point2->pos.y,
temp2, join_point1);
// Join_point1 and 2 are now cross-over points, so they must have NULL
// src_outlines and give their src_outline information their new
// replacements.
new_point1->src_outline = join_point1->src_outline;
new_point1->start_step = join_point1->start_step;
new_point1->step_count = join_point1->step_count;
new_point2->src_outline = join_point2->src_outline;
new_point2->start_step = join_point2->start_step;
new_point2->step_count = join_point2->step_count;
join_point1->src_outline = NULL;
join_point1->start_step = 0;
join_point1->step_count = 0;
join_point2->src_outline = NULL;
join_point2->start_step = 0;
join_point2->step_count = 0;
join_point1->MarkChop();
join_point2->MarkChop();
outlines->next = new TESSLINE;
outlines->next->loop = point1;
outlines->next->ComputeBoundingBox();
outlines = outlines->next;
outlines->next = new TESSLINE;
outlines->next->loop = point2;
outlines->next->ComputeBoundingBox();
outlines->next->next = NULL;
}
// Makes a split between these two edge points, but does not affect the
// outlines to which they belong.
void SPLIT::SplitOutline() const {
EDGEPT* temp2 = point2->next;
EDGEPT* temp1 = point1->next;
/* Create two new points */
EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
// point1 and 2 are now cross-over points, so they must have NULL
// src_outlines and give their src_outline information their new
// replacements.
new_point1->src_outline = point1->src_outline;
new_point1->start_step = point1->start_step;
new_point1->step_count = point1->step_count;
new_point2->src_outline = point2->src_outline;
new_point2->start_step = point2->start_step;
new_point2->step_count = point2->step_count;
point1->src_outline = NULL;
point1->start_step = 0;
point1->step_count = 0;
point2->src_outline = NULL;
point2->start_step = 0;
point2->step_count = 0;
}
/**********************************************************************
* unsplit_outlines
*
* Remove the split that was put between these two points.
**********************************************************************/
void unsplit_outlines(EDGEPT *p1, EDGEPT *p2) {
EDGEPT *tmp1 = p1->next;
EDGEPT *tmp2 = p2->next;
// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
// the split, but possibly leaving some duplicate outlines.
void SPLIT::UnsplitOutlineList(TBLOB* blob) const {
/* Modify edge points */
UnsplitOutlines();
assert (p1 != p2);
TESSLINE* outline1 = new TESSLINE;
outline1->next = blob->outlines;
blob->outlines = outline1;
outline1->loop = point1;
tmp1->next->prev = p2;
tmp2->next->prev = p1;
TESSLINE* outline2 = new TESSLINE;
outline2->next = blob->outlines;
blob->outlines = outline2;
outline2->loop = point2;
}
// tmp2 is coincident with p1. p1 takes tmp2's place as tmp2 is deleted.
p1->next = tmp2->next;
p1->src_outline = tmp2->src_outline;
p1->start_step = tmp2->start_step;
p1->step_count = tmp2->step_count;
// Likewise p2 takes tmp1's place.
p2->next = tmp1->next;
p2->src_outline = tmp1->src_outline;
p2->start_step = tmp1->start_step;
p2->step_count = tmp1->step_count;
p1->UnmarkChop();
p2->UnmarkChop();
// Removes the split that was put between these two points.
void SPLIT::UnsplitOutlines() const {
EDGEPT* tmp1 = point1->next;
EDGEPT* tmp2 = point2->next;
tmp1->next->prev = point2;
tmp2->next->prev = point1;
// tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
// deleted.
point1->next = tmp2->next;
point1->src_outline = tmp2->src_outline;
point1->start_step = tmp2->start_step;
point1->step_count = tmp2->step_count;
// Likewise point2 takes tmp1's place.
point2->next = tmp1->next;
point2->src_outline = tmp1->src_outline;
point2->start_step = tmp1->start_step;
point2->step_count = tmp1->step_count;
delete tmp1;
delete tmp2;
p1->vec.x = p1->next->pos.x - p1->pos.x;
p1->vec.y = p1->next->pos.y - p1->pos.y;
point1->vec.x = point1->next->pos.x - point1->pos.x;
point1->vec.y = point1->next->pos.y - point1->pos.y;
p2->vec.x = p2->next->pos.x - p2->pos.x;
p2->vec.y = p2->next->pos.y - p2->pos.y;
point2->vec.x = point2->next->pos.x - point2->pos.x;
point2->vec.y = point2->next->pos.y - point2->pos.y;
}

View File

@ -29,18 +29,80 @@
I n c l u d e s
----------------------------------------------------------------------*/
#include "blobs.h"
#include "oldlist.h"
#include "scrollview.h"
/*----------------------------------------------------------------------
T y p e s
----------------------------------------------------------------------*/
typedef struct split_record
{ /* SPLIT */
struct SPLIT {
SPLIT() : point1(NULL), point2(NULL) {}
SPLIT(EDGEPT* pt1, EDGEPT* pt2) : point1(pt1), point2(pt2) {}
// Returns the bounding box of all the points in the split.
TBOX bounding_box() const;
// Returns the bounding box of the outline from point1 to point2.
TBOX Box12() const { return point1->SegmentBox(point2); }
// Returns the bounding box of the outline from point1 to point1.
TBOX Box21() const { return point2->SegmentBox(point1); }
// Returns the bounding box of the out
// Hides the SPLIT so the outlines appear not to be cut by it.
void Hide() const;
// Undoes hide, so the outlines are cut by the SPLIT.
void Reveal() const;
// Returns true if the given EDGEPT is used by this SPLIT, checking only
// the EDGEPT pointer, not the coordinates.
bool UsesPoint(const EDGEPT* point) const {
return point1 == point || point2 == point;
}
// Returns true if the other SPLIT has any position shared with *this.
bool SharesPosition(const SPLIT& other) const {
return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) ||
point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2);
}
// Returns true if both points are contained within the blob.
bool ContainedByBlob(const TBLOB& blob) const {
return blob.Contains(point1->pos) && blob.Contains(point2->pos);
}
// Returns true if both points are contained within the outline.
bool ContainedByOutline(const TESSLINE& outline) const {
return outline.Contains(point1->pos) && outline.Contains(point2->pos);
}
// Compute a split priority based on the bounding boxes of the parts.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float FullPriority(int xmin, int xmax, double overlap_knob,
int centered_maxwidth, double center_knob,
double width_change_knob) const;
// Returns true if *this SPLIT appears OK in the sense that it does not cross
// any outlines and does not chop off any ridiculously small pieces.
bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const;
// Returns true if the split generates a small chunk in terms of either area
// or number of points.
bool IsLittleChunk(int min_points, int min_area) const;
void Print() const;
#ifndef GRAPHICS_DISABLED
// Draws the split in the given window.
void Mark(ScrollView* window) const;
#endif
// Creates two outlines out of one by splitting the original one in half.
// Inserts the resulting outlines into the given list.
void SplitOutlineList(TESSLINE* outlines) const;
// Makes a split between these two edge points, but does not affect the
// outlines to which they belong.
void SplitOutline() const;
// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
// the split, but possibly leaving some duplicate outlines.
void UnsplitOutlineList(TBLOB* blob) const;
// Removes the split that was put between these two points.
void UnsplitOutlines() const;
EDGEPT *point1;
EDGEPT *point2;
} SPLIT;
typedef LIST SPLITS; /* SPLITS */
};
/*----------------------------------------------------------------------
V a r i a b l e s
@ -48,38 +110,11 @@ typedef LIST SPLITS; /* SPLITS */
extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits");
/*----------------------------------------------------------------------
M a c r o s
----------------------------------------------------------------------*/
/**********************************************************************
* clone_split
*
* Create a new split record and set the contents equal to the contents
* of this record.
**********************************************************************/
#define clone_split(dest,source) \
if (source) \
(dest) = new_split ((source)->point1, (source)->point2); \
else \
(dest) = (SPLIT*) NULL \
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
void delete_split(SPLIT *split);
EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev);
void remove_edgept(EDGEPT *point);
SPLIT *new_split(EDGEPT *point1, EDGEPT *point2);
void print_split(SPLIT *split);
void split_outline(EDGEPT *join_point1, EDGEPT *join_point2);
void unsplit_outlines(EDGEPT *p1, EDGEPT *p2);
#endif

View File

@ -247,10 +247,11 @@ C_BLOB* C_BLOB::FakeBlob(const TBOX& box) {
* Return the bounding box of the blob.
**********************************************************************/
TBOX C_BLOB::bounding_box() { //bounding box
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = &outlines; //outlines of blob
TBOX box; //bounding box
TBOX C_BLOB::bounding_box() const { // bounding box
C_OUTLINE *outline; // current outline
// This is a read-only iteration of the outlines.
C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST*>(&outlines);
TBOX box; // bounding box
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();

View File

@ -65,7 +65,7 @@ class C_BLOB:public ELIST_LINK
return &outlines;
}
TBOX bounding_box(); //compute bounding box
TBOX bounding_box() const; // compute bounding box
inT32 area(); //compute area
inT32 perimeter(); // Total perimeter of outlines and 1st level children.
inT32 outer_area(); //compute area
@ -116,6 +116,14 @@ class C_BLOB:public ELIST_LINK
return blob;
}
static int SortByXMiddle(const void *v1, const void *v2) {
const C_BLOB* blob1 = *reinterpret_cast<const C_BLOB* const *>(v1);
const C_BLOB* blob2 = *reinterpret_cast<const C_BLOB* const *>(v2);
return blob1->bounding_box().x_middle() -
blob2->bounding_box().x_middle();
}
private:
C_OUTLINE_LIST outlines; //master elements
};

View File

@ -30,6 +30,7 @@
I n c l u d e s
----------------------------------------------------------------------*/
#include "vecfuncs.h"
#include "blobs.h"
/*----------------------------------------------------------------------
F u n c t i o n s

View File

@ -26,7 +26,6 @@
#define VECFUNCS_H
#include <math.h>
#include "blobs.h"
struct EDGEPT;

View File

@ -160,23 +160,37 @@ WERD* WERD::ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob) {
* row being marked as FUZZY space.
*/
TBOX WERD::bounding_box() {
TBOX box; // box being built
C_BLOB_IT rej_cblob_it = &rej_cblobs; // rejected blobs
TBOX WERD::bounding_box() const { return restricted_bounding_box(true, true); }
for (rej_cblob_it.mark_cycle_pt(); !rej_cblob_it.cycled_list();
rej_cblob_it.forward()) {
box += rej_cblob_it.data()->bounding_box();
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX WERD::restricted_bounding_box(bool upper_dots, bool lower_dots) const {
TBOX box = true_bounding_box();
int bottom = box.bottom();
int top = box.top();
// This is a read-only iteration of the rejected blobs.
C_BLOB_IT it(const_cast<C_BLOB_LIST*>(&rej_cblobs));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
TBOX dot_box = it.data()->bounding_box();
if ((upper_dots || dot_box.bottom() <= top) &&
(lower_dots || dot_box.top() >= bottom)) {
box += dot_box;
}
}
return box;
}
C_BLOB_IT it = &cblobs; // blobs of WERD
// Returns the bounding box of only the good blobs.
TBOX WERD::true_bounding_box() const {
TBOX box; // box being built
// This is a read-only iteration of the good blobs.
C_BLOB_IT it(const_cast<C_BLOB_LIST*>(&cblobs));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
box += it.data()->bounding_box();
}
return box;
}
/**
* WERD::move
*
@ -489,3 +503,101 @@ WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs,
}
return new_werd;
}
// Removes noise from the word by moving small outlines to the rej_cblobs
// list, based on the size_threshold.
void WERD::CleanNoise(float size_threshold) {
C_BLOB_IT blob_it(&cblobs);
C_BLOB_IT rej_it(&rej_cblobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB* blob = blob_it.data();
C_OUTLINE_IT ol_it(blob->out_list());
for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
C_OUTLINE* outline = ol_it.data();
TBOX ol_box = outline->bounding_box();
int ol_size =
ol_box.width() > ol_box.height() ? ol_box.width() : ol_box.height();
if (ol_size < size_threshold) {
// This outline is too small. Move it to a separate blob in the
// reject blobs list.
C_BLOB* rej_blob = new C_BLOB(ol_it.extract());
rej_it.add_after_then_move(rej_blob);
}
}
if (blob->out_list()->empty()) delete blob_it.extract();
}
}
// Extracts all the noise outlines and stuffs the pointers into the given
// vector of outlines. Afterwards, the outlines vector owns the pointers.
void WERD::GetNoiseOutlines(GenericVector<C_OUTLINE*>* outlines) {
C_BLOB_IT rej_it(&rej_cblobs);
for (rej_it.mark_cycle_pt(); !rej_it.empty(); rej_it.forward()) {
C_BLOB* blob = rej_it.extract();
C_OUTLINE_IT ol_it(blob->out_list());
outlines->push_back(ol_it.extract());
delete blob;
}
}
// Adds the selected outlines to the indcated real blobs, and puts the rest
// back in rej_cblobs where they came from. Where the target_blobs entry is
// NULL, a run of wanted outlines is put into a single new blob.
// Ownership of the outlines is transferred back to the word. (Hence
// GenericVector and not PointerVector.)
// Returns true if any new blob was added to the start of the word, which
// suggests that it might need joining to the word before it, and likewise
// sets make_next_word_fuzzy true if any new blob was added to the end.
bool WERD::AddSelectedOutlines(const GenericVector<bool>& wanted,
const GenericVector<C_BLOB*>& target_blobs,
const GenericVector<C_OUTLINE*>& outlines,
bool* make_next_word_fuzzy) {
bool outline_added_to_start = false;
if (make_next_word_fuzzy != NULL) *make_next_word_fuzzy = false;
C_BLOB_IT rej_it(&rej_cblobs);
for (int i = 0; i < outlines.size(); ++i) {
C_OUTLINE* outline = outlines[i];
if (outline == NULL) continue; // Already used it.
if (wanted[i]) {
C_BLOB* target_blob = target_blobs[i];
TBOX noise_box = outline->bounding_box();
if (target_blob == NULL) {
target_blob = new C_BLOB(outline);
// Need to find the insertion point.
C_BLOB_IT blob_it(&cblobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
blob_it.forward()) {
C_BLOB* blob = blob_it.data();
TBOX blob_box = blob->bounding_box();
if (blob_box.left() > noise_box.left()) {
if (blob_it.at_first() && !flag(W_FUZZY_SP) && !flag(W_FUZZY_NON)) {
// We might want to join this word to its predecessor.
outline_added_to_start = true;
}
blob_it.add_before_stay_put(target_blob);
break;
}
}
if (blob_it.cycled_list()) {
blob_it.add_to_end(target_blob);
if (make_next_word_fuzzy != NULL) *make_next_word_fuzzy = true;
}
// Add all consecutive wanted, but null-blob outlines to same blob.
C_OUTLINE_IT ol_it(target_blob->out_list());
while (i + 1 < outlines.size() && wanted[i + 1] &&
target_blobs[i + 1] == NULL) {
++i;
ol_it.add_to_end(outlines[i]);
}
} else {
// Insert outline into this blob.
C_OUTLINE_IT ol_it(target_blob->out_list());
ol_it.add_to_end(outline);
}
} else {
// Put back on noise list.
rej_it.add_to_end(new C_BLOB(outline));
}
}
return outline_added_to_start;
}

View File

@ -114,7 +114,13 @@ class WERD : public ELIST2_LINK {
script_id_ = id;
}
TBOX bounding_box(); // compute bounding box
// Returns the (default) bounding box including all the dots.
TBOX bounding_box() const; // compute bounding box
// Returns the bounding box including the desired combination of upper and
// lower noise/diacritic elements.
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
// Returns the bounding box of only the good blobs.
TBOX true_bounding_box() const;
const char *text() const { return correct.string(); }
void set_text(const char *new_text) { correct = new_text; }
@ -155,6 +161,26 @@ class WERD : public ELIST2_LINK {
void plot_rej_blobs(ScrollView *window);
#endif // GRAPHICS_DISABLED
// Removes noise from the word by moving small outlines to the rej_cblobs
// list, based on the size_threshold.
void CleanNoise(float size_threshold);
// Extracts all the noise outlines and stuffs the pointers into the given
// vector of outlines. Afterwards, the outlines vector owns the pointers.
void GetNoiseOutlines(GenericVector<C_OUTLINE *> *outlines);
// Adds the selected outlines to the indcated real blobs, and puts the rest
// back in rej_cblobs where they came from. Where the target_blobs entry is
// NULL, a run of wanted outlines is put into a single new blob.
// Ownership of the outlines is transferred back to the word. (Hence
// GenericVector and not PointerVector.)
// Returns true if any new blob was added to the start of the word, which
// suggests that it might need joining to the word before it, and likewise
// sets make_next_word_fuzzy true if any new blob was added to the end.
bool AddSelectedOutlines(const GenericVector<bool> &wanted,
const GenericVector<C_BLOB *> &target_blobs,
const GenericVector<C_OUTLINE *> &outlines,
bool *make_next_word_fuzzy);
private:
uinT8 blanks; // no of blanks
uinT8 dummy; // padding

View File

@ -1,3 +1,4 @@
AUTOMAKE_OPTIONS = subdir-objects
SUBDIRS =
AM_CXXFLAGS =
@ -40,8 +41,7 @@ libtesseract_ccutil_la_SOURCES = \
unichar.cpp unicharmap.cpp unicharset.cpp unicodes.cpp \
params.cpp universalambigs.cpp
if MINGW
if T_WIN
AM_CPPFLAGS += -I$(top_srcdir)/vs2008/port -DWINDLLNAME=\"lib@GENERIC_LIBRARY_NAME@\"
noinst_HEADERS += ../vs2010/port/strtok_r.h
libtesseract_ccutil_la_SOURCES += ../vs2010/port/strtok_r.cpp

View File

@ -24,13 +24,13 @@
#include "helpers.h"
#include "universalambigs.h"
#ifdef _WIN32
#if defined _WIN32 || defined(__CYGWIN__)
#ifndef __GNUC__
#define strtok_r strtok_s
#else
#include "strtok_r.h"
#endif /* __GNUC__ */
#endif /* _WIN32 */
#endif /* _WIN32 __CYGWIN__*/
namespace tesseract {

View File

@ -28,6 +28,7 @@
#include "errcode.h"
#include "helpers.h"
#include "ndminx.h"
#include "serialis.h"
#include "strngs.h"
// Use PointerVector<T> below in preference to GenericVector<T*>, as that
@ -61,6 +62,11 @@ class GenericVector {
// Resizes to size and sets all values to t.
void init_to_size(int size, T t);
// Resizes to size without any initialization.
void resize_no_init(int size) {
reserve(size);
size_used_ = size;
}
// Return the size used.
int size() const {
@ -159,22 +165,27 @@ class GenericVector {
bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
// Writes a vector of simple types to the given file. Assumes that bitwise
// read/write of T will work. Returns false in case of error.
// TODO(rays) Change all callers to use TFile and remove deprecated methods.
bool Serialize(FILE* fp) const;
bool Serialize(tesseract::TFile* fp) const;
// Reads a vector of simple types from the given file. Assumes that bitwise
// read/write will work with ReverseN according to sizeof(T).
// Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
bool DeSerialize(bool swap, tesseract::TFile* fp);
// Writes a vector of classes to the given file. Assumes the existence of
// bool T::Serialize(FILE* fp) const that returns false in case of error.
// Returns false in case of error.
bool SerializeClasses(FILE* fp) const;
bool SerializeClasses(tesseract::TFile* fp) const;
// Reads a vector of classes from the given file. Assumes the existence of
// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
// error. Also needs T::T() and T::T(constT&), as init_to_size is used in
// this function. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeClasses(bool swap, FILE* fp);
bool DeSerializeClasses(bool swap, tesseract::TFile* fp);
// Allocates a new array of double the current_size, copies over the
// information from data to the new location, deletes data and returns
@ -188,6 +199,12 @@ class GenericVector {
return data_new;
}
// Reverses the elements of the vector.
void reverse() {
for (int i = 0; i < size_used_ / 2; ++i)
Swap(&data_[i], &data_[size_used_ - 1 - i]);
}
// Sorts the members of this vector using the less than comparator (cmp_lt),
// which compares the values. Useful for GenericVectors to primitive types.
// Will not work so great for pointers (unless you just want to sort some
@ -296,6 +313,15 @@ class GenericVector {
data_[index2] = tmp;
}
}
// Returns true if all elements of *this are within the given range.
// Only uses operator<
bool WithinBounds(const T& rangemin, const T& rangemax) const {
for (int i = 0; i < size_used_; ++i) {
if (data_[i] < rangemin || rangemax < data_[i])
return false;
}
return true;
}
protected:
// Internal recursive version of choose_nth_item.
@ -343,7 +369,7 @@ inline bool LoadDataFromFile(const STRING& filename,
// The default FileWriter writes the vector of char to the filename file,
// returning false on error.
inline bool SaveDataToFile(const GenericVector<char>& data,
const STRING& filename) {
const STRING& filename) {
FILE* fp = fopen(filename.string(), "wb");
if (fp == NULL) return false;
bool result =
@ -419,8 +445,10 @@ class PointerVector : public GenericVector<T*> {
}
PointerVector<T>& operator=(const PointerVector& other) {
this->truncate(0);
this->operator+=(other);
if (&other != this) {
this->truncate(0);
this->operator+=(other);
}
return *this;
}
@ -470,8 +498,11 @@ class PointerVector : public GenericVector<T*> {
GenericVector<T*>::clear();
}
// Writes a vector of simple types to the given file. Assumes that bitwise
// read/write of T will work. Returns false in case of error.
// Writes a vector of (pointers to) classes to the given file. Assumes the
// existence of bool T::Serialize(FILE*) const that returns false in case of
// error. There is no Serialize for simple types, as you would have a
// normal GenericVector of those.
// Returns false in case of error.
bool Serialize(FILE* fp) const {
inT32 used = GenericVector<T*>::size_used_;
if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
@ -482,16 +513,29 @@ class PointerVector : public GenericVector<T*> {
}
return true;
}
// Reads a vector of simple types from the given file. Assumes that bitwise
// read/write will work with ReverseN according to sizeof(T).
bool Serialize(TFile* fp) const {
inT32 used = GenericVector<T*>::size_used_;
if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
for (int i = 0; i < used; ++i) {
inT8 non_null = GenericVector<T*>::data_[i] != NULL;
if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
}
return true;
}
// Reads a vector of (pointers to) classes to the given file. Assumes the
// existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
// case of error. There is no Serialize for simple types, as you would have a
// normal GenericVector of those.
// If swap is true, assumes a big/little-endian swap is needed.
// Also needs T::T(), as new T is used in this function.
// Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp) {
inT32 reserved;
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
if (swap) Reverse32(&reserved);
GenericVector<T*>::reserve(reserved);
truncate(0);
for (int i = 0; i < reserved; ++i) {
inT8 non_null;
if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
@ -510,6 +554,30 @@ class PointerVector : public GenericVector<T*> {
}
return true;
}
bool DeSerialize(bool swap, TFile* fp) {
inT32 reserved;
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
if (swap) Reverse32(&reserved);
GenericVector<T*>::reserve(reserved);
truncate(0);
for (int i = 0; i < reserved; ++i) {
inT8 non_null;
if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
T* item = NULL;
if (non_null) {
item = new T;
if (!item->DeSerialize(swap, fp)) {
delete item;
return false;
}
this->push_back(item);
} else {
// Null elements should keep their place in the vector.
this->push_back(NULL);
}
}
return true;
}
// Sorts the items pointed to by the members of this vector using
// t::operator<().
@ -711,8 +779,10 @@ GenericVector<T> &GenericVector<T>::operator+=(const GenericVector& other) {
template <typename T>
GenericVector<T> &GenericVector<T>::operator=(const GenericVector& other) {
this->truncate(0);
this->operator+=(other);
if (&other != this) {
this->truncate(0);
this->operator+=(other);
}
return *this;
}
@ -817,6 +887,12 @@ bool GenericVector<T>::Serialize(FILE* fp) const {
if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false;
return true;
}
template <typename T>
bool GenericVector<T>::Serialize(tesseract::TFile* fp) const {
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false;
return true;
}
// Reads a vector of simple types from the given file. Assumes that bitwise
// read/write will work with ReverseN according to sizeof(T).
@ -836,6 +912,20 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
}
return true;
}
template <typename T>
bool GenericVector<T>::DeSerialize(bool swap, tesseract::TFile* fp) {
inT32 reserved;
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
if (swap) Reverse32(&reserved);
reserve(reserved);
size_used_ = reserved;
if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false;
if (swap) {
for (int i = 0; i < size_used_; ++i)
ReverseN(&data_[i], sizeof(data_[i]));
}
return true;
}
// Writes a vector of classes to the given file. Assumes the existence of
// bool T::Serialize(FILE* fp) const that returns false in case of error.
@ -848,6 +938,14 @@ bool GenericVector<T>::SerializeClasses(FILE* fp) const {
}
return true;
}
template <typename T>
bool GenericVector<T>::SerializeClasses(tesseract::TFile* fp) const {
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
for (int i = 0; i < size_used_; ++i) {
if (!data_[i].Serialize(fp)) return false;
}
return true;
}
// Reads a vector of classes from the given file. Assumes the existence of
// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
@ -866,6 +964,18 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
}
return true;
}
template <typename T>
bool GenericVector<T>::DeSerializeClasses(bool swap, tesseract::TFile* fp) {
uinT32 reserved;
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
if (swap) Reverse32(&reserved);
T empty;
init_to_size(reserved, empty);
for (int i = 0; i < reserved; ++i) {
if (!data_[i].DeSerialize(swap, fp)) return false;
}
return true;
}
// This method clear the current object, then, does a shallow copy of
// its argument, and finally invalidates its argument.

View File

@ -69,13 +69,11 @@ bool ParamUtils::ReadParamsFromFp(FILE *fp, inT64 end_offset,
char line[MAX_PATH]; // input line
bool anyerr = false; // true if any error
bool foundit; // found parameter
inT16 length; // length of line
char *valptr; // value field
while ((end_offset < 0 || ftell(fp) < end_offset) &&
fgets(line, MAX_PATH, fp)) {
if (line[0] != '\n' && line[0] != '#') {
length = strlen (line);
chomp_string(line); // remove newline
for (valptr = line; *valptr && *valptr != ' ' && *valptr != '\t';
valptr++);
@ -185,20 +183,20 @@ void ParamUtils::PrintParams(FILE *fp, const ParamsVectors *member_params) {
for (v = 0; v < num_iterations; ++v) {
const ParamsVectors *vec = (v == 0) ? GlobalParams() : member_params;
for (i = 0; i < vec->int_params.size(); ++i) {
fprintf(fp, "%s\t%d\n", vec->int_params[i]->name_str(),
(inT32)(*vec->int_params[i]));
fprintf(fp, "%s\t%d\t%s\n", vec->int_params[i]->name_str(),
(inT32)(*vec->int_params[i]), vec->int_params[i]->info_str());
}
for (i = 0; i < vec->bool_params.size(); ++i) {
fprintf(fp, "%s\t%d\n", vec->bool_params[i]->name_str(),
(BOOL8)(*vec->bool_params[i]));
fprintf(fp, "%s\t%d\t%s\n", vec->bool_params[i]->name_str(),
(BOOL8)(*vec->bool_params[i]), vec->bool_params[i]->info_str());
}
for (int i = 0; i < vec->string_params.size(); ++i) {
fprintf(fp, "%s\t%s\n", vec->string_params[i]->name_str(),
vec->string_params[i]->string());
fprintf(fp, "%s\t%s\t%s\n", vec->string_params[i]->name_str(),
vec->string_params[i]->string(), vec->string_params[i]->info_str());
}
for (int i = 0; i < vec->double_params.size(); ++i) {
fprintf(fp, "%s\t%g\n", vec->double_params[i]->name_str(),
(double)(*vec->double_params[i]));
fprintf(fp, "%s\t%g\t%s\n", vec->double_params[i]->name_str(),
(double)(*vec->double_params[i]), vec->double_params[i]->info_str());
}
}
}

View File

@ -28,10 +28,12 @@
#define ultoa _ultoa
#endif /* __GNUC__ */
#define SIGNED
#if defined(_MSC_VER)
#define snprintf _snprintf
#if (_MSC_VER <= 1400)
#define vsnprintf _vsnprintf
#endif /* _WIN32 */
#endif /* (_MSC_VER <= 1400) */
#endif /* defined(_MSC_VER) */
#else
#define __UNIX__
#include <limits.h>

View File

@ -34,7 +34,7 @@
#include "tprintf.h"
// workaround for "'off_t' was not declared in this scope" with -std=c++11
#ifndef off_t
#if !defined(off_t) && !defined(__APPLE__) && !defined(__CYGWIN__)
typedef long off_t;
#endif // off_t

View File

@ -19,24 +19,41 @@
#include "serialis.h"
#include <stdio.h>
#include "genericvector.h"
namespace tesseract {
TFile::TFile() : offset_(0) {
TFile::TFile()
: offset_(0), data_(NULL), data_is_owned_(false), is_writing_(false) {
}
TFile::~TFile() {
if (data_is_owned_)
delete data_;
}
bool TFile::Open(const STRING& filename, FileReader reader) {
if (!data_is_owned_) {
data_ = new GenericVector<char>;
data_is_owned_ = true;
}
offset_ = 0;
is_writing_ = false;
if (reader == NULL)
return LoadDataFromFile(filename, &data_);
return LoadDataFromFile(filename, data_);
else
return (*reader)(filename, &data_);
return (*reader)(filename, data_);
}
bool TFile::Open(const char* data, int size) {
offset_ = 0;
data_.init_to_size(size, 0);
memcpy(&data_[0], data, size);
if (!data_is_owned_) {
data_ = new GenericVector<char>;
data_is_owned_ = true;
}
is_writing_ = false;
data_->init_to_size(size, 0);
memcpy(&(*data_)[0], data, size);
return true;
}
@ -44,35 +61,85 @@ bool TFile::Open(FILE* fp, inT64 end_offset) {
offset_ = 0;
inT64 current_pos = ftell(fp);
if (end_offset < 0) {
fseek(fp, 0, SEEK_END);
if (fseek(fp, 0, SEEK_END))
return false;
end_offset = ftell(fp);
fseek(fp, current_pos, SEEK_SET);
if (fseek(fp, current_pos, SEEK_SET))
return false;
}
int size = end_offset - current_pos;
data_.init_to_size(size, 0);
return static_cast<int>(fread(&data_[0], 1, size, fp)) == size;
is_writing_ = false;
if (!data_is_owned_) {
data_ = new GenericVector<char>;
data_is_owned_ = true;
}
data_->init_to_size(size, 0);
return static_cast<int>(fread(&(*data_)[0], 1, size, fp)) == size;
}
char* TFile::FGets(char* buffer, int buffer_size) {
ASSERT_HOST(!is_writing_);
int size = 0;
while (size + 1 < buffer_size && offset_ < data_.size()) {
buffer[size++] = data_[offset_++];
if (data_[offset_ - 1] == '\n') break;
while (size + 1 < buffer_size && offset_ < data_->size()) {
buffer[size++] = (*data_)[offset_++];
if ((*data_)[offset_ - 1] == '\n') break;
}
if (size < buffer_size) buffer[size] = '\0';
return size > 0 ? buffer : NULL;
}
int TFile::FRead(void* buffer, int size, int count) {
char* char_buffer = reinterpret_cast<char*>(buffer);
ASSERT_HOST(!is_writing_);
int required_size = size * count;
if (data_.size() - offset_ < required_size)
required_size = data_.size() - offset_;
memcpy(char_buffer, &data_[offset_], required_size);
if (required_size <= 0) return 0;
char* char_buffer = reinterpret_cast<char*>(buffer);
if (data_->size() - offset_ < required_size)
required_size = data_->size() - offset_;
if (required_size > 0)
memcpy(char_buffer, &(*data_)[offset_], required_size);
offset_ += required_size;
return required_size / size;
}
void TFile::Rewind() {
ASSERT_HOST(!is_writing_);
offset_ = 0;
}
void TFile::OpenWrite(GenericVector<char>* data) {
offset_ = 0;
if (data != NULL) {
if (data_is_owned_) delete data_;
data_ = data;
data_is_owned_ = false;
} else if (!data_is_owned_) {
data_ = new GenericVector<char>;
data_is_owned_ = true;
}
is_writing_ = true;
data_->truncate(0);
}
bool TFile::CloseWrite(const STRING& filename, FileWriter writer) {
ASSERT_HOST(is_writing_);
if (writer == NULL)
return SaveDataToFile(*data_, filename);
else
return (*writer)(*data_, filename);
}
int TFile::FWrite(const void* buffer, int size, int count) {
ASSERT_HOST(is_writing_);
int total = size * count;
if (total <= 0) return 0;
const char* buf = reinterpret_cast<const char*>(buffer);
// This isn't very efficient, but memory is so fast compared to disk
// that it is relatively unimportant, and very simple.
for (int i = 0; i < total; ++i)
data_->push_back(buf[i]);
return count;
}
} // namespace tesseract.

View File

@ -20,11 +20,13 @@
#ifndef SERIALIS_H
#define SERIALIS_H
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "host.h"
#include "genericvector.h"
template <typename T> class GenericVector;
class STRING;
/***********************************************************************
QUOTE_IT MACRO DEFINITION
@ -36,14 +38,24 @@ Replace <parm> with "<parm>". <parm> may be an arbitrary number of tokens
namespace tesseract {
// Simple file class. Only does input for now.
// Allows for portable file input from memory.
// Function to read a GenericVector<char> from a whole file.
// Returns false on failure.
typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
// Function to write a GenericVector<char> to a whole file.
// Returns false on failure.
typedef bool (*FileWriter)(const GenericVector<char>& data,
const STRING& filename);
// Simple file class.
// Allows for portable file input from memory and from foreign file systems.
class TFile {
public:
TFile();
~TFile();
// All the Open methods load the whole file into memory.
// All the Open methods load the whole file into memory for reading.
// Opens a file with a supplied reader, or NULL to use the default.
// Note that mixed read/write is not supported.
bool Open(const STRING& filename, FileReader reader);
// From an existing memory buffer.
bool Open(const char* data, int size);
@ -53,21 +65,33 @@ class TFile {
// Reads a line like fgets. Returns NULL on EOF, otherwise buffer.
// Reads at most buffer_size bytes, including '\0' terminator, even if
// the line is longer. Does nothing if buffer_size <= 0.
// To use fscanf use FGets and sscanf.
char* FGets(char* buffer, int buffer_size);
// Replicates fread, returning the number of items read.
int FRead(void* buffer, int size, int count);
// To use fscanf use FGets and sscanf.
// Resets the TFile as if it has been Opened, but nothing read.
void Rewind() {
offset_ = 0;
}
// Only allowed while reading!
void Rewind();
// Open for writing. Either supply a non-NULL data with OpenWrite before
// calling FWrite, (no close required), or supply a NULL data to OpenWrite
// and call CloseWrite to write to a file after the FWrites.
void OpenWrite(GenericVector<char>* data);
bool CloseWrite(const STRING& filename, FileWriter writer);
// Replicates fwrite, returning the number of items written.
// To use fprintf, use snprintf and FWrite.
int FWrite(const void* buffer, int size, int count);
private:
// The number of bytes used so far.
int offset_;
// The buffered data from the file.
GenericVector<char> data_;
GenericVector<char>* data_;
// True if the data_ pointer is owned by *this.
bool data_is_owned_;
// True if the TFile is open for writing.
bool is_writing_;
};
} // namespace tesseract.

View File

@ -17,12 +17,17 @@
*
**********************************************************************/
#include "helpers.h"
#include "tprintf.h"
#include "strngs.h"
#include "genericvector.h"
#include "strngs.h"
#include <assert.h>
#include "genericvector.h"
#include "helpers.h"
#include "serialis.h"
#include "tprintf.h"
using tesseract::TFile;
// Size of buffer needed to host the decimal representation of the maximum
// possible length of an int (in 64 bits), being -<20 digits>.
const int kMaxIntSize = 22;
@ -123,10 +128,22 @@ STRING::STRING(const char* cstr) {
assert(InvariantOk());
}
STRING::STRING(const char *data, int length) {
if (data == NULL) {
// Empty STRINGs contain just the "\0".
memcpy(AllocData(1, kMinCapacity), "", 1);
} else {
char* this_cstr = AllocData(length + 1, length + 1);
memcpy(this_cstr, data, length);
this_cstr[length] = '\0';
}
}
STRING::~STRING() {
DiscardData();
}
// TODO(rays) Change all callers to use TFile and remove the old functions.
// Writes to the given file. Returns false in case of error.
bool STRING::Serialize(FILE* fp) const {
inT32 len = length();
@ -134,6 +151,13 @@ bool STRING::Serialize(FILE* fp) const {
if (static_cast<int>(fwrite(GetCStr(), 1, len, fp)) != len) return false;
return true;
}
// Writes to the given file. Returns false in case of error.
bool STRING::Serialize(TFile* fp) const {
inT32 len = length();
if (fp->FWrite(&len, sizeof(len), 1) != 1) return false;
if (fp->FWrite(GetCStr(), 1, len) != len) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool STRING::DeSerialize(bool swap, FILE* fp) {
@ -145,6 +169,17 @@ bool STRING::DeSerialize(bool swap, FILE* fp) {
if (static_cast<int>(fread(GetCStr(), 1, len, fp)) != len) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool STRING::DeSerialize(bool swap, TFile* fp) {
inT32 len;
if (fp->FRead(&len, sizeof(len), 1) != 1) return false;
if (swap)
ReverseN(&len, sizeof(len));
truncate_at(len);
if (fp->FRead(GetCStr(), 1, len) != len) return false;
return true;
}
BOOL8 STRING::contains(const char c) const {
return (c != '\0') && (strchr (GetCStr(), c) != NULL);
@ -245,21 +280,20 @@ char& STRING::operator[](inT32 index) const {
void STRING::split(const char c, GenericVector<STRING> *splited) {
int start_index = 0;
for (int i = 0; i < length(); i++) {
int len = length();
for (int i = 0; i < len; i++) {
if ((*this)[i] == c) {
if (i != start_index) {
(*this)[i] = '\0';
STRING tmp = GetCStr() + start_index;
splited->push_back(tmp);
splited->push_back(STRING(GetCStr() + start_index, i - start_index));
(*this)[i] = c;
}
start_index = i + 1;
}
}
if (length() != start_index) {
STRING tmp = GetCStr() + start_index;
splited->push_back(tmp);
if (len != start_index) {
splited->push_back(STRING(GetCStr() + start_index, len - start_index));
}
}

View File

@ -25,6 +25,10 @@
#include "platform.h"
#include "memry.h"
namespace tesseract {
class TFile;
} // namespace tesseract.
// STRING_IS_PROTECTED means that string[index] = X is invalid
// because you have to go through strings interface to modify it.
// This allows the string to ensure internal integrity and maintain
@ -43,6 +47,7 @@ class TESS_API STRING
STRING();
STRING(const STRING &string);
STRING(const char *string);
STRING(const char *data, int length);
~STRING ();
// Writes to the given file. Returns false in case of error.
@ -50,6 +55,11 @@ class TESS_API STRING
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
// Writes to the given file. Returns false in case of error.
bool Serialize(tesseract::TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, tesseract::TFile* fp);
BOOL8 contains(const char c) const;
inT32 length() const;

View File

@ -95,21 +95,30 @@ void TessdataManager::CopyFile(FILE *input_file, FILE *output_file,
delete[] chunk;
}
void TessdataManager::WriteMetadata(inT64 *offset_table,
bool TessdataManager::WriteMetadata(inT64 *offset_table,
const char * language_data_path_prefix,
FILE *output_file) {
fseek(output_file, 0, SEEK_SET);
inT32 num_entries = TESSDATA_NUM_ENTRIES;
fwrite(&num_entries, sizeof(inT32), 1, output_file);
fwrite(offset_table, sizeof(inT64), TESSDATA_NUM_ENTRIES, output_file);
fclose(output_file);
tprintf("TessdataManager combined tesseract data files.\n");
for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
tprintf("Offset for type %2d (%s%-22s) is %lld\n", i,
language_data_path_prefix, kTessdataFileSuffixes[i],
offset_table[i]);
bool result = true;
if (fseek(output_file, 0, SEEK_SET) != 0 ||
fwrite(&num_entries, sizeof(inT32), 1, output_file) != 1 ||
fwrite(offset_table, sizeof(inT64), TESSDATA_NUM_ENTRIES,
output_file) != TESSDATA_NUM_ENTRIES) {
fclose(output_file);
result = false;
tprintf("WriteMetadata failed in TessdataManager!\n");
} else if (fclose(output_file)) {
result = false;
tprintf("WriteMetadata failed to close file!\n");
} else {
tprintf("TessdataManager combined tesseract data files.\n");
for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
tprintf("Offset for type %2d (%s%-22s) is %lld\n", i,
language_data_path_prefix, kTessdataFileSuffixes[i],
offset_table[i]);
}
}
return result;
}
bool TessdataManager::CombineDataFiles(
@ -124,8 +133,11 @@ bool TessdataManager::CombineDataFiles(
return false;
}
// Leave some space for recording the offset_table.
fseek(output_file,
sizeof(inT32) + sizeof(inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET);
if (fseek(output_file,
sizeof(inT32) + sizeof(inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET)) {
tprintf("Error seeking %s\n", output_filename);
return false;
}
TessdataType type = TESSDATA_NUM_ENTRIES;
bool text_file = false;
@ -161,8 +173,7 @@ bool TessdataManager::CombineDataFiles(
return false;
}
WriteMetadata(offset_table, language_data_path_prefix, output_file);
return true;
return WriteMetadata(offset_table, language_data_path_prefix, output_file);
}
bool TessdataManager::OverwriteComponents(
@ -185,8 +196,12 @@ bool TessdataManager::OverwriteComponents(
}
// Leave some space for recording the offset_table.
fseek(output_file,
sizeof(inT32) + sizeof(inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET);
if (fseek(output_file,
sizeof(inT32) + sizeof(inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET)) {
fclose(output_file);
tprintf("Error seeking %s\n", new_traineddata_filename);
return false;
}
// Open the files with the new components.
for (i = 0; i < num_new_components; ++i) {
@ -212,8 +227,7 @@ bool TessdataManager::OverwriteComponents(
}
}
const char *language_data_path_prefix = strchr(new_traineddata_filename, '.');
WriteMetadata(offset_table, language_data_path_prefix, output_file);
return true;
return WriteMetadata(offset_table, language_data_path_prefix, output_file);
}
bool TessdataManager::TessdataTypeFromFileSuffix(

View File

@ -199,8 +199,10 @@ class TessdataManager {
return swap_;
}
/** Writes the number of entries and the given offset table to output_file. */
static void WriteMetadata(inT64 *offset_table,
/** Writes the number of entries and the given offset table to output_file.
* Returns false on error.
*/
static bool WriteMetadata(inT64 *offset_table,
const char *language_data_path_prefix,
FILE *output_file);

View File

@ -19,6 +19,7 @@
#include "unichar.h"
#include "errcode.h"
#include "genericvector.h"
#include "tprintf.h"
#define UNI_MAX_LEGAL_UTF32 0x0010FFFF
@ -203,3 +204,22 @@ UNICHAR::const_iterator UNICHAR::begin(const char* utf8_str, const int len) {
UNICHAR::const_iterator UNICHAR::end(const char* utf8_str, const int len) {
return UNICHAR::const_iterator(utf8_str + len);
}
// Converts a utf-8 string to a vector of unicodes.
// Returns false if the input contains invalid UTF-8, and replaces
// the rest of the string with a single space.
bool UNICHAR::UTF8ToUnicode(const char* utf8_str,
GenericVector<int>* unicodes) {
const int utf8_length = strlen(utf8_str);
const_iterator end_it(end(utf8_str, utf8_length));
for (const_iterator it(begin(utf8_str, utf8_length)); it != end_it; ++it) {
if (it.is_legal()) {
unicodes->push_back(*it);
} else {
unicodes->push_back(' ');
return false;
}
}
return true;
}

View File

@ -23,6 +23,8 @@
#include <memory.h>
#include <string.h>
template <typename T> class GenericVector;
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
#define UNICHAR_LEN 30
@ -148,6 +150,11 @@ class UNICHAR {
static const_iterator begin(const char* utf8_str, const int byte_length);
static const_iterator end(const char* utf8_str, const int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns false if the input contains invalid UTF-8, and replaces
// the rest of the string with a single space.
static bool UTF8ToUnicode(const char* utf8_str, GenericVector<int>* unicodes);
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if

View File

@ -17,15 +17,17 @@
//
///////////////////////////////////////////////////////////////////////
#include "unicharset.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "params.h"
#include "serialis.h"
#include "tesscallback.h"
#include "tprintf.h"
#include "unichar.h"
#include "unicharset.h"
#include "params.h"
// Special character used in representing character fragments.
static const char kSeparator = '|';
@ -448,11 +450,19 @@ void UNICHARSET::ExpandRangesFromOther(const UNICHARSET& src) {
}
}
// Makes this a copy of src. Clears this completely first, so the automattic
// ids will not be present in this if not in src.
// Makes this a copy of src. Clears this completely first, so the automatic
// ids will not be present in this if not in src. Does NOT reorder the set!
void UNICHARSET::CopyFrom(const UNICHARSET& src) {
clear();
AppendOtherUnicharset(src);
for (int ch = 0; ch < src.size_used; ++ch) {
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
const char* utf8 = src.id_to_unichar(ch);
unichar_insert(utf8);
unichars[ch].properties.ExpandRangesFrom(src_props);
}
// Set properties, including mirror and other_case, WITHOUT reordering
// the unicharset.
PartialSetPropertiesFromOther(0, src);
}
// For each id in src, if it does not occur in this, add it, as in
@ -689,8 +699,11 @@ bool UNICHARSET::eq(UNICHAR_ID unichar_id,
return strcmp(this->id_to_unichar(unichar_id), unichar_repr) == 0;
}
bool UNICHARSET::save_to_file(FILE *file) const {
fprintf(file, "%d\n", this->size());
bool UNICHARSET::save_to_string(STRING *str) const {
const int kFileBufSize = 1024;
char buffer[kFileBufSize + 1];
snprintf(buffer, kFileBufSize, "%d\n", this->size());
*str = buffer;
for (UNICHAR_ID id = 0; id < this->size(); ++id) {
int min_bottom, max_bottom, min_top, max_top;
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
@ -702,11 +715,11 @@ bool UNICHARSET::save_to_file(FILE *file) const {
get_advance_range(id, &min_advance, &max_advance);
unsigned int properties = this->get_properties(id);
if (strcmp(this->id_to_unichar(id), " ") == 0) {
fprintf(file, "%s %x %s %d\n", "NULL", properties,
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
this->get_script_from_script_id(this->get_script(id)),
this->get_other_case(id));
} else {
fprintf(file,
snprintf(buffer, kFileBufSize,
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %s %d %d %d %s\t# %s\n",
this->id_to_unichar(id), properties,
min_bottom, max_bottom, min_top, max_top, min_width, max_width,
@ -716,10 +729,12 @@ bool UNICHARSET::save_to_file(FILE *file) const {
this->get_mirror(id), this->get_normed_unichar(id),
this->debug_str(id).string());
}
*str += buffer;
}
return true;
}
// TODO(rays) Replace with TFile everywhere.
class InMemoryFilePointer {
public:
InMemoryFilePointer(const char *memory, int mem_size)
@ -776,6 +791,14 @@ bool UNICHARSET::load_from_file(FILE *file, bool skip_fragments) {
return success;
}
bool UNICHARSET::load_from_file(tesseract::TFile *file, bool skip_fragments) {
TessResultCallback2<char *, char *, int> *fgets_cb =
NewPermanentTessCallback(file, &tesseract::TFile::FGets);
bool success = load_via_fgets(fgets_cb, skip_fragments);
delete fgets_cb;
return success;
}
bool UNICHARSET::load_via_fgets(
TessResultCallback2<char *, char *, int> *fgets_cb,
bool skip_fragments) {
@ -844,8 +867,11 @@ bool UNICHARSET::load_via_fgets(
// Skip fragments if needed.
CHAR_FRAGMENT *frag = NULL;
if (skip_fragments && (frag = CHAR_FRAGMENT::parse_from_string(unichar))) {
int num_pieces = frag->get_total();
delete frag;
continue;
// Skip multi-element fragments, but keep singles like UNICHAR_BROKEN in.
if (num_pieces > 1)
continue;
}
// Insert unichar into unicharset and set its properties.
if (strcmp(unichar, "NULL") == 0)
@ -959,8 +985,10 @@ bool UNICHARSET::major_right_to_left() const {
// Set a whitelist and/or blacklist of characters to recognize.
// An empty or NULL whitelist enables everything (minus any blacklist).
// An empty or NULL blacklist disables nothing.
// An empty or NULL blacklist has no effect.
void UNICHARSET::set_black_and_whitelist(const char* blacklist,
const char* whitelist) {
const char* whitelist,
const char* unblacklist) {
bool def_enabled = whitelist == NULL || whitelist[0] == '\0';
// Set everything to default
for (int ch = 0; ch < size_used; ++ch)
@ -983,6 +1011,15 @@ void UNICHARSET::set_black_and_whitelist(const char* blacklist,
unichars[encoding[i]].properties.enabled = false;
}
}
if (unblacklist != NULL && unblacklist[0] != '\0') {
// Re-enable the unblacklist.
GenericVector<UNICHAR_ID> encoding;
encode_string(unblacklist, false, &encoding, NULL, NULL);
for (int i = 0; i < encoding.size(); ++i) {
if (encoding[i] != INVALID_UNICHAR_ID)
unichars[encoding[i]].properties.enabled = true;
}
}
}
int UNICHARSET::add_script(const char* script) {

View File

@ -23,6 +23,7 @@
#include "errcode.h"
#include "genericvector.h"
#include "helpers.h"
#include "serialis.h"
#include "strngs.h"
#include "tesscallback.h"
#include "unichar.h"
@ -317,7 +318,22 @@ class UNICHARSET {
// Saves the content of the UNICHARSET to the given file.
// Returns true if the operation is successful.
bool save_to_file(FILE *file) const;
bool save_to_file(FILE *file) const {
STRING str;
if (!save_to_string(&str)) return false;
if (fwrite(&str[0], str.length(), 1, file) != 1) return false;
return true;
}
bool save_to_file(tesseract::TFile *file) const {
STRING str;
if (!save_to_string(&str)) return false;
if (file->FWrite(&str[0], str.length(), 1) != 1) return false;
return true;
}
// Saves the content of the UNICHARSET to the given STRING.
// Returns true if the operation is successful.
bool save_to_string(STRING *str) const;
// Load a unicharset from a unicharset file that has been loaded into
// the given memory buffer.
@ -348,6 +364,8 @@ class UNICHARSET {
// Returns true if the operation is successful.
bool load_from_file(FILE *file, bool skip_fragments);
bool load_from_file(FILE *file) { return load_from_file(file, false); }
bool load_from_file(tesseract::TFile *file, bool skip_fragments);
// Sets up internal data after loading the file, based on the char
// properties. Called from load_from_file, but also needs to be run
@ -363,11 +381,14 @@ class UNICHARSET {
// Set a whitelist and/or blacklist of characters to recognize.
// An empty or NULL whitelist enables everything (minus any blacklist).
// An empty or NULL blacklist disables nothing.
// An empty or NULL unblacklist has no effect.
// The blacklist overrides the whitelist.
// The unblacklist overrides the blacklist.
// Each list is a string of utf8 character strings. Boundaries between
// unicharset units are worked out automatically, and characters not in
// the unicharset are silently ignored.
void set_black_and_whitelist(const char* blacklist, const char* whitelist);
void set_black_and_whitelist(const char* blacklist, const char* whitelist,
const char* unblacklist);
// Set the isalpha property of the given unichar to the given value.
void set_isalpha(UNICHAR_ID unichar_id, bool value) {
@ -596,6 +617,10 @@ class UNICHARSET {
unichars[unichar_id].properties.max_advance =
static_cast<inT16>(ClipToRange(max_advance, 0, MAX_INT16));
}
// Returns true if the font metrics properties are empty.
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const {
return unichars[unichar_id].properties.AnyRangeEmpty();
}
// Return the script name of the given unichar.
// The returned pointer will always be the same for the same script, it's

View File

@ -11,15 +11,15 @@ endif
noinst_HEADERS = \
adaptive.h blobclass.h \
classify.h cluster.h clusttool.h cutoffs.h \
errorcounter.h extern.h extract.h \
featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h \
errorcounter.h \
featdefs.h float2int.h fpoint.h \
intfeaturedist.h intfeaturemap.h intfeaturespace.h \
intfx.h intmatcher.h intproto.h kdtree.h \
mastertrainer.h mf.h mfdefs.h mfoutline.h mfx.h \
normfeat.h normmatch.h \
ocrfeatures.h outfeat.h picofeat.h protos.h \
sampleiterator.h shapeclassifier.h shapetable.h \
tessclassifier.h trainingsample.h trainingsampleset.h xform2d.h
tessclassifier.h trainingsample.h trainingsampleset.h
if !USING_MULTIPLELIBS
noinst_LTLIBRARIES = libtesseract_classify.la
@ -37,14 +37,14 @@ endif
libtesseract_classify_la_SOURCES = \
adaptive.cpp adaptmatch.cpp blobclass.cpp \
classify.cpp cluster.cpp clusttool.cpp cutoffs.cpp \
errorcounter.cpp extract.cpp \
featdefs.cpp flexfx.cpp float2int.cpp fpoint.cpp fxdefs.cpp \
errorcounter.cpp \
featdefs.cpp float2int.cpp fpoint.cpp \
intfeaturedist.cpp intfeaturemap.cpp intfeaturespace.cpp \
intfx.cpp intmatcher.cpp intproto.cpp kdtree.cpp \
mastertrainer.cpp mf.cpp mfdefs.cpp mfoutline.cpp mfx.cpp \
normfeat.cpp normmatch.cpp \
ocrfeatures.cpp outfeat.cpp picofeat.cpp protos.cpp \
sampleiterator.cpp shapeclassifier.cpp shapetable.cpp \
tessclassifier.cpp trainingsample.cpp trainingsampleset.cpp xform2d.cpp
tessclassifier.cpp trainingsample.cpp trainingsampleset.cpp

View File

@ -24,6 +24,7 @@
#endif
#include <ctype.h>
#include "shapeclassifier.h"
#include "ambigs.h"
#include "blobclass.h"
#include "blobs.h"
@ -73,37 +74,39 @@
#define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
#define WORST_POSSIBLE_RATING (1.0)
#define WORST_POSSIBLE_RATING (0.0f)
struct ScoredClass {
CLASS_ID unichar_id;
int shape_id;
FLOAT32 rating;
bool adapted;
inT16 config;
inT16 fontinfo_id;
inT16 fontinfo_id2;
};
using tesseract::UnicharRating;
using tesseract::ScoredFont;
struct ADAPT_RESULTS {
inT32 BlobLength;
bool HasNonfragment;
GenericVector<ScoredClass> match;
ScoredClass best_match;
UNICHAR_ID best_unichar_id;
int best_match_index;
FLOAT32 best_rating;
GenericVector<UnicharRating> match;
GenericVector<CP_RESULT_STRUCT> CPResults;
/// Initializes data members to the default values. Sets the initial
/// rating of each class to be the worst possible rating (1.0).
inline void Initialize() {
BlobLength = MAX_INT32;
HasNonfragment = false;
best_match.unichar_id = NO_CLASS;
best_match.shape_id = -1;
best_match.rating = WORST_POSSIBLE_RATING;
best_match.adapted = false;
best_match.config = 0;
best_match.fontinfo_id = kBlankFontinfoId;
best_match.fontinfo_id2 = kBlankFontinfoId;
BlobLength = MAX_INT32;
HasNonfragment = false;
ComputeBest();
}
// Computes best_unichar_id, best_match_index and best_rating.
void ComputeBest() {
best_unichar_id = INVALID_UNICHAR_ID;
best_match_index = -1;
best_rating = WORST_POSSIBLE_RATING;
for (int i = 0; i < match.size(); ++i) {
if (match[i].rating > best_rating) {
best_rating = match[i].rating;
best_unichar_id = match[i].unichar_id;
best_match_index = i;
}
}
}
};
@ -116,17 +119,30 @@ struct PROTO_KEY {
/*-----------------------------------------------------------------------------
Private Macros
-----------------------------------------------------------------------------*/
#define MarginalMatch(Rating) \
((Rating) > matcher_great_threshold)
inline bool MarginalMatch(float confidence, float matcher_great_threshold) {
return (1.0f - confidence) > matcher_great_threshold;
}
/*-----------------------------------------------------------------------------
Private Function Prototypes
-----------------------------------------------------------------------------*/
int CompareByRating(const void *arg1, const void *arg2);
// Returns the index of the given id in results, if present, or the size of the
// vector (index it will go at) if not present.
static int FindScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) {
for (int i = 0; i < results.match.size(); i++) {
if (results.match[i].unichar_id == id)
return i;
}
return results.match.size();
}
ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id);
ScoredClass ScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id);
// Returns the current rating for a unichar id if we have rated it, defaulting
// to WORST_POSSIBLE_RATING.
static float ScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) {
int index = FindScoredUnichar(id, results);
if (index >= results.match.size()) return WORST_POSSIBLE_RATING;
return results.match[index].rating;
}
void InitMatcherRatings(register FLOAT32 *Rating);
@ -176,19 +192,21 @@ void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) {
DoAdaptiveMatch(Blob, Results);
RemoveBadMatches(Results);
Results->match.sort(CompareByRating);
Results->match.sort(&UnicharRating::SortDescendingRating);
RemoveExtraPuncs(Results);
Results->ComputeBest();
ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
Choices);
if (matcher_debug_level >= 1) {
cprintf ("AD Matches = ");
PrintAdaptiveMatchResults(stdout, Results);
}
// TODO(rays) Move to before ConvertMatchesToChoices!
if (LargeSpeckle(*Blob) || Choices->length() == 0)
AddLargeSpeckleTo(Results->BlobLength, Choices);
if (matcher_debug_level >= 1) {
tprintf("AD Matches = ");
PrintAdaptiveMatchResults(*Results);
}
#ifndef GRAPHICS_DISABLED
if (classify_enable_adaptive_debugger)
DebugAdaptiveClassifier(Blob, Results);
@ -220,17 +238,15 @@ void Classify::RefreshDebugWindow(ScrollView **win, const char *msg,
// Learns the given word using its chopped_word, seam_array, denorm,
// box_word, best_state, and correct_text to learn both correctly and
// incorrectly segmented blobs. If filename is not NULL, then LearnBlob
// is called and the data will be written to a file for static training.
// incorrectly segmented blobs. If fontname is not NULL, then LearnBlob
// is called and the data will be saved in an internal buffer.
// Otherwise AdaptToBlob is called for adaption within a document.
// If rejmap is not NULL, then only chars with a rejmap entry of '1' will
// be learned, otherwise all chars with good correct_text are learned.
void Classify::LearnWord(const char* filename, WERD_RES *word) {
void Classify::LearnWord(const char* fontname, WERD_RES* word) {
int word_len = word->correct_text.size();
if (word_len == 0) return;
float* thresholds = NULL;
if (filename == NULL) {
if (fontname == NULL) {
// Adaption mode.
if (!EnableLearning || word->best_choice == NULL)
return; // Can't or won't adapt.
@ -267,8 +283,8 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
if (word->correct_text[ch].length() > 0) {
float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
LearnPieces(filename, start_blob, word->best_state[ch],
threshold, CST_WHOLE, word->correct_text[ch].string(), word);
LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
CST_WHOLE, word->correct_text[ch].string(), word);
if (word->best_state[ch] > 1 && !disable_character_fragments) {
// Check that the character breaks into meaningful fragments
@ -301,8 +317,8 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
if (i != tokens.size() - 1)
full_string += ' ';
}
LearnPieces(filename, start_blob + frag, 1,
threshold, CST_FRAGMENT, full_string.string(), word);
LearnPieces(fontname, start_blob + frag, 1, threshold,
CST_FRAGMENT, full_string.string(), word);
}
}
}
@ -314,13 +330,13 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
if (word->best_state[ch] > 1) {
// If the next blob is good, make junk with the rightmost fragment.
if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
LearnPieces(filename, start_blob + word->best_state[ch] - 1,
LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
word->best_state[ch + 1] + 1,
threshold, CST_IMPROPER, INVALID_UNICHAR, word);
}
// If the previous blob is good, make junk with the leftmost fragment.
if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
LearnPieces(filename, start_blob - word->best_state[ch - 1],
LearnPieces(fontname, start_blob - word->best_state[ch - 1],
word->best_state[ch - 1] + 1,
threshold, CST_IMPROPER, INVALID_UNICHAR, word);
}
@ -329,7 +345,7 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
STRING joined_text = word->correct_text[ch];
joined_text += word->correct_text[ch + 1];
LearnPieces(filename, start_blob,
LearnPieces(fontname, start_blob,
word->best_state[ch] + word->best_state[ch + 1],
threshold, CST_NGRAM, joined_text.string(), word);
}
@ -342,16 +358,16 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
// Builds a blob of length fragments, from the word, starting at start,
// and then learns it, as having the given correct_text.
// If filename is not NULL, then LearnBlob
// is called and the data will be written to a file for static training.
// If fontname is not NULL, then LearnBlob is called and the data will be
// saved in an internal buffer for static training.
// Otherwise AdaptToBlob is called for adaption within a document.
// threshold is a magic number required by AdaptToChar and generated by
// ComputeAdaptionThresholds.
// Although it can be partly inferred from the string, segmentation is
// provided to explicitly clarify the character segmentation.
void Classify::LearnPieces(const char* filename, int start, int length,
void Classify::LearnPieces(const char* fontname, int start, int length,
float threshold, CharSegmentationType segmentation,
const char* correct_text, WERD_RES *word) {
const char* correct_text, WERD_RES* word) {
// TODO(daria) Remove/modify this if/when we want
// to train and/or adapt to n-grams.
if (segmentation != CST_WHOLE &&
@ -359,8 +375,8 @@ void Classify::LearnPieces(const char* filename, int start, int length,
return;
if (length > 1) {
join_pieces(word->seam_array, start, start + length - 1,
word->chopped_word);
SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
start + length - 1);
}
TBLOB* blob = word->chopped_word->blobs[start];
// Rotate the blob if needed for classification.
@ -385,7 +401,7 @@ void Classify::LearnPieces(const char* filename, int start, int length,
}
#endif // GRAPHICS_DISABLED
if (filename != NULL) {
if (fontname != NULL) {
classify_norm_method.set_value(character); // force char norm spc 30/11/93
tess_bn_matching.set_value(false); // turn it off
tess_cn_matching.set_value(false);
@ -393,8 +409,7 @@ void Classify::LearnPieces(const char* filename, int start, int length,
INT_FX_RESULT_STRUCT fx_info;
SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm,
&bl_denorm, &cn_denorm, &fx_info);
LearnBlob(feature_defs_, filename, rotated_blob, bl_denorm, cn_denorm,
fx_info, correct_text);
LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
} else if (unicharset.contains_unichar(correct_text)) {
UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
int font_id = word->fontinfo != NULL
@ -413,7 +428,8 @@ void Classify::LearnPieces(const char* filename, int start, int length,
delete rotated_blob;
}
break_pieces(word->seam_array, start, start + length - 1, word->chopped_word);
SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
start + length - 1);
} // LearnPieces.
/*---------------------------------------------------------------------------*/
@ -726,8 +742,8 @@ void Classify::InitAdaptedClass(TBLOB *Blob,
ConvertConfig (AllProtosOn, 0, IClass);
if (classify_learning_debug_level >= 1) {
cprintf ("Added new class '%s' with class id %d and %d protos.\n",
unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
tprintf("Added new class '%s' with class id %d and %d protos.\n",
unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
if (classify_learning_debug_level > 1)
DisplayAdaptedChar(Blob, IClass);
}
@ -839,7 +855,7 @@ void Classify::AdaptToChar(TBLOB *Blob,
FLOAT32 Threshold) {
int NumFeatures;
INT_FEATURE_ARRAY IntFeatures;
INT_RESULT_STRUCT IntResult;
UnicharRating int_result;
INT_CLASS IClass;
ADAPT_CLASS Class;
TEMP_CONFIG TempConfig;
@ -849,13 +865,13 @@ void Classify::AdaptToChar(TBLOB *Blob,
if (!LegalClassId (ClassId))
return;
int_result.unichar_id = ClassId;
Class = AdaptedTemplates->Class[ClassId];
assert(Class != NULL);
if (IsEmptyAdaptedClass(Class)) {
InitAdaptedClass(Blob, ClassId, FontinfoId, Class, AdaptedTemplates);
}
else {
IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId);
} else {
IClass = ClassForClassId(AdaptedTemplates->Templates, ClassId);
NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
if (NumFeatures <= 0)
@ -872,39 +888,38 @@ void Classify::AdaptToChar(TBLOB *Blob,
}
im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
NumFeatures, IntFeatures,
&IntResult, classify_adapt_feature_threshold,
&int_result, classify_adapt_feature_threshold,
NO_DEBUG, matcher_debug_separate_windows);
FreeBitVector(MatchingFontConfigs);
SetAdaptiveThreshold(Threshold);
if (IntResult.Rating <= Threshold) {
if (ConfigIsPermanent (Class, IntResult.Config)) {
if (1.0f - int_result.rating <= Threshold) {
if (ConfigIsPermanent(Class, int_result.config)) {
if (classify_learning_debug_level >= 1)
cprintf ("Found good match to perm config %d = %4.1f%%.\n",
IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
tprintf("Found good match to perm config %d = %4.1f%%.\n",
int_result.config, int_result.rating * 100.0);
FreeFeatureSet(FloatFeatures);
return;
}
TempConfig = TempConfigFor (Class, IntResult.Config);
TempConfig = TempConfigFor(Class, int_result.config);
IncreaseConfidence(TempConfig);
if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
}
if (classify_learning_debug_level >= 1)
cprintf ("Increasing reliability of temp config %d to %d.\n",
IntResult.Config, TempConfig->NumTimesSeen);
tprintf("Increasing reliability of temp config %d to %d.\n",
int_result.config, TempConfig->NumTimesSeen);
if (TempConfigReliable(ClassId, TempConfig)) {
MakePermanent(AdaptedTemplates, ClassId, IntResult.Config, Blob);
MakePermanent(AdaptedTemplates, ClassId, int_result.config, Blob);
UpdateAmbigsGroup(ClassId, Blob);
}
}
else {
} else {
if (classify_learning_debug_level >= 1) {
cprintf ("Found poor match to temp config %d = %4.1f%%.\n",
IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
tprintf("Found poor match to temp config %d = %4.1f%%.\n",
int_result.config, int_result.rating * 100.0);
if (classify_learning_debug_level > 2)
DisplayAdaptedChar(Blob, IClass);
}
@ -939,20 +954,20 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
&bl_features);
if (sample == NULL) return;
INT_RESULT_STRUCT IntResult;
UnicharRating int_result;
im_.Match(int_class, AllProtosOn, AllConfigsOn,
bl_features.size(), &bl_features[0],
&IntResult, classify_adapt_feature_threshold,
&int_result, classify_adapt_feature_threshold,
NO_DEBUG, matcher_debug_separate_windows);
cprintf ("Best match to temp config %d = %4.1f%%.\n",
IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
tprintf("Best match to temp config %d = %4.1f%%.\n",
int_result.config, int_result.rating * 100.0);
if (classify_learning_debug_level >= 2) {
uinT32 ConfigMask;
ConfigMask = 1 << IntResult.Config;
ConfigMask = 1 << int_result.config;
ShowMatchDisplay();
im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
bl_features.size(), &bl_features[0],
&IntResult, classify_adapt_feature_threshold,
&int_result, classify_adapt_feature_threshold,
6 | 0x19, matcher_debug_separate_windows);
UpdateMatchDisplay();
}
@ -988,44 +1003,34 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
* @note Exceptions: none
* @note History: Tue Mar 12 18:19:29 1991, DSJ, Created.
*/
void Classify::AddNewResult(ADAPT_RESULTS *results,
CLASS_ID class_id,
int shape_id,
FLOAT32 rating,
bool adapted,
int config,
int fontinfo_id,
int fontinfo_id2) {
ScoredClass *old_match = FindScoredUnichar(results, class_id);
ScoredClass match =
{ class_id,
shape_id,
rating,
adapted,
static_cast<inT16>(config),
static_cast<inT16>(fontinfo_id),
static_cast<inT16>(fontinfo_id2) };
void Classify::AddNewResult(const UnicharRating& new_result,
ADAPT_RESULTS *results) {
int old_match = FindScoredUnichar(new_result.unichar_id, *results);
if (rating > results->best_match.rating + matcher_bad_match_pad ||
(old_match && rating >= old_match->rating))
return;
if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
(old_match < results->match.size() &&
new_result.rating <= results->match[old_match].rating))
return; // New one not good enough.
if (!unicharset.get_fragment(class_id))
if (!unicharset.get_fragment(new_result.unichar_id))
results->HasNonfragment = true;
if (old_match)
old_match->rating = rating;
else
results->match.push_back(match);
if (old_match < results->match.size()) {
results->match[old_match].rating = new_result.rating;
} else {
results->match.push_back(new_result);
}
if (rating < results->best_match.rating &&
if (new_result.rating > results->best_rating &&
// Ensure that fragments do not affect best rating, class and config.
// This is needed so that at least one non-fragmented character is
// always present in the results.
// TODO(daria): verify that this helps accuracy and does not
// hurt performance.
!unicharset.get_fragment(class_id)) {
results->best_match = match;
!unicharset.get_fragment(new_result.unichar_id)) {
results->best_match_index = old_match;
results->best_rating = new_result.rating;
results->best_unichar_id = new_result.unichar_id;
}
} /* AddNewResult */
@ -1060,7 +1065,7 @@ void Classify::AmbigClassifier(
ADAPT_RESULTS *results) {
if (int_features.empty()) return;
uinT8* CharNormArray = new uinT8[unicharset.size()];
INT_RESULT_STRUCT IntResult;
UnicharRating int_result;
results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
CharNormArray);
@ -1073,17 +1078,18 @@ void Classify::AmbigClassifier(
while (*ambiguities >= 0) {
CLASS_ID class_id = *ambiguities;
int_result.unichar_id = class_id;
im_.Match(ClassForClassId(templates, class_id),
AllProtosOn, AllConfigsOn,
int_features.size(), &int_features[0],
&IntResult,
&int_result,
classify_adapt_feature_threshold, NO_DEBUG,
matcher_debug_separate_windows);
ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
results->BlobLength,
classify_integer_matcher_multiplier,
CharNormArray, IntResult, results);
CharNormArray, &int_result, results);
ambiguities++;
}
delete [] CharNormArray;
@ -1104,14 +1110,15 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
ADAPT_RESULTS* final_results) {
int top = blob_box.top();
int bottom = blob_box.bottom();
UnicharRating int_result;
for (int c = 0; c < results.size(); c++) {
CLASS_ID class_id = results[c].Class;
INT_RESULT_STRUCT& int_result = results[c].IMResult;
BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
: AllProtosOn;
BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs
: AllConfigsOn;
int_result.unichar_id = class_id;
im_.Match(ClassForClassId(templates, class_id),
protos, configs,
num_features, features,
@ -1122,7 +1129,7 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
results[c].Rating,
final_results->BlobLength,
matcher_multiplier, norm_factors,
int_result, final_results);
&int_result, final_results);
}
}
@ -1135,65 +1142,76 @@ void Classify::ExpandShapesAndApplyCorrections(
ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
float cp_rating, int blob_length, int matcher_multiplier,
const uinT8* cn_factors,
INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results) {
// Compute the fontinfo_ids.
int fontinfo_id = kBlankFontinfoId;
int fontinfo_id2 = kBlankFontinfoId;
UnicharRating* int_result, ADAPT_RESULTS* final_results) {
if (classes != NULL) {
// Adapted result.
fontinfo_id = GetFontinfoId(classes[class_id], int_result.Config);
fontinfo_id2 = GetFontinfoId(classes[class_id], int_result.Config2);
// Adapted result. Convert configs to fontinfo_ids.
int_result->adapted = true;
for (int f = 0; f < int_result->fonts.size(); ++f) {
int_result->fonts[f].fontinfo_id =
GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
}
} else {
// Pre-trained result.
fontinfo_id = ClassAndConfigIDToFontOrShapeID(class_id, int_result.Config);
fontinfo_id2 = ClassAndConfigIDToFontOrShapeID(class_id,
int_result.Config2);
// Pre-trained result. Map fonts using font_sets_.
int_result->adapted = false;
for (int f = 0; f < int_result->fonts.size(); ++f) {
int_result->fonts[f].fontinfo_id =
ClassAndConfigIDToFontOrShapeID(class_id,
int_result->fonts[f].fontinfo_id);
}
if (shape_table_ != NULL) {
// Actually fontinfo_id is an index into the shape_table_ and it
// contains a list of unchar_id/font_id pairs.
int shape_id = fontinfo_id;
const Shape& shape = shape_table_->GetShape(fontinfo_id);
double min_rating = 0.0;
for (int c = 0; c < shape.size(); ++c) {
int unichar_id = shape[c].unichar_id;
fontinfo_id = shape[c].font_ids[0];
if (shape[c].font_ids.size() > 1)
fontinfo_id2 = shape[c].font_ids[1];
else if (fontinfo_id2 != kBlankFontinfoId)
fontinfo_id2 = shape_table_->GetShape(fontinfo_id2)[0].font_ids[0];
double rating = ComputeCorrectedRating(debug, unichar_id, cp_rating,
int_result.Rating,
int_result.FeatureMisses,
bottom, top, blob_length,
matcher_multiplier, cn_factors);
if (c == 0 || rating < min_rating)
min_rating = rating;
if (unicharset.get_enabled(unichar_id)) {
AddNewResult(final_results, unichar_id, shape_id, rating,
classes != NULL, int_result.Config,
fontinfo_id, fontinfo_id2);
// Two possible cases:
// 1. Flat shapetable. All unichar-ids of the shapes referenced by
// int_result->fonts are the same. In this case build a new vector of
// mapped fonts and replace the fonts in int_result.
// 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
// by int_result. In this case, build a vector of UnicharRating to
// gather together different font-ids for each unichar. Also covers case1.
GenericVector<UnicharRating> mapped_results;
for (int f = 0; f < int_result->fonts.size(); ++f) {
int shape_id = int_result->fonts[f].fontinfo_id;
const Shape& shape = shape_table_->GetShape(shape_id);
for (int c = 0; c < shape.size(); ++c) {
int unichar_id = shape[c].unichar_id;
if (!unicharset.get_enabled(unichar_id)) continue;
// Find the mapped_result for unichar_id.
int r = 0;
for (r = 0; r < mapped_results.size() &&
mapped_results[r].unichar_id != unichar_id; ++r) {}
if (r == mapped_results.size()) {
mapped_results.push_back(*int_result);
mapped_results[r].unichar_id = unichar_id;
mapped_results[r].fonts.truncate(0);
}
for (int i = 0; i < shape[c].font_ids.size(); ++i) {
mapped_results[r].fonts.push_back(
ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
}
}
}
int_result.Rating = min_rating;
for (int m = 0; m < mapped_results.size(); ++m) {
mapped_results[m].rating =
ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
cp_rating, int_result->rating,
int_result->feature_misses, bottom, top,
blob_length, matcher_multiplier, cn_factors);
AddNewResult(mapped_results[m], final_results);
}
return;
}
}
double rating = ComputeCorrectedRating(debug, class_id, cp_rating,
int_result.Rating,
int_result.FeatureMisses,
bottom, top, blob_length,
matcher_multiplier, cn_factors);
if (unicharset.get_enabled(class_id)) {
AddNewResult(final_results, class_id, -1, rating,
classes != NULL, int_result.Config,
fontinfo_id, fontinfo_id2);
int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
int_result->rating,
int_result->feature_misses,
bottom, top, blob_length,
matcher_multiplier, cn_factors);
AddNewResult(*int_result, final_results);
}
int_result.Rating = rating;
}
// Applies a set of corrections to the distance im_rating,
// Applies a set of corrections to the confidence im_rating,
// including the cn_correction, miss penalty and additional penalty
// for non-alnums being vertical misfits. Returns the corrected distance.
// for non-alnums being vertical misfits. Returns the corrected confidence.
double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
double cp_rating, double im_rating,
int feature_misses,
@ -1201,7 +1219,7 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
int blob_length, int matcher_multiplier,
const uinT8* cn_factors) {
// Compute class feature corrections.
double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length,
double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
cn_factors[unichar_id],
matcher_multiplier);
double miss_penalty = tessedit_class_miss_scale * feature_misses;
@ -1222,16 +1240,16 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
vertical_penalty = classify_misfit_junk_penalty;
}
}
double result =cn_corrected + miss_penalty + vertical_penalty;
if (result > WORST_POSSIBLE_RATING)
double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
if (result < WORST_POSSIBLE_RATING)
result = WORST_POSSIBLE_RATING;
if (debug) {
tprintf("%s: %2.1f(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
unicharset.id_to_unichar(unichar_id),
result * 100.0,
cp_rating * 100.0,
im_rating * 100.0,
(cn_corrected - im_rating) * 100.0,
(1.0 - im_rating) * 100.0,
(cn_corrected - (1.0 - im_rating)) * 100.0,
cn_factors[unichar_id],
miss_penalty * 100.0,
vertical_penalty * 100.0);
@ -1266,11 +1284,11 @@ UNICHAR_ID *Classify::BaselineClassifier(
ClearCharNormArray(CharNormArray);
Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
PruneClasses(Templates->Templates, int_features.size(), &int_features[0],
PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
CharNormArray, BaselineCutoffs, &Results->CPResults);
if (matcher_debug_level >= 2 || classify_debug_level > 1)
cprintf ("BL Matches = ");
tprintf("BL Matches = ");
MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
CharNormArray,
@ -1278,13 +1296,12 @@ UNICHAR_ID *Classify::BaselineClassifier(
Blob->bounding_box(), Results->CPResults, Results);
delete [] CharNormArray;
CLASS_ID ClassId = Results->best_match.unichar_id;
if (ClassId == NO_CLASS)
return (NULL);
/* this is a bug - maybe should return "" */
CLASS_ID ClassId = Results->best_unichar_id;
if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
return NULL;
return Templates->Class[ClassId]->
Config[Results->best_match.config].Perm->Ambigs;
Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
} /* BaselineClassifier */
@ -1318,14 +1335,7 @@ int Classify::CharNormClassifier(TBLOB *blob,
-1, &unichar_results);
// Convert results to the format used internally by AdaptiveClassifier.
for (int r = 0; r < unichar_results.size(); ++r) {
int unichar_id = unichar_results[r].unichar_id;
// Fonts are listed in order of preference.
int font1 = unichar_results[r].fonts.size() >= 1
? unichar_results[r].fonts[0] : kBlankFontinfoId;
int font2 = unichar_results[r].fonts.size() >= 2
? unichar_results[r].fonts[1] : kBlankFontinfoId;
float rating = 1.0f - unichar_results[r].rating;
AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2);
AddNewResult(unichar_results[r], adapt_results);
}
return sample.num_features();
} /* CharNormClassifier */
@ -1356,7 +1366,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
pruner_norm_array);
PruneClasses(PreTrainedTemplates, num_features, sample.features(),
PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
pruner_norm_array,
shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
&adapt_results->CPResults);
@ -1380,14 +1390,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
blob_box, adapt_results->CPResults, adapt_results);
// Convert master matcher results to output format.
for (int i = 0; i < adapt_results->match.size(); i++) {
ScoredClass next = adapt_results->match[i];
UnicharRating rating(next.unichar_id, 1.0f - next.rating);
if (next.fontinfo_id >= 0) {
rating.fonts.push_back(next.fontinfo_id);
if (next.fontinfo_id2 >= 0)
rating.fonts.push_back(next.fontinfo_id2);
}
results->push_back(rating);
results->push_back(adapt_results->match[i]);
}
results->sort(&UnicharRating::SortDescendingRating);
}
@ -1412,60 +1415,14 @@ int Classify::CharNormTrainingSample(bool pruner_only,
* @note Exceptions: none
* @note History: Tue Mar 12 18:36:52 1991, DSJ, Created.
*/
void Classify::ClassifyAsNoise(ADAPT_RESULTS *Results) {
register FLOAT32 Rating;
void Classify::ClassifyAsNoise(ADAPT_RESULTS *results) {
float rating = results->BlobLength / matcher_avg_noise_size;
rating *= rating;
rating /= 1.0 + rating;
Rating = Results->BlobLength / matcher_avg_noise_size;
Rating *= Rating;
Rating /= 1.0 + Rating;
AddNewResult(Results, NO_CLASS, -1, Rating, false, -1,
kBlankFontinfoId, kBlankFontinfoId);
AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
} /* ClassifyAsNoise */
} // namespace tesseract
/*---------------------------------------------------------------------------*/
// Return a pointer to the scored unichar in results, or NULL if not present.
ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) {
for (int i = 0; i < results->match.size(); i++) {
if (results->match[i].unichar_id == id)
return &results->match[i];
}
return NULL;
}
// Retrieve the current rating for a unichar id if we have rated it, defaulting
// to WORST_POSSIBLE_RATING.
ScoredClass ScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) {
ScoredClass poor_result =
{id, -1, WORST_POSSIBLE_RATING, false, -1,
kBlankFontinfoId, kBlankFontinfoId};
ScoredClass *entry = FindScoredUnichar(results, id);
return (entry == NULL) ? poor_result : *entry;
}
// Compare character classes by rating as for qsort(3).
// For repeatability, use character class id as a tie-breaker.
int CompareByRating(const void *arg1, // ScoredClass *class1
const void *arg2) { // ScoredClass *class2
const ScoredClass *class1 = (const ScoredClass *)arg1;
const ScoredClass *class2 = (const ScoredClass *)arg2;
if (class1->rating < class2->rating)
return -1;
else if (class1->rating > class2->rating)
return 1;
if (class1->unichar_id < class2->unichar_id)
return -1;
else if (class1->unichar_id > class2->unichar_id)
return 1;
return 0;
}
/*---------------------------------------------------------------------------*/
namespace tesseract {
/// The function converts the given match ratings to the list of blob
/// choices with ratings and certainties (used by the context checkers).
/// If character fragments are present in the results, this function also makes
@ -1496,11 +1453,9 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
float best_certainty = -MAX_FLOAT32;
for (int i = 0; i < Results->match.size(); i++) {
ScoredClass next = Results->match[i];
int fontinfo_id = next.fontinfo_id;
int fontinfo_id2 = next.fontinfo_id2;
bool adapted = next.adapted;
bool current_is_frag = (unicharset.get_fragment(next.unichar_id) != NULL);
const UnicharRating& result = Results->match[i];
bool adapted = result.adapted;
bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != NULL);
if (temp_it.length()+1 == max_matches &&
!contains_nonfrag && current_is_frag) {
continue; // look for a non-fragmented character to fill the
@ -1514,7 +1469,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
Certainty = -20;
Rating = 100; // should be -certainty * real_blob_length
} else {
Rating = Certainty = next.rating;
Rating = Certainty = (1.0f - result.rating);
Rating *= rating_scale * Results->BlobLength;
Certainty *= -(getDict().certainty_scale);
}
@ -1531,14 +1486,16 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
}
float min_xheight, max_xheight, yshift;
denorm.XHeightRange(next.unichar_id, unicharset, box,
denorm.XHeightRange(result.unichar_id, unicharset, box,
&min_xheight, &max_xheight, &yshift);
temp_it.add_to_end(new BLOB_CHOICE(next.unichar_id, Rating, Certainty,
fontinfo_id, fontinfo_id2,
unicharset.get_script(next.unichar_id),
min_xheight, max_xheight, yshift,
adapted ? BCC_ADAPTED_CLASSIFIER
: BCC_STATIC_CLASSIFIER));
BLOB_CHOICE* choice =
new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
unicharset.get_script(result.unichar_id),
min_xheight, max_xheight, yshift,
adapted ? BCC_ADAPTED_CLASSIFIER
: BCC_STATIC_CLASSIFIER);
choice->set_fonts(result.fonts);
temp_it.add_to_end(choice);
contains_nonfrag |= !current_is_frag; // update contains_nonfrag
choices_length++;
if (choices_length >= max_matches) break;
@ -1562,17 +1519,13 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
void Classify::DebugAdaptiveClassifier(TBLOB *blob,
ADAPT_RESULTS *Results) {
if (static_classifier_ == NULL) return;
for (int i = 0; i < Results->match.size(); i++) {
if (i == 0 || Results->match[i].rating < Results->best_match.rating)
Results->best_match = Results->match[i];
}
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* sample =
BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
if (sample == NULL) return;
static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
Results->best_match.unichar_id);
Results->best_unichar_id);
} /* DebugAdaptiveClassifier */
#endif
@ -1615,7 +1568,8 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
} else {
Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
AdaptedTemplates, Results);
if ((!Results->match.empty() && MarginalMatch(Results->best_match.rating) &&
if ((!Results->match.empty() &&
MarginalMatch(Results->best_rating, matcher_great_threshold) &&
!tess_bn_matching) ||
Results->match.empty()) {
CharNormClassifier(Blob, *sample, Results);
@ -1674,7 +1628,7 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
CharNormClassifier(Blob, *sample, Results);
delete sample;
RemoveBadMatches(Results);
Results->match.sort(CompareByRating);
Results->match.sort(&UnicharRating::SortDescendingRating);
/* copy the class id's into an string of ambiguities - don't copy if
the correct class is the only class id matched */
@ -2094,14 +2048,11 @@ namespace tesseract {
* @note Exceptions: none
* @note History: Mon Mar 18 09:24:53 1991, DSJ, Created.
*/
void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
for (int i = 0; i < Results->match.size(); ++i) {
tprintf("%s(%d), shape %d, %.2f ",
unicharset.debug_str(Results->match[i].unichar_id).string(),
Results->match[i].unichar_id, Results->match[i].shape_id,
Results->match[i].rating * 100.0);
void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS& results) {
for (int i = 0; i < results.match.size(); ++i) {
tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).string());
results.match[i].Print();
}
tprintf("\n");
} /* PrintAdaptiveMatchResults */
/*---------------------------------------------------------------------------*/
@ -2124,40 +2075,49 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
int Next, NextGood;
FLOAT32 BadMatchThreshold;
static const char* romans = "i v x I V X";
BadMatchThreshold = Results->best_match.rating + matcher_bad_match_pad;
BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
if (classify_bln_numeric_mode) {
UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
unicharset.unichar_to_id("1") : -1;
UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
unicharset.unichar_to_id("0") : -1;
ScoredClass scored_one = ScoredUnichar(Results, unichar_id_one);
ScoredClass scored_zero = ScoredUnichar(Results, unichar_id_zero);
float scored_one = ScoredUnichar(unichar_id_one, *Results);
float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
if (Results->match[Next].rating <= BadMatchThreshold) {
ScoredClass match = Results->match[Next];
const UnicharRating& match = Results->match[Next];
if (match.rating >= BadMatchThreshold) {
if (!unicharset.get_isalpha(match.unichar_id) ||
strstr(romans,
unicharset.id_to_unichar(match.unichar_id)) != NULL) {
Results->match[NextGood++] = Results->match[Next];
} else if (unicharset.eq(match.unichar_id, "l") &&
scored_one.rating >= BadMatchThreshold) {
Results->match[NextGood] = scored_one;
Results->match[NextGood].rating = match.rating;
NextGood++;
scored_one < BadMatchThreshold) {
Results->match[Next].unichar_id = unichar_id_one;
} else if (unicharset.eq(match.unichar_id, "O") &&
scored_zero.rating >= BadMatchThreshold) {
Results->match[NextGood] = scored_zero;
Results->match[NextGood].rating = match.rating;
NextGood++;
scored_zero < BadMatchThreshold) {
Results->match[Next].unichar_id = unichar_id_zero;
} else {
Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
}
if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
if (NextGood == Next) {
++NextGood;
} else {
Results->match[NextGood++] = Results->match[Next];
}
}
}
}
} else {
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
if (Results->match[Next].rating <= BadMatchThreshold)
Results->match[NextGood++] = Results->match[Next];
if (Results->match[Next].rating >= BadMatchThreshold) {
if (NextGood == Next) {
++NextGood;
} else {
Results->match[NextGood++] = Results->match[Next];
}
}
}
}
Results->match.truncate(NextGood);
@ -2184,18 +2144,24 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
punc_count = 0;
digit_count = 0;
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
ScoredClass match = Results->match[Next];
const UnicharRating& match = Results->match[Next];
bool keep = true;
if (strstr(punc_chars,
unicharset.id_to_unichar(match.unichar_id)) != NULL) {
if (punc_count < 2)
Results->match[NextGood++] = match;
if (punc_count >= 2)
keep = false;
punc_count++;
} else {
if (strstr(digit_chars,
unicharset.id_to_unichar(match.unichar_id)) != NULL) {
if (digit_count < 1)
Results->match[NextGood++] = match;
if (digit_count >= 1)
keep = false;
digit_count++;
}
}
if (keep) {
if (NextGood == Next) {
++NextGood;
} else {
Results->match[NextGood++] = match;
}
@ -2252,7 +2218,7 @@ void Classify::ShowBestMatchFor(int shape_id,
tprintf("Illegal blob (char norm features)!\n");
return;
}
INT_RESULT_STRUCT cn_result;
UnicharRating cn_result;
classify_norm_method.set_value(character);
im_.Match(ClassForClassId(PreTrainedTemplates, shape_id),
AllProtosOn, AllConfigsOn,
@ -2260,7 +2226,7 @@ void Classify::ShowBestMatchFor(int shape_id,
classify_adapt_feature_threshold, NO_DEBUG,
matcher_debug_separate_windows);
tprintf("\n");
config_mask = 1 << cn_result.Config;
config_mask = 1 << cn_result.config;
tprintf("Static Shape ID: %d\n", shape_id);
ShowMatchDisplay();

View File

@ -20,63 +20,32 @@
Include Files and Type Defines
----------------------------------------------------------------------------**/
#include "blobclass.h"
#include "extract.h"
#include <stdio.h>
#include "classify.h"
#include "efio.h"
#include "featdefs.h"
#include "callcpp.h"
#include <math.h>
#include <stdio.h>
#include <signal.h>
#define MAXFILENAME 80
#define MAXMATCHES 10
#include "mf.h"
#include "normfeat.h"
static const char kUnknownFontName[] = "UnknownFont";
STRING_VAR(classify_font_name, kUnknownFontName,
"Default font name to be used in training");
/**----------------------------------------------------------------------------
Global Data Definitions and Declarations
----------------------------------------------------------------------------**/
/* name of current image file being processed */
extern char imagefile[];
namespace tesseract {
/**----------------------------------------------------------------------------
Public Code
----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/
// As all TBLOBs, Blob is in baseline normalized coords.
// See SetupBLCNDenorms in intfx.cpp for other args.
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info, const char* BlobText) {
/*
** Parameters:
** Blob blob whose micro-features are to be learned
** Row row of text that blob came from
** BlobText text that corresponds to blob
** TextLength number of characters in blob
** Globals:
** imagefile base filename of the page being learned
** classify_font_name
** name of font currently being trained on
** Operation:
** Extract micro-features from the specified blob and append
** them to the appropriate file.
** Return: none
** Exceptions: none
** History: 7/28/89, DSJ, Created.
*/
#define TRAIN_SUFFIX ".tr"
static FILE *FeatureFile = NULL;
STRING Filename(filename);
// If no fontname was set, try to extract it from the filename
STRING CurrFontName = classify_font_name;
if (CurrFontName == kUnknownFontName) {
// Finds the name of the training font and returns it in fontname, by cutting
// it out based on the expectation that the filename is of the form:
// /path/to/dir/[lang].[fontname].exp[num]
// The [lang], [fontname] and [num] fields should not have '.' characters.
// If the global parameter classify_font_name is set, its value is used instead.
void ExtractFontName(const STRING& filename, STRING* fontname) {
*fontname = classify_font_name;
if (*fontname == kUnknownFontName) {
// filename is expected to be of the form [lang].[fontname].exp[num]
// The [lang], [fontname] and [num] fields should not have '.' characters.
const char *basename = strrchr(filename.string(), '/');
@ -84,47 +53,56 @@ void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
const char *lastdot = strrchr(filename.string(), '.');
if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) {
++firstdot;
CurrFontName = firstdot;
CurrFontName[lastdot - firstdot] = '\0';
*fontname = firstdot;
fontname->truncate_at(lastdot - firstdot);
}
}
}
// if a feature file is not yet open, open it
// the name of the file is the name of the image plus TRAIN_SUFFIX
if (FeatureFile == NULL) {
Filename += TRAIN_SUFFIX;
FeatureFile = Efopen(Filename.string(), "wb");
cprintf("TRAINING ... Font name = %s\n", CurrFontName.string());
}
/*---------------------------------------------------------------------------*/
// Extracts features from the given blob and saves them in the tr_file_data_
// member variable.
// fontname: Name of font that this blob was printed in.
// cn_denorm: Character normalization transformation to apply to the blob.
// fx_info: Character normalization parameters computed with cn_denorm.
// blob_text: Ground truth text for the blob.
void Classify::LearnBlob(const STRING& fontname, TBLOB* blob,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
const char* blob_text) {
CHAR_DESC CharDesc = NewCharDescription(feature_defs_);
CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
LearnBlob(FeatureDefs, FeatureFile, Blob, bl_denorm, cn_denorm, fx_info,
BlobText, CurrFontName.string());
} // LearnBlob
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* FeatureFile,
TBLOB* Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
const char* BlobText, const char* FontName) {
CHAR_DESC CharDesc;
ASSERT_HOST(FeatureFile != NULL);
CharDesc = ExtractBlobFeatures(FeatureDefs, bl_denorm, cn_denorm, fx_info,
Blob);
if (CharDesc == NULL) {
cprintf("LearnBLob: CharDesc was NULL. Aborting.\n");
return;
}
if (ValidCharDescription(FeatureDefs, CharDesc)) {
// label the features with a class name and font name
fprintf(FeatureFile, "\n%s %s\n", FontName, BlobText);
if (ValidCharDescription(feature_defs_, CharDesc)) {
// Label the features with a class name and font name.
tr_file_data_ += "\n";
tr_file_data_ += fontname;
tr_file_data_ += " ";
tr_file_data_ += blob_text;
tr_file_data_ += "\n";
// write micro-features to file and clean up
WriteCharDescription(FeatureDefs, FeatureFile, CharDesc);
WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
} else {
tprintf("Blob learned was invalid!\n");
}
FreeCharDescription(CharDesc);
} // LearnBlob
// Writes stored training data to a .tr file based on the given filename.
// Returns false on error.
bool Classify::WriteTRFile(const STRING& filename) {
STRING tr_filename = filename + ".tr";
FILE* fp = Efopen(tr_filename.string(), "wb");
int len = tr_file_data_.length();
bool result =
fwrite(&tr_file_data_[0], sizeof(tr_file_data_[0]), len, fp) == len;
fclose(fp);
tr_file_data_.truncate_at(0);
return result;
}
} // namespace tesseract.

View File

@ -21,9 +21,7 @@
/**----------------------------------------------------------------------------
Include Files and Type Defines
----------------------------------------------------------------------------**/
#include "featdefs.h"
#include "oldlist.h"
#include "blobs.h"
#include "strngs.h"
/*---------------------------------------------------------------------------
Macros
@ -39,18 +37,14 @@
/**----------------------------------------------------------------------------
Public Function Prototypes
----------------------------------------------------------------------------**/
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
const char* BlobText);
namespace tesseract {
// Finds the name of the training font and returns it in fontname, by cutting
// it out based on the expectation that the filename is of the form:
// /path/to/dir/[lang].[fontname].exp[num]
// The [lang], [fontname] and [num] fields should not have '.' characters.
// If the global parameter classify_font_name is set, its value is used instead.
void ExtractFontName(const STRING& filename, STRING* fontname);
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* File, TBLOB* Blob,
const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
const char* BlobText, const char* FontName);
} // namespace tesseract.
/**----------------------------------------------------------------------------
Global Data Definitions and Declarations
----------------------------------------------------------------------------**/
/*parameter used to turn on/off output of recognized chars to the screen */
#endif

View File

@ -217,7 +217,7 @@ void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) {
(rating_scale * blob_length);
}
BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
-1, -1, 0, 0, MAX_FLOAT32, 0,
-1, 0.0f, MAX_FLOAT32, 0,
BCC_SPECKLE_CLASSIFIER);
bc_it.add_to_end(blob_choice);
}

View File

@ -25,6 +25,7 @@
#include "dict.h"
#include "featdefs.h"
#include "fontinfo.h"
#include "imagedata.h"
#include "intfx.h"
#include "intmatcher.h"
#include "normalis.h"
@ -97,9 +98,8 @@ class Classify : public CCStruct {
// results (output) Sorted Array of pruned classes.
// Array must be sized to take the maximum possible
// number of outputs : int_templates->NumClasses.
int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
int num_features,
const INT_FEATURE_STRUCT* features,
int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, int num_features,
int keep_this, const INT_FEATURE_STRUCT* features,
const uinT8* normalization_factors,
const uinT16* expected_num_features,
GenericVector<CP_RESULT_STRUCT>* results);
@ -119,25 +119,25 @@ class Classify : public CCStruct {
const UNICHARSET& target_unicharset);
/* adaptmatch.cpp ***********************************************************/
// Learn the given word using its chopped_word, seam_array, denorm,
// Learns the given word using its chopped_word, seam_array, denorm,
// box_word, best_state, and correct_text to learn both correctly and
// incorrectly segmented blobs. If filename is not NULL, then LearnBlob
// is called and the data will be written to a file for static training.
// incorrectly segmented blobs. If fontname is not NULL, then LearnBlob
// is called and the data will be saved in an internal buffer.
// Otherwise AdaptToBlob is called for adaption within a document.
void LearnWord(const char* filename, WERD_RES *word);
void LearnWord(const char* fontname, WERD_RES* word);
// Builds a blob of length fragments, from the word, starting at start,
// and then learn it, as having the given correct_text.
// If filename is not NULL, then LearnBlob
// is called and the data will be written to a file for static training.
// and then learns it, as having the given correct_text.
// If fontname is not NULL, then LearnBlob is called and the data will be
// saved in an internal buffer for static training.
// Otherwise AdaptToBlob is called for adaption within a document.
// threshold is a magic number required by AdaptToChar and generated by
// GetAdaptThresholds.
// ComputeAdaptionThresholds.
// Although it can be partly inferred from the string, segmentation is
// provided to explicitly clarify the character segmentation.
void LearnPieces(const char* filename, int start, int length,
float threshold, CharSegmentationType segmentation,
const char* correct_text, WERD_RES *word);
void LearnPieces(const char* fontname, int start, int length, float threshold,
CharSegmentationType segmentation, const char* correct_text,
WERD_RES* word);
void InitAdaptiveClassifier(bool load_pre_trained_templates);
void InitAdaptedClass(TBLOB *Blob,
CLASS_ID ClassId,
@ -174,7 +174,7 @@ class Classify : public CCStruct {
int blob_length,
int matcher_multiplier,
const uinT8* cn_factors,
INT_RESULT_STRUCT& int_result,
UnicharRating* int_result,
ADAPT_RESULTS* final_results);
// Applies a set of corrections to the distance im_rating,
// including the cn_correction, miss penalty and additional penalty
@ -187,14 +187,7 @@ class Classify : public CCStruct {
void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
ADAPT_RESULTS *Results,
BLOB_CHOICE_LIST *Choices);
void AddNewResult(ADAPT_RESULTS *results,
CLASS_ID class_id,
int shape_id,
FLOAT32 rating,
bool adapted,
int config,
int fontinfo_id,
int fontinfo_id2);
void AddNewResult(const UnicharRating& new_result, ADAPT_RESULTS *results);
int GetAdaptiveFeatures(TBLOB *Blob,
INT_FEATURE_ARRAY IntFeatures,
FEATURE_SET *FloatFeatures);
@ -219,7 +212,7 @@ class Classify : public CCStruct {
CLASS_ID ClassId,
int ConfigId,
TBLOB *Blob);
void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results);
void PrintAdaptiveMatchResults(const ADAPT_RESULTS& results);
void RemoveExtraPuncs(ADAPT_RESULTS *Results);
void RemoveBadMatches(ADAPT_RESULTS *Results);
void SetAdaptiveThreshold(FLOAT32 Threshold);
@ -361,7 +354,22 @@ class Classify : public CCStruct {
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob);
/* picofeat.cpp ***********************************************************/
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob);
FEATURE_SET ExtractIntCNFeatures(const TBLOB& blob,
const INT_FX_RESULT_STRUCT& fx_info);
FEATURE_SET ExtractIntGeoFeatures(const TBLOB& blob,
const INT_FX_RESULT_STRUCT& fx_info);
/* blobclass.cpp ***********************************************************/
// Extracts features from the given blob and saves them in the tr_file_data_
// member variable.
// fontname: Name of font that this blob was printed in.
// cn_denorm: Character normalization transformation to apply to the blob.
// fx_info: Character normalization parameters computed with cn_denorm.
// blob_text: Ground truth text for the blob.
void LearnBlob(const STRING& fontname, TBLOB* Blob, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text);
// Writes stored training data to a .tr file based on the given filename.
// Returns false on error.
bool WriteTRFile(const STRING& filename);
// Member variables.
@ -498,6 +506,9 @@ class Classify : public CCStruct {
/* variables used to hold performance statistics */
int NumAdaptationsFailed;
// Training data gathered here for all the images in a document.
STRING tr_file_data_;
// Expected number of features in the class pruner, used to penalize
// unknowns that have too few features (like a c being classified as e) so
// it doesn't recognize everything as '@' or '#'.

View File

@ -1,32 +0,0 @@
#ifndef EXTERN_H
#define EXTERN_H
/* -*-C-*-
********************************************************************************
*
* File: extern.h (Formerly extern.h)
* Description: External definitions for C or C++
* Author: Mark Seaman, OCR Technology
* Created: Tue Mar 20 14:01:22 1990
* Modified: Tue Mar 20 14:02:09 1990 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Experimental (Do Not Distribute)
*
* (c) Copyright 1990, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
********************************************************************************
*/
#define EXTERN extern
#endif

View File

@ -1,74 +0,0 @@
/******************************************************************************
** Filename: extract.c
** Purpose: Generic high level feature extractor routines.
** Author: Dan Johnson
** History: Sun Jan 21 09:44:08 1990, DSJ, Created.
**
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
******************************************************************************/
/*-----------------------------------------------------------------------------
Include Files and Type Defines
-----------------------------------------------------------------------------*/
#include "extract.h"
#include "flexfx.h"
#include "danerror.h"
typedef CHAR_FEATURES (*CF_FUNC) ();
/*-----------------------------------------------------------------------------
Private Function Prototypes
-----------------------------------------------------------------------------*/
void ExtractorStub();
/*-----------------------------------------------------------------------------
Public Code
-----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
/**
* Extract features from Blob by calling the feature
* extractor which is currently being used. This routine
* simply provides a high level interface to feature
* extraction. The caller can extract any type of features
* from a blob without understanding any lower level details.
*
* @param FeatureDefs definitions of feature types/extractors
* @param denorm Normalize/denormalize to access original image
* @param Blob blob to extract features from
*
* @return The character features extracted from Blob.
* @note Exceptions: none
* @note History: Sun Jan 21 10:07:28 1990, DSJ, Created.
*/
CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
TBLOB *Blob) {
return ExtractFlexFeatures(FeatureDefs, Blob, bl_denorm, cn_denorm, fx_info);
} /* ExtractBlobFeatures */
/*-----------------------------------------------------------------------------
Private Code
-----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
void
ExtractorStub ()
/**
* This routine is used to stub out feature extractors
* that are no longer used. It simply calls DoError.
*
* @note Exceptions: none
* @note History: Wed Jan 2 14:16:49 1991, DSJ, Created.
*/
#define DUMMY_ERROR 1
{
DoError (DUMMY_ERROR, "Selected feature extractor has been stubbed out!");
} /* ExtractorStub */

View File

@ -1,40 +0,0 @@
/******************************************************************************
** Filename: extract.h
** Purpose: Interface to high level generic feature extraction.
** Author: Dan Johnson
** History: 1/21/90, DSJ, Created.
**
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
******************************************************************************/
#ifndef EXTRACT_H
#define EXTRACT_H
#include "featdefs.h"
#include <stdio.h>
class DENORM;
/*-----------------------------------------------------------------------------
Public Function Prototypes
-----------------------------------------------------------------------------*/
// Deprecated! Will be deleted soon!
// In the meantime, as all TBLOBs, Blob is in baseline normalized coords.
// See SetupBLCNDenorms in intfx.cpp for other args.
CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info, TBLOB *Blob);
/*---------------------------------------------------------------------------
Private Function Prototypes
----------------------------------------------------------------------------*/
void ExtractorStub();
#endif

Some files were not shown because too many files have changed in this diff Show More