Added simultaneous multi-language capability, Refactored top-level word recognition module, Blamer module added for error analysis, Tidied up constraints on control parameters, Added UNICHARSET to WERD_CHOICE to make mult-language handling easier, Added word bigram correction

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@655 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2012-02-02 03:06:39 +00:00
parent 73adf693d5
commit 9206e92b0d
45 changed files with 3069 additions and 407 deletions

View File

@ -4,11 +4,12 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/image -I$(top_srcdir)/viewer
include_HEADERS = \
blckerr.h blobbox.h blobs.h blread.h boxword.h ccstruct.h coutln.h crakedge.h \
detlinefit.h dppoint.h genblob.h hpddef.h hpdsizes.h ipoints.h \
blckerr.h blobbox.h blobs.h blread.h boxread.h boxword.h ccstruct.h coutln.h crakedge.h \
detlinefit.h dppoint.h fontinfo.h genblob.h hpddef.h hpdsizes.h ipoints.h \
linlsq.h matrix.h mod128.h normalis.h \
ocrblock.h ocrrow.h otsuthr.h \
pageres.h pdblock.h points.h polyaprx.h polyblk.h \
ocrblock.h ocrpara.h ocrrow.h otsuthr.h \
pageres.h params_training_featdef.h \
pdblock.h points.h polyaprx.h polyblk.h \
publictypes.h \
quadlsq.h quadratc.h quspline.h ratngs.h rect.h rejctmap.h \
seam.h split.h statistc.h stepblob.h vecfuncs.h werd.h
@ -26,10 +27,10 @@ libtesseract_ccstruct_la_LIBADD = \
endif
libtesseract_ccstruct_la_SOURCES = \
blobbox.cpp blobs.cpp blread.cpp boxword.cpp ccstruct.cpp coutln.cpp \
detlinefit.cpp dppoint.cpp genblob.cpp \
blobbox.cpp blobs.cpp blread.cpp boxread.cpp boxword.cpp ccstruct.cpp coutln.cpp \
detlinefit.cpp dppoint.cpp fontinfo.cpp genblob.cpp \
linlsq.cpp matrix.cpp mod128.cpp normalis.cpp \
ocrblock.cpp ocrrow.cpp otsuthr.cpp \
ocrblock.cpp ocrpara.cpp ocrrow.cpp otsuthr.cpp \
pageres.cpp pdblock.cpp points.cpp polyaprx.cpp polyblk.cpp \
publictypes.cpp \
quadlsq.cpp quadratc.cpp quspline.cpp ratngs.cpp rect.cpp rejctmap.cpp \

View File

@ -32,14 +32,26 @@ const double kCosSmallAngle = 0.866;
const double kDefiniteAspectRatio = 2.0;
// Multiple of short length in perimeter to make a joined word.
const double kComplexShapePerimeterRatio = 1.5;
// Min multiple of linesize for medium-sized blobs in ReFilterBlobs.
const double kMinMediumSizeRatio = 0.25;
// Max multiple of linesize for medium-sized blobs in ReFilterBlobs.
const double kMaxMediumSizeRatio = 4.0;
// Rotates the box and the underlying blob.
void BLOBNBOX::rotate(FCOORD rotation) {
cblob_ptr->rotate(rotation);
rotate_box(rotation);
compute_bounding_box();
}
// Rotate the box by the angle given by rotation.
// Reflect the box in the y-axis, leaving the underlying blob untouched.
void BLOBNBOX::reflect_box_in_y_axis() {
int left = -box.right();
box.set_right(-box.left());
box.set_left(left);
}
// Rotates the box by the angle given by rotation.
// If the blob is a diacritic, then only small rotations for skew
// correction can be applied.
void BLOBNBOX::rotate_box(FCOORD rotation) {
@ -57,6 +69,7 @@ void BLOBNBOX::rotate_box(FCOORD rotation) {
set_diacritic_box(box);
}
}
/**********************************************************************
* BLOBNBOX::merge
*
@ -183,6 +196,17 @@ void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max,
if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min;
}
// NULLs out any neighbours that are DeletableNoise to remove references.
void BLOBNBOX::CleanNeighbours() {
for (int dir = 0; dir < BND_COUNT; ++dir) {
BLOBNBOX* neighbour = neighbours_[dir];
if (neighbour != NULL && neighbour->DeletableNoise()) {
neighbours_[dir] = NULL;
good_stroke_neighbours_[dir] = false;
}
}
}
// Returns positive if there is at least one side neighbour that has a similar
// stroke width and is not on the other side of a rule line.
int BLOBNBOX::GoodTextBlob() const {
@ -195,6 +219,18 @@ int BLOBNBOX::GoodTextBlob() const {
return score;
}
// Returns the number of side neighbours that are of type BRT_NOISE.
int BLOBNBOX::NoisyNeighbours() const {
int count = 0;
for (int dir = 0; dir < BND_COUNT; ++dir) {
BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
BLOBNBOX* blob = neighbour(bnd);
if (blob != NULL && blob->region_type() == BRT_NOISE)
++count;
}
return count;
}
// Returns true, and sets vert_possible/horz_possible if the blob has some
// feature that makes it individually appear to flow one way.
// eg if it has a high aspect ratio, yet has a complex shape, such as a
@ -281,7 +317,8 @@ bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other,
// given horizontal range.
TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
FCOORD no_rotation(1.0f, 0.0f);
float top, bottom;
float top = box.top();
float bottom = box.bottom();
if (cblob_ptr != NULL) {
find_cblob_limits(cblob_ptr, static_cast<float>(left),
static_cast<float>(right), no_rotation,
@ -300,7 +337,54 @@ TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
return shrunken_box;
}
// Helper to call CleanNeighbours on all blobs on the list.
void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) {
BLOBNBOX_IT blob_it(blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
blob_it.data()->CleanNeighbours();
}
}
// Helper to delete all the deletable blobs on the list.
void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) {
BLOBNBOX_IT blob_it(blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
if (blob->DeletableNoise()) {
delete blob->cblob();
delete blob_it.extract();
}
}
}
#ifndef GRAPHICS_DISABLED
// Helper to draw all the blobs on the list in the given body_colour,
// with child outlines in the child_colour.
void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list,
ScrollView::Color body_colour,
ScrollView::Color child_colour,
ScrollView* win) {
BLOBNBOX_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->plot(win, body_colour, child_colour);
}
}
// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
// given list in the given body_colour, with child outlines in the
// child_colour.
void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list,
ScrollView::Color body_colour,
ScrollView::Color child_colour,
ScrollView* win) {
BLOBNBOX_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
if (blob->DeletableNoise())
blob->plot(win, body_colour, child_colour);
}
}
ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type,
BlobTextFlowType flow_type) {
switch (region_type) {
@ -329,6 +413,8 @@ ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type,
return ScrollView::MEDIUM_BLUE;
if (flow_type == BTFT_LEADER)
return ScrollView::WHEAT;
if (flow_type == BTFT_NONTEXT)
return ScrollView::PINK;
return ScrollView::MAGENTA;
default:
return ScrollView::GREY;
@ -678,6 +764,7 @@ void TO_ROW::clear() {
spacing = 0.0;
xheight = 0.0;
xheight_evidence = 0;
body_size = 0.0;
ascrise = 0.0;
descdrop = 0.0;
min_space = 0;
@ -813,15 +900,97 @@ TO_BLOCK::~TO_BLOCK() {
clear_blobnboxes(&large_blobs);
}
// Helper function to divide the input blobs over noise, small, medium
// and large lists. Blobs small in height and (small in width or large in width)
// go in the noise list. Dash (-) candidates go in the small list, and
// medium and large are by height.
// SIDE-EFFECT: reset all blobs to initial state by calling Init().
static void SizeFilterBlobs(int min_height, int max_height,
BLOBNBOX_LIST* src_list,
BLOBNBOX_LIST* noise_list,
BLOBNBOX_LIST* small_list,
BLOBNBOX_LIST* medium_list,
BLOBNBOX_LIST* large_list) {
BLOBNBOX_IT noise_it(noise_list);
BLOBNBOX_IT small_it(small_list);
BLOBNBOX_IT medium_it(medium_list);
BLOBNBOX_IT large_it(large_list);
for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
BLOBNBOX* blob = src_it.extract();
blob->ReInit();
int width = blob->bounding_box().width();
int height = blob->bounding_box().height();
if (height < min_height &&
(width < min_height || width > max_height))
noise_it.add_after_then_move(blob);
else if (height > max_height)
large_it.add_after_then_move(blob);
else if (height < min_height)
small_it.add_after_then_move(blob);
else
medium_it.add_after_then_move(blob);
}
}
// Reorganize the blob lists with a different definition of small, medium
// and large, compared to the original definition.
// Height is still the primary filter key, but medium width blobs of small
// height become small, and very wide blobs of small height stay noise, along
// with small dot-shaped blobs.
void TO_BLOCK::ReSetAndReFilterBlobs() {
int min_height = IntCastRounded(kMinMediumSizeRatio * line_size);
int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size);
BLOBNBOX_LIST noise_list;
BLOBNBOX_LIST small_list;
BLOBNBOX_LIST medium_list;
BLOBNBOX_LIST large_list;
SizeFilterBlobs(min_height, max_height, &blobs,
&noise_list, &small_list, &medium_list, &large_list);
SizeFilterBlobs(min_height, max_height, &large_blobs,
&noise_list, &small_list, &medium_list, &large_list);
SizeFilterBlobs(min_height, max_height, &small_blobs,
&noise_list, &small_list, &medium_list, &large_list);
SizeFilterBlobs(min_height, max_height, &noise_blobs,
&noise_list, &small_list, &medium_list, &large_list);
BLOBNBOX_IT blob_it(&blobs);
blob_it.add_list_after(&medium_list);
blob_it.set_to_list(&large_blobs);
blob_it.add_list_after(&large_list);
blob_it.set_to_list(&small_blobs);
blob_it.add_list_after(&small_list);
blob_it.set_to_list(&noise_blobs);
blob_it.add_list_after(&noise_list);
}
// Deletes noise blobs from all lists where not owned by a ColPartition.
void TO_BLOCK::DeleteUnownedNoise() {
BLOBNBOX::CleanNeighbours(&blobs);
BLOBNBOX::CleanNeighbours(&small_blobs);
BLOBNBOX::CleanNeighbours(&noise_blobs);
BLOBNBOX::CleanNeighbours(&large_blobs);
BLOBNBOX::DeleteNoiseBlobs(&blobs);
BLOBNBOX::DeleteNoiseBlobs(&small_blobs);
BLOBNBOX::DeleteNoiseBlobs(&noise_blobs);
BLOBNBOX::DeleteNoiseBlobs(&large_blobs);
}
#ifndef GRAPHICS_DISABLED
// Draw the noise blobs from all lists in red.
void TO_BLOCK::plot_noise_blobs(ScrollView* win) {
BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win);
BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win);
BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win);
BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win);
}
// Draw the blobs on the various lists in the block in different colors.
void TO_BLOCK::plot_graded_blobs(ScrollView* to_win) {
plot_blob_list(to_win, &noise_blobs, ScrollView::CORAL, ScrollView::BLUE);
plot_blob_list(to_win, &small_blobs,
ScrollView::GOLDENROD, ScrollView::YELLOW);
plot_blob_list(to_win, &large_blobs,
ScrollView::DARK_GREEN, ScrollView::YELLOW);
plot_blob_list(to_win, &blobs, ScrollView::WHITE, ScrollView::BROWN);
void TO_BLOCK::plot_graded_blobs(ScrollView* win) {
BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win);
BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW,
win);
BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW,
win);
BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win);
}
/**********************************************************************

View File

@ -28,9 +28,9 @@
enum PITCH_TYPE
{
PITCH_DUNNO, //insufficient data
PITCH_DEF_FIXED, //definitely fixed
PITCH_MAYBE_FIXED, //could be
PITCH_DUNNO, // insufficient data
PITCH_DEF_FIXED, // definitely fixed
PITCH_MAYBE_FIXED, // could be
PITCH_DEF_PROP,
PITCH_MAYBE_PROP,
PITCH_CORR_FIXED,
@ -38,13 +38,16 @@ enum PITCH_TYPE
};
// The possible tab-stop types of each side of a BLOBNBOX.
// The ordering is important, as it is used for deleting dead-ends in the
// search. ALIGNED, CONFIRMED and VLINE should remain greater than the
// non-aligned, unset, or deleted members.
enum TabType {
TT_NONE, // Not a tab.
TT_DELETED, // Not a tab after detailed analysis.
TT_UNCONFIRMED, // Initial designation of a tab-stop candidate.
TT_FAKE, // Added by interpolation.
TT_CONFIRMED, // Aligned with neighbours.
TT_VLINE // Detected as a vertical line.
TT_NONE, // Not a tab.
TT_DELETED, // Not a tab after detailed analysis.
TT_MAYBE_RAGGED, // Initial designation of a tab-stop candidate.
TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
TT_CONFIRMED, // Aligned with neighbours.
TT_VLINE // Detected as a vertical line.
};
// The possible region types of a BLOBNBOX.
@ -65,6 +68,7 @@ enum BlobRegionType {
};
// enum for elements of arrays that refer to neighbours.
// NOTE: keep in this order, so ^2 can be used to flip direction.
enum BlobNeighbourDir {
BND_LEFT,
BND_BELOW,
@ -73,6 +77,21 @@ enum BlobNeighbourDir {
BND_COUNT
};
// enum for special type of text characters, such as math symbol or italic.
enum BlobSpecialTextType {
BSTT_NONE, // No special.
BSTT_ITALIC, // Italic style.
BSTT_DIGIT, // Digit symbols.
BSTT_MATH, // Mathmatical symobls (not including digit).
BSTT_UNCLEAR, // Characters with low recognition rate.
BSTT_SKIP, // Characters that we skip labeling (usually too small).
BSTT_COUNT
};
inline BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir) {
return static_cast<BlobNeighbourDir>(dir ^ 2);
}
// BlobTextFlowType indicates the quality of neighbouring information
// related to a chain of connected components, either horizontally or
// vertically. Also used by ColPartition for the collection of blobs
@ -89,14 +108,10 @@ enum BlobTextFlowType {
};
// Returns true if type1 dominates type2 in a merge. Mostly determined by the
// ordering of the enum, but NONTEXT dominates everything else, and LEADER
// dominates nothing.
// ordering of the enum, LEADER is weak and dominates nothing.
// The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that
// this cannot be true if t1 == t2, so the result is undefined.
inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
// NONTEXT dominates everything.
if (type1 == BTFT_NONTEXT) return true;
if (type2 == BTFT_NONTEXT) return false;
// LEADER always loses.
if (type1 == BTFT_LEADER) return false;
if (type2 == BTFT_LEADER) return true;
@ -127,8 +142,17 @@ class BLOBNBOX:public ELIST_LINK
return new BLOBNBOX(blob);
}
void rotate_box(FCOORD rotation);
// Rotates the box and the underlying blob.
void rotate(FCOORD rotation);
// Methods that act on the box without touching the underlying blob.
// Reflect the box in the y-axis, leaving the underlying blob untouched.
void reflect_box_in_y_axis();
// Rotates the box by the angle given by rotation.
// If the blob is a diacritic, then only small rotations for skew
// correction can be applied.
void rotate_box(FCOORD rotation);
// Moves just the box by the given vector.
void translate_box(ICOORD v) {
if (IsDiacritic()) {
box.move(v);
@ -150,7 +174,17 @@ class BLOBNBOX:public ELIST_LINK
void NeighbourGaps(int gaps[BND_COUNT]) const;
void MinMaxGapsClipped(int* h_min, int* h_max,
int* v_min, int* v_max) const;
void CleanNeighbours();
// Returns positive if there is at least one side neighbour that has a
// similar stroke width and is not on the other side of a rule line.
int GoodTextBlob() const;
// Returns the number of side neighbours that are of type BRT_NOISE.
int NoisyNeighbours() const;
// Returns true if the blob is noise and has no owner.
bool DeletableNoise() const {
return owner() == NULL && region_type() == BRT_NOISE;
}
// Returns true, and sets vert_possible/horz_possible if the blob has some
// feature that makes it individually appear to flow one way.
@ -229,6 +263,12 @@ class BLOBNBOX:public ELIST_LINK
void set_region_type(BlobRegionType new_type) {
region_type_ = new_type;
}
BlobSpecialTextType special_text_type() const {
return spt_type_;
}
void set_special_text_type(BlobSpecialTextType new_type) {
spt_type_ = new_type;
}
BlobTextFlowType flow() const {
return flow_;
}
@ -323,10 +363,23 @@ class BLOBNBOX:public ELIST_LINK
int base_char_bottom() const {
return base_char_bottom_;
}
int line_crossings() const {
return line_crossings_;
}
void set_line_crossings(int value) {
line_crossings_ = value;
}
void set_diacritic_box(const TBOX& diacritic_box) {
base_char_top_ = diacritic_box.top();
base_char_bottom_ = diacritic_box.bottom();
}
BLOBNBOX* base_char_blob() const {
return base_char_blob_;
}
void set_base_char_blob(BLOBNBOX* blob) {
base_char_blob_ = blob;
}
bool UniquelyVertical() const {
return vert_possible_ && !horz_possible_;
}
@ -350,11 +403,29 @@ class BLOBNBOX:public ELIST_LINK
static bool UnMergeableType(BlobRegionType type) {
return IsLineType(type) || IsImageType(type);
}
// Helper to call CleanNeighbours on all blobs on the list.
static void CleanNeighbours(BLOBNBOX_LIST* blobs);
// Helper to delete all the deletable blobs on the list.
static void DeleteNoiseBlobs(BLOBNBOX_LIST* blobs);
#ifndef GRAPHICS_DISABLED
// Helper to draw all the blobs on the list in the given body_colour,
// with child outlines in the child_colour.
static void PlotBlobs(BLOBNBOX_LIST* list,
ScrollView::Color body_colour,
ScrollView::Color child_colour,
ScrollView* win);
// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
// given list in the given body_colour, with child outlines in the
// child_colour.
static void PlotNoiseBlobs(BLOBNBOX_LIST* list,
ScrollView::Color body_colour,
ScrollView::Color child_colour,
ScrollView* win);
static ScrollView::Color TextlineColor(BlobRegionType region_type,
BlobTextFlowType flow_type);
#ifndef GRAPHICS_DISABLED
// Keep in sync with BlobRegionType.
ScrollView::Color BoxColor() const;
@ -386,6 +457,7 @@ class BLOBNBOX:public ELIST_LINK
right_tab_type_ = TT_NONE;
region_type_ = BRT_UNKNOWN;
flow_ = BTFT_NONE;
spt_type_ = BSTT_SKIP;
left_rule_ = 0;
right_rule_ = 0;
left_crossing_rule_ = 0;
@ -395,6 +467,8 @@ class BLOBNBOX:public ELIST_LINK
owner_ = NULL;
base_char_top_ = box.top();
base_char_bottom_ = box.bottom();
line_crossings_ = 0;
base_char_blob_ = NULL;
horz_possible_ = false;
vert_possible_ = false;
leader_on_left_ = false;
@ -427,10 +501,13 @@ class BLOBNBOX:public ELIST_LINK
inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line
inT16 base_char_top_; // y-coord of top/bottom of diacritic base,
inT16 base_char_bottom_; // if it exists else top/bottom of this blob.
int line_crossings_; // Number of line intersections touched.
BLOBNBOX* base_char_blob_; // The blob that was the base char.
float horz_stroke_width_; // Median horizontal stroke width
float vert_stroke_width_; // Median vertical stroke width
float area_stroke_width_; // Stroke width from area/perimeter ratio.
tesseract::ColPartition* owner_; // Who will delete me when I am not needed
BlobSpecialTextType spt_type_; // Special text type.
BLOBNBOX* neighbours_[BND_COUNT];
bool good_stroke_neighbours_[BND_COUNT];
bool horz_possible_; // Could be part of horizontal flow.
@ -556,6 +633,8 @@ class TO_ROW: public ELIST2_LINK
int xheight_evidence; // number of blobs of height xheight
float ascrise; // ascenders
float descdrop; // descenders
float body_size; // of CJK characters. Assumed to be
// xheight+ascrise for non-CJK text.
inT32 min_space; // min size for real space
inT32 max_nonspace; // max size of non-space
inT32 space_threshold; // space vs nonspace
@ -640,8 +719,19 @@ class TO_BLOCK:public ELIST_LINK
}
}
// Draw the blobs on on the various lists in the block in different colors.
// Reorganizes the blob lists with a different definition of small, medium
// and large, compared to the original definition.
// Height is still the primary filter key, but medium width blobs of small
// height become medium, and very wide blobs of small height stay small.
void ReSetAndReFilterBlobs();
// Deletes noise blobs from all lists where not owned by a ColPartition.
void DeleteUnownedNoise();
#ifndef GRAPHICS_DISABLED
// Draw the noise blobs from all lists in red.
void plot_noise_blobs(ScrollView* to_win);
// Draw the blobs on on the various lists in the block in different colors.
void plot_graded_blobs(ScrollView* to_win);
#endif

View File

@ -29,6 +29,7 @@
#include "mfcpch.h"
#include "blobs.h"
#include "ccstruct.h"
#include "clst.h"
#include "cutil.h"
#include "emalloc.h"
#include "helpers.h"
@ -46,15 +47,18 @@ using tesseract::CCStruct;
// A Vector representing the "vertical" direction when measuring the
// divisiblity of blobs into multiple blobs just by separating outlines.
// See divisible_blob below for the use.
const TPOINT kDivisibleVerticalUpright = {0, 1};
const TPOINT kDivisibleVerticalUpright(0, 1);
// A vector representing the "vertical" direction for italic text for use
// when separating outlines. Using it actually deteriorates final accuracy,
// so it is only used for ApplyBoxes chopping to get a better segmentation.
const TPOINT kDivisibleVerticalItalic = {1, 5};
const TPOINT kDivisibleVerticalItalic(1, 5);
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
CLISTIZE(EDGEPT);
// Consume the circular list of EDGEPTs to make a TESSLINE.
TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) {
TESSLINE* result = new TESSLINE;
@ -262,6 +266,36 @@ TBLOB* TBLOB::PolygonalCopy(C_BLOB* src) {
return tblob;
}
// Normalizes the blob for classification only if needed.
// (Normally this means a non-zero classify rotation.)
// If no Normalization is needed, then NULL is returned, and the denorm is
// unchanged. Otherwise a new TBLOB is returned and the denorm points to
// a new DENORM. In this case, both the TBLOB and DENORM must be deleted.
TBLOB* TBLOB::ClassifyNormalizeIfNeeded(const DENORM** denorm) const {
TBLOB* rotated_blob = NULL;
// If necessary, copy the blob and rotate it. The rotation is always
// +/- 90 degrees, as 180 was already taken care of.
if ((*denorm)->block() != NULL &&
(*denorm)->block()->classify_rotation().y() != 0.0) {
TBOX box = bounding_box();
int x_middle = (box.left() + box.right()) / 2;
int y_middle = (box.top() + box.bottom()) / 2;
rotated_blob = new TBLOB(*this);
const FCOORD& rotation = (*denorm)->block()->classify_rotation();
DENORM* norm = new DENORM;
// Move the rotated blob back to the same y-position so that we
// can still distinguish similar glyphs with differeny y-position.
float target_y = kBlnBaselineOffset +
(rotation.y() > 0 ? x_middle - box.left() : box.right() - x_middle);
norm->SetupNormalization(NULL, NULL, &rotation, *denorm, NULL, 0,
x_middle, y_middle, 1.0f, 1.0f, 0.0f, target_y);
// x_middle, y_middle, 1.0f, 1.0f, 0.0f, y_middle);
rotated_blob->Normalize(*norm);
*denorm = norm;
}
return rotated_blob;
}
// Copies the data and the outline, but leaves next untouched.
void TBLOB::CopyFrom(const TBLOB& src) {
Clear();
@ -289,7 +323,7 @@ void TBLOB::Clear() {
void TBLOB::Normalize(const DENORM& denorm) {
// TODO(rays) outline->Normalize is more accurate, but breaks tests due
// the changes it makes. Reinstate this code with a retraining.
#if 0
#if 1
for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
outline->Normalize(denorm);
}
@ -334,11 +368,20 @@ int TBLOB::NumOutlines() const {
return result;
}
/**********************************************************************
* TBLOB::bounding_box()
*
* Compute the bounding_box of a compound blob, defined to be the
* bounding box of the union of all top-level outlines in the blob.
**********************************************************************/
TBOX TBLOB::bounding_box() const {
TPOINT topleft;
TPOINT botright;
blob_bounding_box(this, &topleft, &botright);
TBOX box(topleft.x, botright.y, botright.x, topleft.y);
if (outlines == NULL)
return TBOX(0, 0, 0, 0);
TESSLINE *outline = outlines;
TBOX box = outline->bounding_box();
for (outline = outline->next; outline != NULL; outline = outline->next) {
box += outline->bounding_box();
}
return box;
}
@ -482,91 +525,10 @@ void TWERD::plot(ScrollView* window) {
**********************************************************************/
void blob_origin(TBLOB *blob, /*blob to compute on */
TPOINT *origin) { /*return value */
TPOINT topleft; /*bounding box */
TPOINT botright;
/*find bounding box */
blob_bounding_box(blob, &topleft, &botright);
/*centre of box */
origin->x = (topleft.x + botright.x) / 2;
origin->y = (topleft.y + botright.y) / 2;
TBOX bbox = blob->bounding_box();
*origin = (bbox.topleft() + bbox.botright()) / 2;
}
/**********************************************************************
* blob_bounding_box
*
* Compute the bounding_box of a compound blob, define to be the
* max coordinate value of the bounding boxes of all the top-level
* outlines in the box.
**********************************************************************/
void blob_bounding_box(const TBLOB *blob, // blob to compute on.
TPOINT *topleft, // bounding box.
TPOINT *botright) {
register TESSLINE *outline; // Current outline.
if (blob == NULL || blob->outlines == NULL) {
topleft->x = topleft->y = 0;
*botright = *topleft; // Default value.
} else {
outline = blob->outlines;
*topleft = outline->topleft;
*botright = outline->botright;
for (outline = outline->next; outline != NULL; outline = outline->next) {
UpdateRange(outline->topleft.x, outline->botright.x,
&topleft->x, &botright->x);
UpdateRange(outline->botright.y, outline->topleft.y,
&botright->y, &topleft->y);
}
}
}
/**********************************************************************
* blobs_bounding_box
*
* Return the smallest extreme point that contain this word.
**********************************************************************/
void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright) {
TPOINT tl;
TPOINT br;
/* Start with first blob */
blob_bounding_box(blobs, topleft, botright);
for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) {
blob_bounding_box(blob, &tl, &br);
if (tl.x < topleft->x)
topleft->x = tl.x;
if (tl.y > topleft->y)
topleft->y = tl.y;
if (br.x > botright->x)
botright->x = br.x;
if (br.y < botright->y)
botright->y = br.y;
}
}
/**********************************************************************
* blobs_origin
*
* Compute the origin of a compound blob, define to be the centre
* of the bounding box.
**********************************************************************/
void blobs_origin(TBLOB *blobs, /*blob to compute on */
TPOINT *origin) { /*return value */
TPOINT topleft; /*bounding box */
TPOINT botright;
/*find bounding box */
blobs_bounding_box(blobs, &topleft, &botright);
/*center of box */
origin->x = (topleft.x + botright.x) / 2;
origin->y = (topleft.y + botright.y) / 2;
}
/**********************************************************************
* blobs_widths
*
@ -585,18 +547,18 @@ WIDTH_RECORD *blobs_widths(TBLOB *blobs) { /*blob to compute on */
width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
width_record->num_chars = num_blobs;
blob_bounding_box(blobs, &topleft, &botright);
width_record->widths[i++] = botright.x - topleft.x;
TBOX bbox = blobs->bounding_box();
width_record->widths[i++] = bbox.width();
/* First width */
blob_end = botright.x;
blob_end = bbox.right();
for (TBLOB* blob = blobs->next; blob != NULL; blob = blob->next) {
blob_bounding_box(blob, &topleft, &botright);
width_record->widths[i++] = topleft.x - blob_end;
width_record->widths[i++] = botright.x - topleft.x;
blob_end = botright.x;
TBOX curbox = blob->bounding_box();
width_record->widths[i++] = curbox.left() - blob_end;
width_record->widths[i++] = curbox.width();
blob_end = curbox.right();
}
return (width_record);
return width_record;
}
@ -630,8 +592,9 @@ bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location) {
outline1 = outline1->next) {
if (outline1->is_hole)
continue; // Holes do not count as separable.
TPOINT mid_pt1 = {(outline1->topleft.x + outline1->botright.x) / 2,
(outline1->topleft.y + outline1->botright.y) / 2};
TPOINT mid_pt1(
static_cast<inT16>((outline1->topleft.x + outline1->botright.x) / 2),
static_cast<inT16>((outline1->topleft.y + outline1->botright.y) / 2));
int mid_prod1 = CROSS(mid_pt1, vertical);
int min_prod1, max_prod1;
outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
@ -639,15 +602,16 @@ bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location) {
outline2 = outline2->next) {
if (outline2->is_hole)
continue; // Holes do not count as separable.
TPOINT mid_pt2 = { (outline2->topleft.x + outline2->botright.x) / 2,
(outline2->topleft.y + outline2->botright.y) / 2};
TPOINT mid_pt2(
static_cast<inT16>((outline2->topleft.x + outline2->botright.x) / 2),
static_cast<inT16>((outline2->topleft.y + outline2->botright.y) / 2));
int mid_prod2 = CROSS(mid_pt2, vertical);
int min_prod2, max_prod2;
outline2->MinMaxCrossProduct(vertical, &min_prod2, &max_prod2);
int mid_gap = abs(mid_prod2 - mid_prod1);
int overlap = MIN(max_prod1, max_prod2) - MAX(min_prod1, min_prod2);
if (mid_gap - overlap / 2 > max_gap) {
max_gap = mid_gap - overlap / 2;
if (mid_gap - overlap / 4 > max_gap) {
max_gap = mid_gap - overlap / 4;
*location = mid_pt1;
*location += mid_pt2;
*location /= 2;
@ -679,8 +643,9 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
int location_prod = CROSS(location, vertical);
while (outline != NULL) {
TPOINT mid_pt = {(outline->topleft.x + outline->botright.x) / 2,
(outline->topleft.y + outline->botright.y) / 2};
TPOINT mid_pt(
static_cast<inT16>((outline->topleft.x + outline->botright.x) / 2),
static_cast<inT16>((outline->topleft.y + outline->botright.y) / 2));
int mid_prod = CROSS(mid_pt, vertical);
if (mid_prod < location_prod) {
// Outline is in left blob.
@ -705,4 +670,3 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
if (outline2)
outline2->next = NULL;
}

View File

@ -29,6 +29,7 @@
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "clst.h"
#include "rect.h"
#include "vecfuncs.h"
@ -50,6 +51,10 @@ typedef struct
} WIDTH_RECORD;
struct TPOINT {
TPOINT(): x(0), y(0) {}
TPOINT(inT16 vx, inT16 vy) : x(vx), y(vy) {}
TPOINT(const ICOORD &ic) : x(ic.x()), y(ic.y()) {}
void operator+=(const TPOINT& other) {
x += other.x;
y += other.y;
@ -102,6 +107,9 @@ struct EDGEPT {
EDGEPT* prev; // clockwise element
};
// For use in chop and findseam to keep a list of which EDGEPTs were inserted.
CLISTIZEH(EDGEPT);
struct TESSLINE {
TESSLINE() : is_hole(false), loop(NULL), next(NULL) {}
TESSLINE(const TESSLINE& src) : loop(NULL), next(NULL) {
@ -176,6 +184,12 @@ struct TBLOB {
// Factory to build a TBLOB from a C_BLOB with polygonal
// approximation along the way.
static TBLOB* PolygonalCopy(C_BLOB* src);
// Normalizes the blob for classification only if needed.
// (Normally this means a non-zero classify rotation.)
// If no Normalization is needed, then NULL is returned, and the denorm is
// unchanged. Otherwise a new TBLOB is returned and the denorm points to
// a new DENORM. In this case, both the TBLOB and DENORM must be deleted.
TBLOB* ClassifyNormalizeIfNeeded(const DENORM** denorm) const;
// Copies the data and the outlines, but leaves next untouched.
void CopyFrom(const TBLOB& src);
// Deletes owned data.
@ -274,23 +288,12 @@ if (w) memfree (w)
----------------------------------------------------------------------*/
// TODO(rays) This will become a member of TBLOB when TBLOB's definition
// moves to blobs.h
TBOX TBLOB_bounding_box(const TBLOB* blob);
void blob_origin(TBLOB *blob, /*blob to compute on */
TPOINT *origin); /*return value */
// Returns the center of blob's bounding box in origin.
void blob_origin(TBLOB *blob, TPOINT *origin);
/*blob to compute on */
void blob_bounding_box(const TBLOB *blob,
TPOINT *topleft, // Bounding box.
TPOINT *botright);
void blobs_bounding_box(TBLOB *blobs, TPOINT *topleft, TPOINT *botright);
void blobs_origin(TBLOB *blobs, /*blob to compute on */
TPOINT *origin); /*return value */
/*blob to compute on */
WIDTH_RECORD *blobs_widths(TBLOB *blobs);
WIDTH_RECORD *blobs_widths(TBLOB *blobs);
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location);

164
ccstruct/boxread.cpp Normal file
View File

@ -0,0 +1,164 @@
/**********************************************************************
* File: boxread.cpp
* Description: Read data from a box file.
* Author: Ray Smith
* Created: Fri Aug 24 17:47:23 PDT 2007
*
* (C) Copyright 2007, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "mfcpch.h"
#include "boxread.h"
#include <string.h>
#include "rect.h"
#include "strngs.h"
#include "tprintf.h"
#include "unichar.h"
// Special char code used to identify multi-blob labels.
static const char* kMultiBlobLabelCode = "WordStr";
// Open the boxfile based on the given image filename.
FILE* OpenBoxFile(const STRING& fname) {
STRING filename = fname;
const char *lastdot = strrchr(filename.string(), '.');
if (lastdot != NULL)
filename[lastdot - filename.string()] = '\0';
filename += ".box";
FILE* box_file = NULL;
if (!(box_file = fopen(filename.string(), "rb"))) {
CANTOPENFILE.error("read_next_box", TESSEXIT,
"Cant open box file %s",
filename.string());
}
return box_file;
}
// Box files are used ONLY DURING TRAINING, but by both processes of
// creating tr files with tesseract, and unicharset_extractor.
// ReadNextBox factors out the code to interpret a line of a box
// file so that applybox and unicharset_extractor interpret the same way.
// This function returns the next valid box file utf8 string and coords
// and returns true, or false on eof (and closes the file).
// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
// for valid utf-8 and allows space or tab between fields.
// utf8_str is set with the unichar string, and bounding box with the box.
// If there are page numbers in the file, it reads them all.
bool ReadNextBox(int *line_number, FILE* box_file,
STRING* utf8_str, TBOX* bounding_box) {
return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
}
// As ReadNextBox above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
bool ReadNextBox(int target_page, int *line_number, FILE* box_file,
STRING* utf8_str, TBOX* bounding_box) {
int page = 0;
char buff[kBoxReadBufSize]; // boxfile read buffer
char *buffptr = buff;
while (fgets(buff, sizeof(buff) - 1, box_file)) {
(*line_number)++;
buffptr = buff;
const unsigned char *ubuf = reinterpret_cast<const unsigned char*>(buffptr);
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
buffptr += 3; // Skip unicode file designation.
// Check for blank lines in box file
while (*buffptr == ' ' || *buffptr == '\t')
buffptr++;
if (*buffptr != '\0') {
if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
tprintf("Box file format error on line %i; ignored\n", *line_number);
continue;
}
if (target_page >= 0 && target_page != page)
continue; // Not on the appropriate page.
return true; // Successfully read a box.
}
}
fclose(box_file);
return false; // EOF
}
// Parses the given box file string into a page_number, utf8_str, and
// bounding_box. Returns true on a successful parse.
// The box file is assumed to contain box definitions, one per line, of the
// following format for blob-level boxes:
// <UTF8 str> <left> <bottom> <right> <top> <page id>
// and for word/line-level boxes:
// WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
// See applyybox.cpp for more information.
bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
STRING* utf8_str, TBOX* bounding_box) {
*bounding_box = TBOX(); // Initialize it to empty.
*utf8_str = "";
char uch[kBoxReadBufSize];
const char *buffptr = boxfile_str;
// Read the unichar without messing up on Tibetan.
// According to issue 253 the utf-8 surrogates 85 and A0 are treated
// as whitespace by sscanf, so it is more reliable to just find
// ascii space and tab.
int uch_len = 0;
while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
uch_len < kBoxReadBufSize - 1) {
uch[uch_len++] = *buffptr++;
}
uch[uch_len] = '\0';
if (*buffptr != '\0') ++buffptr;
int x_min, y_min, x_max, y_max;
*page_number = 0;
int count = sscanf(buffptr, "%d %d %d %d %d",
&x_min, &y_min, &x_max, &y_max, page_number);
if (count != 5 && count != 4) {
tprintf("Bad box coordinates in boxfile string!\n");
return false;
}
// Test for long space-delimited string label.
if (strcmp(uch, kMultiBlobLabelCode) == 0 &&
(buffptr = strchr(buffptr, '#')) != NULL) {
strncpy(uch, buffptr + 1, kBoxReadBufSize);
chomp_string(uch);
uch_len = strlen(uch);
}
// Validate UTF8 by making unichars with it.
int used = 0;
while (used < uch_len) {
UNICHAR ch(uch + used, uch_len - used);
int new_used = ch.utf8_len();
if (new_used == 0) {
tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n",
uch + used, uch[used], used + 1);
return false;
}
used += new_used;
}
*utf8_str = uch;
bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
return true; // Successfully read a box.
}
// Creates a box file string from a unichar string, TBOX and page number.
void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
STRING* box_str) {
*box_str = unichar_str;
box_str->add_str_int(" ", box.left());
box_str->add_str_int(" ", box.bottom());
box_str->add_str_int(" ", box.right());
box_str->add_str_int(" ", box.top());
box_str->add_str_int(" ", page_num);
}

60
ccstruct/boxread.h Normal file
View File

@ -0,0 +1,60 @@
/**********************************************************************
* File: boxread.cpp
* Description: Read data from a box file.
* Author: Ray Smith
* Created: Fri Aug 24 17:47:23 PDT 2007
*
* (C) Copyright 2007, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESSERACT_CCUTIL_BOXREAD_H__
#define TESSERACT_CCUTIL_BOXREAD_H__
#include <stdio.h>
#include "strngs.h"
class STRING;
class TBOX;
// Size of buffer used to read a line from a box file.
const int kBoxReadBufSize = 1024;
// Open the boxfile based on the given image filename.
FILE* OpenBoxFile(const STRING& fname);
// ReadNextBox factors out the code to interpret a line of a box
// file so that applybox and unicharset_extractor interpret the same way.
// This function returns the next valid box file utf8 string and coords
// and returns true, or false on eof (and closes the file).
// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
// for valid utf-8 and allows space or tab between fields.
// utf8_str is set with the unichar string, and bounding box with the box.
// If there are page numbers in the file, it reads them all.
bool ReadNextBox(int *line_number, FILE* box_file,
STRING* utf8_str, TBOX* bounding_box);
// As ReadNextBox above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
bool ReadNextBox(int target_page, int *line_number, FILE* box_file,
STRING* utf8_str, TBOX* bounding_box);
// Parses the given box file string into a page_number, utf8_str, and
// bounding_box. Returns true on a successful parse.
bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
STRING* utf8_str, TBOX* bounding_box);
// Creates a box file string from a unichar string, TBOX and page number.
void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
STRING* box_str);
#endif // TESSERACT_CCUTIL_BOXREAD_H__

View File

@ -209,6 +209,13 @@ void BoxWord::DeleteBox(int index) {
ComputeBoundingBox();
}
// Deletes all the boxes stored in BoxWord.
void BoxWord::DeleteAllBoxes() {
length_ = 0;
boxes_.clear();
bbox_ = TBOX();
}
// Computes the bounding box of the word.
void BoxWord::ComputeBoundingBox() {
bbox_ = TBOX();

View File

@ -85,6 +85,9 @@ class BoxWord {
// Recomputes the bounding box.
void DeleteBox(int index);
// Deletes all the boxes stored in BoxWord.
void DeleteAllBoxes();
// This and other putatively are the same, so call the (permanent) callback
// for each blob index where the bounding boxes match.
// The callback is deleted on completion.

View File

@ -623,7 +623,7 @@ void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) {
// Renders the outline to the given pix, with left and top being
// the coords of the upper-left corner of the pix.
void C_OUTLINE::render(int left, int top, Pix* pix) {
void C_OUTLINE::render(int left, int top, Pix* pix) const {
ICOORD pos = start;
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
ICOORD next_step = step(stepindex);
@ -638,6 +638,25 @@ void C_OUTLINE::render(int left, int top, Pix* pix) {
}
}
// Renders just the outline to the given pix (no fill), with left and top
// being the coords of the upper-left corner of the pix.
void C_OUTLINE::render_outline(int left, int top, Pix* pix) const {
ICOORD pos = start;
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
ICOORD next_step = step(stepindex);
if (next_step.y() < 0) {
pixSetPixel(pix, pos.x() - left, top - pos.y(), 1);
} else if (next_step.y() > 0) {
pixSetPixel(pix, pos.x() - left - 1, top - pos.y() - 1, 1);
} else if (next_step.x() < 0) {
pixSetPixel(pix, pos.x() - left - 1, top - pos.y(), 1);
} else if (next_step.x() > 0) {
pixSetPixel(pix, pos.x() - left, top - pos.y() - 1, 1);
}
pos += next_step;
}
}
/**********************************************************************
* C_OUTLINE::plot
*

View File

@ -152,7 +152,11 @@ class DLLSYM C_OUTLINE:public ELIST_LINK
// Renders the outline to the given pix, with left and top being
// the coords of the upper-left corner of the pix.
void render(int left, int top, Pix* pix);
void render(int left, int top, Pix* pix) const;
// Renders just the outline to the given pix (no fill), with left and top
// being the coords of the upper-left corner of the pix.
void render_outline(int left, int top, Pix* pix) const;
void plot( //draw one
ScrollView* window, //window to draw in

162
ccstruct/fontinfo.cpp Normal file
View File

@ -0,0 +1,162 @@
///////////////////////////////////////////////////////////////////////
// File: fontinfo.cpp
// Description: Font information classes abstracted from intproto.h/cpp.
// Author: rays@google.com (Ray Smith)
// Created: Wed May 18 10:39:01 PDT 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "fontinfo.h"
namespace tesseract {
// Compare FontInfo structures.
bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2) {
// The font properties are required to be the same for two font with the same
// name, so there is no need to test them.
// Consequently, querying the table with only its font name as information is
// enough to retrieve its properties.
return strcmp(fi1.name, fi2.name) == 0;
}
// Compare FontSet structures.
bool CompareFontSet(const FontSet& fs1, const FontSet& fs2) {
if (fs1.size != fs2.size)
return false;
for (int i = 0; i < fs1.size; ++i) {
if (fs1.configs[i] != fs2.configs[i])
return false;
}
return true;
}
// Callbacks for GenericVector.
void FontInfoDeleteCallback(FontInfo f) {
if (f.spacing_vec != NULL) {
f.spacing_vec->delete_data_pointers();
delete f.spacing_vec;
}
delete[] f.name;
}
void FontSetDeleteCallback(FontSet fs) {
delete[] fs.configs;
}
/*---------------------------------------------------------------------------*/
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(FILE* f, FontInfo* fi, bool swap) {
inT32 size;
if (fread(&size, sizeof(size), 1, f) != 1) return false;
if (swap)
Reverse32(&size);
char* font_name = new char[size + 1];
fi->name = font_name;
if (fread(font_name, sizeof(*font_name), size, f) != size) return false;
font_name[size] = '\0';
if (fread(&fi->properties, sizeof(fi->properties), 1, f) != 1) return false;
if (swap)
Reverse32(&fi->properties);
return true;
}
bool write_info(FILE* f, const FontInfo& fi) {
inT32 size = strlen(fi.name);
if (fwrite(&size, sizeof(size), 1, f) != 1) return false;
if (fwrite(fi.name, sizeof(*fi.name), size, f) != size) return false;
if (fwrite(&fi.properties, sizeof(fi.properties), 1, f) != 1) return false;
return true;
}
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) {
inT32 vec_size, kern_size;
if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
if (swap) Reverse32(&vec_size);
ASSERT_HOST(vec_size >= 0);
if (vec_size == 0) return true;
fi->init_spacing(vec_size);
for (int i = 0; i < vec_size; ++i) {
FontSpacingInfo *fs = new FontSpacingInfo();
if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 ||
fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 ||
fread(&kern_size, sizeof(kern_size), 1, f) != 1) {
return false;
}
if (swap) {
ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before));
ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after));
Reverse32(&kern_size);
}
if (kern_size < 0) { // indication of a NULL entry in fi->spacing_vec
delete fs;
continue;
}
if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) ||
!fs->kerned_x_gaps.DeSerialize(swap, f))) {
return false;
}
fi->add_spacing(i, fs);
}
return true;
}
bool write_spacing_info(FILE* f, const FontInfo& fi) {
inT32 vec_size = (fi.spacing_vec == NULL) ? 0 : fi.spacing_vec->size();
if (fwrite(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
inT16 x_gap_invalid = -1;
for (int i = 0; i < vec_size; ++i) {
FontSpacingInfo *fs = fi.spacing_vec->get(i);
inT32 kern_size = (fs == NULL) ? -1 : fs->kerned_x_gaps.size();
if (fs == NULL) {
if (fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 ||
fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 ||
fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) {
return false;
}
} else {
if (fwrite(&(fs->x_gap_before), sizeof(fs->x_gap_before), 1, f) != 1 ||
fwrite(&(fs->x_gap_after), sizeof(fs->x_gap_after), 1, f) != 1 ||
fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) {
return false;
}
}
if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) ||
!fs->kerned_x_gaps.Serialize(f))) {
return false;
}
}
return true;
}
bool read_set(FILE* f, FontSet* fs, bool swap) {
if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false;
if (swap)
Reverse32(&fs->size);
fs->configs = new int[fs->size];
for (int i = 0; i < fs->size; ++i) {
if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false;
if (swap)
Reverse32(&fs->configs[i]);
}
return true;
}
bool write_set(FILE* f, const FontSet& fs) {
if (fwrite(&fs.size, sizeof(fs.size), 1, f) != 1) return false;
for (int i = 0; i < fs.size; ++i) {
if (fwrite(&fs.configs[i], sizeof(fs.configs[i]), 1, f) != 1) return false;
}
return true;
}
} // namespace tesseract.

133
ccstruct/fontinfo.h Normal file
View File

@ -0,0 +1,133 @@
///////////////////////////////////////////////////////////////////////
// File: fontinfo.h
// Description: Font information classes abstracted from intproto.h/cpp.
// Author: rays@google.com (Ray Smith)
// Created: Tue May 17 17:08:01 PDT 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_FONTINFO_H_
#define TESSERACT_CCSTRUCT_FONTINFO_H_
#include "genericvector.h"
#include "host.h"
#include "unichar.h"
namespace tesseract {
// Struct for information about spacing between characters in a particular font.
struct FontSpacingInfo {
inT16 x_gap_before;
inT16 x_gap_after;
GenericVector<UNICHAR_ID> kerned_unichar_ids;
GenericVector<inT16> kerned_x_gaps;
};
/*
* font_properties contains properties about boldness, italicness, fixed pitch,
* serif, fraktur
*/
struct FontInfo {
FontInfo() : name(NULL), spacing_vec(NULL) {}
~FontInfo() {}
// Reserves unicharset_size spots in spacing_vec.
void init_spacing(int unicharset_size) {
spacing_vec = new GenericVector<FontSpacingInfo *>();
spacing_vec->init_to_size(unicharset_size, NULL);
}
// Adds the given pointer to FontSpacingInfo to spacing_vec member
// (FontInfo class takes ownership of the pointer).
// Note: init_spacing should be called before calling this function.
void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
ASSERT_HOST(spacing_vec != NULL && spacing_vec->size() > uch_id);
(*spacing_vec)[uch_id] = spacing_info;
}
// Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
return (spacing_vec == NULL || spacing_vec->size() <= uch_id) ?
NULL : (*spacing_vec)[uch_id];
}
// Fills spacing with the value of the x gap expected between the two given
// UNICHAR_IDs. Returns true on success.
bool get_spacing(UNICHAR_ID prev_uch_id,
UNICHAR_ID uch_id,
int *spacing) const {
const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
const FontSpacingInfo *fsi = this->get_spacing(uch_id);
if (prev_fsi == NULL || fsi == NULL) return false;
int i = 0;
for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
if (prev_fsi->kerned_unichar_ids[i] == uch_id) break;
}
if (i < prev_fsi->kerned_unichar_ids.size()) {
*spacing = prev_fsi->kerned_x_gaps[i];
} else {
*spacing = prev_fsi->x_gap_after + fsi->x_gap_before;
}
return true;
}
bool is_italic() const { return properties & 1; }
bool is_bold() const { return (properties & 2) != 0; }
bool is_fixed_pitch() const { return (properties & 4) != 0; }
bool is_serif() const { return (properties & 8) != 0; }
bool is_fraktur() const { return (properties & 16) != 0; }
char* name;
uinT32 properties;
// The universal_id is a field reserved for the initialization process
// to assign a unique id number to all fonts loaded for the current
// combination of languages. This id will then be returned by
// ResultIterator::WordFontAttributes.
inT32 universal_id;
// Horizontal spacing between characters (indexed by UNICHAR_ID).
GenericVector<FontSpacingInfo *> *spacing_vec;
};
// Every class (character) owns a FontSet that represents all the fonts that can
// render this character.
// Since almost all the characters from the same script share the same set of
// fonts, the sets are shared over multiple classes (see
// Classify::fontset_table_). Thus, a class only store an id to a set.
// Because some fonts cannot render just one character of a set, there are a
// lot of FontSet that differ only by one font. Rather than storing directly
// the FontInfo in the FontSet structure, it's better to share FontInfos among
// FontSets (Classify::fontinfo_table_).
struct FontSet {
int size;
int* configs; // FontInfo ids
};
// Compare FontInfo structures.
bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2);
// Compare FontSet structures.
bool CompareFontSet(const FontSet& fs1, const FontSet& fs2);
// Deletion callbacks for GenericVector.
void FontInfoDeleteCallback(FontInfo f);
void FontSetDeleteCallback(FontSet fs);
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(FILE* f, FontInfo* fi, bool swap);
bool write_info(FILE* f, const FontInfo& fi);
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap);
bool write_spacing_info(FILE* f, const FontInfo& fi);
bool read_set(FILE* f, FontSet* fs, bool swap);
bool write_set(FILE* f, const FontSet& fs);
} // namespace tesseract.
#endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */

View File

@ -33,7 +33,7 @@
#include "unicharset.h"
// Print the best guesses out of the match rating matrix.
void MATRIX::print(const UNICHARSET &unicharset) {
void MATRIX::print(const UNICHARSET &unicharset) const {
tprintf("Ratings Matrix (top choices)\n");
int row, col;
for (col = 0; col < this->dimension(); ++col) tprintf("\t%d", col);

View File

@ -101,9 +101,11 @@ class GENERIC_2D_ARRAY {
int dim1() const { return dim1_; }
int dim2() const { return dim2_; }
// Expression to select a specific location in the matrix.
// Expression to select a specific location in the matrix. The matrix is
// stored COLUMN-major, so the left-most index is the most significant.
// This allows [][] access to use indices in the same order as (,).
int index(int column, int row) const {
return (row * dim1_ + column);
return (column * dim2_ + row);
}
// Put a list element into the matrix at a specific location.
@ -122,6 +124,11 @@ class GENERIC_2D_ARRAY {
T& operator()(int column, int row) {
return array_[this->index(column, row)];
}
// Allow access using array[column][row]. NOTE that the indices are
// in the same left-to-right order as the () indexing.
T* operator[](int column) {
return &array_[this->index(column, 0)];
}
// Delete objects pointed to by array_[i].
void delete_matrix_pointers() {
@ -188,7 +195,7 @@ class MATRIX : public GENERIC_MATRIX<BLOB_CHOICE_LIST *> {
MATRIX(int dimension) : GENERIC_MATRIX<BLOB_CHOICE_LIST *>(dimension,
NOT_CLASSIFIED) {}
// Print a shortened version of the contents of the matrix.
void print(const UNICHARSET &unicharset);
void print(const UNICHARSET &unicharset) const;
};
struct MATRIX_COORD {

View File

@ -24,7 +24,9 @@
#include "allheaders.h"
#include "blobs.h"
#include "helpers.h"
#include "ocrblock.h"
#include "unicharset.h"
#include "werd.h"
@ -254,7 +256,9 @@ void DENORM::LocalNormBlob(TBLOB* blob) const {
blob->Move(translation);
// Note that the old way of scaling only allowed for a single
// scale factor.
blob->Scale(YScaleAtOrigX(x_center));
float scale = YScaleAtOrigX(x_center);
if (scale != 1.0f)
blob->Scale(scale);
if (rotation_ != NULL)
blob->Rotate(*rotation_);
translation.set_x(IntCastRounded(final_xshift_));
@ -262,6 +266,54 @@ void DENORM::LocalNormBlob(TBLOB* blob) const {
blob->Move(translation);
}
// Fills in the x-height range accepted by the given unichar_id, given its
// bounding box in the usual baseline-normalized coordinates, with some
// initial crude x-height estimate (such as word size) and this denoting the
// transformation that was used. Returns false, and an empty range if the
// bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom
// fits, but the top is impossible.
bool DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
const TBOX& bbox,
inT16* min_xht, inT16* max_xht) const {
// Clip the top and bottom to the limit of normalized feature space.
int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
// A tolerance of yscale corresponds to 1 pixel in the image.
double tolerance = y_scale();
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
&min_top, &max_top);
// Default returns indicate a mis-fit.
*min_xht = 0;
*max_xht = 0;
// Chars with a misfitting bottom might be sub/superscript/dropcap, or might
// just be wrongly classified. Return an empty range so they have to be
// good to be considered.
if (bottom < min_bottom - tolerance || bottom > max_bottom + tolerance) {
return false;
}
// To help very high cap/xheight ratio fonts accept the correct x-height,
// and to allow the large caps in small caps to accept the xheight of the
// small caps, add kBlnBaselineOffset to chars with a maximum max.
if (max_top == kBlnCellHeight - 1)
max_top += kBlnBaselineOffset;
int height = top - kBlnBaselineOffset;
double min_height = min_top - kBlnBaselineOffset - tolerance;
double max_height = max_top - kBlnBaselineOffset + tolerance;
if (min_height <= 0.0) {
if (height <= 0 || max_height > 0)
*max_xht = MAX_INT16; // Anything will do.
} else if (height > 0) {
int result = IntCastRounded(height * kBlnXHeight / y_scale() / min_height);
*max_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
}
if (max_height > 0.0 && height > 0) {
int result = IntCastRounded(height * kBlnXHeight / y_scale() / max_height);
*min_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
}
return true;
}
// ============== Private Code ======================
// Free allocated memory and clear pointers.

View File

@ -31,9 +31,21 @@ struct Pix;
class ROW; // Forward decl
class BLOCK;
class FCOORD;
struct TBLOB;
class TBLOB;
class TBOX;
struct TPOINT;
class TPOINT;
class UNICHARSET;
namespace tesseract {
// Possible normalization methods. Use NEGATIVE values as these also
// double up as markers for the last sub-classifier.
enum NormalizationMode {
NM_BASELINE = -3, // The original BL normalization mode.
NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
};
} // namespace tesseract.
class DENORM_SEG {
public:
@ -219,6 +231,15 @@ class DENORM {
// more accurately copies the old way.
void LocalNormBlob(TBLOB* blob) const;
// Fills in the x-height range accepted by the given unichar_id, given its
// bounding box in the usual baseline-normalized coordinates, with some
// initial crude x-height estimate (such as word size) and this denoting the
// transformation that was used. Returns false, and an empty range if the
// bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom
// fits, but the top is impossible.
bool XHeightRange(int unichar_id, const UNICHARSET& unicharset,
const TBOX& bbox, inT16* min_xht, inT16* max_xht) const;
Pix* pix() const {
return pix_;
}
@ -236,6 +257,9 @@ class DENORM {
return predecessor_->RootDenorm();
return this;
}
const DENORM* predecessor() const {
return predecessor_;
}
// Accessors - perhaps should not be needed.
float x_scale() const {
return x_scale_;

View File

@ -18,10 +18,11 @@
**********************************************************************/
#include "mfcpch.h"
#include <stdlib.h>
#include "blckerr.h"
#include "ocrblock.h"
#include "tprintf.h"
#include <stdlib.h>
#include "blckerr.h"
#include "ocrblock.h"
#include "stepblob.h"
#include "tprintf.h"
#define BLOCK_LABEL_HEIGHT 150 //char height of block id
@ -86,6 +87,17 @@ void BLOCK::rotate(const FCOORD& rotation) {
box = *poly_block()->bounding_box();
}
/**
* BLOCK::reflect_polygon_in_y_axis
*
* Reflects the polygon in the y-axis and recompute the bounding_box.
* Does nothing to any contained rows/words/blobs etc.
*/
void BLOCK::reflect_polygon_in_y_axis() {
poly_block()->reflect_in_y_axis();
box = *poly_block()->bounding_box();
}
/**
* BLOCK::sort_rows
*
@ -219,6 +231,166 @@ const BLOCK & source //from this
return *this;
}
// This function is for finding the approximate (horizontal) distance from
// the x-coordinate of the left edge of a symbol to the left edge of the
// text block which contains it. We are passed:
// segments - output of PB_LINE_IT::get_line() which contains x-coordinate
// intervals for the scan line going through the symbol's y-coordinate.
// Each element of segments is of the form (x()=start_x, y()=length).
// x - the x coordinate of the symbol we're interested in.
// margin - return value, the distance from x,y to the left margin of the
// block containing it.
// If all segments were to the right of x, we return false and 0.
bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
bool found = false;
*margin = 0;
if (segments->empty())
return found;
ICOORDELT_IT seg_it(segments);
for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
int cur_margin = x - seg_it.data()->x();
if (cur_margin >= 0) {
if (!found) {
*margin = cur_margin;
} else if (cur_margin < *margin) {
*margin = cur_margin;
}
found = true;
}
}
return found;
}
// This function is for finding the approximate (horizontal) distance from
// the x-coordinate of the right edge of a symbol to the right edge of the
// text block which contains it. We are passed:
// segments - output of PB_LINE_IT::get_line() which contains x-coordinate
// intervals for the scan line going through the symbol's y-coordinate.
// Each element of segments is of the form (x()=start_x, y()=length).
// x - the x coordinate of the symbol we're interested in.
// margin - return value, the distance from x,y to the right margin of the
// block containing it.
// If all segments were to the left of x, we return false and 0.
bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) {
bool found = false;
*margin = 0;
if (segments->empty())
return found;
ICOORDELT_IT seg_it(segments);
for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
if (cur_margin >= 0) {
if (!found) {
*margin = cur_margin;
} else if (cur_margin < *margin) {
*margin = cur_margin;
}
found = true;
}
}
return found;
}
// Compute the distance from the left and right ends of each row to the
// left and right edges of the block's polyblock. Illustration:
// ____________________________ _______________________
// | Howdy neighbor! | |rectangular blocks look|
// | This text is written to| |more like stacked pizza|
// |illustrate how useful poly- |boxes. |
// |blobs are in ----------- ------ The polyblob|
// |dealing with| _________ |for a BLOCK rec-|
// |harder layout| /===========\ |ords the possibly|
// |issues. | | _ _ | |skewed pseudo-|
// | You see this| | |_| \|_| | |rectangular |
// |text is flowed| | } | |boundary that|
// |around a mid-| \ ____ | |forms the ideal-|
// |cloumn portrait._____ \ / __|ized text margin|
// | Polyblobs exist| \ / |from which we should|
// |to account for insets| | | |measure paragraph|
// |which make otherwise| ----- |indentation. |
// ----------------------- ----------------------
//
// If we identify a drop-cap, we measure the left margin for the lines
// below the first line relative to one space past the drop cap. The
// first line's margin and those past the drop cap area are measured
// relative to the enclosing polyblock.
//
// TODO(rays): Before this will work well, we'll need to adjust the
// polyblob tighter around the text near images, as in:
// UNLV_AUTO:mag.3G0 page 2
// UNLV_AUTO:mag.3G4 page 16
void BLOCK::compute_row_margins() {
if (row_list()->empty() || row_list()->singleton()) {
return;
}
// If Layout analysis was not called, default to this.
POLY_BLOCK rect_block(bounding_box(), PT_FLOWING_TEXT);
POLY_BLOCK *pblock = &rect_block;
if (poly_block() != NULL) {
pblock = poly_block();
}
// Step One: Determine if there is a drop-cap.
// TODO(eger): Fix up drop cap code for RTL languages.
ROW_IT r_it(row_list());
ROW *first_row = r_it.data();
ROW *second_row = r_it.data_relative(1);
// initialize the bottom of a fictitious drop cap far above the first line.
int drop_cap_bottom = first_row->bounding_box().top() +
first_row->bounding_box().height();
int drop_cap_right = first_row->bounding_box().left();
int mid_second_line = second_row->bounding_box().top() -
second_row->bounding_box().height() / 2;
WERD_IT werd_it(r_it.data()->word_list()); // words of line one
if (!werd_it.empty()) {
C_BLOB_IT cblob_it(werd_it.data()->cblob_list());
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list();
cblob_it.forward()) {
TBOX bbox = cblob_it.data()->bounding_box();
if (bbox.bottom() <= mid_second_line) {
// we found a real drop cap
first_row->set_has_drop_cap(true);
if (drop_cap_bottom > bbox.bottom())
drop_cap_bottom = bbox.bottom();
if (drop_cap_right < bbox.right())
drop_cap_right = bbox.right();
}
}
}
// Step Two: Calculate the margin from the text of each row to the block
// (or drop-cap) boundaries.
PB_LINE_IT lines(pblock);
r_it.set_to_list(row_list());
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
ROW *row = r_it.data();
TBOX row_box = row->bounding_box();
int left_y = row->base_line(row_box.left()) + row->x_height();
int left_margin;
ICOORDELT_LIST *segments = lines.get_line(left_y);
LeftMargin(segments, row_box.left(), &left_margin);
delete segments;
if (row_box.top() >= drop_cap_bottom) {
int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
if (drop_cap_distance < 0)
drop_cap_distance = 0;
if (drop_cap_distance < left_margin)
left_margin = drop_cap_distance;
}
int right_y = row->base_line(row_box.right()) + row->x_height();
int right_margin;
segments = lines.get_line(right_y);
RightMargin(segments, row_box.right(), &right_margin);
delete segments;
row->set_lmargin(left_margin);
row->set_rmargin(right_margin);
}
}
/**********************************************************************
* PrintSegmentationStats
*

View File

@ -21,6 +21,7 @@
#define OCRBLOCK_H
#include "img.h"
#include "ocrpara.h"
#include "ocrrow.h"
#include "pdblock.h"
@ -120,6 +121,14 @@ class BLOCK:public ELIST_LINK, public PDBLK
ROW_LIST *row_list() {
return &rows;
}
// Compute the margins between the edges of each row and this block's
// polyblock, and store the results in the rows.
void compute_row_margins();
// get paragraphs
PARA_LIST *para_list() {
return &paras_;
}
/// get blobs
C_BLOB_LIST *blob_list() {
return &c_blobs;
@ -157,6 +166,10 @@ class BLOCK:public ELIST_LINK, public PDBLK
return PDBLK::render_mask(re_rotation_);
}
// Reflects the polygon in the y-axis and recomputes the bounding_box.
// Does nothing to any contained rows/words/blobs etc.
void reflect_polygon_in_y_axis();
void rotate(const FCOORD& rotation);
/// decreasing y order
@ -187,6 +200,7 @@ class BLOCK:public ELIST_LINK, public PDBLK
float cell_over_xheight_; //< Ratio of cell height to xheight.
STRING filename; //< name of block
ROW_LIST rows; //< rows in block
PARA_LIST paras_; //< paragraphs of block
C_BLOB_LIST c_blobs; //< before textord
C_BLOB_LIST rej_blobs; //< duff stuff
FCOORD re_rotation_; //< How to transform coords back to image.

100
ccstruct/ocrpara.cpp Normal file
View File

@ -0,0 +1,100 @@
/////////////////////////////////////////////////////////////////////
// File: ocrpara.h
// Description: OCR Paragraph Output Type
// Author: David Eger
// Created: 2010-11-15
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include "ocrpara.h"
#include "host.h" // For NearlyEqual()
ELISTIZE(PARA)
using tesseract::JUSTIFICATION_LEFT;
using tesseract::JUSTIFICATION_RIGHT;
using tesseract::JUSTIFICATION_CENTER;
using tesseract::JUSTIFICATION_UNKNOWN;
static STRING ParagraphJustificationToString(
tesseract::ParagraphJustification justification) {
switch (justification) {
case JUSTIFICATION_LEFT:
return "LEFT";
case JUSTIFICATION_RIGHT:
return "RIGHT";
case JUSTIFICATION_CENTER:
return "CENTER";
default:
return "UNKNOWN";
}
}
bool ParagraphModel::ValidFirstLine(int lmargin, int lindent,
int rindent, int rmargin) const {
switch (justification_) {
case JUSTIFICATION_LEFT:
return NearlyEqual(lmargin + lindent, margin_ + first_indent_,
tolerance_);
case JUSTIFICATION_RIGHT:
return NearlyEqual(rmargin + rindent, margin_ + first_indent_,
tolerance_);
case JUSTIFICATION_CENTER:
return NearlyEqual(lindent, rindent, tolerance_ * 2);
default:
// shouldn't happen
return false;
}
}
bool ParagraphModel::ValidBodyLine(int lmargin, int lindent,
int rindent, int rmargin) const {
switch (justification_) {
case JUSTIFICATION_LEFT:
return NearlyEqual(lmargin + lindent, margin_ + body_indent_,
tolerance_);
case JUSTIFICATION_RIGHT:
return NearlyEqual(rmargin + rindent, margin_ + body_indent_,
tolerance_);
case JUSTIFICATION_CENTER:
return NearlyEqual(lindent, rindent, tolerance_ * 2);
default:
// shouldn't happen
return false;
}
}
bool ParagraphModel::Comparable(const ParagraphModel &other) const {
if (justification_ != other.justification_)
return false;
if (justification_ == JUSTIFICATION_CENTER ||
justification_ == JUSTIFICATION_UNKNOWN)
return true;
int tolerance = (tolerance_ + other.tolerance_) / 4;
return NearlyEqual(margin_ + first_indent_,
other.margin_ + other.first_indent_, tolerance) &&
NearlyEqual(margin_ + body_indent_,
other.margin_ + other.body_indent_, tolerance);
}
STRING ParagraphModel::ToString() const {
char buffer[200];
const STRING &alignment = ParagraphJustificationToString(justification_);
snprintf(buffer, sizeof(buffer),
"margin: %d, first_indent: %d, body_indent: %d, alignment: %s",
margin_, first_indent_, body_indent_, alignment.string());
return STRING(buffer);
}

191
ccstruct/ocrpara.h Normal file
View File

@ -0,0 +1,191 @@
/////////////////////////////////////////////////////////////////////
// File: ocrpara.h
// Description: OCR Paragraph Output Type
// Author: David Eger
// Created: 2010-11-15
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_OCRPARA_H_
#define TESSERACT_CCSTRUCT_OCRPARA_H_
#include "publictypes.h"
#include "elst.h"
#include "strngs.h"
class ParagraphModel;
struct PARA : public ELIST_LINK {
public:
PARA() : model(NULL), is_list_item(false),
is_very_first_or_continuation(false), has_drop_cap(false) {}
// We do not own the model, we just reference it.
// model may be NULL if there is not a good model for this paragraph.
const ParagraphModel *model;
bool is_list_item;
// The first paragraph on a page often lacks a first line indent, but should
// still be modeled by the same model as other body text paragraphs on the
// page.
bool is_very_first_or_continuation;
// Does this paragraph begin with a drop cap?
bool has_drop_cap;
};
ELISTIZEH(PARA)
// A geometric model of paragraph indentation and alignment.
//
// Measurements are in pixels. The meaning of the integer arguments changes
// depending upon the value of justification. Distances less than or equal
// to tolerance apart we take as "equivalent" for the purpose of model
// matching, and in the examples below, we assume tolerance is zero.
//
// justification = LEFT:
// margin the "ignored" margin to the left block edge.
// first_indent indent from the left margin to a typical first text line.
// body_indent indent from the left margin of a typical body text line.
//
// justification = RIGHT:
// margin the "ignored" margin to the right block edge.
// first_indent indent from the right margin to a typical first text line.
// body_indent indent from the right margin of a typical body text line.
//
// justification = CENTER:
// margin ignored
// first_indent ignored
// body_indent ignored
//
// ====== Extended example, assuming each letter is ten pixels wide: =======
//
// +--------------------------------+
// | Awesome | ParagraphModel(CENTER, 0, 0, 0)
// | Centered Title |
// | Paragraph Detection |
// | OCR TEAM |
// | 10 November 2010 |
// | |
// | Look here, I have a paragraph.| ParagraphModel(LEFT, 0, 20, 0)
// |This paragraph starts at the top|
// |of the page and takes 3 lines. |
// | Here I have a second paragraph| ParagraphModel(LEFT, 0, 20, 0)
// |which indicates that the first |
// |paragraph is not a continuation |
// |from a previous page, as it is |
// |indented just like this second |
// |paragraph. |
// | Here is a block quote. It | ParagraphModel(LEFT, 30, 0, 0)
// | looks like the prior text |
// | but it is indented more |
// | and is fully justified. |
// | So how does one deal with | ParagraphModel(LEFT, 0, 20, 0)
// |centered text, block quotes, |
// |normal paragraphs, and lists |
// |like what follows? |
// |1. Make a plan. | ParagraphModel(LEFT, 0, 0, 30)
// |2. Use a heuristic, for example,| ParagraphModel(LEFT, 0, 0, 30)
// | looking for lines where the |
// | first word of the next line |
// | would fit on the previous |
// | line. |
// |8. Try to implement the plan in | ParagraphModel(LEFT, 0, 0, 30)
// | Python and try it out. |
// |4. Determine how to fix the | ParagraphModel(LEFT, 0, 0, 30)
// | mistakes. |
// |5. Repeat. | ParagraphModel(LEFT, 0, 0, 30)
// | For extra painful penalty work| ParagraphModel(LEFT, 0, 20, 0)
// |you can try to identify source |
// |code. Ouch! |
// +--------------------------------+
class ParagraphModel {
public:
ParagraphModel(tesseract::ParagraphJustification justification,
int margin,
int first_indent,
int body_indent,
int tolerance)
: justification_(justification),
margin_(margin),
first_indent_(first_indent),
body_indent_(body_indent),
tolerance_(tolerance) {
// Make one of {first_indent, body_indent} is 0.
int added_margin = first_indent;
if (body_indent < added_margin)
added_margin = body_indent;
margin_ += added_margin;
first_indent_ -= added_margin;
body_indent_ -= added_margin;
}
ParagraphModel()
: justification_(tesseract::JUSTIFICATION_UNKNOWN),
margin_(0),
first_indent_(0),
body_indent_(0),
tolerance_(0) { }
// ValidFirstLine() and ValidBodyLine() take arguments describing a text line
// in a block of text which we are trying to model:
// lmargin, lindent: these add up to the distance from the leftmost ink
// in the text line to the surrounding text block's left
// edge.
// rmargin, rindent: these add up to the distance from the rightmost ink
// in the text line to the surrounding text block's right
// edge.
// The caller determines the division between "margin" and "indent", which
// only actually affect whether we think the line may be centered.
//
// If the amount of whitespace matches the amount of whitespace expected on
// the relevant side of the line (within tolerance_) we say it matches.
// Return whether a given text line could be a first paragraph line according
// to this paragraph model.
bool ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const;
// Return whether a given text line could be a first paragraph line according
// to this paragraph model.
bool ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const;
tesseract::ParagraphJustification justification() const {
return justification_;
}
int margin() const { return margin_; }
int first_indent() const { return first_indent_; }
int body_indent() const { return body_indent_; }
int tolerance() const { return tolerance_; }
bool is_flush() const {
return (justification_ == tesseract::JUSTIFICATION_LEFT ||
justification_ == tesseract::JUSTIFICATION_RIGHT) &&
abs(first_indent_ - body_indent_) <= tolerance_;
}
// Return whether this model is likely to agree with the other model on most
// paragraphs they are marked.
bool Comparable(const ParagraphModel &other) const;
STRING ToString() const;
private:
tesseract::ParagraphJustification justification_;
int margin_;
int first_indent_;
int body_indent_;
int tolerance_;
};
#endif // TESSERACT_CCSTRUCT_OCRPARA_H_

View File

@ -42,13 +42,18 @@ float ascenders, //ascender size
float descenders, //descender drop
inT16 kern, //char gap
inT16 space //word gap
):
baseline(spline_size, xstarts, coeffs) {
)
: baseline(spline_size, xstarts, coeffs),
para_(NULL) {
kerning = kern; //just store stuff
spacing = space;
xheight = x_height;
ascrise = ascenders;
bodysize = 0.0f;
descdrop = descenders;
has_drop_cap_ = false;
lmargin_ = 0;
rmargin_ = 0;
}
@ -63,13 +68,17 @@ ROW::ROW( //constructor
TO_ROW *to_row, //source row
inT16 kern, //char gap
inT16 space //word gap
) {
) : para_(NULL) {
kerning = kern; //just store stuff
spacing = space;
xheight = to_row->xheight;
bodysize = to_row->body_size;
ascrise = to_row->ascrise;
descdrop = to_row->descdrop;
baseline = to_row->baseline;
has_drop_cap_ = false;
lmargin_ = 0;
rmargin_ = 0;
}
@ -148,12 +157,14 @@ void ROW::move( // reposition row
void ROW::print( //print
FILE *fp //file to print on
) {
tprintf ("Kerning= %d\n", kerning);
tprintf ("Spacing= %d\n", spacing);
bound_box.print ();
tprintf ("Xheight= %f\n", xheight);
tprintf ("Ascrise= %f\n", ascrise);
tprintf ("Descdrop= %f\n", descdrop);
tprintf("Kerning= %d\n", kerning);
tprintf("Spacing= %d\n", spacing);
bound_box.print();
tprintf("Xheight= %f\n", xheight);
tprintf("Ascrise= %f\n", ascrise);
tprintf("Descdrop= %f\n", descdrop);
tprintf("has_drop_cap= %d\n", has_drop_cap_);
tprintf("lmargin= %d, rmargin= %d\n", lmargin_, rmargin_);
}
@ -204,18 +215,21 @@ void ROW::plot( //draw it
* Assign rows by duplicating the row structure but NOT the WERDLIST
**********************************************************************/
ROW & ROW::operator= ( //assignment
const ROW & source //from this
) {
ROW & ROW::operator= (const ROW & source) {
this->ELIST_LINK::operator= (source);
kerning = source.kerning;
spacing = source.spacing;
xheight = source.xheight;
bodysize = source.bodysize;
ascrise = source.ascrise;
descdrop = source.descdrop;
if (!words.empty ())
words.clear ();
baseline = source.baseline; //QSPLINES must do =
bound_box = source.bound_box;
has_drop_cap_ = source.has_drop_cap_;
lmargin_ = source.lmargin_;
rmargin_ = source.rmargin_;
para_ = source.para_;
return *this;
}

View File

@ -1,8 +1,8 @@
/**********************************************************************
* File: ocrrow.h (Formerly row.h)
* Description: Code for the ROW class.
* Author: Ray Smith
* Created: Tue Oct 08 15:58:04 BST 1991
* Author: Ray Smith
* Created: Tue Oct 08 15:58:04 BST 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
@ -20,12 +20,15 @@
#ifndef OCRROW_H
#define OCRROW_H
#include <stdio.h>
#include "quspline.h"
#include "werd.h"
#include <stdio.h>
#include "quspline.h"
#include "werd.h"
class TO_ROW;
class PARA;
class ROW:public ELIST_LINK
{
friend void tweak_row_baseline(ROW *, double, double);
@ -64,6 +67,12 @@ class ROW:public ELIST_LINK
inT32 kern() const { //return kerning
return kerning;
}
float body_size() const { //return body size
return bodysize;
}
void set_body_size(float new_size) { // set body size
bodysize = new_size;
}
inT32 space() const { //return spacing
return spacing;
}
@ -77,6 +86,33 @@ class ROW:public ELIST_LINK
return bound_box;
}
void set_lmargin(inT16 lmargin) {
lmargin_ = lmargin;
}
void set_rmargin(inT16 rmargin) {
rmargin_ = rmargin;
}
inT16 lmargin() const {
return lmargin_;
}
inT16 rmargin() const {
return rmargin_;
}
void set_has_drop_cap(bool has) {
has_drop_cap_ = has;
}
bool has_drop_cap() const {
return has_drop_cap_;
}
void set_para(PARA *p) {
para_ = p;
}
PARA *para() const {
return para_;
}
void recalc_bounding_box(); //recalculate BB
void move( // reposition row
@ -104,12 +140,22 @@ class ROW:public ELIST_LINK
private:
inT32 kerning; //inter char gap
inT32 spacing; //inter word gap
TBOX bound_box; //bounding box
TBOX bound_box; //bounding box
float xheight; //height of line
float ascrise; //size of ascenders
float descdrop; //-size of descenders
float bodysize; //CJK character size. (equals to
//xheight+ascrise by default)
WERD_LIST words; //words
QSPLINE baseline; //baseline spline
// These get set after blocks have been determined.
bool has_drop_cap_;
inT16 lmargin_; // Distance to left polyblock margin.
inT16 rmargin_; // Distance to right polyblock margin.
// This gets set during paragraph analysis.
PARA *para_; // Paragraph of which this row is part.
};
ELISTIZEH (ROW)

View File

@ -24,6 +24,63 @@
#include "pageres.h"
#include "blobs.h"
const char kBlameCorrect[] = "corr";
const char kBlameClassifier[] = "cl";
const char kBlameChopper[] = "chop";
const char kBlameClassLMTradeoff[] = "cl/LM";
const char kBlamePageLayout[] = "pglt";
const char kBlameSegsearchHeur[] = "ss_heur";
const char kBlameSegsearchPP[] = "ss_pp";
const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
const char kBlameAdaption[] = "adapt";
const char kBlameNoTruthSplit[] = "no_tr_spl";
const char kBlameNoTruth[] = "no_tr";
const char kBlameUnknown[] = "unkn";
const char * const kIncorrectResultReasonNames[] = {
kBlameCorrect,
kBlameClassifier,
kBlameChopper,
kBlameClassLMTradeoff,
kBlamePageLayout,
kBlameSegsearchHeur,
kBlameSegsearchPP,
kBlameClassOldLMTradeoff,
kBlameAdaption,
kBlameNoTruthSplit,
kBlameNoTruth,
kBlameUnknown
};
const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) {
return kIncorrectResultReasonNames[irr];
}
const char *BlamerBundle::IncorrectReason() const {
return kIncorrectResultReasonNames[incorrect_result_reason];
}
void BlamerBundle::FillDebugString(const STRING &msg,
const WERD_CHOICE *choice,
STRING *debug) {
(*debug) += "Truth ";
for (int i = 0; i < this->truth_text.length(); ++i) {
(*debug) += this->truth_text[i];
}
if (!this->truth_has_char_boxes) (*debug) += " (no char boxes)";
if (choice != NULL) {
(*debug) += " Choice ";
STRING choice_str;
choice->string_and_lengths(&choice_str, NULL);
(*debug) += choice_str;
}
if (msg.length() > 0) {
(*debug) += "\n";
(*debug) += msg;
}
(*debug) += "\n";
}
ELISTIZE (BLOCK_RES)
CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES)
/*************************************************************************
@ -34,22 +91,16 @@ CLISTIZE (BLOCK_RES) ELISTIZE (ROW_RES) ELISTIZE (WERD_RES)
PAGE_RES::PAGE_RES(
BLOCK_LIST *the_block_list,
WERD_CHOICE **prev_word_best_choice_ptr) {
Init();
BLOCK_IT block_it(the_block_list);
BLOCK_RES_IT block_res_it(&block_res_list);
char_count = 0;
rej_count = 0;
rejected = FALSE;
for (block_it.mark_cycle_pt();
!block_it.cycled_list(); block_it.forward()) {
block_res_it.add_to_end(new BLOCK_RES(block_it.data()));
}
prev_word_best_choice = prev_word_best_choice_ptr;
}
/*************************************************************************
* BLOCK_RES::BLOCK_RES
*
@ -72,8 +123,7 @@ BLOCK_RES::BLOCK_RES(BLOCK *the_block) {
block = the_block;
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
row_res_it.add_to_end(new ROW_RES(the_block->right_to_left(),
row_it.data()));
row_res_it.add_to_end(new ROW_RES(row_it.data()));
}
}
@ -84,8 +134,7 @@ BLOCK_RES::BLOCK_RES(BLOCK *the_block) {
* Constructor for ROW results
*************************************************************************/
ROW_RES::ROW_RES(bool right_to_left,
ROW *the_row) {
ROW_RES::ROW_RES(ROW *the_row) {
WERD_IT word_it(the_row->word_list());
WERD_RES_IT word_res_it(&word_res_list);
WERD_RES *combo = NULL; // current combination of fuzzies
@ -97,13 +146,17 @@ ROW_RES::ROW_RES(bool right_to_left,
whole_word_rej_count = 0;
row = the_row;
if (right_to_left) {
word_it.move_to_last();
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.backward()) {
word_res = new WERD_RES(word_it.data());
word_res->x_height = the_row->x_height();
// A FUZZY_NON marks the beginning of a combo if we are not in one.
if (combo == NULL && word_res->word->flag(W_FUZZY_NON)) {
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word_res = new WERD_RES(word_it.data());
word_res->x_height = the_row->x_height();
if (word_res->word->flag(W_FUZZY_NON)) {
ASSERT_HOST(combo != NULL);
word_res->part_of_combo = TRUE;
combo->copy_on(word_res);
}
if (word_it.data_relative(1)->flag(W_FUZZY_NON)) {
if (combo == NULL) {
copy_word = new WERD;
//deep copy
*copy_word = *(word_it.data());
@ -111,42 +164,12 @@ ROW_RES::ROW_RES(bool right_to_left,
combo->x_height = the_row->x_height();
combo->combination = TRUE;
word_res_it.add_to_end(combo);
word_res->part_of_combo = TRUE;
} else if (combo != NULL) {
word_res->part_of_combo = TRUE;
combo->copy_on(word_res);
// The first non FUZZY_NON is the last word in the combo.
if (!word_res->word->flag(W_FUZZY_NON))
combo = NULL;
}
word_res_it.add_to_end(word_res);
}
} else {
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word_res = new WERD_RES(word_it.data());
word_res->x_height = the_row->x_height();
if (word_res->word->flag(W_FUZZY_NON)) {
ASSERT_HOST(combo != NULL);
word_res->part_of_combo = TRUE;
combo->copy_on(word_res);
}
if (word_it.data_relative(1)->flag(W_FUZZY_NON)) {
if (combo == NULL) {
copy_word = new WERD;
//deep copy
*copy_word = *(word_it.data());
combo = new WERD_RES(copy_word);
combo->x_height = the_row->x_height();
combo->combination = TRUE;
word_res_it.add_to_end(combo);
}
word_res->part_of_combo = TRUE;
} else {
combo = NULL;
}
word_res_it.add_to_end(word_res);
word_res->part_of_combo = TRUE;
} else {
combo = NULL;
}
word_res_it.add_to_end(word_res);
}
}
@ -174,10 +197,8 @@ WERD_RES& WERD_RES::operator=(const WERD_RES & source) {
correct_text = source.correct_text;
if (source.best_choice != NULL) {
best_choice = new WERD_CHOICE;
*best_choice = *(source.best_choice);
raw_choice = new WERD_CHOICE;
*raw_choice = *(source.raw_choice);
best_choice = new WERD_CHOICE(*source.best_choice);
raw_choice = new WERD_CHOICE(*source.raw_choice);
best_choice_fontinfo_ids = source.best_choice_fontinfo_ids;
}
else {
@ -187,16 +208,24 @@ WERD_RES& WERD_RES::operator=(const WERD_RES & source) {
best_choice_fontinfo_ids.clear();
}
}
if (source.ep_choice != NULL) {
ep_choice = new WERD_CHOICE;
*ep_choice = *(source.ep_choice);
for (int i = 0; i < source.alt_choices.length(); ++i) {
const WERD_CHOICE *choice = source.alt_choices[i];
ASSERT_HOST(choice != NULL);
alt_choices.push_back(new WERD_CHOICE(*choice));
}
else
alt_states = source.alt_states;
if (source.ep_choice != NULL) {
ep_choice = new WERD_CHOICE(*source.ep_choice);
} else {
ep_choice = NULL;
}
reject_map = source.reject_map;
combination = source.combination;
part_of_combo = source.part_of_combo;
CopySimpleFields(source);
if (source.blamer_bundle != NULL) {
blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
}
return *this;
}
@ -211,54 +240,222 @@ void WERD_RES::CopySimpleFields(const WERD_RES& source) {
small_caps = source.small_caps;
italic = source.italic;
bold = source.bold;
fontinfo_id = source.fontinfo_id;
fontinfo = source.fontinfo;
fontinfo2 = source.fontinfo2;
fontinfo_id_count = source.fontinfo_id_count;
fontinfo_id2 = source.fontinfo_id2;
fontinfo_id2_count = source.fontinfo_id2_count;
x_height = source.x_height;
caps_height = source.caps_height;
guessed_x_ht = source.guessed_x_ht;
guessed_caps_ht = source.guessed_caps_ht;
reject_spaces = source.reject_spaces;
uch_set = source.uch_set;
tesseract = source.tesseract;
}
// Initializes a blank (default constructed) WERD_RES from one that has
// already been recognized.
// Use SetupFor*Recognition afterwards to complete the setup and make
// it ready for a retry recognition.
void WERD_RES::InitForRetryRecognition(const WERD_RES& source) {
word = source.word;
CopySimpleFields(source);
if (source.blamer_bundle != NULL) {
blamer_bundle = new BlamerBundle();
blamer_bundle->CopyTruth(*source.blamer_bundle);
}
}
// Sets up the members used in recognition:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
// Returns false if the word is empty and sets up fake results.
bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset,
bool numeric_mode, ROW *row, BLOCK* block) {
ClearResults();
if (word->cblob_list()->empty()) {
bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tess, Pix* pix,
bool numeric_mode,
bool use_body_size,
ROW *row, BLOCK* block) {
tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) {
// Empty words occur when all the blobs have been moved to the rej_blobs
// list, which seems to occur frequently in junk.
chopped_word = new TWERD;
rebuild_word = new TWERD;
bln_boxes = new tesseract::BoxWord;
box_word = new tesseract::BoxWord;
best_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
TOP_CHOICE_PERM, unicharset);
raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
TOP_CHOICE_PERM, unicharset);
tess_failed = true;
SetupFake(unicharset_in);
word->set_flag(W_REP_CHAR, false);
return false;
}
ClearResults();
SetupWordScript(unicharset_in);
chopped_word = TWERD::PolygonalCopy(word);
chopped_word->SetupBLNormalize(block, row, x_height, numeric_mode, &denorm);
if (use_body_size && row->body_size() > 0.0f) {
chopped_word->SetupBLNormalize(block, row, row->body_size(),
numeric_mode, &denorm);
} else {
chopped_word->SetupBLNormalize(block, row, x_height, numeric_mode, &denorm);
}
// The image will be 8-bit grey if the input was grey or color. Note that in
// a grey image 0 is black and 255 is white. If the input was binary, then
// the pix will be binary and 0 is white, with 1 being black.
// To tell the difference pixGetDepth() will return 8 or 1.
denorm.set_pix(pix);
// The inverse flag will be true iff the word has been determined to be white
// on black, and is independent of whether the pix is 8 bit or 1 bit.
denorm.set_inverse(word->flag(W_INVERSE));
chopped_word->Normalize(denorm);
bln_boxes = tesseract::BoxWord::CopyFromNormalized(NULL, chopped_word);
seam_array = start_seam_list(chopped_word->blobs);
best_choice = new WERD_CHOICE;
best_choice = new WERD_CHOICE(&unicharset_in);
best_choice->make_bad();
raw_choice = new WERD_CHOICE;
raw_choice = new WERD_CHOICE(&unicharset_in);
raw_choice->make_bad();
SetupBlamerBundle();
return true;
}
// Sets up the members used in recognition:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
// Returns false if the word is empty and sets up fake results.
bool WERD_RES::SetupForCubeRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tess,
const BLOCK* block) {
tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (pb != NULL && !pb->IsText()) {
// Ignore words in graphic regions.
SetupFake(unicharset_in);
word->set_flag(W_REP_CHAR, false);
return false;
}
ClearResults();
SetupWordScript(unicharset_in);
TBOX word_box = word->bounding_box();
denorm.SetupNormalization(block, NULL, NULL, NULL, NULL, 0,
word_box.left(), word_box.bottom(),
1.0f, 1.0f, 0.0f, 0.0f);
SetupBlamerBundle();
return true;
}
// Sets up the members used in recognition for an empty recognition result:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) {
ClearResults();
SetupWordScript(unicharset_in);
chopped_word = new TWERD;
rebuild_word = new TWERD;
bln_boxes = new tesseract::BoxWord;
box_word = new tesseract::BoxWord;
int blob_count = word->cblob_list()->length();
best_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
TOP_CHOICE_PERM, unicharset_in);
raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
TOP_CHOICE_PERM, unicharset_in);
if (blob_count > 0) {
BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
// For non-text blocks, just pass any blobs through to the box_word
// and call the word failed with a fake classification.
C_BLOB_IT b_it(word->cblob_list());
int blob_id = 0;
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
TBOX box = b_it.data()->bounding_box();
box_word->InsertBox(box_word->length(), box);
fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f,
-1, -1, -1, 0, 0, false);
}
FakeClassifyWord(blob_count, fake_choices);
delete [] fake_choices;
}
tess_failed = true;
}
void WERD_RES::SetupWordScript(const UNICHARSET& uch) {
uch_set = &uch;
int script = uch.default_sid();
word->set_script_id(script);
word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
}
// Sets up the blamer_bundle if it is not null, using the initialized denorm.
void WERD_RES::SetupBlamerBundle() {
if (blamer_bundle != NULL) {
blamer_bundle->norm_box_tolerance = kBlamerBoxTolerance * denorm.x_scale();
TPOINT topleft;
TPOINT botright;
TPOINT norm_topleft;
TPOINT norm_botright;
for (int b = 0; b < blamer_bundle->truth_word.length(); ++b) {
const TBOX &box = blamer_bundle->truth_word.BlobBox(b);
topleft.x = box.left();
topleft.y = box.top();
botright.x = box.right();
botright.y = box.bottom();
denorm.NormTransform(topleft, &norm_topleft);
denorm.NormTransform(botright, &norm_botright);
TBOX norm_box(norm_topleft.x, norm_botright.y,
norm_botright.x, norm_topleft.y);
blamer_bundle->norm_truth_word.InsertBox(b, norm_box);
}
}
}
// Simple helper moves the ownership of the pointer data from src to dest,
// first deleting anything in dest, and nulling out src afterwards.
template<class T> static void MovePointerData(T** dest, T**src) {
delete *dest;
*dest = *src;
*src = NULL;
}
// Moves the results fields from word to this. This takes ownership of all
// the data, so src can be destructed.
void WERD_RES::ConsumeWordResults(WERD_RES* word) {
denorm = word->denorm;
MovePointerData(&chopped_word, &word->chopped_word);
MovePointerData(&rebuild_word, &word->rebuild_word);
MovePointerData(&box_word, &word->box_word);
if (seam_array != NULL)
free_seam_list(seam_array);
seam_array = word->seam_array;
word->seam_array = NULL;
best_state.move(&word->best_state);
correct_text.move(&word->correct_text);
MovePointerData(&best_choice, &word->best_choice);
MovePointerData(&raw_choice, &word->raw_choice);
alt_choices.delete_data_pointers();
alt_choices.move(&word->alt_choices);
alt_states.move(&word->alt_states);
reject_map = word->reject_map;
if (word->blamer_bundle != NULL) {
assert(blamer_bundle != NULL);
blamer_bundle->CopyResults(*(word->blamer_bundle));
}
CopySimpleFields(*word);
}
// Replace the best choice and rebuild box word.
void WERD_RES::ReplaceBestChoice(
const WERD_CHOICE& choice,
const GenericVector<int>& segmentation_state) {
delete best_choice;
best_choice = new WERD_CHOICE(choice);
best_state = segmentation_state;
RebuildBestState();
SetupBoxWord();
// Make up a fake reject map of the right length to keep the
// rejection pass happy.
reject_map.initialise(segmentation_state.length());
done = tess_accepted = tess_would_adapt = true;
SetScriptPositions();
}
// Builds the rebuild_word from the chopped_word and the best_state.
void WERD_RES::RebuildBestState() {
if (rebuild_word != NULL)
delete rebuild_word;
rebuild_word = new TWERD;
if (seam_array == NULL) {
seam_array = start_seam_list(chopped_word->blobs);
}
TBLOB* prev_blob = NULL;
int start = 0;
for (int i = 0; i < best_state.size(); ++i) {
@ -305,18 +502,51 @@ void WERD_RES::SetupBoxWord() {
// Sets up the script positions in the output boxword using the best_choice
// to get the unichars, and the unicharset to get the target positions.
void WERD_RES::SetScriptPositions(const UNICHARSET& unicharset) {
box_word->SetScriptPositions(unicharset, small_caps, rebuild_word,
void WERD_RES::SetScriptPositions() {
box_word->SetScriptPositions(*uch_set, small_caps, rebuild_word,
best_choice);
}
void WERD_RES::WithoutFootnoteSpan(int *pstart, int *pend) const {
int end = best_choice->length();
while (end > 0 &&
uch_set->get_isdigit(best_choice->unichar_ids()[end - 1]) &&
box_word->BlobPosition(end - 1) == tesseract::SP_SUPERSCRIPT) {
end--;
}
int start = 0;
while (start < end &&
uch_set->get_isdigit(best_choice->unichar_ids()[start]) &&
box_word->BlobPosition(start) == tesseract::SP_SUPERSCRIPT) {
start++;
}
*pstart = start;
*pend = end;
}
void WERD_RES::WithoutFootnoteSpan(
const WERD_CHOICE &word, const GenericVector<int> &state,
int *pstart, int *pend) const {
int len = word.length();
*pstart = 0;
*pend = len;
if (len < 2) return;
if (!word.unicharset()->get_isdigit(word.unichar_ids()[len - 1]) &&
!word.unicharset()->get_isdigit(word.unichar_ids()[0])) return;
// ok, now that we know the word ends in digits, do the expensive bit of
// figuring out if they're superscript.
WERD_RES copy(*this);
copy.ReplaceBestChoice(word, state);
copy.WithoutFootnoteSpan(pstart, pend);
}
// Classifies the word with some already-calculated BLOB_CHOICEs.
// The choices are an array of blob_count pointers to BLOB_CHOICE,
// providing a single classifier result for each blob.
// The BLOB_CHOICEs are consumed and the word takes ownership.
// The number of blobs in the outword must match blob_count.
void WERD_RES::FakeClassifyWord(const UNICHARSET& unicharset, int blob_count,
BLOB_CHOICE** choices) {
void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
// Setup the WERD_RES.
ASSERT_HOST(box_word != NULL);
ASSERT_HOST(blob_count == box_word->length());
@ -333,19 +563,19 @@ void WERD_RES::FakeClassifyWord(const UNICHARSET& unicharset, int blob_count,
bc_it.add_after_then_move(choice_list);
}
best_choice->set_blob_choices(word_choices);
best_choice->populate_unichars(unicharset);
best_choice->populate_unichars();
delete raw_choice;
raw_choice = new WERD_CHOICE(*best_choice);
reject_map.initialise(blob_count);
}
// Copies the best_choice strings to the correct_text for adaption/training.
void WERD_RES::BestChoiceToCorrectText(const UNICHARSET& unicharset) {
void WERD_RES::BestChoiceToCorrectText() {
correct_text.clear();
ASSERT_HOST(best_choice != NULL);
for (int i = 0; i < best_choice->length(); ++i) {
UNICHAR_ID choice_id = best_choice->unichar_id(i);
const char* blob_choice = unicharset.id_to_unichar(choice_id);
const char* blob_choice = uch_set->id_to_unichar(choice_id);
correct_text.push_back(STRING(blob_choice));
}
}
@ -356,7 +586,6 @@ void WERD_RES::BestChoiceToCorrectText(const UNICHARSET& unicharset) {
// result to the class returned from class_cb.
// Returns true if anything was merged.
bool WERD_RES::ConditionalBlobMerge(
const UNICHARSET& unicharset,
TessResultCallback2<UNICHAR_ID, UNICHAR_ID, UNICHAR_ID>* class_cb,
TessResultCallback2<bool, const TBOX&, const TBOX&>* box_cb,
@ -405,20 +634,153 @@ bool WERD_RES::ConditionalBlobMerge(
delete class_cb;
delete box_cb;
if (modified) {
best_choice->populate_unichars(unicharset);
raw_choice->populate_unichars(unicharset);
best_choice->populate_unichars();
raw_choice->populate_unichars();
}
return modified;
}
// TODO(tkielbus) Decide between keeping this behavior here or modifying the
// training data.
// Utility function for fix_quotes
// Return true if the next character in the string (given the UTF8 length in
// bytes) is a quote character.
static int is_simple_quote(const char* signed_str, int length) {
const unsigned char* str =
reinterpret_cast<const unsigned char*>(signed_str);
// Standard 1 byte quotes.
return (length == 1 && (*str == '\'' || *str == '`')) ||
// UTF-8 3 bytes curved quotes.
(length == 3 && ((*str == 0xe2 &&
*(str + 1) == 0x80 &&
*(str + 2) == 0x98) ||
(*str == 0xe2 &&
*(str + 1) == 0x80 &&
*(str + 2) == 0x99)));
}
// Callback helper for fix_quotes returns a double quote if both
// arguments are quote, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (is_simple_quote(ch, strlen(ch)) &&
is_simple_quote(next_ch, strlen(next_ch)))
return uch_set->unichar_to_id("\"");
return INVALID_UNICHAR_ID;
}
// Change pairs of quotes to double quotes.
void WERD_RES::fix_quotes(BLOB_CHOICE_LIST_CLIST* blob_choices) {
if (!uch_set->contains_unichar("\"") ||
!uch_set->get_enabled(uch_set->unichar_to_id("\"")))
return; // Don't create it if it is disallowed.
ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothQuotes),
NULL,
blob_choices);
}
// Callback helper for fix_hyphens returns UNICHAR_ID of - if both
// arguments are hyphen, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID WERD_RES::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
(*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
return uch_set->unichar_to_id("-");
return INVALID_UNICHAR_ID;
}
// Callback helper for fix_hyphens returns true if box1 and box2 overlap
// (assuming both on the same textline, are in order and a chopped em dash.)
bool WERD_RES::HyphenBoxesOverlap(const TBOX& box1, const TBOX& box2) {
return box1.right() >= box2.left();
}
// Change pairs of hyphens to a single hyphen if the bounding boxes touch
// Typically a long dash which has been segmented.
void WERD_RES::fix_hyphens(BLOB_CHOICE_LIST_CLIST *blob_choices) {
if (!uch_set->contains_unichar("-") ||
!uch_set->get_enabled(uch_set->unichar_to_id("-")))
return; // Don't create it if it is disallowed.
ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothHyphens),
NewPermanentTessCallback(this, &WERD_RES::HyphenBoxesOverlap),
blob_choices);
}
// Callback helper for merge_tess_fails returns a space if both
// arguments are space, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) {
if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
return id1;
else
return INVALID_UNICHAR_ID;
}
// Change pairs of tess failures to a single one
void WERD_RES::merge_tess_fails() {
if (ConditionalBlobMerge(
NewPermanentTessCallback(this, &WERD_RES::BothSpaces), NULL,
best_choice->blob_choices())) {
int len = best_choice->length();
ASSERT_HOST(reject_map.length() == len);
ASSERT_HOST(box_word->length() == len);
}
}
// Returns true if the collection of count pieces, starting at start, are all
// natural connected components, ie there are no real chops involved.
bool WERD_RES::PiecesAllNatural(int start, int count) const {
// all seams must have no splits.
for (int index = start; index < start + count - 1; ++index) {
if (index >= 0 && index < array_count(seam_array)) {
SEAM* seam = reinterpret_cast<SEAM *>(array_value(seam_array, index));
if (seam != NULL && seam->split1 != NULL)
return false;
}
}
return true;
}
WERD_RES::~WERD_RES () {
Clear();
}
void WERD_RES::InitNonPointers() {
tess_failed = FALSE;
tess_accepted = FALSE;
tess_would_adapt = FALSE;
done = FALSE;
unlv_crunch_mode = CR_NONE;
small_caps = false;
italic = FALSE;
bold = FALSE;
// The fontinfos and tesseract count as non-pointers as they point to
// data owned elsewhere.
fontinfo = NULL;
fontinfo2 = NULL;
tesseract = NULL;
fontinfo_id_count = 0;
fontinfo_id2_count = 0;
x_height = 0.0;
caps_height = 0.0;
guessed_x_ht = TRUE;
guessed_caps_ht = TRUE;
combination = FALSE;
part_of_combo = FALSE;
reject_spaces = FALSE;
}
void WERD_RES::InitPointers() {
word = NULL;
bln_boxes = NULL;
uch_set = NULL;
chopped_word = NULL;
rebuild_word = NULL;
box_word = NULL;
@ -426,17 +788,25 @@ void WERD_RES::InitPointers() {
best_choice = NULL;
raw_choice = NULL;
ep_choice = NULL;
blamer_bundle = NULL;
}
void WERD_RES::Clear() {
if (word != NULL && combination)
if (word != NULL && combination) {
delete word;
}
word = NULL;
delete blamer_bundle;
blamer_bundle = NULL;
ClearResults();
}
void WERD_RES::ClearResults() {
done = false;
fontinfo = NULL;
fontinfo2 = NULL;
fontinfo_id_count = 0;
fontinfo_id2_count = 0;
if (bln_boxes != NULL) {
delete bln_boxes;
bln_boxes = NULL;
@ -465,18 +835,93 @@ void WERD_RES::ClearResults() {
best_choice = NULL;
raw_choice = NULL;
}
if (!alt_choices.empty()) {
alt_choices.delete_data_pointers();
alt_choices.clear();
}
alt_states.clear();
if (ep_choice != NULL) {
delete ep_choice;
ep_choice = NULL;
}
if (blamer_bundle != NULL) blamer_bundle->ClearResults();
}
bool PAGE_RES_IT::operator ==(const PAGE_RES_IT &other) const {
return word_res == other.word_res &&
row_res == other.row_res &&
block_res == other.block_res;
}
int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
ASSERT_HOST(page_res == other.page_res);
if (other.block_res == NULL) {
// other points to the end of the page.
if (block_res == NULL)
return 0;
return -1;
}
if (block_res == NULL) {
return 1; // we point to the end of the page.
}
if (block_res == other.block_res) {
if (other.row_res == NULL || row_res == NULL) {
// this should only happen if we hit an image block.
return 0;
}
if (row_res == other.row_res) {
// we point to the same block and row.
ASSERT_HOST(other.word_res != NULL && word_res != NULL);
if (word_res == other.word_res) {
// we point to the same word!
return 0;
}
WERD_RES_IT word_res_it(&row_res->word_res_list);
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
word_res_it.forward()) {
if (word_res_it.data() == word_res) {
return -1;
} else if (word_res_it.data() == other.word_res) {
return 1;
}
}
ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == NULL);
}
// we both point to the same block, but different rows.
ROW_RES_IT row_res_it(&block_res->row_res_list);
for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
row_res_it.forward()) {
if (row_res_it.data() == row_res) {
return -1;
} else if (row_res_it.data() == other.row_res) {
return 1;
}
}
ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == NULL);
}
// We point to different blocks.
BLOCK_RES_IT block_res_it(&page_res->block_res_list);
for (block_res_it.mark_cycle_pt();
!block_res_it.cycled_list(); block_res_it.forward()) {
if (block_res_it.data() == block_res) {
return -1;
} else if (block_res_it.data() == other.block_res) {
return 1;
}
}
// Shouldn't happen...
ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == NULL);
return 0;
}
// Inserts the new_word and a corresponding WERD_RES before the current
// position. The simple fields of the WERD_RES are copied from clone_res and
// the resulting WERD_RES is returned for further setup with best_choice etc.
WERD_RES* PAGE_RES_IT::InsertCloneWord(const WERD_RES& clone_res,
WERD* new_word) {
WERD_RES* PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES& clone_res,
WERD* new_word) {
// Insert new_word into the ROW.
WERD_IT w_it(row()->row->word_list());
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
@ -652,6 +1097,34 @@ WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) {
return word_res;
}
/*************************************************************************
* PAGE_RES_IT::restart_row()
*
* Move to the beginning (leftmost word) of the current row.
*************************************************************************/
WERD_RES *PAGE_RES_IT::restart_row() {
ROW_RES *row = this->row();
if (!row) return NULL;
for (restart_page(); this->row() != row; forward()) {
// pass
}
return word();
}
/*************************************************************************
* PAGE_RES_IT::forward_paragraph
*
* Move to the beginning of the next paragraph, allowing empty blocks.
*************************************************************************/
WERD_RES *PAGE_RES_IT::forward_paragraph() {
while (block_res == next_block_res &&
(next_row_res != NULL && next_row_res->row != NULL &&
row_res->row->para() == next_row_res->row->para())) {
internal_forward(false, true);
}
return internal_forward(false, true);
}
/*************************************************************************
* PAGE_RES_IT::forward_block
@ -666,7 +1139,6 @@ WERD_RES *PAGE_RES_IT::forward_block() {
return internal_forward(false, true);
}
void PAGE_RES_IT::rej_stat_word() {
inT16 chars_in_word;
inT16 rejects_in_word = 0;

View File

@ -26,11 +26,179 @@
#include "normalis.h"
#include "ocrblock.h"
#include "ocrrow.h"
#include "params_training_featdef.h"
#include "ratngs.h"
#include "rejctmap.h"
#include "seam.h"
#include "werd.h"
namespace tesseract {
struct FontInfo;
class Tesseract;
}
using tesseract::FontInfo;
static const inT16 kBlamerBoxTolerance = 5;
// Enum for expressing the source of error.
// Note: Please update kIncorrectResultReasonNames when modifying this enum.
enum IncorrectResultReason {
// The text recorded in best choice == truth text
IRR_CORRECT,
// Either: Top choice is incorrect and is a dictionary word (language model
// is unlikely to help correct such errors, so blame the classifier).
// Or: the correct unichar was not included in shortlist produced by the
// classifier at all.
IRR_CLASSIFIER,
// Chopper have not found one or more splits that correspond to the correct
// character bounding boxes recorded in BlamerBundle::truth_word.
IRR_CHOPPER,
// Classifier did include correct unichars for each blob in the correct
// segmentation, however its rating could have been too bad to allow the
// language model to pull out the correct choice. On the other hand the
// strength of the language model might have been too weak to favor the
// correct answer, this we call this case a classifier-language model
// tradeoff error.
IRR_CLASS_LM_TRADEOFF,
// Page layout failed to produce the correct bounding box. Blame page layout
// if the truth was not found for the word, which implies that the bounding
// box of the word was incorrect (no truth word had a similar bounding box).
IRR_PAGE_LAYOUT,
// SegSearch heuristic prevented one or more blobs from the correct
// segmentation state to be classified (e.g. the blob was too wide).
IRR_SEGSEARCH_HEUR,
// The correct segmentaiton state was not explored because of poor SegSearch
// pain point prioritization. We blame SegSearch pain point prioritization
// if the best rating of a choice constructed from correct segmentation is
// better than that of the best choice (i.e. if we got to explore the correct
// segmentation state, language model would have picked the correct choice).
IRR_SEGSEARCH_PP,
// Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
// and thus use the old language model (permuters).
// TODO(antonova): integrate the new language mode with chopper
IRR_CLASS_OLD_LM_TRADEOFF,
// If there is an incorrect adaptive template match with a better score than
// a correct one (either pre-trained or adapted), mark this as adaption error.
IRR_ADAPTION,
// split_and_recog_word() failed to find a suitable split in truth.
IRR_NO_TRUTH_SPLIT,
// Truth is not available for this word (e.g. when words in corrected content
// file are turned into ~~~~ because an appropriate alignment was not found.
IRR_NO_TRUTH,
// The text recorded in best choice != truth text, but none of the above
// reasons are set.
IRR_UNKNOWN,
IRR_NUM_REASONS
};
// Blamer-related information to determine the source of errors.
struct BlamerBundle {
static const char *IncorrectReasonName(IncorrectResultReason irr);
BlamerBundle() : truth_has_char_boxes(false),
incorrect_result_reason(IRR_CORRECT),
lattice_data(NULL) { ClearResults(); }
~BlamerBundle() { delete[] lattice_data; }
void ClearResults() {
norm_truth_word.DeleteAllBoxes();
norm_box_tolerance = 0;
if (!NoTruth()) incorrect_result_reason = IRR_CORRECT;
debug = "";
segsearch_is_looking_for_blame = false;
best_correctly_segmented_rating = WERD_CHOICE::kBadRating;
correct_segmentation_cols.clear();
correct_segmentation_rows.clear();
best_choice_is_dict_and_top_choice = false;
delete[] lattice_data;
lattice_data = NULL;
lattice_size = 0;
}
void CopyTruth(const BlamerBundle &other) {
truth_has_char_boxes = other.truth_has_char_boxes;
truth_word = other.truth_word;
truth_text = other.truth_text;
incorrect_result_reason =
(other.NoTruth() ? other.incorrect_result_reason : IRR_CORRECT);
}
void CopyResults(const BlamerBundle &other) {
norm_truth_word = other.norm_truth_word;
norm_box_tolerance = other.norm_box_tolerance;
incorrect_result_reason = other.incorrect_result_reason;
segsearch_is_looking_for_blame = other.segsearch_is_looking_for_blame;
best_correctly_segmented_rating =other.best_correctly_segmented_rating;
correct_segmentation_cols = other.correct_segmentation_cols;
correct_segmentation_rows = other.correct_segmentation_rows;
best_choice_is_dict_and_top_choice =
other.best_choice_is_dict_and_top_choice;
if (other.lattice_data != NULL) {
lattice_data = new char[other.lattice_size];
memcpy(lattice_data, other.lattice_data, other.lattice_size);
lattice_size = other.lattice_size;
} else {
lattice_data = NULL;
}
}
BlamerBundle(const BlamerBundle &other) {
this->CopyTruth(other);
this->CopyResults(other);
}
const char *IncorrectReason() const;
bool NoTruth() const {
return (incorrect_result_reason == IRR_NO_TRUTH ||
incorrect_result_reason == IRR_PAGE_LAYOUT);
}
void SetBlame(IncorrectResultReason irr,
const STRING &msg, const WERD_CHOICE *choice, bool debug) {
this->incorrect_result_reason = irr;
this->debug = this->IncorrectReason();
this->debug += " to blame: ";
this->FillDebugString(msg, choice, &(this->debug));
if (debug) tprintf("SetBlame(): %s", this->debug.string());
}
// Appends choice and truth details to the given debug string.
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice,
STRING *debug);
// Set to true when bounding boxes for individual unichars are recorded.
bool truth_has_char_boxes;
// The true_word (in the original image coordinate space) contains ground
// truth bounding boxes for this WERD_RES.
tesseract::BoxWord truth_word;
// Same as above, but in normalized coordinates
// (filled in by WERD_RES::SetupForRecognition()).
tesseract::BoxWord norm_truth_word;
// Tolerance for bounding box comparisons in normalized space.
int norm_box_tolerance;
// Contains ground truth unichar for each of the bounding boxes in truth_word.
GenericVector<STRING> truth_text;
// The reason for incorrect OCR result.
IncorrectResultReason incorrect_result_reason;
// Debug text associated with the blame.
STRING debug;
// Misadaption debug information (filled in if this word was misadapted to).
STRING misadaption_debug;
// Variables used by the segmentation search when looking for the blame.
// Set to true while segmentation search is continued after the usual
// termination condition in order to look for the blame.
bool segsearch_is_looking_for_blame;
// Best rating for correctly segmented path
// (set and used by SegSearch when looking for blame).
float best_correctly_segmented_rating;
// Vectors populated by SegSearch to indicate column and row indices that
// correspond to blobs with correct bounding boxes.
GenericVector<int> correct_segmentation_cols;
GenericVector<int> correct_segmentation_rows;
// Set to true if best choice is a dictionary word and
// classifier's top choice.
bool best_choice_is_dict_and_top_choice;
// Serialized segmentation search lattice.
char *lattice_data;
int lattice_size; // size of lattice_data in bytes
// Information about hypotheses (paths) explored by the segmentation search.
tesseract::ParamsTrainingBundle params_training_bundle;
};
/* Forward declarations */
class BLOCK_RES;
@ -56,9 +224,23 @@ class PAGE_RES { // page result
// Updated every time PAGE_RES_IT iterating on this PAGE_RES moves to
// the next word. This pointer is not owned by PAGE_RES class.
WERD_CHOICE **prev_word_best_choice;
// Sums of blame reasons computed by the blamer.
GenericVector<int> blame_reasons;
// Debug information about all the misadaptions on this page.
// Each BlamerBundle contains an index into this vector, so that words that
// caused misadaption could be marked. However, since words could be
// deleted/split/merged, the log is stored on the PAGE_RES level.
GenericVector<STRING> misadaption_log;
PAGE_RES() {
} // empty constructor
inline void Init() {
char_count = 0;
rej_count = 0;
rejected = FALSE;
prev_word_best_choice = NULL;
blame_reasons.init_to_size(IRR_NUM_REASONS, 0);
}
PAGE_RES() { Init(); } // empty constructor
PAGE_RES(BLOCK_LIST *block_list, // real blocks
WERD_CHOICE **prev_word_best_choice_ptr);
@ -110,7 +292,7 @@ class ROW_RES:public ELIST_LINK {
ROW_RES() {
} // empty constructor
ROW_RES(bool right_to_left, ROW *the_row); // real row
ROW_RES(ROW *the_row); // real row
~ROW_RES() { // destructor
}
@ -142,29 +324,61 @@ class WERD_RES : public ELIST_LINK {
// In any case a rotation by denorm.block()->re_rotation() will take them
// back to the original image.
// The other differences between words all represent different stages of
// processing.
//
// processing during recognition.
// ---------------------------INPUT-------------------------------------
// The word is the input C_BLOBs in the rotated pixel space.
// word is NOT owned by the WERD_RES unless combination is true.
// All the other word pointers ARE owned by the WERD_RES.
WERD* word; // Input C_BLOB word.
// -------------SETUP BY SetupFor*Recognition---READONLY-INPUT------------
// The bln_boxes contains the bounding boxes (only) of the input word, in the
// BLN space. The lengths of word and bln_boxes
// match as they are both before any chopping.
// TODO(rays) determine if docqual does anything useful and delete bln_boxes
// if it doesn't.
tesseract::BoxWord* bln_boxes; // BLN input bounding boxes.
// The denorm provides the transformation to get back to the rotated image
// coords from the chopped_word/rebuild_word BLN coords.
DENORM denorm; // For use on chopped_word.
// Unicharset used by the classifier output in best_choice and raw_choice.
const UNICHARSET* uch_set; // For converting back to utf8.
// ----Initialized by SetupFor*Recognition---BUT OUTPUT FROM RECOGNITION----
// ----Setup to a (different!) state expected by the various classifiers----
// TODO(rays) Tidy and make more consistent.
// The chopped_word is also in BLN space, and represents the fully chopped
// character fragments that make up the word.
// The length of chopped_word matches length of seam_array + 1 (if set).
TWERD* chopped_word; // BLN chopped fragments output.
SEAMS seam_array; // Seams matching chopped_word.
WERD_CHOICE *best_choice; // tess output
WERD_CHOICE *raw_choice; // top choice permuter
// Alternative paths found during chopping/segmentation search stages
// (the first entry being a slim copy of best_choice).
GenericVector<WERD_CHOICE *> alt_choices;
GenericVector<GenericVector<int> > alt_states;
// Truth bounding boxes, text and incorrect choice reason.
BlamerBundle *blamer_bundle;
// --------------OUTPUT FROM RECOGNITION-------------------------------
// --------------Not all fields are necessarily set.-------------------
// ---best_choice, raw_choice *must* end up set, with a box_word-------
// ---In complete output, the number of blobs in rebuild_word matches---
// ---the number of boxes in box_word, the number of unichar_ids in---
// ---best_choice, the number of ints in best_state, and the number---
// ---of strings in correct_text--------------------------------------
// ---SetupFake Sets everything to appropriate values if the word is---
// ---known to be bad before recognition.------------------------------
// The rebuild_word is also in BLN space, but represents the final best
// segmentation of the word. Its length is therefore the same as box_word.
TWERD* rebuild_word; // BLN best segmented word.
// The denorm provides the transformation to get back to the rotated image
// coords from the chopped_word/rebuild_word BLN coords.
DENORM denorm; // For use on chopped_word.
// The box_word is in the original image coordinate space. It is the
// bounding boxes of the rebuild_word, after denormalization.
// The length of box_word matches rebuild_word, best_state (if set) and
@ -180,16 +394,16 @@ class WERD_RES : public ELIST_LINK {
// text to the training system without the need for a unicharset. There
// is one entry in the vector for each blob in rebuild_word and box_word.
GenericVector<STRING> correct_text;
// The truth_* fields below are used by the blamer to determine the source
// of errors.
// The truth_word (in the original image coordinate space) contains ground
// truth bounding boxes for this WERD_RES.
tesseract::BoxWord* truth_word;
// The truth_text contains ground truth unichar for each
// of the bounding boxes in truth_word.
GenericVector<STRING> truth_text;
WERD_CHOICE *best_choice; // tess output
WERD_CHOICE *raw_choice; // top choice permuter
// The Tesseract that was used to recognize this word. Just a borrowed
// pointer. Note: Tesseract's class definition is in a higher-level library.
// We avoid introducing a cyclic dependency by not using the Tesseract
// within WERD_RES. We are just storing it to provide access to it
// for the top-level multi-language controller, and maybe for output of
// the recognized language.
tesseract::Tesseract* tesseract;
// Less-well documented members.
// TODO(rays) Add more documentation here.
WERD_CHOICE *ep_choice; // ep text TODO(rays) delete this.
REJMAP reject_map; // best_choice rejects
BOOL8 tess_failed;
@ -206,15 +420,17 @@ class WERD_RES : public ELIST_LINK {
bool small_caps; // word appears to be small caps
inT8 italic;
inT8 bold;
inT16 fontinfo_id; // primary font id (should be at least inT16)
// The fontinfos are pointers to data owned by the classifier.
const FontInfo* fontinfo;
const FontInfo* fontinfo2;
inT8 fontinfo_id_count; // number of votes
inT16 fontinfo_id2; // secondary font id (should be at least inT16)
inT8 fontinfo_id2_count; // number of votes
BOOL8 guessed_x_ht;
BOOL8 guessed_caps_ht;
CRUNCH_MODE unlv_crunch_mode;
float x_height; // post match estimate
float caps_height; // post match estimate
BOOL8 guessed_x_ht;
BOOL8 guessed_caps_ht;
/*
To deal with fuzzy spaces we need to be able to combine "words" to form
combinations when we suspect that the gap is a non-space. The (new) text
@ -238,31 +454,13 @@ class WERD_RES : public ELIST_LINK {
GenericVector<inT8> best_choice_fontinfo_ids;
WERD_RES() {
InitNonPointers();
InitPointers();
}
WERD_RES( //simple constructor
WERD *the_word) { //real word
WERD_RES(WERD *the_word) {
InitNonPointers();
InitPointers();
word = the_word;
tess_failed = FALSE;
tess_accepted = FALSE;
tess_would_adapt = FALSE;
done = FALSE;
unlv_crunch_mode = CR_NONE;
small_caps = false;
italic = FALSE;
bold = FALSE;
fontinfo_id = -1;
fontinfo_id_count = 0;
fontinfo_id2 = -1;
fontinfo_id2_count = 0;
x_height = 0.0;
caps_height = 0.0;
guessed_x_ht = TRUE;
guessed_caps_ht = TRUE;
combination = FALSE;
part_of_combo = FALSE;
reject_spaces = FALSE;
}
WERD_RES(const WERD_RES &source) {
InitPointers();
@ -270,6 +468,80 @@ class WERD_RES : public ELIST_LINK {
}
~WERD_RES();
// Returns the UTF-8 string for the given blob index in the best_choice word,
// given that we know whether we are in a right-to-left reading context.
// This matters for mirrorable characters such as parentheses. We recognize
// characters purely based on their shape on the page, and by default produce
// the corresponding unicode for a left-to-right context.
const char* const BestUTF8(int blob_index, bool in_rtl_context) const {
if (blob_index < 0 || blob_index >= best_choice->length())
return NULL;
UNICHAR_ID id = best_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
return NULL;
UNICHAR_ID mirrored = uch_set->get_mirror(id);
if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
id = mirrored;
return uch_set->id_to_unichar_ext(id);
}
// Returns the UTF-8 string for the given blob index in the raw_choice word.
const char* const RawUTF8(int blob_index) const {
if (blob_index < 0 || blob_index >= raw_choice->length())
return NULL;
UNICHAR_ID id = raw_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
return NULL;
return uch_set->id_to_unichar(id);
}
UNICHARSET::Direction SymbolDirection(int blob_index) const {
if (best_choice == NULL ||
blob_index >= best_choice->length() ||
blob_index < 0)
return UNICHARSET::U_OTHER_NEUTRAL;
return uch_set->get_direction(best_choice->unichar_id(blob_index));
}
bool AnyRtlCharsInWord() const {
if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
return false;
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size())
continue; // Ignore illegal chars.
UNICHARSET::Direction dir =
uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
dir == UNICHARSET::U_ARABIC_NUMBER)
return true;
}
return false;
}
bool AnyLtrCharsInWord() const {
if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
return false;
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size())
continue; // Ignore illegal chars.
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
return true;
}
return false;
}
// Return whether the blobs in this WERD_RES 0, 1,... come from an engine
// that gave us the unichars in reading order (as opposed to strict left
// to right).
bool UnicharsInReadingOrder() const {
return best_choice->unichars_in_script_order();
}
void InitNonPointers();
void InitPointers();
void Clear();
void ClearResults();
@ -278,11 +550,55 @@ class WERD_RES : public ELIST_LINK {
void CopySimpleFields(const WERD_RES& source);
// Initializes a blank (default constructed) WERD_RES from one that has
// already been recognized.
// Use SetupFor*Recognition afterwards to complete the setup and make
// it ready for a retry recognition.
void InitForRetryRecognition(const WERD_RES& source);
// Sets up the members used in recognition: bln_boxes, chopped_word,
// seam_array, denorm, best_choice, raw_choice. Returns false if
// the word is empty and sets up fake results. If use_body_size is
// true and row->body_size is set, then body_size will be used for
// blob normalization instead of xheight + ascrise. This flag is for
// those languages that are using CJK pitch model and thus it has to
// be true if and only if tesseract->textord_use_cjk_fp_model is
// true.
bool SetupForTessRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tesseract, Pix* pix,
bool numeric_mode, bool use_body_size,
ROW *row, BLOCK* block);
// Sets up the members used in recognition:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
// bln_boxes, chopped_word, seam_array, denorm.
// Returns false if the word is empty and sets up fake results.
bool SetupForRecognition(const UNICHARSET& unicharset,
bool numeric_mode, ROW *row, BLOCK* block);
bool SetupForCubeRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tesseract,
const BLOCK* block);
// Sets up the members used in recognition for an empty recognition result:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
void SetupFake(const UNICHARSET& uch);
// Set the word as having the script of the input unicharset.
void SetupWordScript(const UNICHARSET& unicharset_in);
// Sets up the blamer_bundle if it is not null, using the initialized denorm.
void SetupBlamerBundle();
// Moves the results fields from word to this. This takes ownership of all
// the data, so src can be destructed.
// word1.ConsumeWordResult(word);
// delete word;
// is simpler and faster than:
// word1 = *word;
// delete word;
// as it doesn't need to copy and reallocate anything.
void ConsumeWordResults(WERD_RES* word);
// Replace the best choice and rebuild box word.
void ReplaceBestChoice(const WERD_CHOICE& choice,
const GenericVector<int> &segmentation_state);
// Builds the rebuild_word from the chopped_word and the best_state.
void RebuildBestState();
@ -296,18 +612,30 @@ class WERD_RES : public ELIST_LINK {
// Sets up the script positions in the output boxword using the best_choice
// to get the unichars, and the unicharset to get the target positions.
void SetScriptPositions(const UNICHARSET& unicharset);
void SetScriptPositions();
// Returns the indices [start, end) containing the core of the word, stripped
// of any superscript digits on either side.
// (i.e., the non-footnote part of the word).
// Assumes that BoxWord is all set up for best_choice.
void WithoutFootnoteSpan(int *start, int *end) const;
// Given an alternate word choice and segmentation state, yield the indices
// [start, end) containig the core of the word, stripped of any superscript
// digits on either side. (i.e. stripping off the footnote parts).
void WithoutFootnoteSpan(
const WERD_CHOICE &choice, const GenericVector<int> &state,
int *start, int *end) const;
// Classifies the word with some already-calculated BLOB_CHOICEs.
// The choices are an array of blob_count pointers to BLOB_CHOICE,
// providing a single classifier result for each blob.
// The BLOB_CHOICEs are consumed and the word takes ownership.
// The number of blobs in the outword must match blob_count.
void FakeClassifyWord(const UNICHARSET& unicharset, int blob_count,
BLOB_CHOICE** choices);
void FakeClassifyWord(int blob_count, BLOB_CHOICE** choices);
// Copies the best_choice strings to the correct_text for adaption/training.
void BestChoiceToCorrectText(const UNICHARSET& unicharset);
void BestChoiceToCorrectText();
// Merges 2 adjacent blobs in the result if the permanent callback
// class_cb returns other than INVALID_UNICHAR_ID, AND the permanent
@ -315,11 +643,28 @@ class WERD_RES : public ELIST_LINK {
// result to the class returned from class_cb.
// Returns true if anything was merged.
bool ConditionalBlobMerge(
const UNICHARSET& unicharset,
TessResultCallback2<UNICHAR_ID, UNICHAR_ID, UNICHAR_ID>* class_cb,
TessResultCallback2<bool, const TBOX&, const TBOX&>* box_cb,
BLOB_CHOICE_LIST_CLIST *blob_choices);
// Callback helper for fix_quotes returns a double quote if both
// arguments are quote, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2);
void fix_quotes(BLOB_CHOICE_LIST_CLIST *blob_choices);
// Callback helper for fix_hyphens returns UNICHAR_ID of - if both
// arguments are hyphen, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2);
// Callback helper for fix_hyphens returns true if box1 and box2 overlap
// (assuming both on the same textline, are in order and a chopped em dash.)
bool HyphenBoxesOverlap(const TBOX& box1, const TBOX& box2);
void fix_hyphens(BLOB_CHOICE_LIST_CLIST *blob_choices);
// Callback helper for merge_tess_fails returns a space if both
// arguments are space, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2);
void merge_tess_fails();
static WERD_RES* deep_copy(const WERD_RES* src) {
return new WERD_RES(*src);
}
@ -331,6 +676,10 @@ class WERD_RES : public ELIST_LINK {
word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
word->copy_on(word_res->word);
}
// Returns true if the collection of count pieces, starting at start, are all
// natural connected components, ie there are no real chops involved.
bool PiecesAllNatural(int start, int count) const;
};
/*************************************************************************
@ -349,6 +698,18 @@ class PAGE_RES_IT {
restart_page(); // ready to scan
}
// Do two PAGE_RES_ITs point at the same word?
// This is much cheaper than cmp().
bool operator ==(const PAGE_RES_IT &other) const;
bool operator !=(const PAGE_RES_IT &other) const {return !(*this == other); }
// Given another PAGE_RES_IT to the same page,
// this before other: -1
// this equal to other: 0
// this later than other: 1
int cmp(const PAGE_RES_IT &other) const;
WERD_RES *restart_page() {
return start_page(false); // Skip empty blocks.
}
@ -357,6 +718,8 @@ class PAGE_RES_IT {
}
WERD_RES *start_page(bool empty_ok);
WERD_RES *restart_row();
// ============ Methods that mutate the underling structures ===========
// Note that these methods will potentially invalidate other PAGE_RES_ITs
// and are intended to be used only while a single PAGE_RES_IT is active.
@ -366,7 +729,7 @@ class PAGE_RES_IT {
// Inserts the new_word and a corresponding WERD_RES before the current
// position. The simple fields of the WERD_RES are copied from clone_res and
// the resulting WERD_RES is returned for further setup with best_choice etc.
WERD_RES* InsertCloneWord(const WERD_RES& clone_res, WERD* new_word);
WERD_RES* InsertSimpleCloneWord(const WERD_RES& clone_res, WERD* new_word);
// Deletes the current WERD_RES and its underlying WERD.
void DeleteCurrentWord();
@ -379,8 +742,9 @@ class PAGE_RES_IT {
return internal_forward(false, true);
}
WERD_RES *forward_block(); // get first word in
// next non-empty block
WERD_RES *forward_paragraph(); // get first word in next non-empty paragraph
WERD_RES *forward_block(); // get first word in next non-empty block
WERD_RES *prev_word() const { // previous word
return prev_word_res;
}

View File

@ -59,7 +59,7 @@ class PDBLK
if (hand_poly) delete hand_poly;
}
POLY_BLOCK *poly_block() {
POLY_BLOCK *poly_block() const {
return hand_poly;
}
///set the poly block

View File

@ -19,6 +19,7 @@
#include "mfcpch.h" //precompiled headers
#include <stdlib.h>
#include "helpers.h"
#include "ndminx.h"
#include "serialis.h"
#include "points.h"
@ -55,6 +56,24 @@ static int sign(int x) {
return x > 0 ? 1 : 0;
}
// Writes to the given file. Returns false in case of error.
bool ICOORD::Serialize(FILE* fp) const {
if (fwrite(&xcoord, sizeof(xcoord), 1, fp) != 1) return false;
if (fwrite(&ycoord, sizeof(ycoord), 1, fp) != 1) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool ICOORD::DeSerialize(bool swap, FILE* fp) {
if (fread(&xcoord, sizeof(xcoord), 1, fp) != 1) return false;
if (fread(&ycoord, sizeof(ycoord), 1, fp) != 1) return false;
if (swap) {
ReverseN(&xcoord, sizeof(xcoord));
ReverseN(&ycoord, sizeof(ycoord));
}
return true;
}
// Setup for iterating over the pixels in a vector by the well-known
// Bresenham rendering algorithm.
// Starting with major/2 in the accumulator, on each step add major_step,

View File

@ -99,11 +99,11 @@ class ICOORD
}
///test equality
BOOL8 operator== (const ICOORD & other) {
BOOL8 operator== (const ICOORD & other) const {
return xcoord == other.xcoord && ycoord == other.ycoord;
}
///test inequality
BOOL8 operator!= (const ICOORD & other) {
BOOL8 operator!= (const ICOORD & other) const {
return xcoord != other.xcoord || ycoord != other.ycoord;
}
///rotate 90 deg anti
@ -147,6 +147,12 @@ class ICOORD
void setup_render(ICOORD* major_step, ICOORD* minor_step,
int* major, int* minor) const;
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
protected:
inT16 xcoord; //< x value
inT16 ycoord; //< y value

View File

@ -52,10 +52,9 @@ POLY_BLOCK::POLY_BLOCK(const TBOX& box, PolyBlockType t) {
ICOORDELT_IT v = &vertices;
v.move_to_first();
v.add_to_end(new ICOORDELT(box.left(), box.top()));
v.add_to_end(new ICOORDELT(box.left(), box.top() + box.height()));
v.add_to_end(new ICOORDELT(box.left() + box.width(),
box.top() + box.height()));
v.add_to_end(new ICOORDELT(box.left(), box.top() + box.height()));
v.add_to_end(new ICOORDELT(box.left(), box.bottom()));
v.add_to_end(new ICOORDELT(box.right(), box.bottom()));
v.add_to_end(new ICOORDELT(box.right(), box.top()));
compute_bb();
type = t;
}
@ -204,6 +203,25 @@ void POLY_BLOCK::rotate(FCOORD rotation) {
compute_bb();
}
/**
* @name POLY_BLOCK::reflect_in_y_axis
*
* Reflect the coords of the polygon in the y-axis. (Flip the sign of x.)
*/
void POLY_BLOCK::reflect_in_y_axis() {
ICOORDELT *pt; // current point
ICOORDELT_IT pts = &vertices; // Iterator.
do {
pt = pts.data();
pt->set_x(-pt->x());
pts.forward();
}
while (!pts.at_first());
compute_bb();
}
/**
* POLY_BLOCK::move
@ -384,6 +402,8 @@ ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) {
ScrollView::BLUE, // Text that lives inside a column.
ScrollView::CYAN, // Text that spans more than one column.
ScrollView::MEDIUM_BLUE, // Text that is in a cross-column pull-out region.
ScrollView::AQUAMARINE, // Partition belonging to an equation region.
ScrollView::SKY_BLUE, // Partition belonging to an inline equation region.
ScrollView::MAGENTA, // Partition belonging to a table region.
ScrollView::GREEN, // Text-line runs vertically.
ScrollView::LIGHT_BLUE, // Text that belongs to an image.

View File

@ -58,6 +58,8 @@ class DLLSYM POLY_BLOCK {
// Rotate about the origin by the given rotation. (Analogous to
// multiplying by a complex number.
void rotate(FCOORD rotation);
// Reflect the coords of the polygon in the y-axis. (Flip the sign of x.)
void reflect_in_y_axis();
// Move by adding shift to all coordinates.
void move(ICOORD shift);

View File

@ -25,6 +25,8 @@ const char* kPolyBlockNames[] = {
"Flowing Text",
"Heading Text",
"Pullout Text",
"Equation",
"Inline Equation",
"Table",
"Vertical Text",
"Caption Text",

View File

@ -41,6 +41,8 @@ enum PolyBlockType {
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
@ -66,7 +68,8 @@ inline bool PTIsImageType(PolyBlockType type) {
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT;
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// String name for each block type. Keep in sync with PolyBlockType.
@ -165,9 +168,6 @@ enum PageSegMode {
// enum of the elements of the page hierarchy, used in ResultIterator
// to provide functions that operate on each level without having to
// have 5x as many functions.
// NOTE: At present RIL_PARA and RIL_BLOCK are equivalent as there is
// no paragraph internally yet.
// TODO(rays) Add paragraph detection.
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
@ -176,6 +176,35 @@ enum PageIteratorLevel {
RIL_SYMBOL // Symbol/character within a word.
};
// JUSTIFICATION_UNKNONW
// The alignment is not clearly one of the other options. This could happen
// for example if there are only one or two lines of text or the text looks
// like source code or poetry.
//
// NOTA BENE: Fully justified paragraphs (text aligned to both left and right
// margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
// is written with a left-to-right script and with JUSTIFICATION_RIGHT if
// their text is written in a right-to-left script.
//
// Interpretation for text read in vertical lines:
// "Left" is wherever the starting reading position is.
//
// JUSTIFICATION_LEFT
// Each line, except possibly the first, is flush to the same left tab stop.
//
// JUSTIFICATION_CENTER
// The text lines of the paragraph are centered about a line going
// down through their middle of the text lines.
//
// JUSTIFICATION_RIGHT
// Each line, except possibly the first, is flush to the same right tab stop.
enum ParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT,
};
// When Tesseract/Cube is initialized we can choose to instantiate/load/run
// only the Tesseract part, only the Cube part or both along with the combiner.
// The preference of which engine to use is stored in tessedit_ocr_engine_mode.

View File

@ -28,6 +28,36 @@ ELISTIZE (BLOB_CHOICE) CLISTIZE (BLOB_CHOICE_LIST) CLISTIZE (WERD_CHOICE)
const float WERD_CHOICE::kBadRating = 100000.0;
static const char kPermuterTypeNoPerm[] = "None";
static const char kPermuterTypePuncPerm[] = "Punctuation";
static const char kPermuterTypeTopPerm[] = "Top Choice";
static const char kPermuterTypeLowerPerm[] = "Top Lower Case";
static const char kPermuterTypeUpperPerm[] = "Top Upper Case";
static const char kPermuterTypeNgramPerm[] = "Ngram";
static const char kPermuterTypeNumberPerm[] = "Number";
static const char kPermuterTypeUserPatPerm[] = "User Pattern";
static const char kPermuterTypeSysDawgPerm[] = "System Dictionary";
static const char kPermuterTypeDocDawgPerm[] = "Document Dictionary";
static const char kPermuterTypeUserDawgPerm[] = "User Dictionary";
static const char kPermuterTypeFreqDawgPerm[] = "Frequent Words Dictionary";
static const char kPermuterTypeCompoundPerm[] = "Compound";
static const char * const kPermuterTypeNames[] = {
kPermuterTypeNoPerm, // 0
kPermuterTypePuncPerm, // 1
kPermuterTypeTopPerm, // 2
kPermuterTypeLowerPerm, // 3
kPermuterTypeUpperPerm, // 4
kPermuterTypeNgramPerm, // 5
kPermuterTypeNumberPerm, // 6
kPermuterTypeUserPatPerm, // 7
kPermuterTypeSysDawgPerm, // 8
kPermuterTypeDocDawgPerm, // 9
kPermuterTypeUserDawgPerm, // 10
kPermuterTypeFreqDawgPerm, // 11
kPermuterTypeCompoundPerm // 12
};
/**
* BLOB_CHOICE::BLOB_CHOICE
*
@ -38,7 +68,10 @@ BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
float src_cert, // certainty
inT16 src_fontinfo_id, // font
inT16 src_fontinfo_id2, // 2nd choice font
int src_script_id // script
int src_script_id, // script
inT16 min_xheight, // min xheight allowed
inT16 max_xheight, // max xheight by this char
bool adapted // adapted match or not
) {
unichar_id_ = src_unichar_id;
rating_ = src_rating;
@ -47,6 +80,9 @@ BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
fontinfo_id2_ = src_fontinfo_id2;
script_id_ = src_script_id;
language_model_state_ = NULL;
min_xheight_ = min_xheight;
max_xheight_ = max_xheight;
adapted_ = adapted;
}
/**
@ -62,6 +98,9 @@ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
fontinfo_id2_ = other.fontinfo_id2();
script_id_ = other.script_id();
language_model_state_ = NULL;
min_xheight_ = other.min_xheight_;
max_xheight_ = other.max_xheight_;
adapted_ = other.adapted_;
}
/**
@ -71,7 +110,8 @@ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
* The function assumes that src_string is not NULL.
*/
WERD_CHOICE::WERD_CHOICE(const char *src_string,
const UNICHARSET &unicharset) {
const UNICHARSET &unicharset)
: unicharset_(&unicharset){
STRING src_lengths;
const char *ptr = src_string;
const char *end = src_string + strlen(src_string);
@ -80,7 +120,7 @@ WERD_CHOICE::WERD_CHOICE(const char *src_string,
step = unicharset.step(ptr), src_lengths += step, ptr += step);
if (step != 0 && ptr == end) {
this->init(src_string, src_lengths.string(),
0.0, 0.0, NO_PERM, unicharset);
0.0, 0.0, NO_PERM);
} else { // there must have been an invalid unichar in the string
this->init(8);
this->make_bad();
@ -101,8 +141,7 @@ void WERD_CHOICE::init(const char *src_string,
const char *src_lengths,
float src_rating,
float src_certainty,
uinT8 src_permuter,
const UNICHARSET &unicharset) {
uinT8 src_permuter) {
int src_string_len = strlen(src_string);
if (src_string_len == 0) {
this->init(8);
@ -113,7 +152,7 @@ void WERD_CHOICE::init(const char *src_string,
for (int i = 0; i < length_; ++i) {
int unichar_length = src_lengths ? src_lengths[i] : 1;
unichar_ids_[i] =
unicharset.unichar_to_id(src_string+offset, unichar_length);
unicharset_->unichar_to_id(src_string+offset, unichar_length);
fragment_lengths_[i] = 1;
offset += unichar_length;
}
@ -132,6 +171,9 @@ WERD_CHOICE::~WERD_CHOICE() {
delete_blob_choices();
}
const char *WERD_CHOICE::permuter_name() const {
return kPermuterTypeNames[permuter_];
}
/**
* WERD_CHOICE::set_blob_choices
@ -177,20 +219,86 @@ void WERD_CHOICE::remove_unichar_ids(int start, int num) {
length_ -= num;
}
/**
* reverse_and_mirror_unichar_ids
*
* Reverses and mirrors unichars in unichar_ids.
* Note: this function does not change unichar_string_, it only modifies
* unichar_ids array.
*/
void WERD_CHOICE::reverse_and_mirror_unichar_ids() {
for (int i = 0; i < length_/2; ++i) {
UNICHAR_ID tmp_id = unichar_ids_[i];
unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]);
unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id);
}
if (length_ % 2 != 0) {
unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]);
}
}
/**
* punct_stripped
*
* Returns the half-open interval of unichar_id indices [start, end) which
* enclose the core portion of this word -- the part after stripping
* punctuation from the left and right.
*/
void WERD_CHOICE::punct_stripped(int *start, int *end) const {
*start = 0;
*end = length() - 1;
while (*start < length() &&
unicharset()->get_ispunctuation(unichar_id(*start))) {
(*start)++;
}
while (*end > -1 &&
unicharset()->get_ispunctuation(unichar_id(*end))) {
(*end)--;
}
(*end)++;
}
WERD_CHOICE WERD_CHOICE::shallow_copy(int start, int end) const {
ASSERT_HOST(start >= 0 && start <= length_);
ASSERT_HOST(end >= 0 && end <= length_);
if (end < start) { end = start; }
WERD_CHOICE retval(unicharset_, end - start);
for (int i = start; i < end; i++) {
retval.append_unichar_id_space_allocated(
unichar_ids_[i], fragment_lengths_[i], 0.0f, 0.0f);
}
return retval;
}
/**
* has_rtl_unichar_id
*
* Returns true if unichar_ids contain at least one "strongly" RTL unichar.
*/
bool WERD_CHOICE::has_rtl_unichar_id() const {
int i;
for (i = 0; i < length_; ++i) {
UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]);
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
return true;
}
}
return false;
}
/**
* string_and_lengths
*
* Populates the given word_str with unichars from unichar_ids and
* and word_lengths_str with the corresponding unichar lengths.
* Uses current_unicharset to make unichar id -> unichar conversions.
*/
void WERD_CHOICE::string_and_lengths(const UNICHARSET &current_unicharset,
STRING *word_str,
void WERD_CHOICE::string_and_lengths(STRING *word_str,
STRING *word_lengths_str) const {
*word_str = "";
if (word_lengths_str != NULL) *word_lengths_str = "";
for (int i = 0; i < length_; ++i) {
const char *ch = current_unicharset.id_to_unichar(unichar_ids_[i]);
const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
*word_str += ch;
if (word_lengths_str != NULL) {
*word_lengths_str += strlen(ch);
@ -230,6 +338,7 @@ WERD_CHOICE & WERD_CHOICE::operator+= (const WERD_CHOICE & second) {
// word_lengths = NULL;
// delete_blob_choices();
// } else {
ASSERT_HOST(unicharset_ == second.unicharset_);
while (reserved_ < length_ + second.length()) {
this->double_the_size();
}
@ -291,6 +400,7 @@ WERD_CHOICE& WERD_CHOICE::operator=(const WERD_CHOICE& source) {
this->double_the_size();
}
unicharset_ = source.unicharset_;
const UNICHAR_ID *other_unichar_ids = source.unichar_ids();
const char *other_fragment_lengths = source.fragment_lengths();
for (int i = 0; i < source.length(); ++i) {
@ -376,6 +486,24 @@ const void WERD_CHOICE::print(const char *msg) const {
fflush(stdout);
}
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
const WERD_CHOICE &word2) {
const UNICHARSET *uchset = word1.unicharset();
if (word2.unicharset() != uchset) return false;
int w1start, w1end;
word1.punct_stripped(&w1start, &w1end);
int w2start, w2end;
word2.punct_stripped(&w2start, &w2end);
if (w1end - w1start != w2end - w2start) return false;
for (int i = 0; i < w1end - w1start; i++) {
if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
uchset->to_lower(word2.unichar_id(w2start + i))) {
return false;
}
}
return true;
}
/**
* print_ratings_list
*
@ -499,3 +627,27 @@ void print_char_choices_list(const char *msg,
print_ratings_list("", char_choices.get(x), current_unicharset);
}
}
/**
* print_word_alternates_list
*/
void print_word_alternates_list(
WERD_CHOICE *word,
GenericVector<WERD_CHOICE *> *alternates,
bool needs_populate_unichars) {
if (!word || !alternates) return;
if (needs_populate_unichars) {
word->populate_unichars();
for (int i = 0; i < alternates->size(); ++i) {
alternates->get(i)->populate_unichars();
}
}
STRING alternates_str;
for (int i = 0; i < alternates->size(); i++) {
if (i > 0) alternates_str += "\", \"";
alternates_str += alternates->get(i)->unichar_string();
}
tprintf("Alternates for \"%s\": {\"%s\"}\n",
word->unichar_string().string(), alternates_str.string());
}

View File

@ -40,13 +40,19 @@ class BLOB_CHOICE: public ELIST_LINK
certainty_ = -MAX_FLOAT32;
script_id_ = -1;
language_model_state_ = NULL;
min_xheight_ = 0;
max_xheight_ = 0;
adapted_ = false;
}
BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
float src_rating, // rating
float src_cert, // certainty
inT16 src_fontinfo_id, // font
inT16 src_fontinfo_id2, // 2nd choice font
int script_id); // script
int script_id, // script
inT16 min_xheight, // min xheight in image pixel units
inT16 max_xheight, // max xheight allowed by this char
bool adapted); // adapted match or not
BLOB_CHOICE(const BLOB_CHOICE &other);
~BLOB_CHOICE() {}
@ -71,12 +77,21 @@ class BLOB_CHOICE: public ELIST_LINK
void *language_model_state() {
return language_model_state_;
}
inT16 xgap_before() {
inT16 xgap_before() const {
return xgap_before_;
}
inT16 xgap_after() {
inT16 xgap_after() const {
return xgap_after_;
}
inT16 min_xheight() const {
return min_xheight_;
}
inT16 max_xheight() const {
return max_xheight_;
}
bool adapted() const {
return adapted_;
}
void set_unichar_id(UNICHAR_ID newunichar_id) {
unichar_id_ = newunichar_id;
@ -105,6 +120,9 @@ class BLOB_CHOICE: public ELIST_LINK
void set_xgap_after(inT16 gap) {
xgap_after_ = gap;
}
void set_adapted(bool adapted) {
adapted_ = adapted;
}
static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
BLOB_CHOICE* choice = new BLOB_CHOICE;
*choice = *src;
@ -130,6 +148,10 @@ class BLOB_CHOICE: public ELIST_LINK
void *language_model_state_;
inT16 xgap_before_;
inT16 xgap_after_;
// X-height range (in image pixels) that this classification supports.
inT16 min_xheight_;
inT16 max_xheight_;
bool adapted_; // true if this is a match from adapted templates
};
// Make BLOB_CHOICE listable.
@ -156,24 +178,30 @@ class WERD_CHOICE {
public:
static const float kBadRating;
WERD_CHOICE() { this->init(8); }
WERD_CHOICE(int reserved) { this->init(reserved); }
WERD_CHOICE(const UNICHARSET *unicharset)
: unicharset_(unicharset) { this->init(8); }
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
: unicharset_(unicharset) { this->init(reserved); }
WERD_CHOICE(const char *src_string,
const char *src_lengths,
float src_rating,
float src_certainty,
uinT8 src_permuter,
const UNICHARSET &unicharset) {
const UNICHARSET &unicharset)
: unicharset_(&unicharset) {
this->init(src_string, src_lengths, src_rating,
src_certainty, src_permuter, unicharset);
src_certainty, src_permuter);
}
WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset);
WERD_CHOICE(const WERD_CHOICE &word) {
WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
WERD_CHOICE(const WERD_CHOICE &word) : unicharset_(word.unicharset_) {
this->init(word.length());
this->operator=(word);
}
~WERD_CHOICE();
const UNICHARSET *unicharset() const {
return unicharset_;
}
inline int length() const {
return length_;
}
@ -200,6 +228,7 @@ class WERD_CHOICE {
inline uinT8 permuter() const {
return permuter_;
}
const char *permuter_name() const;
inline bool fragment_mark() const {
return fragment_mark_;
}
@ -237,25 +266,37 @@ class WERD_CHOICE {
/// Make more space in unichar_id_ and fragment_lengths_ arrays.
inline void double_the_size() {
unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
reserved_, unichar_ids_);
fragment_lengths_ = GenericVector<char>::double_the_size_memcpy(
reserved_, fragment_lengths_);
reserved_ *= 2;
if (reserved_ > 0) {
unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy(
reserved_, unichar_ids_);
fragment_lengths_ = GenericVector<char>::double_the_size_memcpy(
reserved_, fragment_lengths_);
reserved_ *= 2;
} else {
unichar_ids_ = new UNICHAR_ID[1];
fragment_lengths_ = new char[1];
reserved_ = 1;
}
}
/// Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and
/// fragment_length_ arrays. Sets other values to default (blank) values.
inline void init(int reserved) {
reserved_ = reserved;
unichar_ids_ = new UNICHAR_ID[reserved];
fragment_lengths_ = new char[reserved];
if (reserved > 0) {
unichar_ids_ = new UNICHAR_ID[reserved];
fragment_lengths_ = new char[reserved];
} else {
unichar_ids_ = NULL;
fragment_lengths_ = NULL;
}
length_ = 0;
rating_ = 0.0;
certainty_ = MAX_FLOAT32;
permuter_ = NO_PERM;
fragment_mark_ = false;
blob_choices_ = NULL;
unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
unichar_string_ = "";
unichar_lengths_ = "";
}
@ -267,7 +308,7 @@ class WERD_CHOICE {
/// in src_string are assumed to all be of length 1.
void init(const char *src_string, const char *src_lengths,
float src_rating, float src_certainty,
uinT8 src_permuter, const UNICHARSET &current_unicharset);
uinT8 src_permuter);
/// Set the fields in this choice to be default (bad) values.
inline void make_bad() {
@ -308,13 +349,26 @@ class WERD_CHOICE {
bool contains_unichar_id(UNICHAR_ID unichar_id) const;
void remove_unichar_ids(int index, int num);
inline void remove_last_unichar_id() { --length_; }
inline void remove_unichar_id(int index) { this->remove_unichar_ids(index, 1); }
void string_and_lengths(const UNICHARSET &current_unicharset,
STRING *word_str, STRING *word_lengths_str) const;
const STRING debug_string(const UNICHARSET &current_unicharset) const {
inline void remove_unichar_id(int index) {
this->remove_unichar_ids(index, 1);
}
bool has_rtl_unichar_id() const;
void reverse_and_mirror_unichar_ids();
// Returns the half-open interval of unichar_id indices [start, end) which
// enclose the core portion of this word -- the part after stripping
// punctuation from the left and right.
void punct_stripped(int *start_core, int *end_core) const;
// Return a copy of this WERD_CHOICE with the choices [start, end).
// The result is useful only for checking against a dictionary.
WERD_CHOICE shallow_copy(int start, int end) const;
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const;
const STRING debug_string() const {
STRING word_str;
for (int i = 0; i < length_; ++i) {
word_str += current_unicharset.debug_str(unichar_ids_[i]);
word_str += unicharset_->debug_str(unichar_ids_[i]);
word_str += " ";
}
return word_str;
@ -322,16 +376,28 @@ class WERD_CHOICE {
/// Since this function walks over the whole word to convert unichar ids
/// to unichars, it is best to call it once, e.g. after all changes to
/// unichar_ids_ in WERD_CHOICE are finished.
void populate_unichars(const UNICHARSET &current_unicharset) {
this->string_and_lengths(current_unicharset, &unichar_string_,
&unichar_lengths_);
void populate_unichars() {
this->string_and_lengths(&unichar_string_, &unichar_lengths_);
}
/// Undoes populate_unichars, so that unichar_string_ and unichar_lengths_
/// are empty.
void depopulate_unichars() {
unichar_string_ = "";
unichar_lengths_ = "";
}
// Call this to override the default (strict left to right graphemes)
// with the fact that some engine produces a "reading order" set of
// Graphemes for each word.
bool set_unichars_in_script_order(bool in_script_order) {
return unichars_in_script_order_ = in_script_order;
}
bool unichars_in_script_order() const {
return unichars_in_script_order_;
}
/// This function should only be called if populate_unichars()
/// was called and WERD_CHOICE did not change since then.
const STRING &unichar_string() const {
@ -339,6 +405,7 @@ class WERD_CHOICE {
unichar_string_.length() >= length_); // sanity check
return unichar_string_;
}
/// This function should only be called if populate_unichars()
/// was called and WERD_CHOICE did not change since then.
const STRING &unichar_lengths() const {
@ -355,6 +422,7 @@ class WERD_CHOICE {
WERD_CHOICE& operator= (const WERD_CHOICE& source);
private:
const UNICHARSET *unicharset_;
UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word
char *fragment_lengths_; // number of fragments in each unichar
int reserved_; // size of the above arrays
@ -367,10 +435,17 @@ class WERD_CHOICE {
// contained a fragment
BLOB_CHOICE_LIST_CLIST *blob_choices_; // best choices for each blob
// Normally, the blob_choices_ represent the recognition results in order
// from left-to-right. However, some engines (say Cube) may return
// recognition results in the order of the script's major reading direction
// (for Arabic, that is right-to-left).
bool unichars_in_script_order_;
// The following variables are only populated by calling populate_unichars().
// They are not synchronized with the values in unichar_ids otherwise.
STRING unichar_string_;
STRING unichar_lengths_;
bool unichar_info_present;
private:
@ -382,6 +457,12 @@ ELISTIZEH (WERD_CHOICE)
typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR;
typedef GenericVector<WERD_CHOICE_LIST *> WERD_CHOICE_LIST_VECTOR;
// Utilities for comparing WERD_CHOICEs
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
const WERD_CHOICE &word2);
// Utilities for debug printing.
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings);
void print_ratings_list(
const char *msg, // intro message
@ -401,5 +482,9 @@ void print_char_choices_list(
const UNICHARSET &current_unicharset,
BOOL8 detailed
);
void print_word_alternates_list(
WERD_CHOICE *word,
GenericVector<WERD_CHOICE *> *alternates,
bool needs_populate_unichars);
#endif

View File

@ -172,6 +172,19 @@ void TBOX::plot( //paint box
}
#endif
// Writes to the given file. Returns false in case of error.
bool TBOX::Serialize(FILE* fp) const {
if (!bot_left.Serialize(fp)) return false;
if (!top_right.Serialize(fp)) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool TBOX::DeSerialize(bool swap, FILE* fp) {
if (!bot_left.DeSerialize(swap, fp)) return false;
if (!top_right.DeSerialize(swap, fp)) return false;
return true;
}
/**********************************************************************
* operator+=
@ -200,15 +213,12 @@ const TBOX & op2) {
/**********************************************************************
* operator-=
* operator&=
*
* Reduce one box to intersection with the other (In place intersection)
**********************************************************************/
DLLSYM TBOX &
operator-= ( //inplace intersection
TBOX & op1, //operands
const TBOX & op2) {
TBOX& operator&=(TBOX& op1, const TBOX& op2) {
if (op1.overlap (op2)) {
if (op2.bot_left.x () > op1.bot_left.x ())
op1.bot_left.set_x (op2.bot_left.x ());
@ -230,3 +240,15 @@ const TBOX & op2) {
}
return op1;
}
bool TBOX::x_almost_equal(const TBOX &box, int tolerance) const {
return (abs(left() - box.left()) <= tolerance &&
abs(right() - box.right()) <= tolerance);
}
bool TBOX::almost_equal(const TBOX &box, int tolerance) const {
return (abs(left() - box.left()) <= tolerance &&
abs(right() - box.right()) <= tolerance &&
abs(top() - box.top()) <= tolerance &&
abs(bottom() - box.bottom()) <= tolerance);
}

View File

@ -23,8 +23,8 @@
#include <math.h>
#include "points.h"
#include "ndminx.h"
#include "tprintf.h"
#include "scrollview.h"
#include "tprintf.h"
class DLLSYM TBOX { // bounding box
public:
@ -46,7 +46,7 @@ class DLLSYM TBOX { // bounding box
return ((left () >= right ()) || (top () <= bottom ()));
}
bool operator==(const TBOX& other) {
bool operator==(const TBOX& other) const {
return bot_left == other.bot_left && top_right == other.top_right;
}
@ -115,6 +115,14 @@ class DLLSYM TBOX { // bounding box
return 0;
}
// Pads the box on either side by the supplied x,y pad amounts.
// NO checks for exceeding any bounds like 0 or an image size.
void pad(int xpad, int ypad) {
ICOORD pad(xpad, ypad);
bot_left -= pad;
top_right += pad;
}
void move_bottom_edge( // move one edge
const inT16 y) { // by +/- y
bot_left += ICOORD (0, y);
@ -232,6 +240,12 @@ class DLLSYM TBOX { // bounding box
// fraction of the current box's projected area covered by the other's
double y_overlap_fraction(const TBOX& box) const;
// Returns true if the boxes are almost equal on x axis.
bool x_almost_equal(const TBOX &box, int tolerance) const;
// Returns true if the boxes are almost equal
bool almost_equal(const TBOX &box, int tolerance) const;
TBOX intersection( // shared area box
const TBOX &box) const;
@ -251,6 +265,15 @@ class DLLSYM TBOX { // bounding box
left(), bottom(), right(), top());
}
// Same as print(), but appends debug information to the given string
// instead of printing it to stdout.
void append_debug(STRING *str) const {
char buffer[256];
sprintf(buffer, "Bounding box=(%d,%d)->(%d,%d)\n",
left(), bottom(), right(), top());
*str += buffer;
}
#ifndef GRAPHICS_DISABLED
void plot( // use current settings
ScrollView* fd) const { // where to paint
@ -263,10 +286,15 @@ class DLLSYM TBOX { // bounding box
ScrollView::Color fill_colour, // colour for inside
ScrollView::Color border_colour) const; // colour for border
#endif
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
friend DLLSYM TBOX & operator+= (TBOX &, const TBOX &);
friend TBOX& operator+=(TBOX&, const TBOX&);
// in place union
friend DLLSYM TBOX & operator-= (TBOX &, const TBOX &);
friend TBOX& operator&=(TBOX&, const TBOX&);
// in place intersection
private:

View File

@ -72,6 +72,29 @@ bool point_in_seam(SEAM *seam, SPLIT *split) {
point_in_split(seam->split3, split->point1, split->point2));
}
/**
* @name point_used_by_split
*
* Return whether this particular EDGEPT * is used in a given split.
* @returns TRUE if the edgept is used by the split.
*/
bool point_used_by_split(SPLIT *split, EDGEPT *point) {
if (split == NULL) return false;
return point == split->point1 || point == split->point2;
}
/**
* @name point_used_by_seam
*
* Return whether this particular EDGEPT * is used in a given seam.
* @returns TRUE if the edgept is used by the seam.
*/
bool point_used_by_seam(SEAM *seam, EDGEPT *point) {
if (seam == NULL) return false;
return point_used_by_split(seam->split1, point) ||
point_used_by_split(seam->split2, point) ||
point_used_by_split(seam->split3, point);
}
/**
* @name add_seam
@ -152,28 +175,20 @@ void delete_seam(void *arg) { //SEAM *seam)
SEAMS start_seam_list(TBLOB *blobs) {
TBLOB *blob;
SEAMS seam_list;
TPOINT topleft;
TPOINT botright;
TPOINT location;
/* Seam slot per char */
seam_list = new_seam_list ();
for (blob = blobs; blob->next != NULL; blob = blob->next) {
blob_bounding_box(blob, &topleft, &botright);
location.x = botright.x;
location.y = botright.y + topleft.y;
blob_bounding_box (blob->next, &topleft, &botright);
location.x += topleft.x;
location.y += botright.y + topleft.y;
location.x /= 2;
location.y /= 4;
seam_list = add_seam (seam_list,
new_seam (0.0, location, NULL, NULL, NULL));
TBOX bbox = blob->bounding_box();
TBOX nbox = blob->next->bounding_box();
location.x = (bbox.right() + nbox.left()) / 2;
location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
seam_list = add_seam(seam_list,
new_seam(0.0, location, NULL, NULL, NULL));
}
return (seam_list);
return seam_list;
}
/**

View File

@ -94,6 +94,10 @@ bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2);
bool point_in_seam(SEAM *seam, SPLIT *split);
bool point_used_by_split(SPLIT *split, EDGEPT *point);
bool point_used_by_seam(SEAM *seam, EDGEPT *point);
SEAMS add_seam(SEAMS seam_list, SEAM *seam);
void combine_seams(SEAM *dest_seam, SEAM *source_seam);

View File

@ -62,7 +62,7 @@ void delete_split(SPLIT *split) {
*
* Create an EDGEPT and hook it into an existing list of edge points.
**********************************************************************/
EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
EDGEPT *this_edgept;
/* Create point */
this_edgept = new EDGEPT;
@ -82,6 +82,20 @@ EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
return (this_edgept);
}
/**********************************************************************
* remove_edgept
*
* Remove a given EDGEPT from its list and delete it.
**********************************************************************/
void remove_edgept(EDGEPT *point) {
EDGEPT *prev = point->prev;
EDGEPT *next = point->next;
prev->next = next;
next->prev = prev;
prev->vec.x = next->pos.x - prev->pos.x;
prev->vec.y = next->pos.y - prev->pos.y;
delete point;
}
/**********************************************************************
* new_split

View File

@ -72,6 +72,8 @@ void delete_split(SPLIT *split);
EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev);
void remove_edgept(EDGEPT *point);
SPLIT *new_split(EDGEPT *point1, EDGEPT *point2);
void print_split(SPLIT *split);

View File

@ -340,6 +340,15 @@ static void render_outline_list(C_OUTLINE_LIST *list,
}
}
static void render_outline_list_outline(C_OUTLINE_LIST *list,
int left, int top, Pix* pix) {
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE* outline = it.data();
outline->render_outline(left, top, pix);
}
}
// Returns a Pix rendering of the blob. pixDestroy after use.
Pix* C_BLOB::render() {
TBOX box = bounding_box();
@ -348,6 +357,15 @@ Pix* C_BLOB::render() {
return pix;
}
// Returns a Pix rendering of the outline of the blob. (no fill).
// pixDestroy after use.
Pix* C_BLOB::render_outline() {
TBOX box = bounding_box();
Pix* pix = pixCreate(box.width(), box.height(), 1);
render_outline_list_outline(&outlines, box.left(), box.top(), pix);
return pix;
}
/**********************************************************************
* C_BLOB::plot
*

View File

@ -55,6 +55,9 @@ class C_BLOB:public ELIST_LINK
// Returns a Pix rendering of the blob. pixDestroy after use.
Pix* render();
// Returns a Pix rendering of the outline of the blob. (no fill).
// pixDestroy after use.
Pix* render_outline();
void plot( //draw one
ScrollView* window, //window to draw in

View File

@ -28,7 +28,7 @@
#include <math.h>
#include "blobs.h"
struct EDGEPT;
class EDGEPT;
/*----------------------------------------------------------------------
M a c r o s

View File

@ -452,6 +452,8 @@ WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs,
}
if (!found) {
not_found_it.add_after_then_move(werd_blob);
} else {
delete werd_blob;
}
}
// Iterate over all not found blobs. Some of them may be due to
@ -462,7 +464,6 @@ WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs,
not_found_it.forward()) {
C_BLOB* not_found = not_found_it.data();
TBOX not_found_box = not_found->bounding_box();
bool found = false;
C_BLOB_IT existing_blobs_it(new_blobs_it);
for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list();
existing_blobs_it.forward()) {
@ -472,8 +473,8 @@ WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs,
a_blob_box.major_overlap(not_found_box)) &&
not_found_box.y_overlap(a_blob_box) > 0.8) {
// Already taken care of.
found = true;
not_found_it.extract();
delete not_found_it.extract();
break;
}
}
}
@ -487,6 +488,10 @@ WERD* WERD::ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs,
WERD* new_werd = NULL;
if (!new_werd_blobs.empty()) {
new_werd = new WERD(&new_werd_blobs, this);
} else {
// Add the blobs back to this word so that it can be reused.
C_BLOB_IT this_list_it(cblob_list());
this_list_it.add_list_after(&not_found_blobs);
}
return new_werd;
}

View File

@ -51,7 +51,8 @@ enum DISPLAY_FLAGS
DF_TEXT, //< Correct ascii
DF_POLYGONAL, //< Polyg approx
DF_EDGE_STEP, //< Edge steps
DF_BN_POLYGONAL //< BL normalisd polyapx
DF_BN_POLYGONAL, //< BL normalisd polyapx
DF_BLAMER //< Blamer information
};
class ROW; //forward decl