mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Major refactor to improve speed on difficut images, especially when running
a heap checker. SEAM and SPLIT have been begging for a refactor for a *LONG* time. This change does most of the work of turning them into proper classes: Moved relevant code into SEAM/SPLIT/TBLOB/EDGEPT etc from global helper functions. Made the splits full data members of SEAM in an array instead of 3 separate pointers. This greatly reduces the amount of new/delete happening in the chopper, which is the main goal. Deleted redundant files: olutil.*, makechop.* Brought other code into SEAM in order to keep its data members private with only priority having accessors.
This commit is contained in:
parent
4c7c960bfd
commit
25d0968d09
@ -582,7 +582,7 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
|
||||
int blob_count = 1;
|
||||
for (int s = 0; s < word_res->seam_array.size(); ++s) {
|
||||
SEAM* seam = word_res->seam_array[s];
|
||||
if (seam->split1 == NULL) {
|
||||
if (!seam->HasAnySplits()) {
|
||||
word_res->best_state.push_back(blob_count);
|
||||
blob_count = 1;
|
||||
} else {
|
||||
|
@ -254,7 +254,7 @@ void Tesseract::join_words(WERD_RES *word,
|
||||
// Move the word2 seams onto the end of the word1 seam_array.
|
||||
// Since the seam list is one element short, an empty seam marking the
|
||||
// end of the last blob in the first word is needed first.
|
||||
word->seam_array.push_back(new SEAM(0.0f, split_pt, NULL, NULL, NULL));
|
||||
word->seam_array.push_back(new SEAM(0.0f, split_pt));
|
||||
word->seam_array += word2->seam_array;
|
||||
word2->seam_array.truncate(0);
|
||||
// Fix widths and gaps.
|
||||
|
@ -64,6 +64,42 @@ const TPOINT kDivisibleVerticalItalic(1, 5);
|
||||
|
||||
CLISTIZE(EDGEPT);
|
||||
|
||||
// Returns true when the two line segments cross each other.
|
||||
// (Moved from outlines.cpp).
|
||||
// Finds where the projected lines would cross and then checks to see if the
|
||||
// point of intersection lies on both of the line segments. If it does
|
||||
// then these two segments cross.
|
||||
/* static */
|
||||
bool TPOINT::IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
|
||||
const TPOINT& b1) {
|
||||
int b0a1xb0b1, b0b1xb0a0;
|
||||
int a1b1xa1a0, a1a0xa1b0;
|
||||
|
||||
TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;
|
||||
|
||||
b0a1.x = a1.x - b0.x;
|
||||
b0a0.x = a0.x - b0.x;
|
||||
a1b1.x = b1.x - a1.x;
|
||||
b0b1.x = b1.x - b0.x;
|
||||
a1a0.x = a0.x - a1.x;
|
||||
b0a1.y = a1.y - b0.y;
|
||||
b0a0.y = a0.y - b0.y;
|
||||
a1b1.y = b1.y - a1.y;
|
||||
b0b1.y = b1.y - b0.y;
|
||||
a1a0.y = a0.y - a1.y;
|
||||
|
||||
b0a1xb0b1 = CROSS(b0a1, b0b1);
|
||||
b0b1xb0a0 = CROSS(b0b1, b0a0);
|
||||
a1b1xa1a0 = CROSS(a1b1, a1a0);
|
||||
// For clarity, we want CROSS(a1a0,a1b0) here but we have b0a1 instead of a1b0
|
||||
// so use -CROSS(a1b0,b0a1) instead, which is the same.
|
||||
a1a0xa1b0 = -CROSS(a1a0, b0a1);
|
||||
|
||||
return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) ||
|
||||
(b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) &&
|
||||
((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
|
||||
}
|
||||
|
||||
// Consume the circular list of EDGEPTs to make a TESSLINE.
|
||||
TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) {
|
||||
TESSLINE* result = new TESSLINE;
|
||||
@ -454,6 +490,36 @@ TBOX TBLOB::bounding_box() const {
|
||||
return box;
|
||||
}
|
||||
|
||||
// Finds and deletes any duplicate outlines in this blob, without deleting
|
||||
// their EDGEPTs.
|
||||
void TBLOB::EliminateDuplicateOutlines() {
|
||||
for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
|
||||
TESSLINE* last_outline = outline;
|
||||
for (TESSLINE* other_outline = outline->next; other_outline != NULL;
|
||||
last_outline = other_outline, other_outline = other_outline->next) {
|
||||
if (outline->SameBox(*other_outline)) {
|
||||
last_outline->next = other_outline->next;
|
||||
// This doesn't leak - the outlines share the EDGEPTs.
|
||||
other_outline->loop = NULL;
|
||||
delete other_outline;
|
||||
other_outline = last_outline;
|
||||
// If it is part of a cut, then it can't be a hole any more.
|
||||
outline->is_hole = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Swaps the outlines of *this and next if needed to keep the centers in
|
||||
// increasing x.
|
||||
void TBLOB::CorrectBlobOrder(TBLOB* next) {
|
||||
TBOX box = bounding_box();
|
||||
TBOX next_box = next->bounding_box();
|
||||
if (box.x_middle() > next_box.x_middle()) {
|
||||
Swap(&outlines, &next->outlines);
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void TBLOB::plot(ScrollView* window, ScrollView::Color color,
|
||||
ScrollView::Color child_color) {
|
||||
@ -858,18 +924,6 @@ void TWERD::plot(ScrollView* window) {
|
||||
}
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
||||
/**********************************************************************
|
||||
* blob_origin
|
||||
*
|
||||
* Compute the origin of a compound blob, define to be the centre
|
||||
* of the bounding box.
|
||||
**********************************************************************/
|
||||
void blob_origin(TBLOB *blob, /*blob to compute on */
|
||||
TPOINT *origin) { /*return value */
|
||||
TBOX bbox = blob->bounding_box();
|
||||
*origin = (bbox.topleft() + bbox.botright()) / 2;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* divisible_blob
|
||||
*
|
||||
|
108
ccstruct/blobs.h
108
ccstruct/blobs.h
@ -60,6 +60,13 @@ struct TPOINT {
|
||||
x /= divisor;
|
||||
y /= divisor;
|
||||
}
|
||||
bool operator==(const TPOINT& other) const {
|
||||
return x == other.x && y == other.y;
|
||||
}
|
||||
// Returns true when the two line segments cross each other.
|
||||
// (Moved from outlines.cpp).
|
||||
static bool IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
|
||||
const TPOINT& b1);
|
||||
|
||||
inT16 x; // absolute x coord.
|
||||
inT16 y; // absolute y coord.
|
||||
@ -87,6 +94,55 @@ struct EDGEPT {
|
||||
start_step = src.start_step;
|
||||
step_count = src.step_count;
|
||||
}
|
||||
// Returns the squared distance between the points, with the x-component
|
||||
// weighted by x_factor.
|
||||
int WeightedDistance(const EDGEPT& other, int x_factor) const {
|
||||
int x_dist = pos.x - other.pos.x;
|
||||
int y_dist = pos.y - other.pos.y;
|
||||
return x_dist * x_dist * x_factor + y_dist * y_dist;
|
||||
}
|
||||
// Returns true if the positions are equal.
|
||||
bool EqualPos(const EDGEPT& other) const { return pos == other.pos; }
|
||||
// Returns the bounding box of the outline segment from *this to *end.
|
||||
// Ignores hidden edge flags.
|
||||
TBOX SegmentBox(const EDGEPT* end) const {
|
||||
TBOX box(pos.x, pos.y, pos.x, pos.y);
|
||||
const EDGEPT* pt = this;
|
||||
do {
|
||||
pt = pt->next;
|
||||
if (pt->pos.x < box.left()) box.set_left(pt->pos.x);
|
||||
if (pt->pos.x > box.right()) box.set_right(pt->pos.x);
|
||||
if (pt->pos.y < box.bottom()) box.set_bottom(pt->pos.y);
|
||||
if (pt->pos.y > box.top()) box.set_top(pt->pos.y);
|
||||
} while (pt != end && pt != this);
|
||||
return box;
|
||||
}
|
||||
// Returns the area of the outline segment from *this to *end.
|
||||
// Ignores hidden edge flags.
|
||||
int SegmentArea(const EDGEPT* end) const {
|
||||
int area = 0;
|
||||
const EDGEPT* pt = this->next;
|
||||
do {
|
||||
TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y);
|
||||
area += CROSS(origin_vec, pt->vec);
|
||||
pt = pt->next;
|
||||
} while (pt != end && pt != this);
|
||||
return area;
|
||||
}
|
||||
// Returns true if the number of points in the outline segment from *this to
|
||||
// *end is less that min_points and false if we get back to *this first.
|
||||
// Ignores hidden edge flags.
|
||||
bool ShortNonCircularSegment(int min_points, const EDGEPT* end) const {
|
||||
int count = 0;
|
||||
const EDGEPT* pt = this;
|
||||
do {
|
||||
if (pt == end) return true;
|
||||
pt = pt->next;
|
||||
++count;
|
||||
} while (pt != this && count <= min_points);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Accessors to hide or reveal a cut edge from feature extractors.
|
||||
void Hide() {
|
||||
flags[0] = true;
|
||||
@ -100,9 +156,6 @@ struct EDGEPT {
|
||||
void MarkChop() {
|
||||
flags[2] = true;
|
||||
}
|
||||
void UnmarkChop() {
|
||||
flags[2] = false;
|
||||
}
|
||||
bool IsChopPt() const {
|
||||
return flags[2] != 0;
|
||||
}
|
||||
@ -162,8 +215,23 @@ struct TESSLINE {
|
||||
void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const;
|
||||
|
||||
TBOX bounding_box() const;
|
||||
// Returns true if *this and other have equal bounding boxes.
|
||||
bool SameBox(const TESSLINE& other) const {
|
||||
return topleft == other.topleft && botright == other.botright;
|
||||
}
|
||||
// Returns true if the given line segment crosses any outline of this blob.
|
||||
bool SegmentCrosses(const TPOINT& pt1, const TPOINT& pt2) const {
|
||||
if (Contains(pt1) && Contains(pt2)) {
|
||||
EDGEPT* pt = loop;
|
||||
do {
|
||||
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) return true;
|
||||
pt = pt->next;
|
||||
} while (pt != loop);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if the point is contained within the outline box.
|
||||
bool Contains(const TPOINT& pt) {
|
||||
bool Contains(const TPOINT& pt) const {
|
||||
return topleft.x <= pt.x && pt.x <= botright.x &&
|
||||
botright.y <= pt.y && pt.y <= topleft.y;
|
||||
}
|
||||
@ -244,6 +312,31 @@ struct TBLOB {
|
||||
|
||||
TBOX bounding_box() const;
|
||||
|
||||
// Returns true if the given line segment crosses any outline of this blob.
|
||||
bool SegmentCrossesOutline(const TPOINT& pt1, const TPOINT& pt2) const {
|
||||
for (const TESSLINE* outline = outlines; outline != NULL;
|
||||
outline = outline->next) {
|
||||
if (outline->SegmentCrosses(pt1, pt2)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if the point is contained within any of the outline boxes.
|
||||
bool Contains(const TPOINT& pt) const {
|
||||
for (const TESSLINE* outline = outlines; outline != NULL;
|
||||
outline = outline->next) {
|
||||
if (outline->Contains(pt)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Finds and deletes any duplicate outlines in this blob, without deleting
|
||||
// their EDGEPTs.
|
||||
void EliminateDuplicateOutlines();
|
||||
|
||||
// Swaps the outlines of *this and next if needed to keep the centers in
|
||||
// increasing x.
|
||||
void CorrectBlobOrder(TBLOB* next);
|
||||
|
||||
const DENORM& denorm() const {
|
||||
return denorm_;
|
||||
}
|
||||
@ -358,12 +451,7 @@ if (w) memfree (w)
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
// TODO(rays) This will become a member of TBLOB when TBLOB's definition
|
||||
// moves to blobs.h
|
||||
|
||||
// Returns the center of blob's bounding box in origin.
|
||||
void blob_origin(TBLOB *blob, TPOINT *origin);
|
||||
|
||||
// TODO(rays) Make divisible_blob and divide_blobs members of TBLOB.
|
||||
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location);
|
||||
|
||||
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
|
@ -404,7 +404,8 @@ void WERD_RES::SetupBlobWidthsAndGaps() {
|
||||
// as the blob widths and gaps.
|
||||
void WERD_RES::InsertSeam(int blob_number, SEAM* seam) {
|
||||
// Insert the seam into the SEAMS array.
|
||||
insert_seam(chopped_word, blob_number, seam, &seam_array);
|
||||
seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
|
||||
seam_array.insert(seam, blob_number);
|
||||
if (ratings != NULL) {
|
||||
// Expand the ratings matrix.
|
||||
ratings = ratings->ConsumeAndMakeBigger(blob_number);
|
||||
@ -804,12 +805,16 @@ void WERD_RES::RebuildBestState() {
|
||||
for (int i = 0; i < best_choice->length(); ++i) {
|
||||
int length = best_choice->state(i);
|
||||
best_state.push_back(length);
|
||||
if (length > 1)
|
||||
join_pieces(seam_array, start, start + length - 1, chopped_word);
|
||||
if (length > 1) {
|
||||
SEAM::JoinPieces(seam_array, chopped_word->blobs, start,
|
||||
start + length - 1);
|
||||
}
|
||||
TBLOB* blob = chopped_word->blobs[start];
|
||||
rebuild_word->blobs.push_back(new TBLOB(*blob));
|
||||
if (length > 1)
|
||||
break_pieces(seam_array, start, start + length - 1, chopped_word);
|
||||
if (length > 1) {
|
||||
SEAM::BreakPieces(seam_array, chopped_word->blobs, start,
|
||||
start + length - 1);
|
||||
}
|
||||
start += length;
|
||||
}
|
||||
}
|
||||
@ -1065,8 +1070,7 @@ bool WERD_RES::PiecesAllNatural(int start, int count) const {
|
||||
for (int index = start; index < start + count - 1; ++index) {
|
||||
if (index >= 0 && index < seam_array.size()) {
|
||||
SEAM* seam = seam_array[index];
|
||||
if (seam != NULL && seam->split1 != NULL)
|
||||
return false;
|
||||
if (seam != NULL && seam->HasAnySplits()) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -27,114 +27,236 @@
|
||||
----------------------------------------------------------------------*/
|
||||
#include "seam.h"
|
||||
#include "blobs.h"
|
||||
#include "freelist.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#ifdef __UNIX__
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
V a r i a b l e s
|
||||
----------------------------------------------------------------------*/
|
||||
#define NUM_STARTING_SEAMS 20
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
Public Function Code
|
||||
----------------------------------------------------------------------*/
|
||||
/**
|
||||
* @name point_in_split
|
||||
*
|
||||
* Check to see if either of these points are present in the current
|
||||
* split.
|
||||
* @returns TRUE if one of them is split.
|
||||
*/
|
||||
bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2) {
|
||||
return ((split) ? ((exact_point (split->point1, point1) ||
|
||||
exact_point (split->point1, point2) ||
|
||||
exact_point (split->point2, point1) ||
|
||||
exact_point (split->point2, point2)) ? TRUE : FALSE)
|
||||
: FALSE);
|
||||
|
||||
// Returns the bounding box of all the points in the seam.
|
||||
TBOX SEAM::bounding_box() const {
|
||||
TBOX box(location_.x, location_.y, location_.x, location_.y);
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
box += splits_[s].bounding_box();
|
||||
}
|
||||
return box;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name point_in_seam
|
||||
*
|
||||
* Check to see if either of these points are present in the current
|
||||
* seam.
|
||||
* @returns TRUE if one of them is.
|
||||
*/
|
||||
bool point_in_seam(const SEAM *seam, SPLIT *split) {
|
||||
return (point_in_split(seam->split1, split->point1, split->point2) ||
|
||||
point_in_split(seam->split2, split->point1, split->point2) ||
|
||||
point_in_split(seam->split3, split->point1, split->point2));
|
||||
// Returns true if other can be combined into *this.
|
||||
bool SEAM::CombineableWith(const SEAM& other, int max_x_dist,
|
||||
float max_total_priority) const {
|
||||
int dist = location_.x - other.location_.x;
|
||||
if (-max_x_dist < dist && dist < max_x_dist &&
|
||||
num_splits_ + other.num_splits_ <= kMaxNumSplits &&
|
||||
priority_ + other.priority_ < max_total_priority &&
|
||||
!OverlappingSplits(other) && !SharesPosition(other)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @name point_used_by_split
|
||||
*
|
||||
* Return whether this particular EDGEPT * is used in a given split.
|
||||
* @returns TRUE if the edgept is used by the split.
|
||||
*/
|
||||
bool point_used_by_split(SPLIT *split, EDGEPT *point) {
|
||||
if (split == NULL) return false;
|
||||
return point == split->point1 || point == split->point2;
|
||||
// Combines other into *this. Only works if CombinableWith returned true.
|
||||
void SEAM::CombineWith(const SEAM& other) {
|
||||
priority_ += other.priority_;
|
||||
location_ += other.location_;
|
||||
location_ /= 2;
|
||||
|
||||
for (int s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
|
||||
splits_[num_splits_++] = other.splits_[s];
|
||||
}
|
||||
|
||||
/**
|
||||
* @name point_used_by_seam
|
||||
*
|
||||
* Return whether this particular EDGEPT * is used in a given seam.
|
||||
* @returns TRUE if the edgept is used by the seam.
|
||||
*/
|
||||
bool point_used_by_seam(SEAM *seam, EDGEPT *point) {
|
||||
if (seam == NULL) return false;
|
||||
return point_used_by_split(seam->split1, point) ||
|
||||
point_used_by_split(seam->split2, point) ||
|
||||
point_used_by_split(seam->split3, point);
|
||||
// Returns true if the splits in *this SEAM appear OK in the sense that they
|
||||
// do not cross any outlines and do not chop off any ridiculously small
|
||||
// pieces.
|
||||
bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
|
||||
// TODO(rays) Try testing all the splits. Duplicating original code for now,
|
||||
// which tested only the first.
|
||||
return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name combine_seam
|
||||
*
|
||||
* Combine two seam records into a single seam. Move the split
|
||||
* references from the second seam to the first one. The argument
|
||||
* convention is patterned after strcpy.
|
||||
*/
|
||||
void combine_seams(SEAM *dest_seam, SEAM *source_seam) {
|
||||
dest_seam->priority += source_seam->priority;
|
||||
dest_seam->location += source_seam->location;
|
||||
dest_seam->location /= 2;
|
||||
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
|
||||
// seam, which is about to be inserted at insert_index. Returns false if
|
||||
// any of the computations fails, as this indicates an invalid chop.
|
||||
// widthn_/widthp_ are only changed if modify is true.
|
||||
bool SEAM::PrepareToInsertSeam(const GenericVector<SEAM*>& seams,
|
||||
const GenericVector<TBLOB*>& blobs,
|
||||
int insert_index, bool modify) {
|
||||
for (int s = 0; s < insert_index; ++s) {
|
||||
if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false;
|
||||
}
|
||||
if (!FindBlobWidth(blobs, insert_index, modify)) return false;
|
||||
for (int s = insert_index; s < seams.size(); ++s) {
|
||||
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (source_seam->split1) {
|
||||
if (!dest_seam->split1)
|
||||
dest_seam->split1 = source_seam->split1;
|
||||
else if (!dest_seam->split2)
|
||||
dest_seam->split2 = source_seam->split1;
|
||||
else if (!dest_seam->split3)
|
||||
dest_seam->split3 = source_seam->split1;
|
||||
else
|
||||
delete source_seam->split1; // Wouldn't have fitted.
|
||||
source_seam->split1 = NULL;
|
||||
// Computes the widthp_/widthn_ range. Returns false if not all the splits
|
||||
// are accounted for. widthn_/widthp_ are only changed if modify is true.
|
||||
bool SEAM::FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
|
||||
bool modify) {
|
||||
int num_found = 0;
|
||||
if (modify) {
|
||||
widthp_ = 0;
|
||||
widthn_ = 0;
|
||||
}
|
||||
if (source_seam->split2) {
|
||||
if (!dest_seam->split2)
|
||||
dest_seam->split2 = source_seam->split2;
|
||||
else if (!dest_seam->split3)
|
||||
dest_seam->split3 = source_seam->split2;
|
||||
else
|
||||
delete source_seam->split2; // Wouldn't have fitted.
|
||||
source_seam->split2 = NULL;
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
const SPLIT& split = splits_[s];
|
||||
bool found_split = split.ContainedByBlob(*blobs[index]);
|
||||
// Look right.
|
||||
for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
|
||||
found_split = split.ContainedByBlob(*blobs[b]);
|
||||
if (found_split && b - index > widthp_ && modify) widthp_ = b - index;
|
||||
}
|
||||
if (source_seam->split3) {
|
||||
if (!dest_seam->split3)
|
||||
dest_seam->split3 = source_seam->split3;
|
||||
else
|
||||
delete source_seam->split3; // Wouldn't have fitted.
|
||||
source_seam->split3 = NULL;
|
||||
// Look left.
|
||||
for (int b = index - 1; !found_split && b >= 0; --b) {
|
||||
found_split = split.ContainedByBlob(*blobs[b]);
|
||||
if (found_split && index - b > widthn_ && modify) widthn_ = index - b;
|
||||
}
|
||||
delete source_seam;
|
||||
if (found_split) ++num_found;
|
||||
}
|
||||
return num_found == num_splits_;
|
||||
}
|
||||
|
||||
// Splits this blob into two blobs by applying the splits included in
|
||||
// *this SEAM
|
||||
void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
splits_[s].SplitOutlineList(blob->outlines);
|
||||
}
|
||||
blob->ComputeBoundingBoxes();
|
||||
|
||||
divide_blobs(blob, other_blob, italic_blob, location_);
|
||||
|
||||
blob->EliminateDuplicateOutlines();
|
||||
other_blob->EliminateDuplicateOutlines();
|
||||
|
||||
blob->CorrectBlobOrder(other_blob);
|
||||
}
|
||||
|
||||
// Undoes ApplySeam by removing the seam between these two blobs.
|
||||
// Produces one blob as a result, and deletes other_blob.
|
||||
void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const {
|
||||
if (blob->outlines == NULL) {
|
||||
blob->outlines = other_blob->outlines;
|
||||
other_blob->outlines = NULL;
|
||||
}
|
||||
|
||||
TESSLINE* outline = blob->outlines;
|
||||
while (outline->next) outline = outline->next;
|
||||
outline->next = other_blob->outlines;
|
||||
other_blob->outlines = NULL;
|
||||
delete other_blob;
|
||||
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
splits_[s].UnsplitOutlineList(blob);
|
||||
}
|
||||
blob->ComputeBoundingBoxes();
|
||||
blob->EliminateDuplicateOutlines();
|
||||
}
|
||||
|
||||
// Prints everything in *this SEAM.
|
||||
void SEAM::Print(const char* label) const {
|
||||
tprintf(label);
|
||||
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y,
|
||||
widthp_, widthn_);
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
splits_[s].Print();
|
||||
if (s + 1 < num_splits_) tprintf(", ");
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
// Prints a collection of SEAMs.
|
||||
/* static */
|
||||
void SEAM::PrintSeams(const char* label, const GenericVector<SEAM*>& seams) {
|
||||
if (!seams.empty()) {
|
||||
tprintf("%s\n", label);
|
||||
for (int x = 0; x < seams.size(); ++x) {
|
||||
tprintf("%2d: ", x);
|
||||
seams[x]->Print("");
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
// Draws the seam in the given window.
|
||||
void SEAM::Mark(ScrollView* window) const {
|
||||
for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Break up the blobs in this chain so that they are all independent.
|
||||
// This operation should undo the affect of join_pieces.
|
||||
/* static */
|
||||
void SEAM::BreakPieces(const GenericVector<SEAM*>& seams,
|
||||
const GenericVector<TBLOB*>& blobs, int first,
|
||||
int last) {
|
||||
for (int x = first; x < last; ++x) seams[x]->Reveal();
|
||||
|
||||
TESSLINE* outline = blobs[first]->outlines;
|
||||
int next_blob = first + 1;
|
||||
|
||||
while (outline != NULL && next_blob <= last) {
|
||||
if (outline->next == blobs[next_blob]->outlines) {
|
||||
outline->next = NULL;
|
||||
outline = blobs[next_blob]->outlines;
|
||||
++next_blob;
|
||||
} else {
|
||||
outline = outline->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Join a group of base level pieces into a single blob that can then
|
||||
// be classified.
|
||||
/* static */
|
||||
void SEAM::JoinPieces(const GenericVector<SEAM*>& seams,
|
||||
const GenericVector<TBLOB*>& blobs, int first, int last) {
|
||||
TESSLINE* outline = blobs[first]->outlines;
|
||||
if (!outline)
|
||||
return;
|
||||
|
||||
for (int x = first; x < last; ++x) {
|
||||
SEAM *seam = seams[x];
|
||||
if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide();
|
||||
while (outline->next) outline = outline->next;
|
||||
outline->next = blobs[x + 1]->outlines;
|
||||
}
|
||||
}
|
||||
|
||||
// Hides the seam so the outlines appear not to be cut by it.
|
||||
void SEAM::Hide() const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
splits_[s].Hide();
|
||||
}
|
||||
}
|
||||
|
||||
// Undoes hide, so the outlines are cut by the seam.
|
||||
void SEAM::Reveal() const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
splits_[s].Reveal();
|
||||
}
|
||||
}
|
||||
|
||||
// Computes and returns, but does not set, the full priority of *this SEAM.
|
||||
float SEAM::FullPriority(int xmin, int xmax, double overlap_knob,
|
||||
int centered_maxwidth, double center_knob,
|
||||
double width_change_knob) const {
|
||||
if (num_splits_ == 0) return 0.0f;
|
||||
for (int s = 1; s < num_splits_; ++s) {
|
||||
splits_[s].SplitOutline();
|
||||
}
|
||||
float full_priority =
|
||||
priority_ +
|
||||
splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth,
|
||||
center_knob, width_change_knob);
|
||||
for (int s = num_splits_ - 1; s >= 1; --s) {
|
||||
splits_[s].UnsplitOutlines();
|
||||
}
|
||||
return full_priority;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -153,381 +275,6 @@ void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) {
|
||||
TBOX nbox = word->blobs[b]->bounding_box();
|
||||
location.x = (bbox.right() + nbox.left()) / 2;
|
||||
location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
|
||||
seam_array->push_back(new SEAM(0.0f, location, NULL, NULL, NULL));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name test_insert_seam
|
||||
*
|
||||
* @returns true if insert_seam will succeed.
|
||||
*/
|
||||
bool test_insert_seam(const GenericVector<SEAM*>& seam_array,
|
||||
TWERD *word, int index) {
|
||||
SEAM *test_seam;
|
||||
int list_length = seam_array.size();
|
||||
for (int test_index = 0; test_index < index; ++test_index) {
|
||||
test_seam = seam_array[test_index];
|
||||
if (test_index + test_seam->widthp < index &&
|
||||
test_seam->widthp + test_index == index - 1 &&
|
||||
account_splits(test_seam, word, test_index + 1, 1) < 0)
|
||||
return false;
|
||||
}
|
||||
for (int test_index = index; test_index < list_length; test_index++) {
|
||||
test_seam = seam_array[test_index];
|
||||
if (test_index - test_seam->widthn >= index &&
|
||||
test_index - test_seam->widthn == index &&
|
||||
account_splits(test_seam, word, test_index + 1, -1) < 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name insert_seam
|
||||
*
|
||||
* Add another seam to a collection of seams at a particular location
|
||||
* in the seam array.
|
||||
*/
|
||||
void insert_seam(const TWERD* word, int index, SEAM *seam,
|
||||
GenericVector<SEAM*>* seam_array) {
|
||||
SEAM *test_seam;
|
||||
int list_length = seam_array->size();
|
||||
for (int test_index = 0; test_index < index; ++test_index) {
|
||||
test_seam = seam_array->get(test_index);
|
||||
if (test_index + test_seam->widthp >= index) {
|
||||
test_seam->widthp++; /*got in the way */
|
||||
} else if (test_seam->widthp + test_index == index - 1) {
|
||||
test_seam->widthp = account_splits(test_seam, word, test_index + 1, 1);
|
||||
if (test_seam->widthp < 0) {
|
||||
tprintf("Failed to find any right blob for a split!\n");
|
||||
print_seam("New dud seam", seam);
|
||||
print_seam("Failed seam", test_seam);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int test_index = index; test_index < list_length; test_index++) {
|
||||
test_seam = seam_array->get(test_index);
|
||||
if (test_index - test_seam->widthn < index) {
|
||||
test_seam->widthn++; /*got in the way */
|
||||
} else if (test_index - test_seam->widthn == index) {
|
||||
test_seam->widthn = account_splits(test_seam, word, test_index + 1, -1);
|
||||
if (test_seam->widthn < 0) {
|
||||
tprintf("Failed to find any left blob for a split!\n");
|
||||
print_seam("New dud seam", seam);
|
||||
print_seam("Failed seam", test_seam);
|
||||
}
|
||||
}
|
||||
}
|
||||
seam_array->insert(seam, index);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name account_splits
|
||||
*
|
||||
* Account for all the splits by looking to the right (blob_direction == 1),
|
||||
* or to the left (blob_direction == -1) in the word.
|
||||
*/
|
||||
int account_splits(const SEAM *seam, const TWERD *word, int blob_index,
|
||||
int blob_direction) {
|
||||
inT8 found_em[3];
|
||||
inT8 width;
|
||||
|
||||
found_em[0] = seam->split1 == NULL;
|
||||
found_em[1] = seam->split2 == NULL;
|
||||
found_em[2] = seam->split3 == NULL;
|
||||
if (found_em[0] && found_em[1] && found_em[2])
|
||||
return 0;
|
||||
width = 0;
|
||||
do {
|
||||
TBLOB* blob = word->blobs[blob_index];
|
||||
if (!found_em[0])
|
||||
found_em[0] = find_split_in_blob(seam->split1, blob);
|
||||
if (!found_em[1])
|
||||
found_em[1] = find_split_in_blob(seam->split2, blob);
|
||||
if (!found_em[2])
|
||||
found_em[2] = find_split_in_blob(seam->split3, blob);
|
||||
if (found_em[0] && found_em[1] && found_em[2]) {
|
||||
return width;
|
||||
}
|
||||
width++;
|
||||
blob_index += blob_direction;
|
||||
} while (0 <= blob_index && blob_index < word->NumBlobs());
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name find_split_in_blob
|
||||
*
|
||||
* @returns TRUE if the split is somewhere in this blob.
|
||||
*/
|
||||
bool find_split_in_blob(SPLIT *split, TBLOB *blob) {
|
||||
TESSLINE *outline;
|
||||
|
||||
for (outline = blob->outlines; outline != NULL; outline = outline->next)
|
||||
if (outline->Contains(split->point1->pos))
|
||||
break;
|
||||
if (outline == NULL)
|
||||
return FALSE;
|
||||
for (outline = blob->outlines; outline != NULL; outline = outline->next)
|
||||
if (outline->Contains(split->point2->pos))
|
||||
return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name join_two_seams
|
||||
*
|
||||
* Merge these two seams into a new seam. Duplicate the split records
|
||||
* in both of the input seams. Return the resultant seam.
|
||||
*/
|
||||
SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2) {
|
||||
SEAM *result = NULL;
|
||||
SEAM *temp;
|
||||
|
||||
assert(seam1 &&seam2);
|
||||
|
||||
if (((seam1->split3 == NULL && seam2->split2 == NULL) ||
|
||||
(seam1->split2 == NULL && seam2->split3 == NULL) ||
|
||||
seam1->split1 == NULL || seam2->split1 == NULL) &&
|
||||
(!shared_split_points(seam1, seam2))) {
|
||||
result = new SEAM(*seam1);
|
||||
temp = new SEAM(*seam2);
|
||||
combine_seams(result, temp);
|
||||
}
|
||||
return (result);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name print_seam
|
||||
*
|
||||
* Print a list of splits. Show the coordinates of both points in
|
||||
* each split.
|
||||
*/
|
||||
void print_seam(const char *label, SEAM *seam) {
|
||||
if (seam) {
|
||||
tprintf(label);
|
||||
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ",
|
||||
seam->priority, seam->location.x, seam->location.y,
|
||||
seam->widthp, seam->widthn);
|
||||
print_split(seam->split1);
|
||||
|
||||
if (seam->split2) {
|
||||
tprintf(", ");
|
||||
print_split (seam->split2);
|
||||
if (seam->split3) {
|
||||
tprintf(", ");
|
||||
print_split (seam->split3);
|
||||
}
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name print_seams
|
||||
*
|
||||
* Print a list of splits. Show the coordinates of both points in
|
||||
* each split.
|
||||
*/
|
||||
void print_seams(const char *label, const GenericVector<SEAM*>& seams) {
|
||||
char number[CHARS_PER_LINE];
|
||||
|
||||
if (!seams.empty()) {
|
||||
tprintf("%s\n", label);
|
||||
for (int x = 0; x < seams.size(); ++x) {
|
||||
sprintf(number, "%2d: ", x);
|
||||
print_seam(number, seams[x]);
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name shared_split_points
|
||||
*
|
||||
* Check these two seams to make sure that neither of them have two
|
||||
* points in common. Return TRUE if any of the same points are present
|
||||
* in any of the splits of both seams.
|
||||
*/
|
||||
int shared_split_points(const SEAM *seam1, const SEAM *seam2) {
|
||||
if (seam1 == NULL || seam2 == NULL)
|
||||
return (FALSE);
|
||||
|
||||
if (seam2->split1 == NULL)
|
||||
return (FALSE);
|
||||
if (point_in_seam(seam1, seam2->split1))
|
||||
return (TRUE);
|
||||
|
||||
if (seam2->split2 == NULL)
|
||||
return (FALSE);
|
||||
if (point_in_seam(seam1, seam2->split2))
|
||||
return (TRUE);
|
||||
|
||||
if (seam2->split3 == NULL)
|
||||
return (FALSE);
|
||||
if (point_in_seam(seam1, seam2->split3))
|
||||
return (TRUE);
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* break_pieces
|
||||
*
|
||||
* Break up the blobs in this chain so that they are all independent.
|
||||
* This operation should undo the affect of join_pieces.
|
||||
**********************************************************************/
|
||||
void break_pieces(const GenericVector<SEAM*>& seams, int first, int last,
|
||||
TWERD *word) {
|
||||
for (int x = first; x < last; ++x)
|
||||
reveal_seam(seams[x]);
|
||||
|
||||
TESSLINE *outline = word->blobs[first]->outlines;
|
||||
int next_blob = first + 1;
|
||||
|
||||
while (outline != NULL && next_blob <= last) {
|
||||
if (outline->next == word->blobs[next_blob]->outlines) {
|
||||
outline->next = NULL;
|
||||
outline = word->blobs[next_blob]->outlines;
|
||||
++next_blob;
|
||||
} else {
|
||||
outline = outline->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* join_pieces
|
||||
*
|
||||
* Join a group of base level pieces into a single blob that can then
|
||||
* be classified.
|
||||
**********************************************************************/
|
||||
void join_pieces(const GenericVector<SEAM*>& seams, int first, int last,
|
||||
TWERD *word) {
|
||||
TESSLINE *outline = word->blobs[first]->outlines;
|
||||
if (!outline)
|
||||
return;
|
||||
|
||||
for (int x = first; x < last; ++x) {
|
||||
SEAM *seam = seams[x];
|
||||
if (x - seam->widthn >= first && x + seam->widthp < last)
|
||||
hide_seam(seam);
|
||||
while (outline->next)
|
||||
outline = outline->next;
|
||||
outline->next = word->blobs[x + 1]->outlines;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* hide_seam
|
||||
*
|
||||
* Change the edge points that are referenced by this seam to make
|
||||
* them hidden edges.
|
||||
**********************************************************************/
|
||||
void hide_seam(SEAM *seam) {
|
||||
if (seam == NULL || seam->split1 == NULL)
|
||||
return;
|
||||
hide_edge_pair (seam->split1->point1, seam->split1->point2);
|
||||
|
||||
if (seam->split2 == NULL)
|
||||
return;
|
||||
hide_edge_pair (seam->split2->point1, seam->split2->point2);
|
||||
|
||||
if (seam->split3 == NULL)
|
||||
return;
|
||||
hide_edge_pair (seam->split3->point1, seam->split3->point2);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* hide_edge_pair
|
||||
*
|
||||
* Change the edge points that are referenced by this seam to make
|
||||
* them hidden edges.
|
||||
**********************************************************************/
|
||||
void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2) {
|
||||
EDGEPT *edgept;
|
||||
|
||||
edgept = pt1;
|
||||
do {
|
||||
edgept->Hide();
|
||||
edgept = edgept->next;
|
||||
}
|
||||
while (!exact_point (edgept, pt2) && edgept != pt1);
|
||||
if (edgept == pt1) {
|
||||
/* tprintf("Hid entire outline at (%d,%d)!!\n",
|
||||
edgept->pos.x,edgept->pos.y); */
|
||||
}
|
||||
edgept = pt2;
|
||||
do {
|
||||
edgept->Hide();
|
||||
edgept = edgept->next;
|
||||
}
|
||||
while (!exact_point (edgept, pt1) && edgept != pt2);
|
||||
if (edgept == pt2) {
|
||||
/* tprintf("Hid entire outline at (%d,%d)!!\n",
|
||||
edgept->pos.x,edgept->pos.y); */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* reveal_seam
|
||||
*
|
||||
* Change the edge points that are referenced by this seam to make
|
||||
* them hidden edges.
|
||||
**********************************************************************/
|
||||
void reveal_seam(SEAM *seam) {
|
||||
if (seam == NULL || seam->split1 == NULL)
|
||||
return;
|
||||
reveal_edge_pair (seam->split1->point1, seam->split1->point2);
|
||||
|
||||
if (seam->split2 == NULL)
|
||||
return;
|
||||
reveal_edge_pair (seam->split2->point1, seam->split2->point2);
|
||||
|
||||
if (seam->split3 == NULL)
|
||||
return;
|
||||
reveal_edge_pair (seam->split3->point1, seam->split3->point2);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* reveal_edge_pair
|
||||
*
|
||||
* Change the edge points that are referenced by this seam to make
|
||||
* them hidden edges.
|
||||
**********************************************************************/
|
||||
void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2) {
|
||||
EDGEPT *edgept;
|
||||
|
||||
edgept = pt1;
|
||||
do {
|
||||
edgept->Reveal();
|
||||
edgept = edgept->next;
|
||||
}
|
||||
while (!exact_point (edgept, pt2) && edgept != pt1);
|
||||
if (edgept == pt1) {
|
||||
/* tprintf("Hid entire outline at (%d,%d)!!\n",
|
||||
edgept->pos.x,edgept->pos.y); */
|
||||
}
|
||||
edgept = pt2;
|
||||
do {
|
||||
edgept->Reveal();
|
||||
edgept = edgept->next;
|
||||
}
|
||||
while (!exact_point (edgept, pt1) && edgept != pt2);
|
||||
if (edgept == pt2) {
|
||||
/* tprintf("Hid entire outline at (%d,%d)!!\n",
|
||||
edgept->pos.x,edgept->pos.y); */
|
||||
seam_array->push_back(new SEAM(0.0f, location));
|
||||
}
|
||||
}
|
||||
|
228
ccstruct/seam.h
228
ccstruct/seam.h
@ -36,95 +36,163 @@
|
||||
----------------------------------------------------------------------*/
|
||||
typedef float PRIORITY; /* PRIORITY */
|
||||
|
||||
struct SEAM {
|
||||
// Constructor that was formerly new_seam.
|
||||
SEAM(PRIORITY priority0, const TPOINT& location0,
|
||||
SPLIT *splita, SPLIT *splitb, SPLIT *splitc)
|
||||
: priority(priority0), widthp(0), widthn(0), location(location0),
|
||||
split1(splita), split2(splitb), split3(splitc) {}
|
||||
// Copy constructor that was formerly clone_seam.
|
||||
SEAM(const SEAM& src)
|
||||
: priority(src.priority), widthp(src.widthp), widthn(src.widthn),
|
||||
location(src.location) {
|
||||
clone_split(split1, src.split1);
|
||||
clone_split(split2, src.split2);
|
||||
clone_split(split3, src.split3);
|
||||
class SEAM {
|
||||
public:
|
||||
// A seam with no splits
|
||||
SEAM(float priority, const TPOINT& location)
|
||||
: priority_(priority),
|
||||
location_(location),
|
||||
widthp_(0),
|
||||
widthn_(0),
|
||||
num_splits_(0) {}
|
||||
// A seam with a single split point.
|
||||
SEAM(float priority, const TPOINT& location, const SPLIT& split)
|
||||
: priority_(priority),
|
||||
location_(location),
|
||||
widthp_(0),
|
||||
widthn_(0),
|
||||
num_splits_(1) {
|
||||
splits_[0] = split;
|
||||
}
|
||||
// Destructor was delete_seam.
|
||||
~SEAM() {
|
||||
if (split1)
|
||||
delete_split(split1);
|
||||
if (split2)
|
||||
delete_split(split2);
|
||||
if (split3)
|
||||
delete_split(split3);
|
||||
// Default copy constructor, operator= and destructor are OK!
|
||||
|
||||
// Accessors.
|
||||
float priority() const { return priority_; }
|
||||
void set_priority(float priority) { priority_ = priority; }
|
||||
bool HasAnySplits() const { return num_splits_ > 0; }
|
||||
|
||||
// Returns the bounding box of all the points in the seam.
|
||||
TBOX bounding_box() const;
|
||||
|
||||
// Returns true if other can be combined into *this.
|
||||
bool CombineableWith(const SEAM& other, int max_x_dist,
|
||||
float max_total_priority) const;
|
||||
// Combines other into *this. Only works if CombinableWith returned true.
|
||||
void CombineWith(const SEAM& other);
|
||||
|
||||
// Returns true if the given blob contains all splits of *this SEAM.
|
||||
bool ContainedByBlob(const TBLOB& blob) const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
if (!splits_[s].ContainedByBlob(blob)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
PRIORITY priority;
|
||||
inT8 widthp;
|
||||
inT8 widthn;
|
||||
TPOINT location;
|
||||
SPLIT *split1;
|
||||
SPLIT *split2;
|
||||
SPLIT *split3;
|
||||
// Returns true if the given EDGEPT is used by this SEAM, checking only
|
||||
// the EDGEPT pointer, not the coordinates.
|
||||
bool UsesPoint(const EDGEPT* point) const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
if (splits_[s].UsesPoint(point)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if *this and other share any common point, by coordinates.
|
||||
bool SharesPosition(const SEAM& other) const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
for (int t = 0; t < other.num_splits_; ++t)
|
||||
if (splits_[s].SharesPosition(other.splits_[t])) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if *this and other have any vertically overlapping splits.
|
||||
bool OverlappingSplits(const SEAM& other) const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
TBOX split1_box = splits_[s].bounding_box();
|
||||
for (int t = 0; t < other.num_splits_; ++t) {
|
||||
TBOX split2_box = other.splits_[t].bounding_box();
|
||||
if (split1_box.y_overlap(split2_box)) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Marks the edgepts used by the seam so the segments made by the cut
|
||||
// never get split further by another seam in the future.
|
||||
void Finalize() {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
splits_[s].point1->MarkChop();
|
||||
splits_[s].point2->MarkChop();
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the splits in *this SEAM appear OK in the sense that they
|
||||
// do not cross any outlines and do not chop off any ridiculously small
|
||||
// pieces.
|
||||
bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const;
|
||||
|
||||
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
|
||||
// seam, which is about to be inserted at insert_index. Returns false if
|
||||
// any of the computations fails, as this indicates an invalid chop.
|
||||
// widthn_/widthp_ are only changed if modify is true.
|
||||
bool PrepareToInsertSeam(const GenericVector<SEAM*>& seams,
|
||||
const GenericVector<TBLOB*>& blobs, int insert_index,
|
||||
bool modify);
|
||||
// Computes the widthp_/widthn_ range. Returns false if not all the splits
|
||||
// are accounted for. widthn_/widthp_ are only changed if modify is true.
|
||||
bool FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
|
||||
bool modify);
|
||||
|
||||
// Splits this blob into two blobs by applying the splits included in
|
||||
// *this SEAM
|
||||
void ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const;
|
||||
// Undoes ApplySeam by removing the seam between these two blobs.
|
||||
// Produces one blob as a result, and deletes other_blob.
|
||||
void UndoSeam(TBLOB* blob, TBLOB* other_blob) const;
|
||||
|
||||
// Prints everything in *this SEAM.
|
||||
void Print(const char* label) const;
|
||||
// Prints a collection of SEAMs.
|
||||
static void PrintSeams(const char* label, const GenericVector<SEAM*>& seams);
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
// Draws the seam in the given window.
|
||||
void Mark(ScrollView* window) const;
|
||||
#endif
|
||||
|
||||
// Break up the blobs in this chain so that they are all independent.
|
||||
// This operation should undo the affect of join_pieces.
|
||||
static void BreakPieces(const GenericVector<SEAM*>& seams,
|
||||
const GenericVector<TBLOB*>& blobs, int first,
|
||||
int last);
|
||||
// Join a group of base level pieces into a single blob that can then
|
||||
// be classified.
|
||||
static void JoinPieces(const GenericVector<SEAM*>& seams,
|
||||
const GenericVector<TBLOB*>& blobs, int first,
|
||||
int last);
|
||||
|
||||
// Hides the seam so the outlines appear not to be cut by it.
|
||||
void Hide() const;
|
||||
// Undoes hide, so the outlines are cut by the seam.
|
||||
void Reveal() const;
|
||||
|
||||
// Computes and returns, but does not set, the full priority of *this SEAM.
|
||||
// The arguments here are config parameters defined in Wordrec. Add chop_
|
||||
// to the beginning of the name.
|
||||
float FullPriority(int xmin, int xmax, double overlap_knob,
|
||||
int centered_maxwidth, double center_knob,
|
||||
double width_change_knob) const;
|
||||
|
||||
private:
|
||||
// Maximum number of splits that a SEAM can hold.
|
||||
static const int kMaxNumSplits = 3;
|
||||
// Priority of this split. Lower is better.
|
||||
float priority_;
|
||||
// Position of the middle of the seam.
|
||||
TPOINT location_;
|
||||
// A range such that all splits in *this SEAM are contained within blobs in
|
||||
// the range [index - widthn_,index + widthp_] where index is the index of
|
||||
// this SEAM in the seams vector.
|
||||
inT8 widthp_;
|
||||
inT8 widthn_;
|
||||
// Number of splits_ that are used.
|
||||
inT8 num_splits_;
|
||||
// Set of pairs of points that are the ends of each split in the SEAM.
|
||||
SPLIT splits_[kMaxNumSplits];
|
||||
};
|
||||
|
||||
/**
|
||||
* exact_point
|
||||
*
|
||||
* Return TRUE if the point positions are the exactly the same. The
|
||||
* parameters must be of type (EDGEPT*).
|
||||
*/
|
||||
|
||||
#define exact_point(p1,p2) \
|
||||
(! ((p1->pos.x - p2->pos.x) || (p1->pos.y - p2->pos.y)))
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2);
|
||||
|
||||
bool point_in_seam(const SEAM *seam, SPLIT *split);
|
||||
|
||||
bool point_used_by_split(SPLIT *split, EDGEPT *point);
|
||||
|
||||
bool point_used_by_seam(SEAM *seam, EDGEPT *point);
|
||||
|
||||
void combine_seams(SEAM *dest_seam, SEAM *source_seam);
|
||||
|
||||
void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array);
|
||||
|
||||
bool test_insert_seam(const GenericVector<SEAM*>& seam_array,
|
||||
TWERD *word, int index);
|
||||
|
||||
void insert_seam(const TWERD *word, int index, SEAM *seam,
|
||||
GenericVector<SEAM*>* seam_array);
|
||||
|
||||
int account_splits(const SEAM *seam, const TWERD *word, int blob_index,
|
||||
int blob_direction);
|
||||
|
||||
bool find_split_in_blob(SPLIT *split, TBLOB *blob);
|
||||
|
||||
SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2);
|
||||
|
||||
void print_seam(const char *label, SEAM *seam);
|
||||
|
||||
void print_seams(const char *label, const GenericVector<SEAM*>& seams);
|
||||
|
||||
int shared_split_points(const SEAM *seam1, const SEAM *seam2);
|
||||
|
||||
void break_pieces(const GenericVector<SEAM*>& seams,
|
||||
int first, int last, TWERD *word);
|
||||
|
||||
void join_pieces(const GenericVector<SEAM*>& seams,
|
||||
int first, int last, TWERD *word);
|
||||
|
||||
void hide_seam(SEAM *seam);
|
||||
|
||||
void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2);
|
||||
|
||||
void reveal_seam(SEAM *seam);
|
||||
|
||||
void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2);
|
||||
|
||||
#endif
|
||||
|
@ -36,23 +36,103 @@
|
||||
/*----------------------------------------------------------------------
|
||||
V a r i a b l e s
|
||||
----------------------------------------------------------------------*/
|
||||
// Limit on the amount of penalty for the chop being off-center.
|
||||
const int kCenterGradeCap = 25;
|
||||
// Ridiculously large priority for splits that are no use.
|
||||
const double kBadPriority = 999.0;
|
||||
|
||||
BOOL_VAR(wordrec_display_splits, 0, "Display splits");
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
|
||||
/**********************************************************************
|
||||
* delete_split
|
||||
*
|
||||
* Remove this split from existence.
|
||||
**********************************************************************/
|
||||
void delete_split(SPLIT *split) {
|
||||
if (split) {
|
||||
delete split;
|
||||
}
|
||||
// Returns the bounding box of all the points in the split.
|
||||
TBOX SPLIT::bounding_box() const {
|
||||
return TBOX(
|
||||
MIN(point1->pos.x, point2->pos.x), MIN(point1->pos.y, point2->pos.y),
|
||||
MAX(point1->pos.x, point2->pos.x), MAX(point1->pos.y, point2->pos.y));
|
||||
}
|
||||
|
||||
// Hides the SPLIT so the outlines appear not to be cut by it.
|
||||
void SPLIT::Hide() const {
|
||||
EDGEPT* edgept = point1;
|
||||
do {
|
||||
edgept->Hide();
|
||||
edgept = edgept->next;
|
||||
} while (!edgept->EqualPos(*point2) && edgept != point1);
|
||||
edgept = point2;
|
||||
do {
|
||||
edgept->Hide();
|
||||
edgept = edgept->next;
|
||||
} while (!edgept->EqualPos(*point1) && edgept != point2);
|
||||
}
|
||||
|
||||
// Undoes hide, so the outlines are cut by the SPLIT.
|
||||
void SPLIT::Reveal() const {
|
||||
EDGEPT* edgept = point1;
|
||||
do {
|
||||
edgept->Reveal();
|
||||
edgept = edgept->next;
|
||||
} while (!edgept->EqualPos(*point2) && edgept != point1);
|
||||
edgept = point2;
|
||||
do {
|
||||
edgept->Reveal();
|
||||
edgept = edgept->next;
|
||||
} while (!edgept->EqualPos(*point1) && edgept != point2);
|
||||
}
|
||||
|
||||
// Compute a split priority based on the bounding boxes of the parts.
|
||||
// The arguments here are config parameters defined in Wordrec. Add chop_
|
||||
// to the beginning of the name.
|
||||
float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
|
||||
int centered_maxwidth, double center_knob,
|
||||
double width_change_knob) const {
|
||||
TBOX box1 = Box12();
|
||||
TBOX box2 = Box21();
|
||||
int min_left = MIN(box1.left(), box2.left());
|
||||
int max_right = MAX(box1.right(), box2.right());
|
||||
if (xmin < min_left && xmax > max_right) return kBadPriority;
|
||||
|
||||
float grade = 0.0f;
|
||||
// grade_overlap.
|
||||
int width1 = box1.width();
|
||||
int width2 = box2.width();
|
||||
int min_width = MIN(width1, width2);
|
||||
int overlap = -box1.x_gap(box2);
|
||||
if (overlap == min_width) {
|
||||
grade += 100.0f; // Total overlap.
|
||||
} else {
|
||||
if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
|
||||
if (overlap > 0) grade += overlap_knob * overlap;
|
||||
}
|
||||
// grade_center_of_blob.
|
||||
if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
|
||||
grade += MIN(kCenterGradeCap, center_knob * abs(width1 - width2));
|
||||
}
|
||||
// grade_width_change.
|
||||
float width_change_grade = 20 - (max_right - min_left - MAX(width1, width2));
|
||||
if (width_change_grade > 0.0f)
|
||||
grade += width_change_grade * width_change_knob;
|
||||
return grade;
|
||||
}
|
||||
|
||||
// Returns true if *this SPLIT appears OK in the sense that it does not cross
|
||||
// any outlines and does not chop off any ridiculously small pieces.
|
||||
bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
|
||||
return !IsLittleChunk(min_points, min_area) &&
|
||||
!blob.SegmentCrossesOutline(point1->pos, point2->pos);
|
||||
}
|
||||
|
||||
// Returns true if the split generates a small chunk in terms of either area
|
||||
// or number of points.
|
||||
bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
|
||||
if (point1->ShortNonCircularSegment(min_points, point2) &&
|
||||
point1->SegmentArea(point2) < min_area) {
|
||||
return true;
|
||||
}
|
||||
if (point2->ShortNonCircularSegment(min_points, point1) &&
|
||||
point2->SegmentArea(point1) < min_area) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* make_edgept
|
||||
@ -135,102 +215,113 @@ void remove_edgept(EDGEPT *point) {
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* new_split
|
||||
* Print
|
||||
*
|
||||
* Create a new split record and initialize it. Put it on the display
|
||||
* list.
|
||||
* Shows the coordinates of both points in a split.
|
||||
**********************************************************************/
|
||||
SPLIT *new_split(EDGEPT *point1, EDGEPT *point2) {
|
||||
SPLIT *s = new SPLIT;
|
||||
s->point1 = point1;
|
||||
s->point2 = point2;
|
||||
return (s);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* print_split
|
||||
*
|
||||
* Print a list of splits. Show the coordinates of both points in
|
||||
* each split.
|
||||
**********************************************************************/
|
||||
void print_split(SPLIT *split) {
|
||||
if (split) {
|
||||
tprintf("(%d,%d)--(%d,%d)",
|
||||
split->point1->pos.x, split->point1->pos.y,
|
||||
split->point2->pos.x, split->point2->pos.y);
|
||||
void SPLIT::Print() const {
|
||||
if (this != NULL) {
|
||||
tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
|
||||
point2->pos.y);
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
// Draws the split in the given window.
|
||||
void SPLIT::Mark(ScrollView* window) const {
|
||||
window->Pen(ScrollView::GREEN);
|
||||
window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
|
||||
window->UpdateWindow();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**********************************************************************
|
||||
* split_outline
|
||||
*
|
||||
* Split between these two edge points.
|
||||
**********************************************************************/
|
||||
void split_outline(EDGEPT *join_point1, EDGEPT *join_point2) {
|
||||
assert(join_point1 != join_point2);
|
||||
// Creates two outlines out of one by splitting the original one in half.
|
||||
// Inserts the resulting outlines into the given list.
|
||||
void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
|
||||
SplitOutline();
|
||||
while (outlines->next != NULL) outlines = outlines->next;
|
||||
|
||||
EDGEPT* temp2 = join_point2->next;
|
||||
EDGEPT* temp1 = join_point1->next;
|
||||
outlines->next = new TESSLINE;
|
||||
outlines->next->loop = point1;
|
||||
outlines->next->ComputeBoundingBox();
|
||||
|
||||
outlines = outlines->next;
|
||||
|
||||
outlines->next = new TESSLINE;
|
||||
outlines->next->loop = point2;
|
||||
outlines->next->ComputeBoundingBox();
|
||||
|
||||
outlines->next->next = NULL;
|
||||
}
|
||||
|
||||
// Makes a split between these two edge points, but does not affect the
|
||||
// outlines to which they belong.
|
||||
void SPLIT::SplitOutline() const {
|
||||
EDGEPT* temp2 = point2->next;
|
||||
EDGEPT* temp1 = point1->next;
|
||||
/* Create two new points */
|
||||
EDGEPT* new_point1 = make_edgept(join_point1->pos.x, join_point1->pos.y,
|
||||
temp1, join_point2);
|
||||
EDGEPT* new_point2 = make_edgept(join_point2->pos.x, join_point2->pos.y,
|
||||
temp2, join_point1);
|
||||
// Join_point1 and 2 are now cross-over points, so they must have NULL
|
||||
EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
|
||||
EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
|
||||
// point1 and 2 are now cross-over points, so they must have NULL
|
||||
// src_outlines and give their src_outline information their new
|
||||
// replacements.
|
||||
new_point1->src_outline = join_point1->src_outline;
|
||||
new_point1->start_step = join_point1->start_step;
|
||||
new_point1->step_count = join_point1->step_count;
|
||||
new_point2->src_outline = join_point2->src_outline;
|
||||
new_point2->start_step = join_point2->start_step;
|
||||
new_point2->step_count = join_point2->step_count;
|
||||
join_point1->src_outline = NULL;
|
||||
join_point1->start_step = 0;
|
||||
join_point1->step_count = 0;
|
||||
join_point2->src_outline = NULL;
|
||||
join_point2->start_step = 0;
|
||||
join_point2->step_count = 0;
|
||||
join_point1->MarkChop();
|
||||
join_point2->MarkChop();
|
||||
new_point1->src_outline = point1->src_outline;
|
||||
new_point1->start_step = point1->start_step;
|
||||
new_point1->step_count = point1->step_count;
|
||||
new_point2->src_outline = point2->src_outline;
|
||||
new_point2->start_step = point2->start_step;
|
||||
new_point2->step_count = point2->step_count;
|
||||
point1->src_outline = NULL;
|
||||
point1->start_step = 0;
|
||||
point1->step_count = 0;
|
||||
point2->src_outline = NULL;
|
||||
point2->start_step = 0;
|
||||
point2->step_count = 0;
|
||||
}
|
||||
|
||||
// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
|
||||
// the split, but possibly leaving some duplicate outlines.
|
||||
void SPLIT::UnsplitOutlineList(TBLOB* blob) const {
|
||||
/* Modify edge points */
|
||||
UnsplitOutlines();
|
||||
|
||||
/**********************************************************************
|
||||
* unsplit_outlines
|
||||
*
|
||||
* Remove the split that was put between these two points.
|
||||
**********************************************************************/
|
||||
void unsplit_outlines(EDGEPT *p1, EDGEPT *p2) {
|
||||
EDGEPT *tmp1 = p1->next;
|
||||
EDGEPT *tmp2 = p2->next;
|
||||
TESSLINE* outline1 = new TESSLINE;
|
||||
outline1->next = blob->outlines;
|
||||
blob->outlines = outline1;
|
||||
outline1->loop = point1;
|
||||
|
||||
assert (p1 != p2);
|
||||
TESSLINE* outline2 = new TESSLINE;
|
||||
outline2->next = blob->outlines;
|
||||
blob->outlines = outline2;
|
||||
outline2->loop = point2;
|
||||
}
|
||||
|
||||
tmp1->next->prev = p2;
|
||||
tmp2->next->prev = p1;
|
||||
// Removes the split that was put between these two points.
|
||||
void SPLIT::UnsplitOutlines() const {
|
||||
EDGEPT* tmp1 = point1->next;
|
||||
EDGEPT* tmp2 = point2->next;
|
||||
|
||||
// tmp2 is coincident with p1. p1 takes tmp2's place as tmp2 is deleted.
|
||||
p1->next = tmp2->next;
|
||||
p1->src_outline = tmp2->src_outline;
|
||||
p1->start_step = tmp2->start_step;
|
||||
p1->step_count = tmp2->step_count;
|
||||
// Likewise p2 takes tmp1's place.
|
||||
p2->next = tmp1->next;
|
||||
p2->src_outline = tmp1->src_outline;
|
||||
p2->start_step = tmp1->start_step;
|
||||
p2->step_count = tmp1->step_count;
|
||||
p1->UnmarkChop();
|
||||
p2->UnmarkChop();
|
||||
tmp1->next->prev = point2;
|
||||
tmp2->next->prev = point1;
|
||||
|
||||
// tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
|
||||
// deleted.
|
||||
point1->next = tmp2->next;
|
||||
point1->src_outline = tmp2->src_outline;
|
||||
point1->start_step = tmp2->start_step;
|
||||
point1->step_count = tmp2->step_count;
|
||||
// Likewise point2 takes tmp1's place.
|
||||
point2->next = tmp1->next;
|
||||
point2->src_outline = tmp1->src_outline;
|
||||
point2->start_step = tmp1->start_step;
|
||||
point2->step_count = tmp1->step_count;
|
||||
|
||||
delete tmp1;
|
||||
delete tmp2;
|
||||
|
||||
p1->vec.x = p1->next->pos.x - p1->pos.x;
|
||||
p1->vec.y = p1->next->pos.y - p1->pos.y;
|
||||
point1->vec.x = point1->next->pos.x - point1->pos.x;
|
||||
point1->vec.y = point1->next->pos.y - point1->pos.y;
|
||||
|
||||
p2->vec.x = p2->next->pos.x - p2->pos.x;
|
||||
p2->vec.y = p2->next->pos.y - p2->pos.y;
|
||||
point2->vec.x = point2->next->pos.x - point2->pos.x;
|
||||
point2->vec.y = point2->next->pos.y - point2->pos.y;
|
||||
}
|
||||
|
101
ccstruct/split.h
101
ccstruct/split.h
@ -29,18 +29,80 @@
|
||||
I n c l u d e s
|
||||
----------------------------------------------------------------------*/
|
||||
#include "blobs.h"
|
||||
#include "oldlist.h"
|
||||
#include "scrollview.h"
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
T y p e s
|
||||
----------------------------------------------------------------------*/
|
||||
typedef struct split_record
|
||||
{ /* SPLIT */
|
||||
struct SPLIT {
|
||||
SPLIT() : point1(NULL), point2(NULL) {}
|
||||
SPLIT(EDGEPT* pt1, EDGEPT* pt2) : point1(pt1), point2(pt2) {}
|
||||
|
||||
// Returns the bounding box of all the points in the split.
|
||||
TBOX bounding_box() const;
|
||||
// Returns the bounding box of the outline from point1 to point2.
|
||||
TBOX Box12() const { return point1->SegmentBox(point2); }
|
||||
// Returns the bounding box of the outline from point1 to point1.
|
||||
TBOX Box21() const { return point2->SegmentBox(point1); }
|
||||
// Returns the bounding box of the out
|
||||
|
||||
// Hides the SPLIT so the outlines appear not to be cut by it.
|
||||
void Hide() const;
|
||||
// Undoes hide, so the outlines are cut by the SPLIT.
|
||||
void Reveal() const;
|
||||
|
||||
// Returns true if the given EDGEPT is used by this SPLIT, checking only
|
||||
// the EDGEPT pointer, not the coordinates.
|
||||
bool UsesPoint(const EDGEPT* point) const {
|
||||
return point1 == point || point2 == point;
|
||||
}
|
||||
// Returns true if the other SPLIT has any position shared with *this.
|
||||
bool SharesPosition(const SPLIT& other) const {
|
||||
return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) ||
|
||||
point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2);
|
||||
}
|
||||
// Returns true if both points are contained within the blob.
|
||||
bool ContainedByBlob(const TBLOB& blob) const {
|
||||
return blob.Contains(point1->pos) && blob.Contains(point2->pos);
|
||||
}
|
||||
// Returns true if both points are contained within the outline.
|
||||
bool ContainedByOutline(const TESSLINE& outline) const {
|
||||
return outline.Contains(point1->pos) && outline.Contains(point2->pos);
|
||||
}
|
||||
// Compute a split priority based on the bounding boxes of the parts.
|
||||
// The arguments here are config parameters defined in Wordrec. Add chop_
|
||||
// to the beginning of the name.
|
||||
float FullPriority(int xmin, int xmax, double overlap_knob,
|
||||
int centered_maxwidth, double center_knob,
|
||||
double width_change_knob) const;
|
||||
// Returns true if *this SPLIT appears OK in the sense that it does not cross
|
||||
// any outlines and does not chop off any ridiculously small pieces.
|
||||
bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const;
|
||||
// Returns true if the split generates a small chunk in terms of either area
|
||||
// or number of points.
|
||||
bool IsLittleChunk(int min_points, int min_area) const;
|
||||
|
||||
void Print() const;
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
// Draws the split in the given window.
|
||||
void Mark(ScrollView* window) const;
|
||||
#endif
|
||||
|
||||
// Creates two outlines out of one by splitting the original one in half.
|
||||
// Inserts the resulting outlines into the given list.
|
||||
void SplitOutlineList(TESSLINE* outlines) const;
|
||||
// Makes a split between these two edge points, but does not affect the
|
||||
// outlines to which they belong.
|
||||
void SplitOutline() const;
|
||||
// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
|
||||
// the split, but possibly leaving some duplicate outlines.
|
||||
void UnsplitOutlineList(TBLOB* blob) const;
|
||||
// Removes the split that was put between these two points.
|
||||
void UnsplitOutlines() const;
|
||||
|
||||
EDGEPT *point1;
|
||||
EDGEPT *point2;
|
||||
} SPLIT;
|
||||
|
||||
typedef LIST SPLITS; /* SPLITS */
|
||||
};
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
V a r i a b l e s
|
||||
@ -48,38 +110,11 @@ typedef LIST SPLITS; /* SPLITS */
|
||||
|
||||
extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits");
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
M a c r o s
|
||||
----------------------------------------------------------------------*/
|
||||
/**********************************************************************
|
||||
* clone_split
|
||||
*
|
||||
* Create a new split record and set the contents equal to the contents
|
||||
* of this record.
|
||||
**********************************************************************/
|
||||
|
||||
#define clone_split(dest,source) \
|
||||
if (source) \
|
||||
(dest) = new_split ((source)->point1, (source)->point2); \
|
||||
else \
|
||||
(dest) = (SPLIT*) NULL \
|
||||
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
void delete_split(SPLIT *split);
|
||||
|
||||
EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev);
|
||||
|
||||
void remove_edgept(EDGEPT *point);
|
||||
|
||||
SPLIT *new_split(EDGEPT *point1, EDGEPT *point2);
|
||||
|
||||
void print_split(SPLIT *split);
|
||||
|
||||
void split_outline(EDGEPT *join_point1, EDGEPT *join_point2);
|
||||
|
||||
void unsplit_outlines(EDGEPT *p1, EDGEPT *p2);
|
||||
|
||||
#endif
|
||||
|
@ -30,6 +30,7 @@
|
||||
I n c l u d e s
|
||||
----------------------------------------------------------------------*/
|
||||
#include "vecfuncs.h"
|
||||
#include "blobs.h"
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
|
@ -26,7 +26,6 @@
|
||||
#define VECFUNCS_H
|
||||
|
||||
#include <math.h>
|
||||
#include "blobs.h"
|
||||
|
||||
struct EDGEPT;
|
||||
|
||||
|
@ -359,8 +359,8 @@ void Classify::LearnPieces(const char* filename, int start, int length,
|
||||
return;
|
||||
|
||||
if (length > 1) {
|
||||
join_pieces(word->seam_array, start, start + length - 1,
|
||||
word->chopped_word);
|
||||
SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
|
||||
start + length - 1);
|
||||
}
|
||||
TBLOB* blob = word->chopped_word->blobs[start];
|
||||
// Rotate the blob if needed for classification.
|
||||
@ -413,7 +413,8 @@ void Classify::LearnPieces(const char* filename, int start, int length,
|
||||
delete rotated_blob;
|
||||
}
|
||||
|
||||
break_pieces(word->seam_array, start, start + length - 1, word->chopped_word);
|
||||
SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
|
||||
start + length - 1);
|
||||
} // LearnPieces.
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
@ -29,7 +29,6 @@
|
||||
|
||||
#include "chop.h"
|
||||
#include "outlines.h"
|
||||
#include "olutil.h"
|
||||
#include "callcpp.h"
|
||||
#include "plotedges.h"
|
||||
#include "const.h"
|
||||
@ -74,6 +73,11 @@ void Wordrec::add_point_to_list(PointHeap* point_heap, EDGEPT *point) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Returns true if the edgept supplied as input is an inside angle. This
|
||||
// is determined by the angular change of the vectors from point to point.
|
||||
bool Wordrec::is_inside_angle(EDGEPT *pt) {
|
||||
return angle_change(pt->prev, pt, pt->next) < chop_inside_angle;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name angle_change
|
||||
@ -111,65 +115,6 @@ int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
|
||||
return (angle);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name is_little_chunk
|
||||
*
|
||||
* Return TRUE if one of the pieces resulting from this split would
|
||||
* less than some number of edge points.
|
||||
*/
|
||||
int Wordrec::is_little_chunk(EDGEPT *point1, EDGEPT *point2) {
|
||||
EDGEPT *p = point1; /* Iterator */
|
||||
int counter = 0;
|
||||
|
||||
do {
|
||||
/* Go from P1 to P2 */
|
||||
if (is_same_edgept (point2, p)) {
|
||||
if (is_small_area (point1, point2))
|
||||
return (TRUE);
|
||||
else
|
||||
break;
|
||||
}
|
||||
p = p->next;
|
||||
}
|
||||
while ((p != point1) && (counter++ < chop_min_outline_points));
|
||||
/* Go from P2 to P1 */
|
||||
p = point2;
|
||||
counter = 0;
|
||||
do {
|
||||
if (is_same_edgept (point1, p)) {
|
||||
return (is_small_area (point2, point1));
|
||||
}
|
||||
p = p->next;
|
||||
}
|
||||
while ((p != point2) && (counter++ < chop_min_outline_points));
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name is_small_area
|
||||
*
|
||||
* Test the area defined by a split accross this outline.
|
||||
*/
|
||||
int Wordrec::is_small_area(EDGEPT *point1, EDGEPT *point2) {
|
||||
EDGEPT *p = point1->next; /* Iterator */
|
||||
int area = 0;
|
||||
TPOINT origin;
|
||||
|
||||
do {
|
||||
/* Go from P1 to P2 */
|
||||
origin.x = p->pos.x - point1->pos.x;
|
||||
origin.y = p->pos.y - point1->pos.y;
|
||||
area += CROSS (origin, p->vec);
|
||||
p = p->next;
|
||||
}
|
||||
while (!is_same_edgept (point2, p));
|
||||
|
||||
return (area < chop_min_outline_area);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name pick_close_point
|
||||
*
|
||||
|
@ -39,7 +39,6 @@
|
||||
#include "findseam.h"
|
||||
#include "freelist.h"
|
||||
#include "globals.h"
|
||||
#include "makechop.h"
|
||||
#include "render.h"
|
||||
#include "pageres.h"
|
||||
#include "seam.h"
|
||||
@ -135,18 +134,14 @@ void restore_outline_tree(TESSLINE *srcline) {
|
||||
static SEAM* CheckSeam(int debug_level, inT32 blob_number, TWERD* word,
|
||||
TBLOB* blob, TBLOB* other_blob,
|
||||
const GenericVector<SEAM*>& seams, SEAM* seam) {
|
||||
if (seam == NULL ||
|
||||
blob->outlines == NULL ||
|
||||
other_blob->outlines == NULL ||
|
||||
total_containment(blob, other_blob) ||
|
||||
check_blob(other_blob) ||
|
||||
!(check_seam_order(blob, seam) &&
|
||||
check_seam_order(other_blob, seam)) ||
|
||||
if (seam == NULL || blob->outlines == NULL || other_blob->outlines == NULL ||
|
||||
total_containment(blob, other_blob) || check_blob(other_blob) ||
|
||||
!seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) ||
|
||||
any_shared_split_points(seams, seam) ||
|
||||
!test_insert_seam(seams, word, blob_number)) {
|
||||
!seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) {
|
||||
word->blobs.remove(blob_number + 1);
|
||||
if (seam) {
|
||||
undo_seam(blob, other_blob, seam);
|
||||
seam->UndoSeam(blob, other_blob);
|
||||
delete seam;
|
||||
seam = NULL;
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
@ -185,19 +180,19 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number,
|
||||
if (prioritize_division) {
|
||||
TPOINT location;
|
||||
if (divisible_blob(blob, italic_blob, &location)) {
|
||||
seam = new SEAM(0.0f, location, NULL, NULL, NULL);
|
||||
seam = new SEAM(0.0f, location);
|
||||
}
|
||||
}
|
||||
if (seam == NULL)
|
||||
seam = pick_good_seam(blob);
|
||||
if (chop_debug) {
|
||||
if (seam != NULL)
|
||||
print_seam("Good seam picked=", seam);
|
||||
seam->Print("Good seam picked=");
|
||||
else
|
||||
tprintf("\n** no seam picked *** \n");
|
||||
}
|
||||
if (seam) {
|
||||
apply_seam(blob, other_blob, italic_blob, seam);
|
||||
seam->ApplySeam(italic_blob, blob, other_blob);
|
||||
}
|
||||
|
||||
seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
|
||||
@ -211,13 +206,17 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number,
|
||||
if (divisible_blob(blob, italic_blob, &location)) {
|
||||
other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
|
||||
word->blobs.insert(other_blob, blob_number + 1);
|
||||
seam = new SEAM(0.0f, location, NULL, NULL, NULL);
|
||||
apply_seam(blob, other_blob, italic_blob, seam);
|
||||
seam = new SEAM(0.0f, location);
|
||||
seam->ApplySeam(italic_blob, blob, other_blob);
|
||||
seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
|
||||
seams, seam);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (seam != NULL) {
|
||||
// Make sure this seam doesn't get chopped again.
|
||||
seam->Finalize();
|
||||
}
|
||||
return seam;
|
||||
}
|
||||
|
||||
@ -286,8 +285,7 @@ int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam) {
|
||||
|
||||
length = seams.size();
|
||||
for (index = 0; index < length; index++)
|
||||
if (shared_split_points(seams[index], seam))
|
||||
return TRUE;
|
||||
if (seam->SharesPosition(*seams[index])) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
@ -384,50 +382,6 @@ SEAM* Wordrec::chop_one_blob(const GenericVector<TBOX>& boxes,
|
||||
blob_number);
|
||||
}
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
/**
|
||||
* @name check_seam_order
|
||||
*
|
||||
* Make sure that each of the splits in this seam match to outlines
|
||||
* in this blob. If any of the splits could not correspond to this
|
||||
* blob then there is a problem (and FALSE should be returned to the
|
||||
* caller).
|
||||
*/
|
||||
inT16 check_seam_order(TBLOB *blob, SEAM *seam) {
|
||||
TESSLINE *outline;
|
||||
inT8 found_em[3];
|
||||
|
||||
if (seam->split1 == NULL || blob == NULL)
|
||||
return (TRUE);
|
||||
|
||||
found_em[0] = found_em[1] = found_em[2] = FALSE;
|
||||
|
||||
for (outline = blob->outlines; outline; outline = outline->next) {
|
||||
if (!found_em[0] &&
|
||||
((seam->split1 == NULL) ||
|
||||
is_split_outline (outline, seam->split1))) {
|
||||
found_em[0] = TRUE;
|
||||
}
|
||||
if (!found_em[1] &&
|
||||
((seam->split2 == NULL) ||
|
||||
is_split_outline (outline, seam->split2))) {
|
||||
found_em[1] = TRUE;
|
||||
}
|
||||
if (!found_em[2] &&
|
||||
((seam->split3 == NULL) ||
|
||||
is_split_outline (outline, seam->split3))) {
|
||||
found_em[2] = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_em[0] || !found_em[1] || !found_em[2])
|
||||
return (FALSE);
|
||||
else
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* @name chop_word_main
|
||||
|
@ -44,7 +44,5 @@ int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam);
|
||||
|
||||
int check_blob(TBLOB *blob);
|
||||
|
||||
inT16 check_seam_order(TBLOB *blob, SEAM *seam);
|
||||
|
||||
inT16 total_containment(TBLOB *blob1, TBLOB *blob2);
|
||||
#endif
|
||||
|
@ -27,7 +27,6 @@
|
||||
----------------------------------------------------------------------*/
|
||||
#include "findseam.h"
|
||||
#include "gradechop.h"
|
||||
#include "olutil.h"
|
||||
#include "plotedges.h"
|
||||
#include "outlines.h"
|
||||
#include "freelist.h"
|
||||
@ -67,7 +66,7 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
|
||||
if (new_seam == NULL) return;
|
||||
if (chop_debug) {
|
||||
tprintf("Pushing new seam with priority %g :", new_priority);
|
||||
print_seam("seam: ", new_seam);
|
||||
new_seam->Print("seam: ");
|
||||
}
|
||||
if (seams->size() >= MAX_NUM_SEAMS) {
|
||||
SeamPair old_pair(0, NULL);
|
||||
@ -101,12 +100,9 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
|
||||
* a split of NULL, then no further splits can be supplied by the
|
||||
* caller.
|
||||
**********************************************************************/
|
||||
void Wordrec::choose_best_seam(SeamQueue* seam_queue,
|
||||
SPLIT *split,
|
||||
PRIORITY priority,
|
||||
SEAM **seam_result,
|
||||
TBLOB *blob,
|
||||
SeamPile* seam_pile) {
|
||||
void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
|
||||
PRIORITY priority, SEAM **seam_result,
|
||||
TBLOB *blob, SeamPile *seam_pile) {
|
||||
SEAM *seam;
|
||||
char str[80];
|
||||
float my_priority;
|
||||
@ -116,9 +112,8 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue,
|
||||
TPOINT split_point = split->point1->pos;
|
||||
split_point += split->point2->pos;
|
||||
split_point /= 2;
|
||||
seam = new SEAM(my_priority, split_point, split, NULL, NULL);
|
||||
if (chop_debug > 1)
|
||||
print_seam ("Partial priority ", seam);
|
||||
seam = new SEAM(my_priority, split_point, *split);
|
||||
if (chop_debug > 1) seam->Print("Partial priority ");
|
||||
add_seam_to_queue(my_priority, seam, seam_queue);
|
||||
|
||||
if (my_priority > chop_good_split)
|
||||
@ -132,19 +127,22 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue,
|
||||
seam_queue->Pop(&seam_pair);
|
||||
seam = seam_pair.extract_data();
|
||||
/* Set full priority */
|
||||
my_priority = seam_priority(seam, bbox.left(), bbox.right());
|
||||
my_priority = seam->FullPriority(bbox.left(), bbox.right(),
|
||||
chop_overlap_knob, chop_centered_maxwidth,
|
||||
chop_center_knob, chop_width_change_knob);
|
||||
if (chop_debug) {
|
||||
sprintf (str, "Full my_priority %0.0f, ", my_priority);
|
||||
print_seam(str, seam);
|
||||
seam->Print(str);
|
||||
}
|
||||
|
||||
if ((*seam_result == NULL || (*seam_result)->priority > my_priority) &&
|
||||
if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) &&
|
||||
my_priority < chop_ok_split) {
|
||||
/* No crossing */
|
||||
if (constrained_split(seam->split1, blob)) {
|
||||
if (seam->IsHealthy(*blob, chop_min_outline_points,
|
||||
chop_min_outline_area)) {
|
||||
delete *seam_result;
|
||||
*seam_result = new SEAM(*seam);
|
||||
(*seam_result)->priority = my_priority;
|
||||
(*seam_result)->set_priority(my_priority);
|
||||
} else {
|
||||
delete seam;
|
||||
seam = NULL;
|
||||
@ -198,103 +196,16 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue,
|
||||
**********************************************************************/
|
||||
void Wordrec::combine_seam(const SeamPile& seam_pile,
|
||||
const SEAM* seam, SeamQueue* seam_queue) {
|
||||
register inT16 dist;
|
||||
inT16 bottom1, top1;
|
||||
inT16 bottom2, top2;
|
||||
|
||||
SEAM *new_one;
|
||||
const SEAM *this_one;
|
||||
|
||||
bottom1 = seam->split1->point1->pos.y;
|
||||
if (seam->split1->point2->pos.y >= bottom1)
|
||||
top1 = seam->split1->point2->pos.y;
|
||||
else {
|
||||
top1 = bottom1;
|
||||
bottom1 = seam->split1->point2->pos.y;
|
||||
}
|
||||
if (seam->split2 != NULL) {
|
||||
bottom2 = seam->split2->point1->pos.y;
|
||||
if (seam->split2->point2->pos.y >= bottom2)
|
||||
top2 = seam->split2->point2->pos.y;
|
||||
else {
|
||||
top2 = bottom2;
|
||||
bottom2 = seam->split2->point2->pos.y;
|
||||
}
|
||||
}
|
||||
else {
|
||||
bottom2 = bottom1;
|
||||
top2 = top1;
|
||||
}
|
||||
for (int x = 0; x < seam_pile.size(); ++x) {
|
||||
this_one = seam_pile.get(x).data();
|
||||
dist = seam->location.x - this_one->location.x;
|
||||
if (-SPLIT_CLOSENESS < dist &&
|
||||
dist < SPLIT_CLOSENESS &&
|
||||
seam->priority + this_one->priority < chop_ok_split) {
|
||||
inT16 split1_point1_y = this_one->split1->point1->pos.y;
|
||||
inT16 split1_point2_y = this_one->split1->point2->pos.y;
|
||||
inT16 split2_point1_y = 0;
|
||||
inT16 split2_point2_y = 0;
|
||||
if (this_one->split2) {
|
||||
split2_point1_y = this_one->split2->point1->pos.y;
|
||||
split2_point2_y = this_one->split2->point2->pos.y;
|
||||
}
|
||||
if (
|
||||
/*!tessedit_fix_sideways_chops || */
|
||||
(
|
||||
/* this_one->split1 always exists */
|
||||
(
|
||||
((split1_point1_y >= top1 && split1_point2_y >= top1) ||
|
||||
(split1_point1_y <= bottom1 && split1_point2_y <= bottom1))
|
||||
&&
|
||||
((split1_point1_y >= top2 && split1_point2_y >= top2) ||
|
||||
(split1_point1_y <= bottom2 && split1_point2_y <= bottom2))
|
||||
)
|
||||
)
|
||||
&&
|
||||
(
|
||||
this_one->split2 == NULL ||
|
||||
(
|
||||
((split2_point1_y >= top1 && split2_point2_y >= top1) ||
|
||||
(split2_point1_y <= bottom1 && split2_point2_y <= bottom1))
|
||||
&&
|
||||
((split2_point1_y >= top2 && split2_point2_y >= top2) ||
|
||||
(split2_point1_y <= bottom2 && split2_point2_y <= bottom2))
|
||||
)
|
||||
)
|
||||
) {
|
||||
new_one = join_two_seams (seam, this_one);
|
||||
if (new_one != NULL) {
|
||||
if (chop_debug > 1)
|
||||
print_seam ("Combo priority ", new_one);
|
||||
add_seam_to_queue(new_one->priority, new_one, seam_queue);
|
||||
const SEAM *this_one = seam_pile.get(x).data();
|
||||
if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
|
||||
SEAM *new_one = new SEAM(*seam);
|
||||
new_one->CombineWith(*this_one);
|
||||
if (chop_debug > 1) new_one->Print("Combo priority ");
|
||||
add_seam_to_queue(new_one->priority(), new_one, seam_queue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* constrained_split
|
||||
*
|
||||
* Constrain this split to obey certain rules. It must not cross any
|
||||
* inner outline. It must not cut off a small chunk of the outline.
|
||||
**********************************************************************/
|
||||
inT16 Wordrec::constrained_split(SPLIT *split, TBLOB *blob) {
|
||||
TESSLINE *outline;
|
||||
|
||||
if (is_little_chunk (split->point1, split->point2))
|
||||
return (FALSE);
|
||||
|
||||
for (outline = blob->outlines; outline; outline = outline->next) {
|
||||
if (split_bounds_overlap (split, outline) &&
|
||||
crosses_outline (split->point1, split->point2, outline->loop)) {
|
||||
return (FALSE);
|
||||
}
|
||||
}
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* pick_good_seam
|
||||
@ -335,16 +246,15 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
|
||||
|
||||
if (seam == NULL) {
|
||||
choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile);
|
||||
}
|
||||
else if (seam->priority > chop_good_split) {
|
||||
choose_best_seam(&seam_queue, NULL, seam->priority,
|
||||
&seam, blob, &seam_pile);
|
||||
} else if (seam->priority() > chop_good_split) {
|
||||
choose_best_seam(&seam_queue, NULL, seam->priority(), &seam, blob,
|
||||
&seam_pile);
|
||||
}
|
||||
|
||||
EDGEPT_C_IT it(&new_points);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
EDGEPT *inserted_point = it.data();
|
||||
if (!point_used_by_seam(seam, inserted_point)) {
|
||||
if (seam == NULL || !seam->UsesPoint(inserted_point)) {
|
||||
for (outline = blob->outlines; outline; outline = outline->next) {
|
||||
if (outline->loop == inserted_point) {
|
||||
outline->loop = outline->loop->next;
|
||||
@ -355,18 +265,13 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
|
||||
}
|
||||
|
||||
if (seam) {
|
||||
if (seam->priority > chop_ok_split) {
|
||||
if (seam->priority() > chop_ok_split) {
|
||||
delete seam;
|
||||
seam = NULL;
|
||||
}
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
else if (wordrec_display_splits) {
|
||||
if (seam->split1)
|
||||
mark_split (seam->split1);
|
||||
if (seam->split2)
|
||||
mark_split (seam->split2);
|
||||
if (seam->split3)
|
||||
mark_split (seam->split3);
|
||||
seam->Mark(edge_window);
|
||||
if (chop_debug > 2) {
|
||||
update_edge_window();
|
||||
edge_window_wait();
|
||||
@ -382,42 +287,6 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* seam_priority
|
||||
*
|
||||
* Assign a full priority value to the seam.
|
||||
**********************************************************************/
|
||||
PRIORITY Wordrec::seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) {
|
||||
PRIORITY priority;
|
||||
|
||||
if (seam->split1 == NULL)
|
||||
priority = 0;
|
||||
|
||||
else if (seam->split2 == NULL) {
|
||||
priority = (seam->priority +
|
||||
full_split_priority (seam->split1, xmin, xmax));
|
||||
}
|
||||
|
||||
else if (seam->split3 == NULL) {
|
||||
split_outline (seam->split2->point1, seam->split2->point2);
|
||||
priority = (seam->priority +
|
||||
full_split_priority (seam->split1, xmin, xmax));
|
||||
unsplit_outlines (seam->split2->point1, seam->split2->point2);
|
||||
}
|
||||
|
||||
else {
|
||||
split_outline (seam->split2->point1, seam->split2->point2);
|
||||
split_outline (seam->split3->point1, seam->split3->point2);
|
||||
priority = (seam->priority +
|
||||
full_split_priority (seam->split1, xmin, xmax));
|
||||
unsplit_outlines (seam->split3->point1, seam->split3->point2);
|
||||
unsplit_outlines (seam->split2->point1, seam->split2->point2);
|
||||
}
|
||||
|
||||
return (priority);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* try_point_pairs
|
||||
*
|
||||
@ -433,23 +302,20 @@ void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS],
|
||||
TBLOB * blob) {
|
||||
inT16 x;
|
||||
inT16 y;
|
||||
SPLIT *split;
|
||||
PRIORITY priority;
|
||||
|
||||
for (x = 0; x < num_points; x++) {
|
||||
for (y = x + 1; y < num_points; y++) {
|
||||
|
||||
if (points[y] &&
|
||||
weighted_edgept_dist(points[x], points[y],
|
||||
chop_x_y_weight) < chop_split_length &&
|
||||
points[x] != points[y]->next &&
|
||||
points[y] != points[x]->next &&
|
||||
points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
|
||||
chop_split_length &&
|
||||
points[x] != points[y]->next && points[y] != points[x]->next &&
|
||||
!is_exterior_point(points[x], points[y]) &&
|
||||
!is_exterior_point(points[y], points[x])) {
|
||||
split = new_split (points[x], points[y]);
|
||||
priority = partial_split_priority (split);
|
||||
SPLIT split(points[x], points[y]);
|
||||
priority = partial_split_priority(&split);
|
||||
|
||||
choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile);
|
||||
choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -474,7 +340,6 @@ void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
|
||||
SEAM ** seam,
|
||||
TBLOB * blob) {
|
||||
EDGEPT *vertical_point = NULL;
|
||||
SPLIT *split;
|
||||
inT16 x;
|
||||
PRIORITY priority;
|
||||
TESSLINE *outline;
|
||||
@ -486,16 +351,13 @@ void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
|
||||
&vertical_point, new_points);
|
||||
}
|
||||
|
||||
if (vertical_point &&
|
||||
points[x] != vertical_point->next &&
|
||||
if (vertical_point && points[x] != vertical_point->next &&
|
||||
vertical_point != points[x]->next &&
|
||||
weighted_edgept_dist(points[x], vertical_point,
|
||||
chop_x_y_weight) < chop_split_length) {
|
||||
|
||||
split = new_split (points[x], vertical_point);
|
||||
priority = partial_split_priority (split);
|
||||
|
||||
choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile);
|
||||
points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) <
|
||||
chop_split_length) {
|
||||
SPLIT split(points[x], vertical_point);
|
||||
priority = partial_split_priority(&split);
|
||||
choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -27,120 +27,19 @@
|
||||
----------------------------------------------------------------------*/
|
||||
#include "gradechop.h"
|
||||
#include "wordrec.h"
|
||||
#include "olutil.h"
|
||||
#include "chop.h"
|
||||
#include "ndminx.h"
|
||||
#include <math.h>
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
T y p e s
|
||||
----------------------------------------------------------------------*/
|
||||
#define CENTER_GRADE_CAP 25.0
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
M a c r o s
|
||||
----------------------------------------------------------------------*/
|
||||
/**********************************************************************
|
||||
* find_bounds_loop
|
||||
*
|
||||
* This is a macro to be used by set_outline_bounds.
|
||||
**********************************************************************/
|
||||
|
||||
#define find_bounds_loop(point1,point2,x_min,x_max) \
|
||||
x_min = point2->pos.x; \
|
||||
x_max = point2->pos.x; \
|
||||
\
|
||||
this_point = point1; \
|
||||
do { \
|
||||
x_min = MIN (this_point->pos.x, x_min); \
|
||||
x_max = MAX (this_point->pos.x, x_max); \
|
||||
this_point = this_point->next; \
|
||||
} \
|
||||
while (this_point != point2 && this_point != point1) \
|
||||
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
/**********************************************************************
|
||||
* full_split_priority
|
||||
*
|
||||
* Assign a priority to this split based on the features that it has.
|
||||
* Part of the priority has already been calculated so just return the
|
||||
* additional amount for the bounding box type information.
|
||||
**********************************************************************/
|
||||
PRIORITY Wordrec::full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax) {
|
||||
BOUNDS_RECT rect;
|
||||
|
||||
set_outline_bounds (split->point1, split->point2, rect);
|
||||
|
||||
if (xmin < MIN (rect[0], rect[2]) && xmax > MAX (rect[1], rect[3]))
|
||||
return (999.0);
|
||||
|
||||
return (grade_overlap (rect) +
|
||||
grade_center_of_blob (rect) + grade_width_change (rect));
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* grade_center_of_blob
|
||||
*
|
||||
* Return a grade for the a split. Rank it on closeness to the center
|
||||
* of the original blob
|
||||
* 0 = "perfect"
|
||||
* 100 = "no way jay"
|
||||
**********************************************************************/
|
||||
PRIORITY Wordrec::grade_center_of_blob(register BOUNDS_RECT rect) {
|
||||
register PRIORITY grade;
|
||||
int width1 = rect[1] - rect[0];
|
||||
int width2 = rect[3] - rect[2];
|
||||
|
||||
if (width1 > chop_centered_maxwidth &&
|
||||
width2 > chop_centered_maxwidth) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
grade = width1 - width2;
|
||||
if (grade < 0)
|
||||
grade = -grade;
|
||||
|
||||
grade *= chop_center_knob;
|
||||
grade = MIN (CENTER_GRADE_CAP, grade);
|
||||
return (MAX (0.0, grade));
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* grade_overlap
|
||||
*
|
||||
* Return a grade for this split for the overlap of the resultant blobs.
|
||||
* 0 = "perfect"
|
||||
* 100 = "no way jay"
|
||||
**********************************************************************/
|
||||
PRIORITY Wordrec::grade_overlap(register BOUNDS_RECT rect) {
|
||||
register PRIORITY grade;
|
||||
register inT16 width1;
|
||||
register inT16 width2;
|
||||
register inT16 overlap;
|
||||
|
||||
width1 = rect[3] - rect[2];
|
||||
width2 = rect[1] - rect[0];
|
||||
|
||||
overlap = MIN (rect[1], rect[3]) - MAX (rect[0], rect[2]);
|
||||
width1 = MIN (width1, width2);
|
||||
if (overlap == width1)
|
||||
return (100.0); /* Total overlap */
|
||||
|
||||
width1 = 2 * overlap - width1; /* Extra penalty for too */
|
||||
overlap += MAX (0, width1); /* much overlap */
|
||||
|
||||
grade = overlap * chop_overlap_knob;
|
||||
|
||||
return (MAX (0.0, grade));
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* grade_split_length
|
||||
@ -153,8 +52,8 @@ PRIORITY Wordrec::grade_split_length(register SPLIT *split) {
|
||||
register PRIORITY grade;
|
||||
register float split_length;
|
||||
|
||||
split_length = weighted_edgept_dist (split->point1, split->point2,
|
||||
chop_x_y_weight);
|
||||
split_length =
|
||||
split->point1->WeightedDistance(*split->point2, chop_x_y_weight);
|
||||
|
||||
if (split_length <= 0)
|
||||
grade = 0;
|
||||
@ -188,51 +87,4 @@ PRIORITY Wordrec::grade_sharpness(register SPLIT *split) {
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* grade_width_change
|
||||
*
|
||||
* Return a grade for the change in width of the resultant blobs.
|
||||
* 0 = "perfect"
|
||||
* 100 = "no way jay"
|
||||
**********************************************************************/
|
||||
PRIORITY Wordrec::grade_width_change(register BOUNDS_RECT rect) {
|
||||
register PRIORITY grade;
|
||||
register inT32 width1;
|
||||
register inT32 width2;
|
||||
|
||||
width1 = rect[3] - rect[2];
|
||||
width2 = rect[1] - rect[0];
|
||||
|
||||
grade = 20 - (MAX (rect[1], rect[3])
|
||||
- MIN (rect[0], rect[2]) - MAX (width1, width2));
|
||||
|
||||
grade *= chop_width_change_knob;
|
||||
|
||||
return (MAX (0.0, grade));
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* set_outline_bounds
|
||||
*
|
||||
* Set up the limits for the x coordinate of the outline.
|
||||
**********************************************************************/
|
||||
void Wordrec::set_outline_bounds(register EDGEPT *point1,
|
||||
register EDGEPT *point2,
|
||||
BOUNDS_RECT rect) {
|
||||
register EDGEPT *this_point;
|
||||
register inT16 x_min;
|
||||
register inT16 x_max;
|
||||
|
||||
find_bounds_loop(point1, point2, x_min, x_max);
|
||||
|
||||
rect[0] = x_min;
|
||||
rect[1] = x_max;
|
||||
|
||||
find_bounds_loop(point2, point1, x_min, x_max);
|
||||
|
||||
rect[2] = x_min;
|
||||
rect[3] = x_max;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -32,11 +32,6 @@
|
||||
#include "seam.h"
|
||||
#include "ndminx.h"
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
T y p e s
|
||||
----------------------------------------------------------------------*/
|
||||
typedef inT16 BOUNDS_RECT[4];
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
M a c r o s
|
||||
----------------------------------------------------------------------*/
|
||||
@ -52,18 +47,4 @@ typedef inT16 BOUNDS_RECT[4];
|
||||
(grade_split_length (split) + \
|
||||
grade_sharpness (split)) \
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* split_bounds_overlap
|
||||
*
|
||||
* Check to see if this split might overlap with this outline. Return
|
||||
* TRUE if there is a positive overlap in the bounding boxes of the two.
|
||||
**********************************************************************/
|
||||
|
||||
#define split_bounds_overlap(split,outline) \
|
||||
(outline->topleft.x <= MAX (split->point1->pos.x,split->point2->pos.x) && \
|
||||
outline->botright.x >= MIN (split->point1->pos.x,split->point2->pos.x) && \
|
||||
outline->botright.y <= MAX (split->point1->pos.y,split->point2->pos.y) && \
|
||||
outline->topleft.y >= MIN (split->point1->pos.y,split->point2->pos.y))
|
||||
|
||||
#endif
|
||||
|
@ -1,226 +0,0 @@
|
||||
/* -*-C-*-
|
||||
********************************************************************************
|
||||
*
|
||||
* File: makechop.c (Formerly makechop.c)
|
||||
* Description:
|
||||
* Author: Mark Seaman, OCR Technology
|
||||
* Created: Fri Oct 16 14:37:00 1987
|
||||
* Modified: Mon Jul 29 15:50:42 1991 (Mark Seaman) marks@hpgrlt
|
||||
* Language: C
|
||||
* Package: N/A
|
||||
* Status: Reusable Software Component
|
||||
*
|
||||
* (c) Copyright 1987, Hewlett-Packard Company.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
*********************************************************************************/
|
||||
/*----------------------------------------------------------------------
|
||||
I n c l u d e s
|
||||
----------------------------------------------------------------------*/
|
||||
|
||||
#include "makechop.h"
|
||||
#include "blobs.h"
|
||||
#include "render.h"
|
||||
#include "structures.h"
|
||||
#ifdef __UNIX__
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
Public Function Code
|
||||
----------------------------------------------------------------------*/
|
||||
/**********************************************************************
|
||||
* apply_seam
|
||||
*
|
||||
* Split this blob into two blobs by applying the splits included in
|
||||
* the seam description.
|
||||
**********************************************************************/
|
||||
void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam) {
|
||||
if (seam->split1 == NULL) {
|
||||
divide_blobs(blob, other_blob, italic_blob, seam->location);
|
||||
}
|
||||
else if (seam->split2 == NULL) {
|
||||
make_split_blobs(blob, other_blob, italic_blob, seam);
|
||||
}
|
||||
else if (seam->split3 == NULL) {
|
||||
make_double_split(blob, other_blob, italic_blob, seam);
|
||||
}
|
||||
else {
|
||||
make_triple_split(blob, other_blob, italic_blob, seam);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* form_two_blobs
|
||||
*
|
||||
* Group the outlines from the first blob into both of them. Do so
|
||||
* according to the information about the split.
|
||||
**********************************************************************/
|
||||
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
const TPOINT& location) {
|
||||
setup_blob_outlines(blob);
|
||||
|
||||
divide_blobs(blob, other_blob, italic_blob, location);
|
||||
|
||||
eliminate_duplicate_outlines(blob);
|
||||
eliminate_duplicate_outlines(other_blob);
|
||||
|
||||
correct_blob_order(blob, other_blob);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* make_double_split
|
||||
*
|
||||
* Create two blobs out of one by splitting the original one in half.
|
||||
* Return the resultant blobs for classification.
|
||||
**********************************************************************/
|
||||
void make_double_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
SEAM *seam) {
|
||||
make_single_split(blob->outlines, seam->split1);
|
||||
make_single_split(blob->outlines, seam->split2);
|
||||
form_two_blobs(blob, other_blob, italic_blob, seam->location);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* make_single_split
|
||||
*
|
||||
* Create two outlines out of one by splitting the original one in half.
|
||||
* Return the resultant outlines.
|
||||
**********************************************************************/
|
||||
void make_single_split(TESSLINE *outlines, SPLIT *split) {
|
||||
assert (outlines != NULL);
|
||||
|
||||
split_outline (split->point1, split->point2);
|
||||
|
||||
while (outlines->next != NULL)
|
||||
outlines = outlines->next;
|
||||
|
||||
outlines->next = new TESSLINE;
|
||||
outlines->next->loop = split->point1;
|
||||
outlines->next->ComputeBoundingBox();
|
||||
|
||||
outlines = outlines->next;
|
||||
|
||||
outlines->next = new TESSLINE;
|
||||
outlines->next->loop = split->point2;
|
||||
outlines->next->ComputeBoundingBox();
|
||||
|
||||
outlines->next->next = NULL;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* make_split_blobs
|
||||
*
|
||||
* Create two blobs out of one by splitting the original one in half.
|
||||
* Return the resultant blobs for classification.
|
||||
**********************************************************************/
|
||||
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
SEAM *seam) {
|
||||
make_single_split(blob->outlines, seam->split1);
|
||||
|
||||
form_two_blobs (blob, other_blob, italic_blob, seam->location);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* make_triple_split
|
||||
*
|
||||
* Create two blobs out of one by splitting the original one in half.
|
||||
* This splitting is accomplished by applying three separate splits on
|
||||
* the outlines. Three of the starting outlines will produce two ending
|
||||
* outlines. Return the resultant blobs for classification.
|
||||
**********************************************************************/
|
||||
void make_triple_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
SEAM *seam) {
|
||||
make_single_split(blob->outlines, seam->split1);
|
||||
make_single_split(blob->outlines, seam->split2);
|
||||
make_single_split(blob->outlines, seam->split3);
|
||||
|
||||
form_two_blobs(blob, other_blob, italic_blob, seam->location);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* undo_seam
|
||||
*
|
||||
* Remove the seam between these two blobs. Produce one blob as a
|
||||
* result. The seam may consist of one, two, or three splits. Each
|
||||
* of these split must be removed from the outlines.
|
||||
**********************************************************************/
|
||||
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
|
||||
TESSLINE *outline;
|
||||
|
||||
if (!seam)
|
||||
return; /* Append other blob outlines */
|
||||
if (blob->outlines == NULL) {
|
||||
blob->outlines = other_blob->outlines;
|
||||
other_blob->outlines = NULL;
|
||||
}
|
||||
|
||||
outline = blob->outlines;
|
||||
while (outline->next)
|
||||
outline = outline->next;
|
||||
outline->next = other_blob->outlines;
|
||||
other_blob->outlines = NULL;
|
||||
delete other_blob;
|
||||
|
||||
if (seam->split1 == NULL) {
|
||||
}
|
||||
else if (seam->split2 == NULL) {
|
||||
undo_single_split (blob, seam->split1);
|
||||
}
|
||||
else if (seam->split3 == NULL) {
|
||||
undo_single_split (blob, seam->split1);
|
||||
undo_single_split (blob, seam->split2);
|
||||
}
|
||||
else {
|
||||
undo_single_split (blob, seam->split3);
|
||||
undo_single_split (blob, seam->split2);
|
||||
undo_single_split (blob, seam->split1);
|
||||
}
|
||||
|
||||
setup_blob_outlines(blob);
|
||||
eliminate_duplicate_outlines(blob);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* undo_single_split
|
||||
*
|
||||
* Undo a seam that is made by a single split. Perform the correct
|
||||
* magic to reconstruct the appropriate set of outline data structures.
|
||||
**********************************************************************/
|
||||
void undo_single_split(TBLOB *blob, SPLIT *split) {
|
||||
TESSLINE *outline1;
|
||||
TESSLINE *outline2;
|
||||
/* Modify edge points */
|
||||
unsplit_outlines (split->point1, split->point2);
|
||||
|
||||
outline1 = new TESSLINE;
|
||||
outline1->next = blob->outlines;
|
||||
blob->outlines = outline1;
|
||||
outline1->loop = split->point1;
|
||||
|
||||
outline2 = new TESSLINE;
|
||||
outline2->next = blob->outlines;
|
||||
blob->outlines = outline2;
|
||||
outline2->loop = split->point2;
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
/* -*-C-*-
|
||||
********************************************************************************
|
||||
*
|
||||
* File: makechop.h (Formerly makechop.h)
|
||||
* Description:
|
||||
* Author: Mark Seaman, SW Productivity
|
||||
* Created: Fri Oct 16 14:37:00 1987
|
||||
* Modified: Mon Jul 29 13:33:23 1991 (Mark Seaman) marks@hpgrlt
|
||||
* Language: C
|
||||
* Package: N/A
|
||||
* Status: Reusable Software Component
|
||||
*
|
||||
* (c) Copyright 1987, Hewlett-Packard Company.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
*********************************************************************************/
|
||||
#ifndef MAKECHOP_H
|
||||
#define MAKECHOP_H
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
I n c l u d e s
|
||||
----------------------------------------------------------------------*/
|
||||
#include "chop.h"
|
||||
#include "olutil.h"
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
M a c r o s
|
||||
---------------------------------------------------------------------*/
|
||||
/**********************************************************************
|
||||
* is_split_outline
|
||||
*
|
||||
* Check to see if both sides of the split fall within the bounding
|
||||
* box of this outline.
|
||||
**********************************************************************/
|
||||
|
||||
#define is_split_outline(outline,split) \
|
||||
(outline->Contains(split->point1->pos) && \
|
||||
outline->Contains(split->point2->pos)) \
|
||||
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
----------------------------------------------------------------------*/
|
||||
void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam);
|
||||
|
||||
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
const TPOINT& location);
|
||||
|
||||
void make_double_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
SEAM *seam);
|
||||
|
||||
void make_single_split(TESSLINE *outlines, SPLIT *split);
|
||||
|
||||
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
SEAM *seam);
|
||||
|
||||
void make_triple_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
SEAM *seam);
|
||||
|
||||
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam);
|
||||
|
||||
void undo_single_split(TBLOB *blob, SPLIT *split);
|
||||
#endif
|
@ -1,102 +0,0 @@
|
||||
/* -*-C-*-
|
||||
********************************************************************************
|
||||
*
|
||||
* File: olutil.c (Formerly olutil.c)
|
||||
* Description:
|
||||
* Author: Mark Seaman, OCR Technology
|
||||
* Created: Fri Oct 16 14:37:00 1987
|
||||
* Modified: Fri May 17 13:11:24 1991 (Mark Seaman) marks@hpgrlt
|
||||
* Language: C
|
||||
* Package: N/A
|
||||
* Status: Reusable Software Component
|
||||
*
|
||||
* (c) Copyright 1987, Hewlett-Packard Company.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
*********************************************************************************/
|
||||
/*----------------------------------------------------------------------
|
||||
I n c l u d e s
|
||||
----------------------------------------------------------------------*/
|
||||
#include "olutil.h"
|
||||
#include "structures.h"
|
||||
#include "blobs.h"
|
||||
#include "const.h"
|
||||
|
||||
#ifdef __UNIX__
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
/**********************************************************************
|
||||
* correct_blob_order
|
||||
*
|
||||
* Check to see if the blobs are in the correct order. If they are not
|
||||
* then swap which outlines are attached to which blobs.
|
||||
**********************************************************************/
|
||||
void correct_blob_order(TBLOB *blob1, TBLOB *blob2) {
|
||||
TPOINT origin1;
|
||||
TPOINT origin2;
|
||||
TESSLINE *temp;
|
||||
|
||||
blob_origin(blob1, &origin1);
|
||||
blob_origin(blob2, &origin2);
|
||||
|
||||
if (origin1.x > origin2.x) {
|
||||
temp = blob2->outlines;
|
||||
blob2->outlines = blob1->outlines;
|
||||
blob1->outlines = temp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* eliminate_duplicate_outlines
|
||||
*
|
||||
* Find and delete any duplicate outline records in this blob.
|
||||
**********************************************************************/
|
||||
void eliminate_duplicate_outlines(TBLOB *blob) {
|
||||
TESSLINE *outline;
|
||||
TESSLINE *other_outline;
|
||||
TESSLINE *last_outline;
|
||||
|
||||
for (outline = blob->outlines; outline; outline = outline->next) {
|
||||
|
||||
for (last_outline = outline, other_outline = outline->next;
|
||||
other_outline;
|
||||
last_outline = other_outline, other_outline = other_outline->next) {
|
||||
|
||||
if (same_outline_bounds (outline, other_outline)) {
|
||||
last_outline->next = other_outline->next;
|
||||
// This doesn't leak - the outlines share the EDGEPTs.
|
||||
other_outline->loop = NULL;
|
||||
delete other_outline;
|
||||
other_outline = last_outline;
|
||||
// If it is part of a cut, then it can't be a hole any more.
|
||||
outline->is_hole = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* setup_blob_outlines
|
||||
*
|
||||
* Set up each of the outlines in this blob.
|
||||
**********************************************************************/
|
||||
void setup_blob_outlines(TBLOB *blob) {
|
||||
TESSLINE *outline;
|
||||
|
||||
for (outline = blob->outlines; outline; outline = outline->next) {
|
||||
outline->ComputeBoundingBox();
|
||||
}
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
/* -*-C-*-
|
||||
********************************************************************************
|
||||
*
|
||||
* File: olutil.h (Formerly olutil.h)
|
||||
* Description:
|
||||
* Author: Mark Seaman, SW Productivity
|
||||
* Created: Fri Oct 16 14:37:00 1987
|
||||
* Modified: Wed Jul 10 14:21:55 1991 (Mark Seaman) marks@hpgrlt
|
||||
* Language: C
|
||||
* Package: N/A
|
||||
* Status: Reusable Software Component
|
||||
*
|
||||
* (c) Copyright 1987, Hewlett-Packard Company.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
*********************************************************************************/
|
||||
#ifndef OLUTIL_H
|
||||
#define OLUTIL_H
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
I n c l u d e s
|
||||
----------------------------------------------------------------------*/
|
||||
#include "blobs.h"
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
M a c r o s
|
||||
----------------------------------------------------------------------*/
|
||||
/**********************************************************************
|
||||
* is_inside_angle
|
||||
*
|
||||
* Return true if the edgept supplied as input is an inside angle. This
|
||||
* is determined by the angular change of the vectors from point to
|
||||
* point.
|
||||
|
||||
**********************************************************************/
|
||||
|
||||
#define is_inside_angle(pt) \
|
||||
(angle_change ((pt)->prev, (pt), (pt)->next) < chop_inside_angle)
|
||||
|
||||
/**********************************************************************
|
||||
* same_outline_bounds
|
||||
*
|
||||
* Return TRUE if these two outlines have the same bounds.
|
||||
**********************************************************************/
|
||||
|
||||
#define same_outline_bounds(outline,other_outline) \
|
||||
(outline->topleft.x == other_outline->topleft.x && \
|
||||
outline->topleft.y == other_outline->topleft.y && \
|
||||
outline->botright.x == other_outline->botright.x && \
|
||||
outline->botright.y == other_outline->botright.y) \
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* weighted_edgept_dist
|
||||
*
|
||||
* Return the distance (squared) between the two edge points.
|
||||
**********************************************************************/
|
||||
|
||||
#define weighted_edgept_dist(p1,p2,chop_x_y_weight) \
|
||||
(((p1)->pos.x - (p2)->pos.x) * \
|
||||
((p1)->pos.x - (p2)->pos.x) * chop_x_y_weight + \
|
||||
((p1)->pos.y - (p2)->pos.y) * \
|
||||
((p1)->pos.y - (p2)->pos.y))
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
void correct_blob_order(TBLOB *blob1, TBLOB *blob2);
|
||||
|
||||
void eliminate_duplicate_outlines(TBLOB *blob);
|
||||
|
||||
void setup_blob_outlines(TBLOB *blob);
|
||||
|
||||
#endif
|
@ -39,73 +39,6 @@ namespace tesseract {
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
/**********************************************************************
|
||||
* crosses_outline
|
||||
*
|
||||
* Check to see if this line crosses over this outline. If it does
|
||||
* return TRUE.
|
||||
**********************************************************************/
|
||||
int Wordrec::crosses_outline(EDGEPT *p0, /* Start of line */
|
||||
EDGEPT *p1, /* End of line */
|
||||
EDGEPT *outline) { /* Outline to check */
|
||||
EDGEPT *pt = outline;
|
||||
do {
|
||||
if (is_crossed (p0->pos, p1->pos, pt->pos, pt->next->pos))
|
||||
return (TRUE);
|
||||
pt = pt->next;
|
||||
}
|
||||
while (pt != outline);
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* is_crossed
|
||||
*
|
||||
* Return TRUE when the two line segments cross each other. Find out
|
||||
* where the projected lines would cross and then check to see if the
|
||||
* point of intersection lies on both of the line segments. If it does
|
||||
* then these two segments cross.
|
||||
**********************************************************************/
|
||||
int Wordrec::is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) {
|
||||
int b0a1xb0b1, b0b1xb0a0;
|
||||
int a1b1xa1a0, a1a0xa1b0;
|
||||
|
||||
TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;
|
||||
|
||||
b0a1.x = a1.x - b0.x;
|
||||
b0a0.x = a0.x - b0.x;
|
||||
a1b1.x = b1.x - a1.x;
|
||||
b0b1.x = b1.x - b0.x;
|
||||
a1a0.x = a0.x - a1.x;
|
||||
b0a1.y = a1.y - b0.y;
|
||||
b0a0.y = a0.y - b0.y;
|
||||
a1b1.y = b1.y - a1.y;
|
||||
b0b1.y = b1.y - b0.y;
|
||||
a1a0.y = a0.y - a1.y;
|
||||
|
||||
b0a1xb0b1 = CROSS (b0a1, b0b1);
|
||||
b0b1xb0a0 = CROSS (b0b1, b0a0);
|
||||
a1b1xa1a0 = CROSS (a1b1, a1a0);
|
||||
/*a1a0xa1b0=CROSS(a1a0,a1b0); */
|
||||
a1a0xa1b0 = -CROSS (a1a0, b0a1);
|
||||
|
||||
return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0)
|
||||
|| (b0a1xb0b1 < 0 && b0b1xb0a0 < 0))
|
||||
&& ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* is_same_edgept
|
||||
*
|
||||
* Return true if the points are identical.
|
||||
**********************************************************************/
|
||||
int Wordrec::is_same_edgept(EDGEPT *p1, EDGEPT *p2) {
|
||||
return (p1 == p2);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* near_point
|
||||
*
|
||||
@ -153,30 +86,4 @@ bool Wordrec::near_point(EDGEPT *point,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* reverse_outline
|
||||
*
|
||||
* Change the direction of the outline. If it was clockwise make it
|
||||
* counter-clockwise and vice versa. Do this by swapping each of the
|
||||
* next and prev fields of each edge point.
|
||||
**********************************************************************/
|
||||
void Wordrec::reverse_outline(EDGEPT *outline) {
|
||||
EDGEPT *edgept = outline;
|
||||
EDGEPT *temp;
|
||||
|
||||
do {
|
||||
/* Swap next and prev */
|
||||
temp = edgept->prev;
|
||||
edgept->prev = edgept->next;
|
||||
edgept->next = temp;
|
||||
/* Set up vec field */
|
||||
edgept->vec.x = edgept->next->pos.x - edgept->pos.x;
|
||||
edgept->vec.y = edgept->next->pos.y - edgept->pos.y;
|
||||
|
||||
edgept = edgept->prev; /* Go to next point */
|
||||
}
|
||||
while (edgept != outline);
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -58,7 +58,7 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector<SEAM*>& seams,
|
||||
const char* description,
|
||||
TWERD *word,
|
||||
BlamerBundle *blamer_bundle) {
|
||||
if (end > start) join_pieces(seams, start, end, word);
|
||||
if (end > start) SEAM::JoinPieces(seams, word->blobs, start, end);
|
||||
BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description,
|
||||
White, blamer_bundle);
|
||||
// Set the matrix_cell_ entries in all the BLOB_CHOICES.
|
||||
@ -67,7 +67,7 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector<SEAM*>& seams,
|
||||
bc_it.data()->set_matrix_cell(start, end);
|
||||
}
|
||||
|
||||
if (end > start) break_pieces(seams, start, end, word);
|
||||
if (end > start) SEAM::BreakPieces(seams, word->blobs, start, end);
|
||||
|
||||
return (choices);
|
||||
}
|
||||
|
@ -119,21 +119,4 @@ void mark_outline(EDGEPT *edgept) { /* Start of point list */
|
||||
c_make_current(window);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* mark_split
|
||||
*
|
||||
* Set up the marks list to be displayed in subsequent updates and draw
|
||||
* the marks in the current window. The marks are stored in the second
|
||||
* sublist. The first sublist is left unmodified.
|
||||
**********************************************************************/
|
||||
void mark_split(SPLIT *split) {
|
||||
void *window = edge_window;
|
||||
|
||||
c_line_color_index(window, Green);
|
||||
c_move (window, (float) split->point1->pos.x, (float) split->point1->pos.y);
|
||||
c_draw (window, (float) split->point2->pos.x, (float) split->point2->pos.y);
|
||||
c_make_current(window);
|
||||
}
|
||||
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include "callcpp.h"
|
||||
#include "oldlist.h"
|
||||
#include "blobs.h"
|
||||
#include "split.h"
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
V a r i a b l e s
|
||||
@ -67,5 +66,4 @@ void draw_blob_edges(TBLOB *blob);
|
||||
|
||||
void mark_outline(EDGEPT *edgept);
|
||||
|
||||
void mark_split(SPLIT *split);
|
||||
#endif
|
||||
|
@ -53,8 +53,7 @@ void Wordrec::SegSearch(WERD_RES* word_res,
|
||||
improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle,
|
||||
blamer_bundle, &pain_points, &pending);
|
||||
}
|
||||
if (chop_debug)
|
||||
print_seams("Final seam list:", word_res->seam_array);
|
||||
if (chop_debug) SEAM::PrintSeams("Final seam list:", word_res->seam_array);
|
||||
|
||||
if (blamer_bundle != NULL &&
|
||||
!blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) {
|
||||
|
@ -290,9 +290,10 @@ class Wordrec : public Classify {
|
||||
// chop.cpp
|
||||
PRIORITY point_priority(EDGEPT *point);
|
||||
void add_point_to_list(PointHeap* point_heap, EDGEPT *point);
|
||||
// Returns true if the edgept supplied as input is an inside angle. This
|
||||
// is determined by the angular change of the vectors from point to point.
|
||||
bool is_inside_angle(EDGEPT *pt);
|
||||
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3);
|
||||
int is_little_chunk(EDGEPT *point1, EDGEPT *point2);
|
||||
int is_small_area(EDGEPT *point1, EDGEPT *point2);
|
||||
EDGEPT *pick_close_point(EDGEPT *critical_point,
|
||||
EDGEPT *vertical_point,
|
||||
int *best_dist);
|
||||
@ -335,17 +336,12 @@ class Wordrec : public Classify {
|
||||
|
||||
// findseam.cpp
|
||||
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue* seams);
|
||||
void choose_best_seam(SeamQueue* seam_queue,
|
||||
SPLIT *split,
|
||||
PRIORITY priority,
|
||||
SEAM **seam_result,
|
||||
TBLOB *blob,
|
||||
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
|
||||
PRIORITY priority, SEAM **seam_result, TBLOB *blob,
|
||||
SeamPile *seam_pile);
|
||||
void combine_seam(const SeamPile& seam_pile,
|
||||
const SEAM* seam, SeamQueue* seam_queue);
|
||||
inT16 constrained_split(SPLIT *split, TBLOB *blob);
|
||||
SEAM *pick_good_seam(TBLOB *blob);
|
||||
PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax);
|
||||
void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS],
|
||||
inT16 num_points,
|
||||
SeamQueue* seam_queue,
|
||||
@ -359,23 +355,12 @@ class Wordrec : public Classify {
|
||||
SEAM ** seam, TBLOB * blob);
|
||||
|
||||
// gradechop.cpp
|
||||
PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax);
|
||||
PRIORITY grade_center_of_blob(register BOUNDS_RECT rect);
|
||||
PRIORITY grade_overlap(register BOUNDS_RECT rect);
|
||||
PRIORITY grade_split_length(register SPLIT *split);
|
||||
PRIORITY grade_sharpness(register SPLIT *split);
|
||||
PRIORITY grade_width_change(register BOUNDS_RECT rect);
|
||||
void set_outline_bounds(register EDGEPT *point1,
|
||||
register EDGEPT *point2,
|
||||
BOUNDS_RECT rect);
|
||||
|
||||
// outlines.cpp
|
||||
int crosses_outline(EDGEPT *p0, EDGEPT *p1, EDGEPT *outline);
|
||||
int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1);
|
||||
int is_same_edgept(EDGEPT *p1, EDGEPT *p2);
|
||||
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1,
|
||||
EDGEPT **near_pt);
|
||||
void reverse_outline(EDGEPT *outline);
|
||||
|
||||
// pieces.cpp
|
||||
virtual BLOB_CHOICE_LIST *classify_piece(const GenericVector<SEAM*>& seams,
|
||||
|
Loading…
Reference in New Issue
Block a user