Major refactor to improve speed on difficut images, especially when running

a heap checker.
SEAM and SPLIT have been begging for a refactor for a *LONG* time.
This change does most of the work of turning them into proper classes:
  Moved relevant code into SEAM/SPLIT/TBLOB/EDGEPT etc from global helper functions.
  Made the splits full data members of SEAM in an array instead of 3 separate pointers.
    This greatly reduces the amount of new/delete happening in the chopper, which is the main goal.
  Deleted redundant files: olutil.*,  makechop.*
  Brought other code into SEAM in order to keep its data members private with only priority having accessors.
This commit is contained in:
Ray Smith 2015-05-12 14:59:14 -07:00
parent 4c7c960bfd
commit 25d0968d09
28 changed files with 876 additions and 1805 deletions

View File

@ -582,7 +582,7 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
int blob_count = 1; int blob_count = 1;
for (int s = 0; s < word_res->seam_array.size(); ++s) { for (int s = 0; s < word_res->seam_array.size(); ++s) {
SEAM* seam = word_res->seam_array[s]; SEAM* seam = word_res->seam_array[s];
if (seam->split1 == NULL) { if (!seam->HasAnySplits()) {
word_res->best_state.push_back(blob_count); word_res->best_state.push_back(blob_count);
blob_count = 1; blob_count = 1;
} else { } else {

View File

@ -254,7 +254,7 @@ void Tesseract::join_words(WERD_RES *word,
// Move the word2 seams onto the end of the word1 seam_array. // Move the word2 seams onto the end of the word1 seam_array.
// Since the seam list is one element short, an empty seam marking the // Since the seam list is one element short, an empty seam marking the
// end of the last blob in the first word is needed first. // end of the last blob in the first word is needed first.
word->seam_array.push_back(new SEAM(0.0f, split_pt, NULL, NULL, NULL)); word->seam_array.push_back(new SEAM(0.0f, split_pt));
word->seam_array += word2->seam_array; word->seam_array += word2->seam_array;
word2->seam_array.truncate(0); word2->seam_array.truncate(0);
// Fix widths and gaps. // Fix widths and gaps.

View File

@ -64,6 +64,42 @@ const TPOINT kDivisibleVerticalItalic(1, 5);
CLISTIZE(EDGEPT); CLISTIZE(EDGEPT);
// Returns true when the two line segments cross each other.
// (Moved from outlines.cpp).
// Finds where the projected lines would cross and then checks to see if the
// point of intersection lies on both of the line segments. If it does
// then these two segments cross.
/* static */
bool TPOINT::IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
const TPOINT& b1) {
int b0a1xb0b1, b0b1xb0a0;
int a1b1xa1a0, a1a0xa1b0;
TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;
b0a1.x = a1.x - b0.x;
b0a0.x = a0.x - b0.x;
a1b1.x = b1.x - a1.x;
b0b1.x = b1.x - b0.x;
a1a0.x = a0.x - a1.x;
b0a1.y = a1.y - b0.y;
b0a0.y = a0.y - b0.y;
a1b1.y = b1.y - a1.y;
b0b1.y = b1.y - b0.y;
a1a0.y = a0.y - a1.y;
b0a1xb0b1 = CROSS(b0a1, b0b1);
b0b1xb0a0 = CROSS(b0b1, b0a0);
a1b1xa1a0 = CROSS(a1b1, a1a0);
// For clarity, we want CROSS(a1a0,a1b0) here but we have b0a1 instead of a1b0
// so use -CROSS(a1b0,b0a1) instead, which is the same.
a1a0xa1b0 = -CROSS(a1a0, b0a1);
return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) ||
(b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) &&
((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
}
// Consume the circular list of EDGEPTs to make a TESSLINE. // Consume the circular list of EDGEPTs to make a TESSLINE.
TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) {
TESSLINE* result = new TESSLINE; TESSLINE* result = new TESSLINE;
@ -454,6 +490,36 @@ TBOX TBLOB::bounding_box() const {
return box; return box;
} }
// Finds and deletes any duplicate outlines in this blob, without deleting
// their EDGEPTs.
void TBLOB::EliminateDuplicateOutlines() {
for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
TESSLINE* last_outline = outline;
for (TESSLINE* other_outline = outline->next; other_outline != NULL;
last_outline = other_outline, other_outline = other_outline->next) {
if (outline->SameBox(*other_outline)) {
last_outline->next = other_outline->next;
// This doesn't leak - the outlines share the EDGEPTs.
other_outline->loop = NULL;
delete other_outline;
other_outline = last_outline;
// If it is part of a cut, then it can't be a hole any more.
outline->is_hole = false;
}
}
}
}
// Swaps the outlines of *this and next if needed to keep the centers in
// increasing x.
void TBLOB::CorrectBlobOrder(TBLOB* next) {
TBOX box = bounding_box();
TBOX next_box = next->bounding_box();
if (box.x_middle() > next_box.x_middle()) {
Swap(&outlines, &next->outlines);
}
}
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
void TBLOB::plot(ScrollView* window, ScrollView::Color color, void TBLOB::plot(ScrollView* window, ScrollView::Color color,
ScrollView::Color child_color) { ScrollView::Color child_color) {
@ -858,18 +924,6 @@ void TWERD::plot(ScrollView* window) {
} }
#endif // GRAPHICS_DISABLED #endif // GRAPHICS_DISABLED
/**********************************************************************
* blob_origin
*
* Compute the origin of a compound blob, define to be the centre
* of the bounding box.
**********************************************************************/
void blob_origin(TBLOB *blob, /*blob to compute on */
TPOINT *origin) { /*return value */
TBOX bbox = blob->bounding_box();
*origin = (bbox.topleft() + bbox.botright()) / 2;
}
/********************************************************************** /**********************************************************************
* divisible_blob * divisible_blob
* *

View File

@ -60,6 +60,13 @@ struct TPOINT {
x /= divisor; x /= divisor;
y /= divisor; y /= divisor;
} }
bool operator==(const TPOINT& other) const {
return x == other.x && y == other.y;
}
// Returns true when the two line segments cross each other.
// (Moved from outlines.cpp).
static bool IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
const TPOINT& b1);
inT16 x; // absolute x coord. inT16 x; // absolute x coord.
inT16 y; // absolute y coord. inT16 y; // absolute y coord.
@ -87,6 +94,55 @@ struct EDGEPT {
start_step = src.start_step; start_step = src.start_step;
step_count = src.step_count; step_count = src.step_count;
} }
// Returns the squared distance between the points, with the x-component
// weighted by x_factor.
int WeightedDistance(const EDGEPT& other, int x_factor) const {
int x_dist = pos.x - other.pos.x;
int y_dist = pos.y - other.pos.y;
return x_dist * x_dist * x_factor + y_dist * y_dist;
}
// Returns true if the positions are equal.
bool EqualPos(const EDGEPT& other) const { return pos == other.pos; }
// Returns the bounding box of the outline segment from *this to *end.
// Ignores hidden edge flags.
TBOX SegmentBox(const EDGEPT* end) const {
TBOX box(pos.x, pos.y, pos.x, pos.y);
const EDGEPT* pt = this;
do {
pt = pt->next;
if (pt->pos.x < box.left()) box.set_left(pt->pos.x);
if (pt->pos.x > box.right()) box.set_right(pt->pos.x);
if (pt->pos.y < box.bottom()) box.set_bottom(pt->pos.y);
if (pt->pos.y > box.top()) box.set_top(pt->pos.y);
} while (pt != end && pt != this);
return box;
}
// Returns the area of the outline segment from *this to *end.
// Ignores hidden edge flags.
int SegmentArea(const EDGEPT* end) const {
int area = 0;
const EDGEPT* pt = this->next;
do {
TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y);
area += CROSS(origin_vec, pt->vec);
pt = pt->next;
} while (pt != end && pt != this);
return area;
}
// Returns true if the number of points in the outline segment from *this to
// *end is less that min_points and false if we get back to *this first.
// Ignores hidden edge flags.
bool ShortNonCircularSegment(int min_points, const EDGEPT* end) const {
int count = 0;
const EDGEPT* pt = this;
do {
if (pt == end) return true;
pt = pt->next;
++count;
} while (pt != this && count <= min_points);
return false;
}
// Accessors to hide or reveal a cut edge from feature extractors. // Accessors to hide or reveal a cut edge from feature extractors.
void Hide() { void Hide() {
flags[0] = true; flags[0] = true;
@ -100,9 +156,6 @@ struct EDGEPT {
void MarkChop() { void MarkChop() {
flags[2] = true; flags[2] = true;
} }
void UnmarkChop() {
flags[2] = false;
}
bool IsChopPt() const { bool IsChopPt() const {
return flags[2] != 0; return flags[2] != 0;
} }
@ -162,8 +215,23 @@ struct TESSLINE {
void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const; void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const;
TBOX bounding_box() const; TBOX bounding_box() const;
// Returns true if *this and other have equal bounding boxes.
bool SameBox(const TESSLINE& other) const {
return topleft == other.topleft && botright == other.botright;
}
// Returns true if the given line segment crosses any outline of this blob.
bool SegmentCrosses(const TPOINT& pt1, const TPOINT& pt2) const {
if (Contains(pt1) && Contains(pt2)) {
EDGEPT* pt = loop;
do {
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) return true;
pt = pt->next;
} while (pt != loop);
}
return false;
}
// Returns true if the point is contained within the outline box. // Returns true if the point is contained within the outline box.
bool Contains(const TPOINT& pt) { bool Contains(const TPOINT& pt) const {
return topleft.x <= pt.x && pt.x <= botright.x && return topleft.x <= pt.x && pt.x <= botright.x &&
botright.y <= pt.y && pt.y <= topleft.y; botright.y <= pt.y && pt.y <= topleft.y;
} }
@ -244,6 +312,31 @@ struct TBLOB {
TBOX bounding_box() const; TBOX bounding_box() const;
// Returns true if the given line segment crosses any outline of this blob.
bool SegmentCrossesOutline(const TPOINT& pt1, const TPOINT& pt2) const {
for (const TESSLINE* outline = outlines; outline != NULL;
outline = outline->next) {
if (outline->SegmentCrosses(pt1, pt2)) return true;
}
return false;
}
// Returns true if the point is contained within any of the outline boxes.
bool Contains(const TPOINT& pt) const {
for (const TESSLINE* outline = outlines; outline != NULL;
outline = outline->next) {
if (outline->Contains(pt)) return true;
}
return false;
}
// Finds and deletes any duplicate outlines in this blob, without deleting
// their EDGEPTs.
void EliminateDuplicateOutlines();
// Swaps the outlines of *this and next if needed to keep the centers in
// increasing x.
void CorrectBlobOrder(TBLOB* next);
const DENORM& denorm() const { const DENORM& denorm() const {
return denorm_; return denorm_;
} }
@ -358,12 +451,7 @@ if (w) memfree (w)
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
F u n c t i o n s F u n c t i o n s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
// TODO(rays) This will become a member of TBLOB when TBLOB's definition // TODO(rays) Make divisible_blob and divide_blobs members of TBLOB.
// moves to blobs.h
// Returns the center of blob's bounding box in origin.
void blob_origin(TBLOB *blob, TPOINT *origin);
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location); bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location);
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,

View File

@ -404,7 +404,8 @@ void WERD_RES::SetupBlobWidthsAndGaps() {
// as the blob widths and gaps. // as the blob widths and gaps.
void WERD_RES::InsertSeam(int blob_number, SEAM* seam) { void WERD_RES::InsertSeam(int blob_number, SEAM* seam) {
// Insert the seam into the SEAMS array. // Insert the seam into the SEAMS array.
insert_seam(chopped_word, blob_number, seam, &seam_array); seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
seam_array.insert(seam, blob_number);
if (ratings != NULL) { if (ratings != NULL) {
// Expand the ratings matrix. // Expand the ratings matrix.
ratings = ratings->ConsumeAndMakeBigger(blob_number); ratings = ratings->ConsumeAndMakeBigger(blob_number);
@ -804,12 +805,16 @@ void WERD_RES::RebuildBestState() {
for (int i = 0; i < best_choice->length(); ++i) { for (int i = 0; i < best_choice->length(); ++i) {
int length = best_choice->state(i); int length = best_choice->state(i);
best_state.push_back(length); best_state.push_back(length);
if (length > 1) if (length > 1) {
join_pieces(seam_array, start, start + length - 1, chopped_word); SEAM::JoinPieces(seam_array, chopped_word->blobs, start,
start + length - 1);
}
TBLOB* blob = chopped_word->blobs[start]; TBLOB* blob = chopped_word->blobs[start];
rebuild_word->blobs.push_back(new TBLOB(*blob)); rebuild_word->blobs.push_back(new TBLOB(*blob));
if (length > 1) if (length > 1) {
break_pieces(seam_array, start, start + length - 1, chopped_word); SEAM::BreakPieces(seam_array, chopped_word->blobs, start,
start + length - 1);
}
start += length; start += length;
} }
} }
@ -1065,8 +1070,7 @@ bool WERD_RES::PiecesAllNatural(int start, int count) const {
for (int index = start; index < start + count - 1; ++index) { for (int index = start; index < start + count - 1; ++index) {
if (index >= 0 && index < seam_array.size()) { if (index >= 0 && index < seam_array.size()) {
SEAM* seam = seam_array[index]; SEAM* seam = seam_array[index];
if (seam != NULL && seam->split1 != NULL) if (seam != NULL && seam->HasAnySplits()) return false;
return false;
} }
} }
return true; return true;

View File

@ -27,114 +27,236 @@
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
#include "seam.h" #include "seam.h"
#include "blobs.h" #include "blobs.h"
#include "freelist.h"
#include "tprintf.h" #include "tprintf.h"
#ifdef __UNIX__
#include <assert.h>
#endif
/*----------------------------------------------------------------------
V a r i a b l e s
----------------------------------------------------------------------*/
#define NUM_STARTING_SEAMS 20
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
Public Function Code Public Function Code
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
/**
* @name point_in_split
*
* Check to see if either of these points are present in the current
* split.
* @returns TRUE if one of them is split.
*/
bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2) {
return ((split) ? ((exact_point (split->point1, point1) ||
exact_point (split->point1, point2) ||
exact_point (split->point2, point1) ||
exact_point (split->point2, point2)) ? TRUE : FALSE)
: FALSE);
}
// Returns the bounding box of all the points in the seam.
/** TBOX SEAM::bounding_box() const {
* @name point_in_seam TBOX box(location_.x, location_.y, location_.x, location_.y);
* for (int s = 0; s < num_splits_; ++s) {
* Check to see if either of these points are present in the current box += splits_[s].bounding_box();
* seam.
* @returns TRUE if one of them is.
*/
bool point_in_seam(const SEAM *seam, SPLIT *split) {
return (point_in_split(seam->split1, split->point1, split->point2) ||
point_in_split(seam->split2, split->point1, split->point2) ||
point_in_split(seam->split3, split->point1, split->point2));
}
/**
* @name point_used_by_split
*
* Return whether this particular EDGEPT * is used in a given split.
* @returns TRUE if the edgept is used by the split.
*/
bool point_used_by_split(SPLIT *split, EDGEPT *point) {
if (split == NULL) return false;
return point == split->point1 || point == split->point2;
}
/**
* @name point_used_by_seam
*
* Return whether this particular EDGEPT * is used in a given seam.
* @returns TRUE if the edgept is used by the seam.
*/
bool point_used_by_seam(SEAM *seam, EDGEPT *point) {
if (seam == NULL) return false;
return point_used_by_split(seam->split1, point) ||
point_used_by_split(seam->split2, point) ||
point_used_by_split(seam->split3, point);
}
/**
* @name combine_seam
*
* Combine two seam records into a single seam. Move the split
* references from the second seam to the first one. The argument
* convention is patterned after strcpy.
*/
void combine_seams(SEAM *dest_seam, SEAM *source_seam) {
dest_seam->priority += source_seam->priority;
dest_seam->location += source_seam->location;
dest_seam->location /= 2;
if (source_seam->split1) {
if (!dest_seam->split1)
dest_seam->split1 = source_seam->split1;
else if (!dest_seam->split2)
dest_seam->split2 = source_seam->split1;
else if (!dest_seam->split3)
dest_seam->split3 = source_seam->split1;
else
delete source_seam->split1; // Wouldn't have fitted.
source_seam->split1 = NULL;
} }
if (source_seam->split2) { return box;
if (!dest_seam->split2) }
dest_seam->split2 = source_seam->split2;
else if (!dest_seam->split3) // Returns true if other can be combined into *this.
dest_seam->split3 = source_seam->split2; bool SEAM::CombineableWith(const SEAM& other, int max_x_dist,
else float max_total_priority) const {
delete source_seam->split2; // Wouldn't have fitted. int dist = location_.x - other.location_.x;
source_seam->split2 = NULL; if (-max_x_dist < dist && dist < max_x_dist &&
num_splits_ + other.num_splits_ <= kMaxNumSplits &&
priority_ + other.priority_ < max_total_priority &&
!OverlappingSplits(other) && !SharesPosition(other)) {
return true;
} else {
return false;
} }
if (source_seam->split3) { }
if (!dest_seam->split3)
dest_seam->split3 = source_seam->split3; // Combines other into *this. Only works if CombinableWith returned true.
else void SEAM::CombineWith(const SEAM& other) {
delete source_seam->split3; // Wouldn't have fitted. priority_ += other.priority_;
source_seam->split3 = NULL; location_ += other.location_;
location_ /= 2;
for (int s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
splits_[num_splits_++] = other.splits_[s];
}
// Returns true if the splits in *this SEAM appear OK in the sense that they
// do not cross any outlines and do not chop off any ridiculously small
// pieces.
bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
// TODO(rays) Try testing all the splits. Duplicating original code for now,
// which tested only the first.
return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
}
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
// seam, which is about to be inserted at insert_index. Returns false if
// any of the computations fails, as this indicates an invalid chop.
// widthn_/widthp_ are only changed if modify is true.
bool SEAM::PrepareToInsertSeam(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs,
int insert_index, bool modify) {
for (int s = 0; s < insert_index; ++s) {
if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false;
} }
delete source_seam; if (!FindBlobWidth(blobs, insert_index, modify)) return false;
for (int s = insert_index; s < seams.size(); ++s) {
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false;
}
return true;
}
// Computes the widthp_/widthn_ range. Returns false if not all the splits
// are accounted for. widthn_/widthp_ are only changed if modify is true.
bool SEAM::FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
bool modify) {
int num_found = 0;
if (modify) {
widthp_ = 0;
widthn_ = 0;
}
for (int s = 0; s < num_splits_; ++s) {
const SPLIT& split = splits_[s];
bool found_split = split.ContainedByBlob(*blobs[index]);
// Look right.
for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
found_split = split.ContainedByBlob(*blobs[b]);
if (found_split && b - index > widthp_ && modify) widthp_ = b - index;
}
// Look left.
for (int b = index - 1; !found_split && b >= 0; --b) {
found_split = split.ContainedByBlob(*blobs[b]);
if (found_split && index - b > widthn_ && modify) widthn_ = index - b;
}
if (found_split) ++num_found;
}
return num_found == num_splits_;
}
// Splits this blob into two blobs by applying the splits included in
// *this SEAM
void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].SplitOutlineList(blob->outlines);
}
blob->ComputeBoundingBoxes();
divide_blobs(blob, other_blob, italic_blob, location_);
blob->EliminateDuplicateOutlines();
other_blob->EliminateDuplicateOutlines();
blob->CorrectBlobOrder(other_blob);
}
// Undoes ApplySeam by removing the seam between these two blobs.
// Produces one blob as a result, and deletes other_blob.
void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const {
if (blob->outlines == NULL) {
blob->outlines = other_blob->outlines;
other_blob->outlines = NULL;
}
TESSLINE* outline = blob->outlines;
while (outline->next) outline = outline->next;
outline->next = other_blob->outlines;
other_blob->outlines = NULL;
delete other_blob;
for (int s = 0; s < num_splits_; ++s) {
splits_[s].UnsplitOutlineList(blob);
}
blob->ComputeBoundingBoxes();
blob->EliminateDuplicateOutlines();
}
// Prints everything in *this SEAM.
void SEAM::Print(const char* label) const {
tprintf(label);
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y,
widthp_, widthn_);
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Print();
if (s + 1 < num_splits_) tprintf(", ");
}
tprintf("\n");
}
// Prints a collection of SEAMs.
/* static */
void SEAM::PrintSeams(const char* label, const GenericVector<SEAM*>& seams) {
if (!seams.empty()) {
tprintf("%s\n", label);
for (int x = 0; x < seams.size(); ++x) {
tprintf("%2d: ", x);
seams[x]->Print("");
}
tprintf("\n");
}
}
#ifndef GRAPHICS_DISABLED
// Draws the seam in the given window.
void SEAM::Mark(ScrollView* window) const {
for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window);
}
#endif
// Break up the blobs in this chain so that they are all independent.
// This operation should undo the affect of join_pieces.
/* static */
void SEAM::BreakPieces(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int first,
int last) {
for (int x = first; x < last; ++x) seams[x]->Reveal();
TESSLINE* outline = blobs[first]->outlines;
int next_blob = first + 1;
while (outline != NULL && next_blob <= last) {
if (outline->next == blobs[next_blob]->outlines) {
outline->next = NULL;
outline = blobs[next_blob]->outlines;
++next_blob;
} else {
outline = outline->next;
}
}
}
// Join a group of base level pieces into a single blob that can then
// be classified.
/* static */
void SEAM::JoinPieces(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int first, int last) {
TESSLINE* outline = blobs[first]->outlines;
if (!outline)
return;
for (int x = first; x < last; ++x) {
SEAM *seam = seams[x];
if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide();
while (outline->next) outline = outline->next;
outline->next = blobs[x + 1]->outlines;
}
}
// Hides the seam so the outlines appear not to be cut by it.
void SEAM::Hide() const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Hide();
}
}
// Undoes hide, so the outlines are cut by the seam.
void SEAM::Reveal() const {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Reveal();
}
}
// Computes and returns, but does not set, the full priority of *this SEAM.
float SEAM::FullPriority(int xmin, int xmax, double overlap_knob,
int centered_maxwidth, double center_knob,
double width_change_knob) const {
if (num_splits_ == 0) return 0.0f;
for (int s = 1; s < num_splits_; ++s) {
splits_[s].SplitOutline();
}
float full_priority =
priority_ +
splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth,
center_knob, width_change_knob);
for (int s = num_splits_ - 1; s >= 1; --s) {
splits_[s].UnsplitOutlines();
}
return full_priority;
} }
/** /**
@ -144,7 +266,7 @@ void combine_seams(SEAM *dest_seam, SEAM *source_seam) {
* present in the starting segmentation. Each of the seams created * present in the starting segmentation. Each of the seams created
* by this routine have location information only. * by this routine have location information only.
*/ */
void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) { void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array) {
seam_array->truncate(0); seam_array->truncate(0);
TPOINT location; TPOINT location;
@ -153,381 +275,6 @@ void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array) {
TBOX nbox = word->blobs[b]->bounding_box(); TBOX nbox = word->blobs[b]->bounding_box();
location.x = (bbox.right() + nbox.left()) / 2; location.x = (bbox.right() + nbox.left()) / 2;
location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4; location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
seam_array->push_back(new SEAM(0.0f, location, NULL, NULL, NULL)); seam_array->push_back(new SEAM(0.0f, location));
}
}
/**
* @name test_insert_seam
*
* @returns true if insert_seam will succeed.
*/
bool test_insert_seam(const GenericVector<SEAM*>& seam_array,
TWERD *word, int index) {
SEAM *test_seam;
int list_length = seam_array.size();
for (int test_index = 0; test_index < index; ++test_index) {
test_seam = seam_array[test_index];
if (test_index + test_seam->widthp < index &&
test_seam->widthp + test_index == index - 1 &&
account_splits(test_seam, word, test_index + 1, 1) < 0)
return false;
}
for (int test_index = index; test_index < list_length; test_index++) {
test_seam = seam_array[test_index];
if (test_index - test_seam->widthn >= index &&
test_index - test_seam->widthn == index &&
account_splits(test_seam, word, test_index + 1, -1) < 0)
return false;
}
return true;
}
/**
* @name insert_seam
*
* Add another seam to a collection of seams at a particular location
* in the seam array.
*/
void insert_seam(const TWERD* word, int index, SEAM *seam,
GenericVector<SEAM*>* seam_array) {
SEAM *test_seam;
int list_length = seam_array->size();
for (int test_index = 0; test_index < index; ++test_index) {
test_seam = seam_array->get(test_index);
if (test_index + test_seam->widthp >= index) {
test_seam->widthp++; /*got in the way */
} else if (test_seam->widthp + test_index == index - 1) {
test_seam->widthp = account_splits(test_seam, word, test_index + 1, 1);
if (test_seam->widthp < 0) {
tprintf("Failed to find any right blob for a split!\n");
print_seam("New dud seam", seam);
print_seam("Failed seam", test_seam);
}
}
}
for (int test_index = index; test_index < list_length; test_index++) {
test_seam = seam_array->get(test_index);
if (test_index - test_seam->widthn < index) {
test_seam->widthn++; /*got in the way */
} else if (test_index - test_seam->widthn == index) {
test_seam->widthn = account_splits(test_seam, word, test_index + 1, -1);
if (test_seam->widthn < 0) {
tprintf("Failed to find any left blob for a split!\n");
print_seam("New dud seam", seam);
print_seam("Failed seam", test_seam);
}
}
}
seam_array->insert(seam, index);
}
/**
* @name account_splits
*
* Account for all the splits by looking to the right (blob_direction == 1),
* or to the left (blob_direction == -1) in the word.
*/
int account_splits(const SEAM *seam, const TWERD *word, int blob_index,
int blob_direction) {
inT8 found_em[3];
inT8 width;
found_em[0] = seam->split1 == NULL;
found_em[1] = seam->split2 == NULL;
found_em[2] = seam->split3 == NULL;
if (found_em[0] && found_em[1] && found_em[2])
return 0;
width = 0;
do {
TBLOB* blob = word->blobs[blob_index];
if (!found_em[0])
found_em[0] = find_split_in_blob(seam->split1, blob);
if (!found_em[1])
found_em[1] = find_split_in_blob(seam->split2, blob);
if (!found_em[2])
found_em[2] = find_split_in_blob(seam->split3, blob);
if (found_em[0] && found_em[1] && found_em[2]) {
return width;
}
width++;
blob_index += blob_direction;
} while (0 <= blob_index && blob_index < word->NumBlobs());
return -1;
}
/**
* @name find_split_in_blob
*
* @returns TRUE if the split is somewhere in this blob.
*/
bool find_split_in_blob(SPLIT *split, TBLOB *blob) {
TESSLINE *outline;
for (outline = blob->outlines; outline != NULL; outline = outline->next)
if (outline->Contains(split->point1->pos))
break;
if (outline == NULL)
return FALSE;
for (outline = blob->outlines; outline != NULL; outline = outline->next)
if (outline->Contains(split->point2->pos))
return TRUE;
return FALSE;
}
/**
* @name join_two_seams
*
* Merge these two seams into a new seam. Duplicate the split records
* in both of the input seams. Return the resultant seam.
*/
SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2) {
SEAM *result = NULL;
SEAM *temp;
assert(seam1 &&seam2);
if (((seam1->split3 == NULL && seam2->split2 == NULL) ||
(seam1->split2 == NULL && seam2->split3 == NULL) ||
seam1->split1 == NULL || seam2->split1 == NULL) &&
(!shared_split_points(seam1, seam2))) {
result = new SEAM(*seam1);
temp = new SEAM(*seam2);
combine_seams(result, temp);
}
return (result);
}
/**
* @name print_seam
*
* Print a list of splits. Show the coordinates of both points in
* each split.
*/
void print_seam(const char *label, SEAM *seam) {
if (seam) {
tprintf(label);
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ",
seam->priority, seam->location.x, seam->location.y,
seam->widthp, seam->widthn);
print_split(seam->split1);
if (seam->split2) {
tprintf(", ");
print_split (seam->split2);
if (seam->split3) {
tprintf(", ");
print_split (seam->split3);
}
}
tprintf("\n");
}
}
/**
* @name print_seams
*
* Print a list of splits. Show the coordinates of both points in
* each split.
*/
void print_seams(const char *label, const GenericVector<SEAM*>& seams) {
char number[CHARS_PER_LINE];
if (!seams.empty()) {
tprintf("%s\n", label);
for (int x = 0; x < seams.size(); ++x) {
sprintf(number, "%2d: ", x);
print_seam(number, seams[x]);
}
tprintf("\n");
}
}
/**
* @name shared_split_points
*
* Check these two seams to make sure that neither of them have two
* points in common. Return TRUE if any of the same points are present
* in any of the splits of both seams.
*/
int shared_split_points(const SEAM *seam1, const SEAM *seam2) {
if (seam1 == NULL || seam2 == NULL)
return (FALSE);
if (seam2->split1 == NULL)
return (FALSE);
if (point_in_seam(seam1, seam2->split1))
return (TRUE);
if (seam2->split2 == NULL)
return (FALSE);
if (point_in_seam(seam1, seam2->split2))
return (TRUE);
if (seam2->split3 == NULL)
return (FALSE);
if (point_in_seam(seam1, seam2->split3))
return (TRUE);
return (FALSE);
}
/**********************************************************************
* break_pieces
*
* Break up the blobs in this chain so that they are all independent.
* This operation should undo the affect of join_pieces.
**********************************************************************/
void break_pieces(const GenericVector<SEAM*>& seams, int first, int last,
TWERD *word) {
for (int x = first; x < last; ++x)
reveal_seam(seams[x]);
TESSLINE *outline = word->blobs[first]->outlines;
int next_blob = first + 1;
while (outline != NULL && next_blob <= last) {
if (outline->next == word->blobs[next_blob]->outlines) {
outline->next = NULL;
outline = word->blobs[next_blob]->outlines;
++next_blob;
} else {
outline = outline->next;
}
}
}
/**********************************************************************
* join_pieces
*
* Join a group of base level pieces into a single blob that can then
* be classified.
**********************************************************************/
void join_pieces(const GenericVector<SEAM*>& seams, int first, int last,
TWERD *word) {
TESSLINE *outline = word->blobs[first]->outlines;
if (!outline)
return;
for (int x = first; x < last; ++x) {
SEAM *seam = seams[x];
if (x - seam->widthn >= first && x + seam->widthp < last)
hide_seam(seam);
while (outline->next)
outline = outline->next;
outline->next = word->blobs[x + 1]->outlines;
}
}
/**********************************************************************
* hide_seam
*
* Change the edge points that are referenced by this seam to make
* them hidden edges.
**********************************************************************/
void hide_seam(SEAM *seam) {
if (seam == NULL || seam->split1 == NULL)
return;
hide_edge_pair (seam->split1->point1, seam->split1->point2);
if (seam->split2 == NULL)
return;
hide_edge_pair (seam->split2->point1, seam->split2->point2);
if (seam->split3 == NULL)
return;
hide_edge_pair (seam->split3->point1, seam->split3->point2);
}
/**********************************************************************
* hide_edge_pair
*
* Change the edge points that are referenced by this seam to make
* them hidden edges.
**********************************************************************/
void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2) {
EDGEPT *edgept;
edgept = pt1;
do {
edgept->Hide();
edgept = edgept->next;
}
while (!exact_point (edgept, pt2) && edgept != pt1);
if (edgept == pt1) {
/* tprintf("Hid entire outline at (%d,%d)!!\n",
edgept->pos.x,edgept->pos.y); */
}
edgept = pt2;
do {
edgept->Hide();
edgept = edgept->next;
}
while (!exact_point (edgept, pt1) && edgept != pt2);
if (edgept == pt2) {
/* tprintf("Hid entire outline at (%d,%d)!!\n",
edgept->pos.x,edgept->pos.y); */
}
}
/**********************************************************************
* reveal_seam
*
* Change the edge points that are referenced by this seam to make
* them hidden edges.
**********************************************************************/
void reveal_seam(SEAM *seam) {
if (seam == NULL || seam->split1 == NULL)
return;
reveal_edge_pair (seam->split1->point1, seam->split1->point2);
if (seam->split2 == NULL)
return;
reveal_edge_pair (seam->split2->point1, seam->split2->point2);
if (seam->split3 == NULL)
return;
reveal_edge_pair (seam->split3->point1, seam->split3->point2);
}
/**********************************************************************
* reveal_edge_pair
*
* Change the edge points that are referenced by this seam to make
* them hidden edges.
**********************************************************************/
void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2) {
EDGEPT *edgept;
edgept = pt1;
do {
edgept->Reveal();
edgept = edgept->next;
}
while (!exact_point (edgept, pt2) && edgept != pt1);
if (edgept == pt1) {
/* tprintf("Hid entire outline at (%d,%d)!!\n",
edgept->pos.x,edgept->pos.y); */
}
edgept = pt2;
do {
edgept->Reveal();
edgept = edgept->next;
}
while (!exact_point (edgept, pt1) && edgept != pt2);
if (edgept == pt2) {
/* tprintf("Hid entire outline at (%d,%d)!!\n",
edgept->pos.x,edgept->pos.y); */
} }
} }

View File

@ -36,95 +36,163 @@
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
typedef float PRIORITY; /* PRIORITY */ typedef float PRIORITY; /* PRIORITY */
struct SEAM { class SEAM {
// Constructor that was formerly new_seam. public:
SEAM(PRIORITY priority0, const TPOINT& location0, // A seam with no splits
SPLIT *splita, SPLIT *splitb, SPLIT *splitc) SEAM(float priority, const TPOINT& location)
: priority(priority0), widthp(0), widthn(0), location(location0), : priority_(priority),
split1(splita), split2(splitb), split3(splitc) {} location_(location),
// Copy constructor that was formerly clone_seam. widthp_(0),
SEAM(const SEAM& src) widthn_(0),
: priority(src.priority), widthp(src.widthp), widthn(src.widthn), num_splits_(0) {}
location(src.location) { // A seam with a single split point.
clone_split(split1, src.split1); SEAM(float priority, const TPOINT& location, const SPLIT& split)
clone_split(split2, src.split2); : priority_(priority),
clone_split(split3, src.split3); location_(location),
widthp_(0),
widthn_(0),
num_splits_(1) {
splits_[0] = split;
} }
// Destructor was delete_seam. // Default copy constructor, operator= and destructor are OK!
~SEAM() {
if (split1) // Accessors.
delete_split(split1); float priority() const { return priority_; }
if (split2) void set_priority(float priority) { priority_ = priority; }
delete_split(split2); bool HasAnySplits() const { return num_splits_ > 0; }
if (split3)
delete_split(split3); // Returns the bounding box of all the points in the seam.
TBOX bounding_box() const;
// Returns true if other can be combined into *this.
bool CombineableWith(const SEAM& other, int max_x_dist,
float max_total_priority) const;
// Combines other into *this. Only works if CombinableWith returned true.
void CombineWith(const SEAM& other);
// Returns true if the given blob contains all splits of *this SEAM.
bool ContainedByBlob(const TBLOB& blob) const {
for (int s = 0; s < num_splits_; ++s) {
if (!splits_[s].ContainedByBlob(blob)) return false;
}
return true;
} }
PRIORITY priority; // Returns true if the given EDGEPT is used by this SEAM, checking only
inT8 widthp; // the EDGEPT pointer, not the coordinates.
inT8 widthn; bool UsesPoint(const EDGEPT* point) const {
TPOINT location; for (int s = 0; s < num_splits_; ++s) {
SPLIT *split1; if (splits_[s].UsesPoint(point)) return true;
SPLIT *split2; }
SPLIT *split3; return false;
}
// Returns true if *this and other share any common point, by coordinates.
bool SharesPosition(const SEAM& other) const {
for (int s = 0; s < num_splits_; ++s) {
for (int t = 0; t < other.num_splits_; ++t)
if (splits_[s].SharesPosition(other.splits_[t])) return true;
}
return false;
}
// Returns true if *this and other have any vertically overlapping splits.
bool OverlappingSplits(const SEAM& other) const {
for (int s = 0; s < num_splits_; ++s) {
TBOX split1_box = splits_[s].bounding_box();
for (int t = 0; t < other.num_splits_; ++t) {
TBOX split2_box = other.splits_[t].bounding_box();
if (split1_box.y_overlap(split2_box)) return true;
}
}
return false;
}
// Marks the edgepts used by the seam so the segments made by the cut
// never get split further by another seam in the future.
void Finalize() {
for (int s = 0; s < num_splits_; ++s) {
splits_[s].point1->MarkChop();
splits_[s].point2->MarkChop();
}
}
// Returns true if the splits in *this SEAM appear OK in the sense that they
// do not cross any outlines and do not chop off any ridiculously small
// pieces.
bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const;
// Computes the widthp_/widthn_ range for all existing SEAMs and for *this
// seam, which is about to be inserted at insert_index. Returns false if
// any of the computations fails, as this indicates an invalid chop.
// widthn_/widthp_ are only changed if modify is true.
bool PrepareToInsertSeam(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int insert_index,
bool modify);
// Computes the widthp_/widthn_ range. Returns false if not all the splits
// are accounted for. widthn_/widthp_ are only changed if modify is true.
bool FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
bool modify);
// Splits this blob into two blobs by applying the splits included in
// *this SEAM
void ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const;
// Undoes ApplySeam by removing the seam between these two blobs.
// Produces one blob as a result, and deletes other_blob.
void UndoSeam(TBLOB* blob, TBLOB* other_blob) const;
// Prints everything in *this SEAM.
void Print(const char* label) const;
// Prints a collection of SEAMs.
static void PrintSeams(const char* label, const GenericVector<SEAM*>& seams);
#ifndef GRAPHICS_DISABLED
// Draws the seam in the given window.
void Mark(ScrollView* window) const;
#endif
// Break up the blobs in this chain so that they are all independent.
// This operation should undo the affect of join_pieces.
static void BreakPieces(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int first,
int last);
// Join a group of base level pieces into a single blob that can then
// be classified.
static void JoinPieces(const GenericVector<SEAM*>& seams,
const GenericVector<TBLOB*>& blobs, int first,
int last);
// Hides the seam so the outlines appear not to be cut by it.
void Hide() const;
// Undoes hide, so the outlines are cut by the seam.
void Reveal() const;
// Computes and returns, but does not set, the full priority of *this SEAM.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float FullPriority(int xmin, int xmax, double overlap_knob,
int centered_maxwidth, double center_knob,
double width_change_knob) const;
private:
// Maximum number of splits that a SEAM can hold.
static const int kMaxNumSplits = 3;
// Priority of this split. Lower is better.
float priority_;
// Position of the middle of the seam.
TPOINT location_;
// A range such that all splits in *this SEAM are contained within blobs in
// the range [index - widthn_,index + widthp_] where index is the index of
// this SEAM in the seams vector.
inT8 widthp_;
inT8 widthn_;
// Number of splits_ that are used.
inT8 num_splits_;
// Set of pairs of points that are the ends of each split in the SEAM.
SPLIT splits_[kMaxNumSplits];
}; };
/**
* exact_point
*
* Return TRUE if the point positions are the exactly the same. The
* parameters must be of type (EDGEPT*).
*/
#define exact_point(p1,p2) \
(! ((p1->pos.x - p2->pos.x) || (p1->pos.y - p2->pos.y)))
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
F u n c t i o n s F u n c t i o n s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2);
bool point_in_seam(const SEAM *seam, SPLIT *split); void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array);
bool point_used_by_split(SPLIT *split, EDGEPT *point);
bool point_used_by_seam(SEAM *seam, EDGEPT *point);
void combine_seams(SEAM *dest_seam, SEAM *source_seam);
void start_seam_list(TWERD *word, GenericVector<SEAM*>* seam_array);
bool test_insert_seam(const GenericVector<SEAM*>& seam_array,
TWERD *word, int index);
void insert_seam(const TWERD *word, int index, SEAM *seam,
GenericVector<SEAM*>* seam_array);
int account_splits(const SEAM *seam, const TWERD *word, int blob_index,
int blob_direction);
bool find_split_in_blob(SPLIT *split, TBLOB *blob);
SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2);
void print_seam(const char *label, SEAM *seam);
void print_seams(const char *label, const GenericVector<SEAM*>& seams);
int shared_split_points(const SEAM *seam1, const SEAM *seam2);
void break_pieces(const GenericVector<SEAM*>& seams,
int first, int last, TWERD *word);
void join_pieces(const GenericVector<SEAM*>& seams,
int first, int last, TWERD *word);
void hide_seam(SEAM *seam);
void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2);
void reveal_seam(SEAM *seam);
void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2);
#endif #endif

View File

@ -36,23 +36,103 @@
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
V a r i a b l e s V a r i a b l e s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
// Limit on the amount of penalty for the chop being off-center.
const int kCenterGradeCap = 25;
// Ridiculously large priority for splits that are no use.
const double kBadPriority = 999.0;
BOOL_VAR(wordrec_display_splits, 0, "Display splits"); BOOL_VAR(wordrec_display_splits, 0, "Display splits");
/*---------------------------------------------------------------------- // Returns the bounding box of all the points in the split.
F u n c t i o n s TBOX SPLIT::bounding_box() const {
----------------------------------------------------------------------*/ return TBOX(
MIN(point1->pos.x, point2->pos.x), MIN(point1->pos.y, point2->pos.y),
/********************************************************************** MAX(point1->pos.x, point2->pos.x), MAX(point1->pos.y, point2->pos.y));
* delete_split
*
* Remove this split from existence.
**********************************************************************/
void delete_split(SPLIT *split) {
if (split) {
delete split;
}
} }
// Hides the SPLIT so the outlines appear not to be cut by it.
void SPLIT::Hide() const {
EDGEPT* edgept = point1;
do {
edgept->Hide();
edgept = edgept->next;
} while (!edgept->EqualPos(*point2) && edgept != point1);
edgept = point2;
do {
edgept->Hide();
edgept = edgept->next;
} while (!edgept->EqualPos(*point1) && edgept != point2);
}
// Undoes hide, so the outlines are cut by the SPLIT.
void SPLIT::Reveal() const {
EDGEPT* edgept = point1;
do {
edgept->Reveal();
edgept = edgept->next;
} while (!edgept->EqualPos(*point2) && edgept != point1);
edgept = point2;
do {
edgept->Reveal();
edgept = edgept->next;
} while (!edgept->EqualPos(*point1) && edgept != point2);
}
// Compute a split priority based on the bounding boxes of the parts.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
int centered_maxwidth, double center_knob,
double width_change_knob) const {
TBOX box1 = Box12();
TBOX box2 = Box21();
int min_left = MIN(box1.left(), box2.left());
int max_right = MAX(box1.right(), box2.right());
if (xmin < min_left && xmax > max_right) return kBadPriority;
float grade = 0.0f;
// grade_overlap.
int width1 = box1.width();
int width2 = box2.width();
int min_width = MIN(width1, width2);
int overlap = -box1.x_gap(box2);
if (overlap == min_width) {
grade += 100.0f; // Total overlap.
} else {
if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
if (overlap > 0) grade += overlap_knob * overlap;
}
// grade_center_of_blob.
if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
grade += MIN(kCenterGradeCap, center_knob * abs(width1 - width2));
}
// grade_width_change.
float width_change_grade = 20 - (max_right - min_left - MAX(width1, width2));
if (width_change_grade > 0.0f)
grade += width_change_grade * width_change_knob;
return grade;
}
// Returns true if *this SPLIT appears OK in the sense that it does not cross
// any outlines and does not chop off any ridiculously small pieces.
bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
return !IsLittleChunk(min_points, min_area) &&
!blob.SegmentCrossesOutline(point1->pos, point2->pos);
}
// Returns true if the split generates a small chunk in terms of either area
// or number of points.
bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
if (point1->ShortNonCircularSegment(min_points, point2) &&
point1->SegmentArea(point2) < min_area) {
return true;
}
if (point2->ShortNonCircularSegment(min_points, point1) &&
point2->SegmentArea(point1) < min_area) {
return true;
}
return false;
}
/********************************************************************** /**********************************************************************
* make_edgept * make_edgept
@ -135,102 +215,113 @@ void remove_edgept(EDGEPT *point) {
} }
/********************************************************************** /**********************************************************************
* new_split * Print
* *
* Create a new split record and initialize it. Put it on the display * Shows the coordinates of both points in a split.
* list.
**********************************************************************/ **********************************************************************/
SPLIT *new_split(EDGEPT *point1, EDGEPT *point2) { void SPLIT::Print() const {
SPLIT *s = new SPLIT; if (this != NULL) {
s->point1 = point1; tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
s->point2 = point2; point2->pos.y);
return (s);
}
/**********************************************************************
* print_split
*
* Print a list of splits. Show the coordinates of both points in
* each split.
**********************************************************************/
void print_split(SPLIT *split) {
if (split) {
tprintf("(%d,%d)--(%d,%d)",
split->point1->pos.x, split->point1->pos.y,
split->point2->pos.x, split->point2->pos.y);
} }
} }
#ifndef GRAPHICS_DISABLED
// Draws the split in the given window.
void SPLIT::Mark(ScrollView* window) const {
window->Pen(ScrollView::GREEN);
window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
window->UpdateWindow();
}
#endif
/********************************************************************** // Creates two outlines out of one by splitting the original one in half.
* split_outline // Inserts the resulting outlines into the given list.
* void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
* Split between these two edge points. SplitOutline();
**********************************************************************/ while (outlines->next != NULL) outlines = outlines->next;
void split_outline(EDGEPT *join_point1, EDGEPT *join_point2) {
assert(join_point1 != join_point2);
EDGEPT* temp2 = join_point2->next; outlines->next = new TESSLINE;
EDGEPT* temp1 = join_point1->next; outlines->next->loop = point1;
/* Create two new points */ outlines->next->ComputeBoundingBox();
EDGEPT* new_point1 = make_edgept(join_point1->pos.x, join_point1->pos.y,
temp1, join_point2); outlines = outlines->next;
EDGEPT* new_point2 = make_edgept(join_point2->pos.x, join_point2->pos.y,
temp2, join_point1); outlines->next = new TESSLINE;
// Join_point1 and 2 are now cross-over points, so they must have NULL outlines->next->loop = point2;
// src_outlines and give their src_outline information their new outlines->next->ComputeBoundingBox();
// replacements.
new_point1->src_outline = join_point1->src_outline; outlines->next->next = NULL;
new_point1->start_step = join_point1->start_step;
new_point1->step_count = join_point1->step_count;
new_point2->src_outline = join_point2->src_outline;
new_point2->start_step = join_point2->start_step;
new_point2->step_count = join_point2->step_count;
join_point1->src_outline = NULL;
join_point1->start_step = 0;
join_point1->step_count = 0;
join_point2->src_outline = NULL;
join_point2->start_step = 0;
join_point2->step_count = 0;
join_point1->MarkChop();
join_point2->MarkChop();
} }
// Makes a split between these two edge points, but does not affect the
// outlines to which they belong.
void SPLIT::SplitOutline() const {
EDGEPT* temp2 = point2->next;
EDGEPT* temp1 = point1->next;
/* Create two new points */
EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
// point1 and 2 are now cross-over points, so they must have NULL
// src_outlines and give their src_outline information their new
// replacements.
new_point1->src_outline = point1->src_outline;
new_point1->start_step = point1->start_step;
new_point1->step_count = point1->step_count;
new_point2->src_outline = point2->src_outline;
new_point2->start_step = point2->start_step;
new_point2->step_count = point2->step_count;
point1->src_outline = NULL;
point1->start_step = 0;
point1->step_count = 0;
point2->src_outline = NULL;
point2->start_step = 0;
point2->step_count = 0;
}
/********************************************************************** // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
* unsplit_outlines // the split, but possibly leaving some duplicate outlines.
* void SPLIT::UnsplitOutlineList(TBLOB* blob) const {
* Remove the split that was put between these two points. /* Modify edge points */
**********************************************************************/ UnsplitOutlines();
void unsplit_outlines(EDGEPT *p1, EDGEPT *p2) {
EDGEPT *tmp1 = p1->next;
EDGEPT *tmp2 = p2->next;
assert (p1 != p2); TESSLINE* outline1 = new TESSLINE;
outline1->next = blob->outlines;
blob->outlines = outline1;
outline1->loop = point1;
tmp1->next->prev = p2; TESSLINE* outline2 = new TESSLINE;
tmp2->next->prev = p1; outline2->next = blob->outlines;
blob->outlines = outline2;
outline2->loop = point2;
}
// tmp2 is coincident with p1. p1 takes tmp2's place as tmp2 is deleted. // Removes the split that was put between these two points.
p1->next = tmp2->next; void SPLIT::UnsplitOutlines() const {
p1->src_outline = tmp2->src_outline; EDGEPT* tmp1 = point1->next;
p1->start_step = tmp2->start_step; EDGEPT* tmp2 = point2->next;
p1->step_count = tmp2->step_count;
// Likewise p2 takes tmp1's place. tmp1->next->prev = point2;
p2->next = tmp1->next; tmp2->next->prev = point1;
p2->src_outline = tmp1->src_outline;
p2->start_step = tmp1->start_step; // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
p2->step_count = tmp1->step_count; // deleted.
p1->UnmarkChop(); point1->next = tmp2->next;
p2->UnmarkChop(); point1->src_outline = tmp2->src_outline;
point1->start_step = tmp2->start_step;
point1->step_count = tmp2->step_count;
// Likewise point2 takes tmp1's place.
point2->next = tmp1->next;
point2->src_outline = tmp1->src_outline;
point2->start_step = tmp1->start_step;
point2->step_count = tmp1->step_count;
delete tmp1; delete tmp1;
delete tmp2; delete tmp2;
p1->vec.x = p1->next->pos.x - p1->pos.x; point1->vec.x = point1->next->pos.x - point1->pos.x;
p1->vec.y = p1->next->pos.y - p1->pos.y; point1->vec.y = point1->next->pos.y - point1->pos.y;
p2->vec.x = p2->next->pos.x - p2->pos.x; point2->vec.x = point2->next->pos.x - point2->pos.x;
p2->vec.y = p2->next->pos.y - p2->pos.y; point2->vec.y = point2->next->pos.y - point2->pos.y;
} }

View File

@ -29,18 +29,80 @@
I n c l u d e s I n c l u d e s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
#include "blobs.h" #include "blobs.h"
#include "oldlist.h" #include "scrollview.h"
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
T y p e s T y p e s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
typedef struct split_record struct SPLIT {
{ /* SPLIT */ SPLIT() : point1(NULL), point2(NULL) {}
SPLIT(EDGEPT* pt1, EDGEPT* pt2) : point1(pt1), point2(pt2) {}
// Returns the bounding box of all the points in the split.
TBOX bounding_box() const;
// Returns the bounding box of the outline from point1 to point2.
TBOX Box12() const { return point1->SegmentBox(point2); }
// Returns the bounding box of the outline from point1 to point1.
TBOX Box21() const { return point2->SegmentBox(point1); }
// Returns the bounding box of the out
// Hides the SPLIT so the outlines appear not to be cut by it.
void Hide() const;
// Undoes hide, so the outlines are cut by the SPLIT.
void Reveal() const;
// Returns true if the given EDGEPT is used by this SPLIT, checking only
// the EDGEPT pointer, not the coordinates.
bool UsesPoint(const EDGEPT* point) const {
return point1 == point || point2 == point;
}
// Returns true if the other SPLIT has any position shared with *this.
bool SharesPosition(const SPLIT& other) const {
return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) ||
point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2);
}
// Returns true if both points are contained within the blob.
bool ContainedByBlob(const TBLOB& blob) const {
return blob.Contains(point1->pos) && blob.Contains(point2->pos);
}
// Returns true if both points are contained within the outline.
bool ContainedByOutline(const TESSLINE& outline) const {
return outline.Contains(point1->pos) && outline.Contains(point2->pos);
}
// Compute a split priority based on the bounding boxes of the parts.
// The arguments here are config parameters defined in Wordrec. Add chop_
// to the beginning of the name.
float FullPriority(int xmin, int xmax, double overlap_knob,
int centered_maxwidth, double center_knob,
double width_change_knob) const;
// Returns true if *this SPLIT appears OK in the sense that it does not cross
// any outlines and does not chop off any ridiculously small pieces.
bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const;
// Returns true if the split generates a small chunk in terms of either area
// or number of points.
bool IsLittleChunk(int min_points, int min_area) const;
void Print() const;
#ifndef GRAPHICS_DISABLED
// Draws the split in the given window.
void Mark(ScrollView* window) const;
#endif
// Creates two outlines out of one by splitting the original one in half.
// Inserts the resulting outlines into the given list.
void SplitOutlineList(TESSLINE* outlines) const;
// Makes a split between these two edge points, but does not affect the
// outlines to which they belong.
void SplitOutline() const;
// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
// the split, but possibly leaving some duplicate outlines.
void UnsplitOutlineList(TBLOB* blob) const;
// Removes the split that was put between these two points.
void UnsplitOutlines() const;
EDGEPT *point1; EDGEPT *point1;
EDGEPT *point2; EDGEPT *point2;
} SPLIT; };
typedef LIST SPLITS; /* SPLITS */
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
V a r i a b l e s V a r i a b l e s
@ -48,38 +110,11 @@ typedef LIST SPLITS; /* SPLITS */
extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits"); extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits");
/*----------------------------------------------------------------------
M a c r o s
----------------------------------------------------------------------*/
/**********************************************************************
* clone_split
*
* Create a new split record and set the contents equal to the contents
* of this record.
**********************************************************************/
#define clone_split(dest,source) \
if (source) \
(dest) = new_split ((source)->point1, (source)->point2); \
else \
(dest) = (SPLIT*) NULL \
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
F u n c t i o n s F u n c t i o n s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
void delete_split(SPLIT *split);
EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev); EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev);
void remove_edgept(EDGEPT *point); void remove_edgept(EDGEPT *point);
SPLIT *new_split(EDGEPT *point1, EDGEPT *point2);
void print_split(SPLIT *split);
void split_outline(EDGEPT *join_point1, EDGEPT *join_point2);
void unsplit_outlines(EDGEPT *p1, EDGEPT *p2);
#endif #endif

View File

@ -30,6 +30,7 @@
I n c l u d e s I n c l u d e s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
#include "vecfuncs.h" #include "vecfuncs.h"
#include "blobs.h"
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
F u n c t i o n s F u n c t i o n s

View File

@ -26,7 +26,6 @@
#define VECFUNCS_H #define VECFUNCS_H
#include <math.h> #include <math.h>
#include "blobs.h"
struct EDGEPT; struct EDGEPT;

View File

@ -359,8 +359,8 @@ void Classify::LearnPieces(const char* filename, int start, int length,
return; return;
if (length > 1) { if (length > 1) {
join_pieces(word->seam_array, start, start + length - 1, SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
word->chopped_word); start + length - 1);
} }
TBLOB* blob = word->chopped_word->blobs[start]; TBLOB* blob = word->chopped_word->blobs[start];
// Rotate the blob if needed for classification. // Rotate the blob if needed for classification.
@ -413,7 +413,8 @@ void Classify::LearnPieces(const char* filename, int start, int length,
delete rotated_blob; delete rotated_blob;
} }
break_pieces(word->seam_array, start, start + length - 1, word->chopped_word); SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
start + length - 1);
} // LearnPieces. } // LearnPieces.
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/

View File

@ -29,7 +29,6 @@
#include "chop.h" #include "chop.h"
#include "outlines.h" #include "outlines.h"
#include "olutil.h"
#include "callcpp.h" #include "callcpp.h"
#include "plotedges.h" #include "plotedges.h"
#include "const.h" #include "const.h"
@ -74,6 +73,11 @@ void Wordrec::add_point_to_list(PointHeap* point_heap, EDGEPT *point) {
#endif #endif
} }
// Returns true if the edgept supplied as input is an inside angle. This
// is determined by the angular change of the vectors from point to point.
bool Wordrec::is_inside_angle(EDGEPT *pt) {
return angle_change(pt->prev, pt, pt->next) < chop_inside_angle;
}
/** /**
* @name angle_change * @name angle_change
@ -111,65 +115,6 @@ int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
return (angle); return (angle);
} }
/**
* @name is_little_chunk
*
* Return TRUE if one of the pieces resulting from this split would
* less than some number of edge points.
*/
int Wordrec::is_little_chunk(EDGEPT *point1, EDGEPT *point2) {
EDGEPT *p = point1; /* Iterator */
int counter = 0;
do {
/* Go from P1 to P2 */
if (is_same_edgept (point2, p)) {
if (is_small_area (point1, point2))
return (TRUE);
else
break;
}
p = p->next;
}
while ((p != point1) && (counter++ < chop_min_outline_points));
/* Go from P2 to P1 */
p = point2;
counter = 0;
do {
if (is_same_edgept (point1, p)) {
return (is_small_area (point2, point1));
}
p = p->next;
}
while ((p != point2) && (counter++ < chop_min_outline_points));
return (FALSE);
}
/**
* @name is_small_area
*
* Test the area defined by a split accross this outline.
*/
int Wordrec::is_small_area(EDGEPT *point1, EDGEPT *point2) {
EDGEPT *p = point1->next; /* Iterator */
int area = 0;
TPOINT origin;
do {
/* Go from P1 to P2 */
origin.x = p->pos.x - point1->pos.x;
origin.y = p->pos.y - point1->pos.y;
area += CROSS (origin, p->vec);
p = p->next;
}
while (!is_same_edgept (point2, p));
return (area < chop_min_outline_area);
}
/** /**
* @name pick_close_point * @name pick_close_point
* *

View File

@ -39,7 +39,6 @@
#include "findseam.h" #include "findseam.h"
#include "freelist.h" #include "freelist.h"
#include "globals.h" #include "globals.h"
#include "makechop.h"
#include "render.h" #include "render.h"
#include "pageres.h" #include "pageres.h"
#include "seam.h" #include "seam.h"
@ -135,18 +134,14 @@ void restore_outline_tree(TESSLINE *srcline) {
static SEAM* CheckSeam(int debug_level, inT32 blob_number, TWERD* word, static SEAM* CheckSeam(int debug_level, inT32 blob_number, TWERD* word,
TBLOB* blob, TBLOB* other_blob, TBLOB* blob, TBLOB* other_blob,
const GenericVector<SEAM*>& seams, SEAM* seam) { const GenericVector<SEAM*>& seams, SEAM* seam) {
if (seam == NULL || if (seam == NULL || blob->outlines == NULL || other_blob->outlines == NULL ||
blob->outlines == NULL || total_containment(blob, other_blob) || check_blob(other_blob) ||
other_blob->outlines == NULL || !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) ||
total_containment(blob, other_blob) ||
check_blob(other_blob) ||
!(check_seam_order(blob, seam) &&
check_seam_order(other_blob, seam)) ||
any_shared_split_points(seams, seam) || any_shared_split_points(seams, seam) ||
!test_insert_seam(seams, word, blob_number)) { !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) {
word->blobs.remove(blob_number + 1); word->blobs.remove(blob_number + 1);
if (seam) { if (seam) {
undo_seam(blob, other_blob, seam); seam->UndoSeam(blob, other_blob);
delete seam; delete seam;
seam = NULL; seam = NULL;
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
@ -185,19 +180,19 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number,
if (prioritize_division) { if (prioritize_division) {
TPOINT location; TPOINT location;
if (divisible_blob(blob, italic_blob, &location)) { if (divisible_blob(blob, italic_blob, &location)) {
seam = new SEAM(0.0f, location, NULL, NULL, NULL); seam = new SEAM(0.0f, location);
} }
} }
if (seam == NULL) if (seam == NULL)
seam = pick_good_seam(blob); seam = pick_good_seam(blob);
if (chop_debug) { if (chop_debug) {
if (seam != NULL) if (seam != NULL)
print_seam("Good seam picked=", seam); seam->Print("Good seam picked=");
else else
tprintf("\n** no seam picked *** \n"); tprintf("\n** no seam picked *** \n");
} }
if (seam) { if (seam) {
apply_seam(blob, other_blob, italic_blob, seam); seam->ApplySeam(italic_blob, blob, other_blob);
} }
seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
@ -211,13 +206,17 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number,
if (divisible_blob(blob, italic_blob, &location)) { if (divisible_blob(blob, italic_blob, &location)) {
other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
word->blobs.insert(other_blob, blob_number + 1); word->blobs.insert(other_blob, blob_number + 1);
seam = new SEAM(0.0f, location, NULL, NULL, NULL); seam = new SEAM(0.0f, location);
apply_seam(blob, other_blob, italic_blob, seam); seam->ApplySeam(italic_blob, blob, other_blob);
seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
seams, seam); seams, seam);
} }
} }
} }
if (seam != NULL) {
// Make sure this seam doesn't get chopped again.
seam->Finalize();
}
return seam; return seam;
} }
@ -286,8 +285,7 @@ int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam) {
length = seams.size(); length = seams.size();
for (index = 0; index < length; index++) for (index = 0; index < length; index++)
if (shared_split_points(seams[index], seam)) if (seam->SharesPosition(*seams[index])) return TRUE;
return TRUE;
return FALSE; return FALSE;
} }
@ -384,50 +382,6 @@ SEAM* Wordrec::chop_one_blob(const GenericVector<TBOX>& boxes,
blob_number); blob_number);
} }
} }
} // namespace tesseract
/**
* @name check_seam_order
*
* Make sure that each of the splits in this seam match to outlines
* in this blob. If any of the splits could not correspond to this
* blob then there is a problem (and FALSE should be returned to the
* caller).
*/
inT16 check_seam_order(TBLOB *blob, SEAM *seam) {
TESSLINE *outline;
inT8 found_em[3];
if (seam->split1 == NULL || blob == NULL)
return (TRUE);
found_em[0] = found_em[1] = found_em[2] = FALSE;
for (outline = blob->outlines; outline; outline = outline->next) {
if (!found_em[0] &&
((seam->split1 == NULL) ||
is_split_outline (outline, seam->split1))) {
found_em[0] = TRUE;
}
if (!found_em[1] &&
((seam->split2 == NULL) ||
is_split_outline (outline, seam->split2))) {
found_em[1] = TRUE;
}
if (!found_em[2] &&
((seam->split3 == NULL) ||
is_split_outline (outline, seam->split3))) {
found_em[2] = TRUE;
}
}
if (!found_em[0] || !found_em[1] || !found_em[2])
return (FALSE);
else
return (TRUE);
}
namespace tesseract {
/** /**
* @name chop_word_main * @name chop_word_main

View File

@ -44,7 +44,5 @@ int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam);
int check_blob(TBLOB *blob); int check_blob(TBLOB *blob);
inT16 check_seam_order(TBLOB *blob, SEAM *seam);
inT16 total_containment(TBLOB *blob1, TBLOB *blob2); inT16 total_containment(TBLOB *blob1, TBLOB *blob2);
#endif #endif

View File

@ -27,7 +27,6 @@
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
#include "findseam.h" #include "findseam.h"
#include "gradechop.h" #include "gradechop.h"
#include "olutil.h"
#include "plotedges.h" #include "plotedges.h"
#include "outlines.h" #include "outlines.h"
#include "freelist.h" #include "freelist.h"
@ -67,7 +66,7 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
if (new_seam == NULL) return; if (new_seam == NULL) return;
if (chop_debug) { if (chop_debug) {
tprintf("Pushing new seam with priority %g :", new_priority); tprintf("Pushing new seam with priority %g :", new_priority);
print_seam("seam: ", new_seam); new_seam->Print("seam: ");
} }
if (seams->size() >= MAX_NUM_SEAMS) { if (seams->size() >= MAX_NUM_SEAMS) {
SeamPair old_pair(0, NULL); SeamPair old_pair(0, NULL);
@ -101,12 +100,9 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
* a split of NULL, then no further splits can be supplied by the * a split of NULL, then no further splits can be supplied by the
* caller. * caller.
**********************************************************************/ **********************************************************************/
void Wordrec::choose_best_seam(SeamQueue* seam_queue, void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
SPLIT *split, PRIORITY priority, SEAM **seam_result,
PRIORITY priority, TBLOB *blob, SeamPile *seam_pile) {
SEAM **seam_result,
TBLOB *blob,
SeamPile* seam_pile) {
SEAM *seam; SEAM *seam;
char str[80]; char str[80];
float my_priority; float my_priority;
@ -116,9 +112,8 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue,
TPOINT split_point = split->point1->pos; TPOINT split_point = split->point1->pos;
split_point += split->point2->pos; split_point += split->point2->pos;
split_point /= 2; split_point /= 2;
seam = new SEAM(my_priority, split_point, split, NULL, NULL); seam = new SEAM(my_priority, split_point, *split);
if (chop_debug > 1) if (chop_debug > 1) seam->Print("Partial priority ");
print_seam ("Partial priority ", seam);
add_seam_to_queue(my_priority, seam, seam_queue); add_seam_to_queue(my_priority, seam, seam_queue);
if (my_priority > chop_good_split) if (my_priority > chop_good_split)
@ -132,19 +127,22 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue,
seam_queue->Pop(&seam_pair); seam_queue->Pop(&seam_pair);
seam = seam_pair.extract_data(); seam = seam_pair.extract_data();
/* Set full priority */ /* Set full priority */
my_priority = seam_priority(seam, bbox.left(), bbox.right()); my_priority = seam->FullPriority(bbox.left(), bbox.right(),
chop_overlap_knob, chop_centered_maxwidth,
chop_center_knob, chop_width_change_knob);
if (chop_debug) { if (chop_debug) {
sprintf (str, "Full my_priority %0.0f, ", my_priority); sprintf (str, "Full my_priority %0.0f, ", my_priority);
print_seam(str, seam); seam->Print(str);
} }
if ((*seam_result == NULL || (*seam_result)->priority > my_priority) && if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) &&
my_priority < chop_ok_split) { my_priority < chop_ok_split) {
/* No crossing */ /* No crossing */
if (constrained_split(seam->split1, blob)) { if (seam->IsHealthy(*blob, chop_min_outline_points,
chop_min_outline_area)) {
delete *seam_result; delete *seam_result;
*seam_result = new SEAM(*seam); *seam_result = new SEAM(*seam);
(*seam_result)->priority = my_priority; (*seam_result)->set_priority(my_priority);
} else { } else {
delete seam; delete seam;
seam = NULL; seam = NULL;
@ -198,104 +196,17 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue,
**********************************************************************/ **********************************************************************/
void Wordrec::combine_seam(const SeamPile& seam_pile, void Wordrec::combine_seam(const SeamPile& seam_pile,
const SEAM* seam, SeamQueue* seam_queue) { const SEAM* seam, SeamQueue* seam_queue) {
register inT16 dist;
inT16 bottom1, top1;
inT16 bottom2, top2;
SEAM *new_one;
const SEAM *this_one;
bottom1 = seam->split1->point1->pos.y;
if (seam->split1->point2->pos.y >= bottom1)
top1 = seam->split1->point2->pos.y;
else {
top1 = bottom1;
bottom1 = seam->split1->point2->pos.y;
}
if (seam->split2 != NULL) {
bottom2 = seam->split2->point1->pos.y;
if (seam->split2->point2->pos.y >= bottom2)
top2 = seam->split2->point2->pos.y;
else {
top2 = bottom2;
bottom2 = seam->split2->point2->pos.y;
}
}
else {
bottom2 = bottom1;
top2 = top1;
}
for (int x = 0; x < seam_pile.size(); ++x) { for (int x = 0; x < seam_pile.size(); ++x) {
this_one = seam_pile.get(x).data(); const SEAM *this_one = seam_pile.get(x).data();
dist = seam->location.x - this_one->location.x; if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
if (-SPLIT_CLOSENESS < dist && SEAM *new_one = new SEAM(*seam);
dist < SPLIT_CLOSENESS && new_one->CombineWith(*this_one);
seam->priority + this_one->priority < chop_ok_split) { if (chop_debug > 1) new_one->Print("Combo priority ");
inT16 split1_point1_y = this_one->split1->point1->pos.y; add_seam_to_queue(new_one->priority(), new_one, seam_queue);
inT16 split1_point2_y = this_one->split1->point2->pos.y;
inT16 split2_point1_y = 0;
inT16 split2_point2_y = 0;
if (this_one->split2) {
split2_point1_y = this_one->split2->point1->pos.y;
split2_point2_y = this_one->split2->point2->pos.y;
}
if (
/*!tessedit_fix_sideways_chops || */
(
/* this_one->split1 always exists */
(
((split1_point1_y >= top1 && split1_point2_y >= top1) ||
(split1_point1_y <= bottom1 && split1_point2_y <= bottom1))
&&
((split1_point1_y >= top2 && split1_point2_y >= top2) ||
(split1_point1_y <= bottom2 && split1_point2_y <= bottom2))
)
)
&&
(
this_one->split2 == NULL ||
(
((split2_point1_y >= top1 && split2_point2_y >= top1) ||
(split2_point1_y <= bottom1 && split2_point2_y <= bottom1))
&&
((split2_point1_y >= top2 && split2_point2_y >= top2) ||
(split2_point1_y <= bottom2 && split2_point2_y <= bottom2))
)
)
) {
new_one = join_two_seams (seam, this_one);
if (new_one != NULL) {
if (chop_debug > 1)
print_seam ("Combo priority ", new_one);
add_seam_to_queue(new_one->priority, new_one, seam_queue);
}
}
} }
} }
} }
/**********************************************************************
* constrained_split
*
* Constrain this split to obey certain rules. It must not cross any
* inner outline. It must not cut off a small chunk of the outline.
**********************************************************************/
inT16 Wordrec::constrained_split(SPLIT *split, TBLOB *blob) {
TESSLINE *outline;
if (is_little_chunk (split->point1, split->point2))
return (FALSE);
for (outline = blob->outlines; outline; outline = outline->next) {
if (split_bounds_overlap (split, outline) &&
crosses_outline (split->point1, split->point2, outline->loop)) {
return (FALSE);
}
}
return (TRUE);
}
/********************************************************************** /**********************************************************************
* pick_good_seam * pick_good_seam
* *
@ -335,16 +246,15 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
if (seam == NULL) { if (seam == NULL) {
choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile); choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile);
} } else if (seam->priority() > chop_good_split) {
else if (seam->priority > chop_good_split) { choose_best_seam(&seam_queue, NULL, seam->priority(), &seam, blob,
choose_best_seam(&seam_queue, NULL, seam->priority, &seam_pile);
&seam, blob, &seam_pile);
} }
EDGEPT_C_IT it(&new_points); EDGEPT_C_IT it(&new_points);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
EDGEPT *inserted_point = it.data(); EDGEPT *inserted_point = it.data();
if (!point_used_by_seam(seam, inserted_point)) { if (seam == NULL || !seam->UsesPoint(inserted_point)) {
for (outline = blob->outlines; outline; outline = outline->next) { for (outline = blob->outlines; outline; outline = outline->next) {
if (outline->loop == inserted_point) { if (outline->loop == inserted_point) {
outline->loop = outline->loop->next; outline->loop = outline->loop->next;
@ -355,18 +265,13 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
} }
if (seam) { if (seam) {
if (seam->priority > chop_ok_split) { if (seam->priority() > chop_ok_split) {
delete seam; delete seam;
seam = NULL; seam = NULL;
} }
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
else if (wordrec_display_splits) { else if (wordrec_display_splits) {
if (seam->split1) seam->Mark(edge_window);
mark_split (seam->split1);
if (seam->split2)
mark_split (seam->split2);
if (seam->split3)
mark_split (seam->split3);
if (chop_debug > 2) { if (chop_debug > 2) {
update_edge_window(); update_edge_window();
edge_window_wait(); edge_window_wait();
@ -382,42 +287,6 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) {
} }
/**********************************************************************
* seam_priority
*
* Assign a full priority value to the seam.
**********************************************************************/
PRIORITY Wordrec::seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) {
PRIORITY priority;
if (seam->split1 == NULL)
priority = 0;
else if (seam->split2 == NULL) {
priority = (seam->priority +
full_split_priority (seam->split1, xmin, xmax));
}
else if (seam->split3 == NULL) {
split_outline (seam->split2->point1, seam->split2->point2);
priority = (seam->priority +
full_split_priority (seam->split1, xmin, xmax));
unsplit_outlines (seam->split2->point1, seam->split2->point2);
}
else {
split_outline (seam->split2->point1, seam->split2->point2);
split_outline (seam->split3->point1, seam->split3->point2);
priority = (seam->priority +
full_split_priority (seam->split1, xmin, xmax));
unsplit_outlines (seam->split3->point1, seam->split3->point2);
unsplit_outlines (seam->split2->point1, seam->split2->point2);
}
return (priority);
}
/********************************************************************** /**********************************************************************
* try_point_pairs * try_point_pairs
* *
@ -433,23 +302,20 @@ void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS],
TBLOB * blob) { TBLOB * blob) {
inT16 x; inT16 x;
inT16 y; inT16 y;
SPLIT *split;
PRIORITY priority; PRIORITY priority;
for (x = 0; x < num_points; x++) { for (x = 0; x < num_points; x++) {
for (y = x + 1; y < num_points; y++) { for (y = x + 1; y < num_points; y++) {
if (points[y] && if (points[y] &&
weighted_edgept_dist(points[x], points[y], points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
chop_x_y_weight) < chop_split_length && chop_split_length &&
points[x] != points[y]->next && points[x] != points[y]->next && points[y] != points[x]->next &&
points[y] != points[x]->next &&
!is_exterior_point(points[x], points[y]) && !is_exterior_point(points[x], points[y]) &&
!is_exterior_point(points[y], points[x])) { !is_exterior_point(points[y], points[x])) {
split = new_split (points[x], points[y]); SPLIT split(points[x], points[y]);
priority = partial_split_priority (split); priority = partial_split_priority(&split);
choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile); choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
} }
} }
} }
@ -474,7 +340,6 @@ void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
SEAM ** seam, SEAM ** seam,
TBLOB * blob) { TBLOB * blob) {
EDGEPT *vertical_point = NULL; EDGEPT *vertical_point = NULL;
SPLIT *split;
inT16 x; inT16 x;
PRIORITY priority; PRIORITY priority;
TESSLINE *outline; TESSLINE *outline;
@ -486,16 +351,13 @@ void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
&vertical_point, new_points); &vertical_point, new_points);
} }
if (vertical_point && if (vertical_point && points[x] != vertical_point->next &&
points[x] != vertical_point->next && vertical_point != points[x]->next &&
vertical_point != points[x]->next && points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) <
weighted_edgept_dist(points[x], vertical_point, chop_split_length) {
chop_x_y_weight) < chop_split_length) { SPLIT split(points[x], vertical_point);
priority = partial_split_priority(&split);
split = new_split (points[x], vertical_point); choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
priority = partial_split_priority (split);
choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile);
} }
} }
} }

View File

@ -27,120 +27,19 @@
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
#include "gradechop.h" #include "gradechop.h"
#include "wordrec.h" #include "wordrec.h"
#include "olutil.h"
#include "chop.h" #include "chop.h"
#include "ndminx.h" #include "ndminx.h"
#include <math.h> #include <math.h>
/*----------------------------------------------------------------------
T y p e s
----------------------------------------------------------------------*/
#define CENTER_GRADE_CAP 25.0
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
M a c r o s M a c r o s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
/**********************************************************************
* find_bounds_loop
*
* This is a macro to be used by set_outline_bounds.
**********************************************************************/
#define find_bounds_loop(point1,point2,x_min,x_max) \
x_min = point2->pos.x; \
x_max = point2->pos.x; \
\
this_point = point1; \
do { \
x_min = MIN (this_point->pos.x, x_min); \
x_max = MAX (this_point->pos.x, x_max); \
this_point = this_point->next; \
} \
while (this_point != point2 && this_point != point1) \
namespace tesseract { namespace tesseract {
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
F u n c t i o n s F u n c t i o n s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
/**********************************************************************
* full_split_priority
*
* Assign a priority to this split based on the features that it has.
* Part of the priority has already been calculated so just return the
* additional amount for the bounding box type information.
**********************************************************************/
PRIORITY Wordrec::full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax) {
BOUNDS_RECT rect;
set_outline_bounds (split->point1, split->point2, rect);
if (xmin < MIN (rect[0], rect[2]) && xmax > MAX (rect[1], rect[3]))
return (999.0);
return (grade_overlap (rect) +
grade_center_of_blob (rect) + grade_width_change (rect));
}
/**********************************************************************
* grade_center_of_blob
*
* Return a grade for the a split. Rank it on closeness to the center
* of the original blob
* 0 = "perfect"
* 100 = "no way jay"
**********************************************************************/
PRIORITY Wordrec::grade_center_of_blob(register BOUNDS_RECT rect) {
register PRIORITY grade;
int width1 = rect[1] - rect[0];
int width2 = rect[3] - rect[2];
if (width1 > chop_centered_maxwidth &&
width2 > chop_centered_maxwidth) {
return 0.0;
}
grade = width1 - width2;
if (grade < 0)
grade = -grade;
grade *= chop_center_knob;
grade = MIN (CENTER_GRADE_CAP, grade);
return (MAX (0.0, grade));
}
/**********************************************************************
* grade_overlap
*
* Return a grade for this split for the overlap of the resultant blobs.
* 0 = "perfect"
* 100 = "no way jay"
**********************************************************************/
PRIORITY Wordrec::grade_overlap(register BOUNDS_RECT rect) {
register PRIORITY grade;
register inT16 width1;
register inT16 width2;
register inT16 overlap;
width1 = rect[3] - rect[2];
width2 = rect[1] - rect[0];
overlap = MIN (rect[1], rect[3]) - MAX (rect[0], rect[2]);
width1 = MIN (width1, width2);
if (overlap == width1)
return (100.0); /* Total overlap */
width1 = 2 * overlap - width1; /* Extra penalty for too */
overlap += MAX (0, width1); /* much overlap */
grade = overlap * chop_overlap_knob;
return (MAX (0.0, grade));
}
/********************************************************************** /**********************************************************************
* grade_split_length * grade_split_length
@ -153,8 +52,8 @@ PRIORITY Wordrec::grade_split_length(register SPLIT *split) {
register PRIORITY grade; register PRIORITY grade;
register float split_length; register float split_length;
split_length = weighted_edgept_dist (split->point1, split->point2, split_length =
chop_x_y_weight); split->point1->WeightedDistance(*split->point2, chop_x_y_weight);
if (split_length <= 0) if (split_length <= 0)
grade = 0; grade = 0;
@ -188,51 +87,4 @@ PRIORITY Wordrec::grade_sharpness(register SPLIT *split) {
} }
/**********************************************************************
* grade_width_change
*
* Return a grade for the change in width of the resultant blobs.
* 0 = "perfect"
* 100 = "no way jay"
**********************************************************************/
PRIORITY Wordrec::grade_width_change(register BOUNDS_RECT rect) {
register PRIORITY grade;
register inT32 width1;
register inT32 width2;
width1 = rect[3] - rect[2];
width2 = rect[1] - rect[0];
grade = 20 - (MAX (rect[1], rect[3])
- MIN (rect[0], rect[2]) - MAX (width1, width2));
grade *= chop_width_change_knob;
return (MAX (0.0, grade));
}
/**********************************************************************
* set_outline_bounds
*
* Set up the limits for the x coordinate of the outline.
**********************************************************************/
void Wordrec::set_outline_bounds(register EDGEPT *point1,
register EDGEPT *point2,
BOUNDS_RECT rect) {
register EDGEPT *this_point;
register inT16 x_min;
register inT16 x_max;
find_bounds_loop(point1, point2, x_min, x_max);
rect[0] = x_min;
rect[1] = x_max;
find_bounds_loop(point2, point1, x_min, x_max);
rect[2] = x_min;
rect[3] = x_max;
}
} // namespace tesseract } // namespace tesseract

View File

@ -32,11 +32,6 @@
#include "seam.h" #include "seam.h"
#include "ndminx.h" #include "ndminx.h"
/*----------------------------------------------------------------------
T y p e s
----------------------------------------------------------------------*/
typedef inT16 BOUNDS_RECT[4];
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
M a c r o s M a c r o s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
@ -52,18 +47,4 @@ typedef inT16 BOUNDS_RECT[4];
(grade_split_length (split) + \ (grade_split_length (split) + \
grade_sharpness (split)) \ grade_sharpness (split)) \
/**********************************************************************
* split_bounds_overlap
*
* Check to see if this split might overlap with this outline. Return
* TRUE if there is a positive overlap in the bounding boxes of the two.
**********************************************************************/
#define split_bounds_overlap(split,outline) \
(outline->topleft.x <= MAX (split->point1->pos.x,split->point2->pos.x) && \
outline->botright.x >= MIN (split->point1->pos.x,split->point2->pos.x) && \
outline->botright.y <= MAX (split->point1->pos.y,split->point2->pos.y) && \
outline->topleft.y >= MIN (split->point1->pos.y,split->point2->pos.y))
#endif #endif

View File

@ -1,226 +0,0 @@
/* -*-C-*-
********************************************************************************
*
* File: makechop.c (Formerly makechop.c)
* Description:
* Author: Mark Seaman, OCR Technology
* Created: Fri Oct 16 14:37:00 1987
* Modified: Mon Jul 29 15:50:42 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Reusable Software Component
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*********************************************************************************/
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "makechop.h"
#include "blobs.h"
#include "render.h"
#include "structures.h"
#ifdef __UNIX__
#include <assert.h>
#include <unistd.h>
#endif
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
/*----------------------------------------------------------------------
Public Function Code
----------------------------------------------------------------------*/
/**********************************************************************
* apply_seam
*
* Split this blob into two blobs by applying the splits included in
* the seam description.
**********************************************************************/
void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam) {
if (seam->split1 == NULL) {
divide_blobs(blob, other_blob, italic_blob, seam->location);
}
else if (seam->split2 == NULL) {
make_split_blobs(blob, other_blob, italic_blob, seam);
}
else if (seam->split3 == NULL) {
make_double_split(blob, other_blob, italic_blob, seam);
}
else {
make_triple_split(blob, other_blob, italic_blob, seam);
}
}
/**********************************************************************
* form_two_blobs
*
* Group the outlines from the first blob into both of them. Do so
* according to the information about the split.
**********************************************************************/
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
const TPOINT& location) {
setup_blob_outlines(blob);
divide_blobs(blob, other_blob, italic_blob, location);
eliminate_duplicate_outlines(blob);
eliminate_duplicate_outlines(other_blob);
correct_blob_order(blob, other_blob);
}
/**********************************************************************
* make_double_split
*
* Create two blobs out of one by splitting the original one in half.
* Return the resultant blobs for classification.
**********************************************************************/
void make_double_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
SEAM *seam) {
make_single_split(blob->outlines, seam->split1);
make_single_split(blob->outlines, seam->split2);
form_two_blobs(blob, other_blob, italic_blob, seam->location);
}
/**********************************************************************
* make_single_split
*
* Create two outlines out of one by splitting the original one in half.
* Return the resultant outlines.
**********************************************************************/
void make_single_split(TESSLINE *outlines, SPLIT *split) {
assert (outlines != NULL);
split_outline (split->point1, split->point2);
while (outlines->next != NULL)
outlines = outlines->next;
outlines->next = new TESSLINE;
outlines->next->loop = split->point1;
outlines->next->ComputeBoundingBox();
outlines = outlines->next;
outlines->next = new TESSLINE;
outlines->next->loop = split->point2;
outlines->next->ComputeBoundingBox();
outlines->next->next = NULL;
}
/**********************************************************************
* make_split_blobs
*
* Create two blobs out of one by splitting the original one in half.
* Return the resultant blobs for classification.
**********************************************************************/
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
SEAM *seam) {
make_single_split(blob->outlines, seam->split1);
form_two_blobs (blob, other_blob, italic_blob, seam->location);
}
/**********************************************************************
* make_triple_split
*
* Create two blobs out of one by splitting the original one in half.
* This splitting is accomplished by applying three separate splits on
* the outlines. Three of the starting outlines will produce two ending
* outlines. Return the resultant blobs for classification.
**********************************************************************/
void make_triple_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
SEAM *seam) {
make_single_split(blob->outlines, seam->split1);
make_single_split(blob->outlines, seam->split2);
make_single_split(blob->outlines, seam->split3);
form_two_blobs(blob, other_blob, italic_blob, seam->location);
}
/**********************************************************************
* undo_seam
*
* Remove the seam between these two blobs. Produce one blob as a
* result. The seam may consist of one, two, or three splits. Each
* of these split must be removed from the outlines.
**********************************************************************/
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
TESSLINE *outline;
if (!seam)
return; /* Append other blob outlines */
if (blob->outlines == NULL) {
blob->outlines = other_blob->outlines;
other_blob->outlines = NULL;
}
outline = blob->outlines;
while (outline->next)
outline = outline->next;
outline->next = other_blob->outlines;
other_blob->outlines = NULL;
delete other_blob;
if (seam->split1 == NULL) {
}
else if (seam->split2 == NULL) {
undo_single_split (blob, seam->split1);
}
else if (seam->split3 == NULL) {
undo_single_split (blob, seam->split1);
undo_single_split (blob, seam->split2);
}
else {
undo_single_split (blob, seam->split3);
undo_single_split (blob, seam->split2);
undo_single_split (blob, seam->split1);
}
setup_blob_outlines(blob);
eliminate_duplicate_outlines(blob);
}
/**********************************************************************
* undo_single_split
*
* Undo a seam that is made by a single split. Perform the correct
* magic to reconstruct the appropriate set of outline data structures.
**********************************************************************/
void undo_single_split(TBLOB *blob, SPLIT *split) {
TESSLINE *outline1;
TESSLINE *outline2;
/* Modify edge points */
unsplit_outlines (split->point1, split->point2);
outline1 = new TESSLINE;
outline1->next = blob->outlines;
blob->outlines = outline1;
outline1->loop = split->point1;
outline2 = new TESSLINE;
outline2->next = blob->outlines;
blob->outlines = outline2;
outline2->loop = split->point2;
}

View File

@ -1,71 +0,0 @@
/* -*-C-*-
********************************************************************************
*
* File: makechop.h (Formerly makechop.h)
* Description:
* Author: Mark Seaman, SW Productivity
* Created: Fri Oct 16 14:37:00 1987
* Modified: Mon Jul 29 13:33:23 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Reusable Software Component
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*********************************************************************************/
#ifndef MAKECHOP_H
#define MAKECHOP_H
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "chop.h"
#include "olutil.h"
/*----------------------------------------------------------------------
M a c r o s
---------------------------------------------------------------------*/
/**********************************************************************
* is_split_outline
*
* Check to see if both sides of the split fall within the bounding
* box of this outline.
**********************************************************************/
#define is_split_outline(outline,split) \
(outline->Contains(split->point1->pos) && \
outline->Contains(split->point2->pos)) \
/*----------------------------------------------------------------------
Public Function Prototypes
----------------------------------------------------------------------*/
void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam);
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
const TPOINT& location);
void make_double_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
SEAM *seam);
void make_single_split(TESSLINE *outlines, SPLIT *split);
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
SEAM *seam);
void make_triple_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
SEAM *seam);
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam);
void undo_single_split(TBLOB *blob, SPLIT *split);
#endif

View File

@ -1,102 +0,0 @@
/* -*-C-*-
********************************************************************************
*
* File: olutil.c (Formerly olutil.c)
* Description:
* Author: Mark Seaman, OCR Technology
* Created: Fri Oct 16 14:37:00 1987
* Modified: Fri May 17 13:11:24 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Reusable Software Component
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*********************************************************************************/
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "olutil.h"
#include "structures.h"
#include "blobs.h"
#include "const.h"
#ifdef __UNIX__
#include <assert.h>
#endif
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
/**********************************************************************
* correct_blob_order
*
* Check to see if the blobs are in the correct order. If they are not
* then swap which outlines are attached to which blobs.
**********************************************************************/
void correct_blob_order(TBLOB *blob1, TBLOB *blob2) {
TPOINT origin1;
TPOINT origin2;
TESSLINE *temp;
blob_origin(blob1, &origin1);
blob_origin(blob2, &origin2);
if (origin1.x > origin2.x) {
temp = blob2->outlines;
blob2->outlines = blob1->outlines;
blob1->outlines = temp;
}
}
/**********************************************************************
* eliminate_duplicate_outlines
*
* Find and delete any duplicate outline records in this blob.
**********************************************************************/
void eliminate_duplicate_outlines(TBLOB *blob) {
TESSLINE *outline;
TESSLINE *other_outline;
TESSLINE *last_outline;
for (outline = blob->outlines; outline; outline = outline->next) {
for (last_outline = outline, other_outline = outline->next;
other_outline;
last_outline = other_outline, other_outline = other_outline->next) {
if (same_outline_bounds (outline, other_outline)) {
last_outline->next = other_outline->next;
// This doesn't leak - the outlines share the EDGEPTs.
other_outline->loop = NULL;
delete other_outline;
other_outline = last_outline;
// If it is part of a cut, then it can't be a hole any more.
outline->is_hole = false;
}
}
}
}
/**********************************************************************
* setup_blob_outlines
*
* Set up each of the outlines in this blob.
**********************************************************************/
void setup_blob_outlines(TBLOB *blob) {
TESSLINE *outline;
for (outline = blob->outlines; outline; outline = outline->next) {
outline->ComputeBoundingBox();
}
}

View File

@ -1,82 +0,0 @@
/* -*-C-*-
********************************************************************************
*
* File: olutil.h (Formerly olutil.h)
* Description:
* Author: Mark Seaman, SW Productivity
* Created: Fri Oct 16 14:37:00 1987
* Modified: Wed Jul 10 14:21:55 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Reusable Software Component
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*********************************************************************************/
#ifndef OLUTIL_H
#define OLUTIL_H
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "blobs.h"
/*----------------------------------------------------------------------
M a c r o s
----------------------------------------------------------------------*/
/**********************************************************************
* is_inside_angle
*
* Return true if the edgept supplied as input is an inside angle. This
* is determined by the angular change of the vectors from point to
* point.
**********************************************************************/
#define is_inside_angle(pt) \
(angle_change ((pt)->prev, (pt), (pt)->next) < chop_inside_angle)
/**********************************************************************
* same_outline_bounds
*
* Return TRUE if these two outlines have the same bounds.
**********************************************************************/
#define same_outline_bounds(outline,other_outline) \
(outline->topleft.x == other_outline->topleft.x && \
outline->topleft.y == other_outline->topleft.y && \
outline->botright.x == other_outline->botright.x && \
outline->botright.y == other_outline->botright.y) \
/**********************************************************************
* weighted_edgept_dist
*
* Return the distance (squared) between the two edge points.
**********************************************************************/
#define weighted_edgept_dist(p1,p2,chop_x_y_weight) \
(((p1)->pos.x - (p2)->pos.x) * \
((p1)->pos.x - (p2)->pos.x) * chop_x_y_weight + \
((p1)->pos.y - (p2)->pos.y) * \
((p1)->pos.y - (p2)->pos.y))
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
void correct_blob_order(TBLOB *blob1, TBLOB *blob2);
void eliminate_duplicate_outlines(TBLOB *blob);
void setup_blob_outlines(TBLOB *blob);
#endif

View File

@ -39,73 +39,6 @@ namespace tesseract {
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
F u n c t i o n s F u n c t i o n s
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
/**********************************************************************
* crosses_outline
*
* Check to see if this line crosses over this outline. If it does
* return TRUE.
**********************************************************************/
int Wordrec::crosses_outline(EDGEPT *p0, /* Start of line */
EDGEPT *p1, /* End of line */
EDGEPT *outline) { /* Outline to check */
EDGEPT *pt = outline;
do {
if (is_crossed (p0->pos, p1->pos, pt->pos, pt->next->pos))
return (TRUE);
pt = pt->next;
}
while (pt != outline);
return (FALSE);
}
/**********************************************************************
* is_crossed
*
* Return TRUE when the two line segments cross each other. Find out
* where the projected lines would cross and then check to see if the
* point of intersection lies on both of the line segments. If it does
* then these two segments cross.
**********************************************************************/
int Wordrec::is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) {
int b0a1xb0b1, b0b1xb0a0;
int a1b1xa1a0, a1a0xa1b0;
TPOINT b0a1, b0a0, a1b1, b0b1, a1a0;
b0a1.x = a1.x - b0.x;
b0a0.x = a0.x - b0.x;
a1b1.x = b1.x - a1.x;
b0b1.x = b1.x - b0.x;
a1a0.x = a0.x - a1.x;
b0a1.y = a1.y - b0.y;
b0a0.y = a0.y - b0.y;
a1b1.y = b1.y - a1.y;
b0b1.y = b1.y - b0.y;
a1a0.y = a0.y - a1.y;
b0a1xb0b1 = CROSS (b0a1, b0b1);
b0b1xb0a0 = CROSS (b0b1, b0a0);
a1b1xa1a0 = CROSS (a1b1, a1a0);
/*a1a0xa1b0=CROSS(a1a0,a1b0); */
a1a0xa1b0 = -CROSS (a1a0, b0a1);
return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0)
|| (b0a1xb0b1 < 0 && b0b1xb0a0 < 0))
&& ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0));
}
/**********************************************************************
* is_same_edgept
*
* Return true if the points are identical.
**********************************************************************/
int Wordrec::is_same_edgept(EDGEPT *p1, EDGEPT *p2) {
return (p1 == p2);
}
/********************************************************************** /**********************************************************************
* near_point * near_point
* *
@ -153,30 +86,4 @@ bool Wordrec::near_point(EDGEPT *point,
} }
} }
/**********************************************************************
* reverse_outline
*
* Change the direction of the outline. If it was clockwise make it
* counter-clockwise and vice versa. Do this by swapping each of the
* next and prev fields of each edge point.
**********************************************************************/
void Wordrec::reverse_outline(EDGEPT *outline) {
EDGEPT *edgept = outline;
EDGEPT *temp;
do {
/* Swap next and prev */
temp = edgept->prev;
edgept->prev = edgept->next;
edgept->next = temp;
/* Set up vec field */
edgept->vec.x = edgept->next->pos.x - edgept->pos.x;
edgept->vec.y = edgept->next->pos.y - edgept->pos.y;
edgept = edgept->prev; /* Go to next point */
}
while (edgept != outline);
}
} // namespace tesseract } // namespace tesseract

View File

@ -58,7 +58,7 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector<SEAM*>& seams,
const char* description, const char* description,
TWERD *word, TWERD *word,
BlamerBundle *blamer_bundle) { BlamerBundle *blamer_bundle) {
if (end > start) join_pieces(seams, start, end, word); if (end > start) SEAM::JoinPieces(seams, word->blobs, start, end);
BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description, BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description,
White, blamer_bundle); White, blamer_bundle);
// Set the matrix_cell_ entries in all the BLOB_CHOICES. // Set the matrix_cell_ entries in all the BLOB_CHOICES.
@ -67,7 +67,7 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector<SEAM*>& seams,
bc_it.data()->set_matrix_cell(start, end); bc_it.data()->set_matrix_cell(start, end);
} }
if (end > start) break_pieces(seams, start, end, word); if (end > start) SEAM::BreakPieces(seams, word->blobs, start, end);
return (choices); return (choices);
} }

View File

@ -119,21 +119,4 @@ void mark_outline(EDGEPT *edgept) { /* Start of point list */
c_make_current(window); c_make_current(window);
} }
/**********************************************************************
* mark_split
*
* Set up the marks list to be displayed in subsequent updates and draw
* the marks in the current window. The marks are stored in the second
* sublist. The first sublist is left unmodified.
**********************************************************************/
void mark_split(SPLIT *split) {
void *window = edge_window;
c_line_color_index(window, Green);
c_move (window, (float) split->point1->pos.x, (float) split->point1->pos.y);
c_draw (window, (float) split->point2->pos.x, (float) split->point2->pos.y);
c_make_current(window);
}
#endif // GRAPHICS_DISABLED #endif // GRAPHICS_DISABLED

View File

@ -28,7 +28,6 @@
#include "callcpp.h" #include "callcpp.h"
#include "oldlist.h" #include "oldlist.h"
#include "blobs.h" #include "blobs.h"
#include "split.h"
/*---------------------------------------------------------------------- /*----------------------------------------------------------------------
V a r i a b l e s V a r i a b l e s
@ -67,5 +66,4 @@ void draw_blob_edges(TBLOB *blob);
void mark_outline(EDGEPT *edgept); void mark_outline(EDGEPT *edgept);
void mark_split(SPLIT *split);
#endif #endif

View File

@ -53,8 +53,7 @@ void Wordrec::SegSearch(WERD_RES* word_res,
improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle, improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle,
blamer_bundle, &pain_points, &pending); blamer_bundle, &pain_points, &pending);
} }
if (chop_debug) if (chop_debug) SEAM::PrintSeams("Final seam list:", word_res->seam_array);
print_seams("Final seam list:", word_res->seam_array);
if (blamer_bundle != NULL && if (blamer_bundle != NULL &&
!blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) { !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) {

View File

@ -290,9 +290,10 @@ class Wordrec : public Classify {
// chop.cpp // chop.cpp
PRIORITY point_priority(EDGEPT *point); PRIORITY point_priority(EDGEPT *point);
void add_point_to_list(PointHeap* point_heap, EDGEPT *point); void add_point_to_list(PointHeap* point_heap, EDGEPT *point);
// Returns true if the edgept supplied as input is an inside angle. This
// is determined by the angular change of the vectors from point to point.
bool is_inside_angle(EDGEPT *pt);
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3); int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3);
int is_little_chunk(EDGEPT *point1, EDGEPT *point2);
int is_small_area(EDGEPT *point1, EDGEPT *point2);
EDGEPT *pick_close_point(EDGEPT *critical_point, EDGEPT *pick_close_point(EDGEPT *critical_point,
EDGEPT *vertical_point, EDGEPT *vertical_point,
int *best_dist); int *best_dist);
@ -335,17 +336,12 @@ class Wordrec : public Classify {
// findseam.cpp // findseam.cpp
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue* seams); void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue* seams);
void choose_best_seam(SeamQueue* seam_queue, void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob,
PRIORITY priority, SeamPile *seam_pile);
SEAM **seam_result,
TBLOB *blob,
SeamPile* seam_pile);
void combine_seam(const SeamPile& seam_pile, void combine_seam(const SeamPile& seam_pile,
const SEAM* seam, SeamQueue* seam_queue); const SEAM* seam, SeamQueue* seam_queue);
inT16 constrained_split(SPLIT *split, TBLOB *blob);
SEAM *pick_good_seam(TBLOB *blob); SEAM *pick_good_seam(TBLOB *blob);
PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax);
void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS],
inT16 num_points, inT16 num_points,
SeamQueue* seam_queue, SeamQueue* seam_queue,
@ -359,23 +355,12 @@ class Wordrec : public Classify {
SEAM ** seam, TBLOB * blob); SEAM ** seam, TBLOB * blob);
// gradechop.cpp // gradechop.cpp
PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax);
PRIORITY grade_center_of_blob(register BOUNDS_RECT rect);
PRIORITY grade_overlap(register BOUNDS_RECT rect);
PRIORITY grade_split_length(register SPLIT *split); PRIORITY grade_split_length(register SPLIT *split);
PRIORITY grade_sharpness(register SPLIT *split); PRIORITY grade_sharpness(register SPLIT *split);
PRIORITY grade_width_change(register BOUNDS_RECT rect);
void set_outline_bounds(register EDGEPT *point1,
register EDGEPT *point2,
BOUNDS_RECT rect);
// outlines.cpp // outlines.cpp
int crosses_outline(EDGEPT *p0, EDGEPT *p1, EDGEPT *outline);
int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1);
int is_same_edgept(EDGEPT *p1, EDGEPT *p2);
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1,
EDGEPT **near_pt); EDGEPT **near_pt);
void reverse_outline(EDGEPT *outline);
// pieces.cpp // pieces.cpp
virtual BLOB_CHOICE_LIST *classify_piece(const GenericVector<SEAM*>& seams, virtual BLOB_CHOICE_LIST *classify_piece(const GenericVector<SEAM*>& seams,