From 25d0968d094a8f6d4ec52f1cc9b869f4a650e3b7 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Tue, 12 May 2015 14:59:14 -0700 Subject: [PATCH] Major refactor to improve speed on difficut images, especially when running a heap checker. SEAM and SPLIT have been begging for a refactor for a *LONG* time. This change does most of the work of turning them into proper classes: Moved relevant code into SEAM/SPLIT/TBLOB/EDGEPT etc from global helper functions. Made the splits full data members of SEAM in an array instead of 3 separate pointers. This greatly reduces the amount of new/delete happening in the chopper, which is the main goal. Deleted redundant files: olutil.*, makechop.* Brought other code into SEAM in order to keep its data members private with only priority having accessors. --- ccmain/applybox.cpp | 2 +- ccmain/tfacepp.cpp | 2 +- ccstruct/blobs.cpp | 78 ++++- ccstruct/blobs.h | 108 ++++++- ccstruct/pageres.cpp | 18 +- ccstruct/seam.cpp | 699 +++++++++++++--------------------------- ccstruct/seam.h | 230 ++++++++----- ccstruct/split.cpp | 277 ++++++++++------ ccstruct/split.h | 101 ++++-- ccstruct/vecfuncs.cpp | 1 + ccstruct/vecfuncs.h | 1 - classify/adaptmatch.cpp | 7 +- wordrec/chop.cpp | 65 +--- wordrec/chopper.cpp | 76 +---- wordrec/chopper.h | 2 - wordrec/findseam.cpp | 216 +++---------- wordrec/gradechop.cpp | 152 +-------- wordrec/gradechop.h | 19 -- wordrec/makechop.cpp | 226 ------------- wordrec/makechop.h | 71 ---- wordrec/olutil.cpp | 102 ------ wordrec/olutil.h | 82 ----- wordrec/outlines.cpp | 93 ------ wordrec/pieces.cpp | 4 +- wordrec/plotedges.cpp | 17 - wordrec/plotedges.h | 2 - wordrec/segsearch.cpp | 3 +- wordrec/wordrec.h | 27 +- 28 files changed, 876 insertions(+), 1805 deletions(-) delete mode 100644 wordrec/makechop.cpp delete mode 100644 wordrec/makechop.h delete mode 100644 wordrec/olutil.cpp delete mode 100644 wordrec/olutil.h diff --git a/ccmain/applybox.cpp b/ccmain/applybox.cpp index b9a28fa3..9c067e79 100644 --- a/ccmain/applybox.cpp +++ b/ccmain/applybox.cpp @@ -582,7 +582,7 @@ bool Tesseract::FindSegmentation(const GenericVector& target_text, int blob_count = 1; for (int s = 0; s < word_res->seam_array.size(); ++s) { SEAM* seam = word_res->seam_array[s]; - if (seam->split1 == NULL) { + if (!seam->HasAnySplits()) { word_res->best_state.push_back(blob_count); blob_count = 1; } else { diff --git a/ccmain/tfacepp.cpp b/ccmain/tfacepp.cpp index 45775fe4..e1dc778f 100644 --- a/ccmain/tfacepp.cpp +++ b/ccmain/tfacepp.cpp @@ -254,7 +254,7 @@ void Tesseract::join_words(WERD_RES *word, // Move the word2 seams onto the end of the word1 seam_array. // Since the seam list is one element short, an empty seam marking the // end of the last blob in the first word is needed first. - word->seam_array.push_back(new SEAM(0.0f, split_pt, NULL, NULL, NULL)); + word->seam_array.push_back(new SEAM(0.0f, split_pt)); word->seam_array += word2->seam_array; word2->seam_array.truncate(0); // Fix widths and gaps. diff --git a/ccstruct/blobs.cpp b/ccstruct/blobs.cpp index a0e6dc7b..97f95eba 100644 --- a/ccstruct/blobs.cpp +++ b/ccstruct/blobs.cpp @@ -64,6 +64,42 @@ const TPOINT kDivisibleVerticalItalic(1, 5); CLISTIZE(EDGEPT); +// Returns true when the two line segments cross each other. +// (Moved from outlines.cpp). +// Finds where the projected lines would cross and then checks to see if the +// point of intersection lies on both of the line segments. If it does +// then these two segments cross. +/* static */ +bool TPOINT::IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0, + const TPOINT& b1) { + int b0a1xb0b1, b0b1xb0a0; + int a1b1xa1a0, a1a0xa1b0; + + TPOINT b0a1, b0a0, a1b1, b0b1, a1a0; + + b0a1.x = a1.x - b0.x; + b0a0.x = a0.x - b0.x; + a1b1.x = b1.x - a1.x; + b0b1.x = b1.x - b0.x; + a1a0.x = a0.x - a1.x; + b0a1.y = a1.y - b0.y; + b0a0.y = a0.y - b0.y; + a1b1.y = b1.y - a1.y; + b0b1.y = b1.y - b0.y; + a1a0.y = a0.y - a1.y; + + b0a1xb0b1 = CROSS(b0a1, b0b1); + b0b1xb0a0 = CROSS(b0b1, b0a0); + a1b1xa1a0 = CROSS(a1b1, a1a0); + // For clarity, we want CROSS(a1a0,a1b0) here but we have b0a1 instead of a1b0 + // so use -CROSS(a1b0,b0a1) instead, which is the same. + a1a0xa1b0 = -CROSS(a1a0, b0a1); + + return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) || + (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) && + ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0)); +} + // Consume the circular list of EDGEPTs to make a TESSLINE. TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) { TESSLINE* result = new TESSLINE; @@ -454,6 +490,36 @@ TBOX TBLOB::bounding_box() const { return box; } +// Finds and deletes any duplicate outlines in this blob, without deleting +// their EDGEPTs. +void TBLOB::EliminateDuplicateOutlines() { + for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) { + TESSLINE* last_outline = outline; + for (TESSLINE* other_outline = outline->next; other_outline != NULL; + last_outline = other_outline, other_outline = other_outline->next) { + if (outline->SameBox(*other_outline)) { + last_outline->next = other_outline->next; + // This doesn't leak - the outlines share the EDGEPTs. + other_outline->loop = NULL; + delete other_outline; + other_outline = last_outline; + // If it is part of a cut, then it can't be a hole any more. + outline->is_hole = false; + } + } + } +} + +// Swaps the outlines of *this and next if needed to keep the centers in +// increasing x. +void TBLOB::CorrectBlobOrder(TBLOB* next) { + TBOX box = bounding_box(); + TBOX next_box = next->bounding_box(); + if (box.x_middle() > next_box.x_middle()) { + Swap(&outlines, &next->outlines); + } +} + #ifndef GRAPHICS_DISABLED void TBLOB::plot(ScrollView* window, ScrollView::Color color, ScrollView::Color child_color) { @@ -858,18 +924,6 @@ void TWERD::plot(ScrollView* window) { } #endif // GRAPHICS_DISABLED -/********************************************************************** - * blob_origin - * - * Compute the origin of a compound blob, define to be the centre - * of the bounding box. - **********************************************************************/ -void blob_origin(TBLOB *blob, /*blob to compute on */ - TPOINT *origin) { /*return value */ - TBOX bbox = blob->bounding_box(); - *origin = (bbox.topleft() + bbox.botright()) / 2; -} - /********************************************************************** * divisible_blob * diff --git a/ccstruct/blobs.h b/ccstruct/blobs.h index e39761b1..1fd9683e 100644 --- a/ccstruct/blobs.h +++ b/ccstruct/blobs.h @@ -60,6 +60,13 @@ struct TPOINT { x /= divisor; y /= divisor; } + bool operator==(const TPOINT& other) const { + return x == other.x && y == other.y; + } + // Returns true when the two line segments cross each other. + // (Moved from outlines.cpp). + static bool IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0, + const TPOINT& b1); inT16 x; // absolute x coord. inT16 y; // absolute y coord. @@ -87,6 +94,55 @@ struct EDGEPT { start_step = src.start_step; step_count = src.step_count; } + // Returns the squared distance between the points, with the x-component + // weighted by x_factor. + int WeightedDistance(const EDGEPT& other, int x_factor) const { + int x_dist = pos.x - other.pos.x; + int y_dist = pos.y - other.pos.y; + return x_dist * x_dist * x_factor + y_dist * y_dist; + } + // Returns true if the positions are equal. + bool EqualPos(const EDGEPT& other) const { return pos == other.pos; } + // Returns the bounding box of the outline segment from *this to *end. + // Ignores hidden edge flags. + TBOX SegmentBox(const EDGEPT* end) const { + TBOX box(pos.x, pos.y, pos.x, pos.y); + const EDGEPT* pt = this; + do { + pt = pt->next; + if (pt->pos.x < box.left()) box.set_left(pt->pos.x); + if (pt->pos.x > box.right()) box.set_right(pt->pos.x); + if (pt->pos.y < box.bottom()) box.set_bottom(pt->pos.y); + if (pt->pos.y > box.top()) box.set_top(pt->pos.y); + } while (pt != end && pt != this); + return box; + } + // Returns the area of the outline segment from *this to *end. + // Ignores hidden edge flags. + int SegmentArea(const EDGEPT* end) const { + int area = 0; + const EDGEPT* pt = this->next; + do { + TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y); + area += CROSS(origin_vec, pt->vec); + pt = pt->next; + } while (pt != end && pt != this); + return area; + } + // Returns true if the number of points in the outline segment from *this to + // *end is less that min_points and false if we get back to *this first. + // Ignores hidden edge flags. + bool ShortNonCircularSegment(int min_points, const EDGEPT* end) const { + int count = 0; + const EDGEPT* pt = this; + do { + if (pt == end) return true; + pt = pt->next; + ++count; + } while (pt != this && count <= min_points); + return false; + } + // Accessors to hide or reveal a cut edge from feature extractors. void Hide() { flags[0] = true; @@ -100,9 +156,6 @@ struct EDGEPT { void MarkChop() { flags[2] = true; } - void UnmarkChop() { - flags[2] = false; - } bool IsChopPt() const { return flags[2] != 0; } @@ -162,8 +215,23 @@ struct TESSLINE { void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const; TBOX bounding_box() const; + // Returns true if *this and other have equal bounding boxes. + bool SameBox(const TESSLINE& other) const { + return topleft == other.topleft && botright == other.botright; + } + // Returns true if the given line segment crosses any outline of this blob. + bool SegmentCrosses(const TPOINT& pt1, const TPOINT& pt2) const { + if (Contains(pt1) && Contains(pt2)) { + EDGEPT* pt = loop; + do { + if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) return true; + pt = pt->next; + } while (pt != loop); + } + return false; + } // Returns true if the point is contained within the outline box. - bool Contains(const TPOINT& pt) { + bool Contains(const TPOINT& pt) const { return topleft.x <= pt.x && pt.x <= botright.x && botright.y <= pt.y && pt.y <= topleft.y; } @@ -244,6 +312,31 @@ struct TBLOB { TBOX bounding_box() const; + // Returns true if the given line segment crosses any outline of this blob. + bool SegmentCrossesOutline(const TPOINT& pt1, const TPOINT& pt2) const { + for (const TESSLINE* outline = outlines; outline != NULL; + outline = outline->next) { + if (outline->SegmentCrosses(pt1, pt2)) return true; + } + return false; + } + // Returns true if the point is contained within any of the outline boxes. + bool Contains(const TPOINT& pt) const { + for (const TESSLINE* outline = outlines; outline != NULL; + outline = outline->next) { + if (outline->Contains(pt)) return true; + } + return false; + } + + // Finds and deletes any duplicate outlines in this blob, without deleting + // their EDGEPTs. + void EliminateDuplicateOutlines(); + + // Swaps the outlines of *this and next if needed to keep the centers in + // increasing x. + void CorrectBlobOrder(TBLOB* next); + const DENORM& denorm() const { return denorm_; } @@ -358,12 +451,7 @@ if (w) memfree (w) /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -// TODO(rays) This will become a member of TBLOB when TBLOB's definition -// moves to blobs.h - -// Returns the center of blob's bounding box in origin. -void blob_origin(TBLOB *blob, TPOINT *origin); - +// TODO(rays) Make divisible_blob and divide_blobs members of TBLOB. bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location); void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, diff --git a/ccstruct/pageres.cpp b/ccstruct/pageres.cpp index 6a7f7a02..58f7d8a8 100644 --- a/ccstruct/pageres.cpp +++ b/ccstruct/pageres.cpp @@ -404,7 +404,8 @@ void WERD_RES::SetupBlobWidthsAndGaps() { // as the blob widths and gaps. void WERD_RES::InsertSeam(int blob_number, SEAM* seam) { // Insert the seam into the SEAMS array. - insert_seam(chopped_word, blob_number, seam, &seam_array); + seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true); + seam_array.insert(seam, blob_number); if (ratings != NULL) { // Expand the ratings matrix. ratings = ratings->ConsumeAndMakeBigger(blob_number); @@ -804,12 +805,16 @@ void WERD_RES::RebuildBestState() { for (int i = 0; i < best_choice->length(); ++i) { int length = best_choice->state(i); best_state.push_back(length); - if (length > 1) - join_pieces(seam_array, start, start + length - 1, chopped_word); + if (length > 1) { + SEAM::JoinPieces(seam_array, chopped_word->blobs, start, + start + length - 1); + } TBLOB* blob = chopped_word->blobs[start]; rebuild_word->blobs.push_back(new TBLOB(*blob)); - if (length > 1) - break_pieces(seam_array, start, start + length - 1, chopped_word); + if (length > 1) { + SEAM::BreakPieces(seam_array, chopped_word->blobs, start, + start + length - 1); + } start += length; } } @@ -1065,8 +1070,7 @@ bool WERD_RES::PiecesAllNatural(int start, int count) const { for (int index = start; index < start + count - 1; ++index) { if (index >= 0 && index < seam_array.size()) { SEAM* seam = seam_array[index]; - if (seam != NULL && seam->split1 != NULL) - return false; + if (seam != NULL && seam->HasAnySplits()) return false; } } return true; diff --git a/ccstruct/seam.cpp b/ccstruct/seam.cpp index e05fac9a..3d70eafc 100644 --- a/ccstruct/seam.cpp +++ b/ccstruct/seam.cpp @@ -27,114 +27,236 @@ ----------------------------------------------------------------------*/ #include "seam.h" #include "blobs.h" -#include "freelist.h" #include "tprintf.h" -#ifdef __UNIX__ -#include -#endif - -/*---------------------------------------------------------------------- - V a r i a b l e s -----------------------------------------------------------------------*/ -#define NUM_STARTING_SEAMS 20 - /*---------------------------------------------------------------------- Public Function Code ----------------------------------------------------------------------*/ -/** - * @name point_in_split - * - * Check to see if either of these points are present in the current - * split. - * @returns TRUE if one of them is split. - */ -bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2) { - return ((split) ? ((exact_point (split->point1, point1) || - exact_point (split->point1, point2) || - exact_point (split->point2, point1) || - exact_point (split->point2, point2)) ? TRUE : FALSE) - : FALSE); -} - -/** - * @name point_in_seam - * - * Check to see if either of these points are present in the current - * seam. - * @returns TRUE if one of them is. - */ -bool point_in_seam(const SEAM *seam, SPLIT *split) { - return (point_in_split(seam->split1, split->point1, split->point2) || - point_in_split(seam->split2, split->point1, split->point2) || - point_in_split(seam->split3, split->point1, split->point2)); -} - -/** - * @name point_used_by_split - * - * Return whether this particular EDGEPT * is used in a given split. - * @returns TRUE if the edgept is used by the split. - */ -bool point_used_by_split(SPLIT *split, EDGEPT *point) { - if (split == NULL) return false; - return point == split->point1 || point == split->point2; -} - -/** - * @name point_used_by_seam - * - * Return whether this particular EDGEPT * is used in a given seam. - * @returns TRUE if the edgept is used by the seam. - */ -bool point_used_by_seam(SEAM *seam, EDGEPT *point) { - if (seam == NULL) return false; - return point_used_by_split(seam->split1, point) || - point_used_by_split(seam->split2, point) || - point_used_by_split(seam->split3, point); -} - -/** - * @name combine_seam - * - * Combine two seam records into a single seam. Move the split - * references from the second seam to the first one. The argument - * convention is patterned after strcpy. - */ -void combine_seams(SEAM *dest_seam, SEAM *source_seam) { - dest_seam->priority += source_seam->priority; - dest_seam->location += source_seam->location; - dest_seam->location /= 2; - - if (source_seam->split1) { - if (!dest_seam->split1) - dest_seam->split1 = source_seam->split1; - else if (!dest_seam->split2) - dest_seam->split2 = source_seam->split1; - else if (!dest_seam->split3) - dest_seam->split3 = source_seam->split1; - else - delete source_seam->split1; // Wouldn't have fitted. - source_seam->split1 = NULL; +// Returns the bounding box of all the points in the seam. +TBOX SEAM::bounding_box() const { + TBOX box(location_.x, location_.y, location_.x, location_.y); + for (int s = 0; s < num_splits_; ++s) { + box += splits_[s].bounding_box(); } - if (source_seam->split2) { - if (!dest_seam->split2) - dest_seam->split2 = source_seam->split2; - else if (!dest_seam->split3) - dest_seam->split3 = source_seam->split2; - else - delete source_seam->split2; // Wouldn't have fitted. - source_seam->split2 = NULL; + return box; +} + +// Returns true if other can be combined into *this. +bool SEAM::CombineableWith(const SEAM& other, int max_x_dist, + float max_total_priority) const { + int dist = location_.x - other.location_.x; + if (-max_x_dist < dist && dist < max_x_dist && + num_splits_ + other.num_splits_ <= kMaxNumSplits && + priority_ + other.priority_ < max_total_priority && + !OverlappingSplits(other) && !SharesPosition(other)) { + return true; + } else { + return false; } - if (source_seam->split3) { - if (!dest_seam->split3) - dest_seam->split3 = source_seam->split3; - else - delete source_seam->split3; // Wouldn't have fitted. - source_seam->split3 = NULL; +} + +// Combines other into *this. Only works if CombinableWith returned true. +void SEAM::CombineWith(const SEAM& other) { + priority_ += other.priority_; + location_ += other.location_; + location_ /= 2; + + for (int s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) + splits_[num_splits_++] = other.splits_[s]; +} + +// Returns true if the splits in *this SEAM appear OK in the sense that they +// do not cross any outlines and do not chop off any ridiculously small +// pieces. +bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const { + // TODO(rays) Try testing all the splits. Duplicating original code for now, + // which tested only the first. + return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area); +} + +// Computes the widthp_/widthn_ range for all existing SEAMs and for *this +// seam, which is about to be inserted at insert_index. Returns false if +// any of the computations fails, as this indicates an invalid chop. +// widthn_/widthp_ are only changed if modify is true. +bool SEAM::PrepareToInsertSeam(const GenericVector& seams, + const GenericVector& blobs, + int insert_index, bool modify) { + for (int s = 0; s < insert_index; ++s) { + if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false; } - delete source_seam; + if (!FindBlobWidth(blobs, insert_index, modify)) return false; + for (int s = insert_index; s < seams.size(); ++s) { + if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false; + } + return true; +} + +// Computes the widthp_/widthn_ range. Returns false if not all the splits +// are accounted for. widthn_/widthp_ are only changed if modify is true. +bool SEAM::FindBlobWidth(const GenericVector& blobs, int index, + bool modify) { + int num_found = 0; + if (modify) { + widthp_ = 0; + widthn_ = 0; + } + for (int s = 0; s < num_splits_; ++s) { + const SPLIT& split = splits_[s]; + bool found_split = split.ContainedByBlob(*blobs[index]); + // Look right. + for (int b = index + 1; !found_split && b < blobs.size(); ++b) { + found_split = split.ContainedByBlob(*blobs[b]); + if (found_split && b - index > widthp_ && modify) widthp_ = b - index; + } + // Look left. + for (int b = index - 1; !found_split && b >= 0; --b) { + found_split = split.ContainedByBlob(*blobs[b]); + if (found_split && index - b > widthn_ && modify) widthn_ = index - b; + } + if (found_split) ++num_found; + } + return num_found == num_splits_; +} + +// Splits this blob into two blobs by applying the splits included in +// *this SEAM +void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const { + for (int s = 0; s < num_splits_; ++s) { + splits_[s].SplitOutlineList(blob->outlines); + } + blob->ComputeBoundingBoxes(); + + divide_blobs(blob, other_blob, italic_blob, location_); + + blob->EliminateDuplicateOutlines(); + other_blob->EliminateDuplicateOutlines(); + + blob->CorrectBlobOrder(other_blob); +} + +// Undoes ApplySeam by removing the seam between these two blobs. +// Produces one blob as a result, and deletes other_blob. +void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const { + if (blob->outlines == NULL) { + blob->outlines = other_blob->outlines; + other_blob->outlines = NULL; + } + + TESSLINE* outline = blob->outlines; + while (outline->next) outline = outline->next; + outline->next = other_blob->outlines; + other_blob->outlines = NULL; + delete other_blob; + + for (int s = 0; s < num_splits_; ++s) { + splits_[s].UnsplitOutlineList(blob); + } + blob->ComputeBoundingBoxes(); + blob->EliminateDuplicateOutlines(); +} + +// Prints everything in *this SEAM. +void SEAM::Print(const char* label) const { + tprintf(label); + tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y, + widthp_, widthn_); + for (int s = 0; s < num_splits_; ++s) { + splits_[s].Print(); + if (s + 1 < num_splits_) tprintf(", "); + } + tprintf("\n"); +} + +// Prints a collection of SEAMs. +/* static */ +void SEAM::PrintSeams(const char* label, const GenericVector& seams) { + if (!seams.empty()) { + tprintf("%s\n", label); + for (int x = 0; x < seams.size(); ++x) { + tprintf("%2d: ", x); + seams[x]->Print(""); + } + tprintf("\n"); + } +} + +#ifndef GRAPHICS_DISABLED +// Draws the seam in the given window. +void SEAM::Mark(ScrollView* window) const { + for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window); +} +#endif + +// Break up the blobs in this chain so that they are all independent. +// This operation should undo the affect of join_pieces. +/* static */ +void SEAM::BreakPieces(const GenericVector& seams, + const GenericVector& blobs, int first, + int last) { + for (int x = first; x < last; ++x) seams[x]->Reveal(); + + TESSLINE* outline = blobs[first]->outlines; + int next_blob = first + 1; + + while (outline != NULL && next_blob <= last) { + if (outline->next == blobs[next_blob]->outlines) { + outline->next = NULL; + outline = blobs[next_blob]->outlines; + ++next_blob; + } else { + outline = outline->next; + } + } +} + +// Join a group of base level pieces into a single blob that can then +// be classified. +/* static */ +void SEAM::JoinPieces(const GenericVector& seams, + const GenericVector& blobs, int first, int last) { + TESSLINE* outline = blobs[first]->outlines; + if (!outline) + return; + + for (int x = first; x < last; ++x) { + SEAM *seam = seams[x]; + if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide(); + while (outline->next) outline = outline->next; + outline->next = blobs[x + 1]->outlines; + } +} + +// Hides the seam so the outlines appear not to be cut by it. +void SEAM::Hide() const { + for (int s = 0; s < num_splits_; ++s) { + splits_[s].Hide(); + } +} + +// Undoes hide, so the outlines are cut by the seam. +void SEAM::Reveal() const { + for (int s = 0; s < num_splits_; ++s) { + splits_[s].Reveal(); + } +} + +// Computes and returns, but does not set, the full priority of *this SEAM. +float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, + int centered_maxwidth, double center_knob, + double width_change_knob) const { + if (num_splits_ == 0) return 0.0f; + for (int s = 1; s < num_splits_; ++s) { + splits_[s].SplitOutline(); + } + float full_priority = + priority_ + + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, + center_knob, width_change_knob); + for (int s = num_splits_ - 1; s >= 1; --s) { + splits_[s].UnsplitOutlines(); + } + return full_priority; } /** @@ -144,7 +266,7 @@ void combine_seams(SEAM *dest_seam, SEAM *source_seam) { * present in the starting segmentation. Each of the seams created * by this routine have location information only. */ -void start_seam_list(TWERD *word, GenericVector* seam_array) { +void start_seam_list(TWERD* word, GenericVector* seam_array) { seam_array->truncate(0); TPOINT location; @@ -153,381 +275,6 @@ void start_seam_list(TWERD *word, GenericVector* seam_array) { TBOX nbox = word->blobs[b]->bounding_box(); location.x = (bbox.right() + nbox.left()) / 2; location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4; - seam_array->push_back(new SEAM(0.0f, location, NULL, NULL, NULL)); - } -} - - -/** - * @name test_insert_seam - * - * @returns true if insert_seam will succeed. - */ -bool test_insert_seam(const GenericVector& seam_array, - TWERD *word, int index) { - SEAM *test_seam; - int list_length = seam_array.size(); - for (int test_index = 0; test_index < index; ++test_index) { - test_seam = seam_array[test_index]; - if (test_index + test_seam->widthp < index && - test_seam->widthp + test_index == index - 1 && - account_splits(test_seam, word, test_index + 1, 1) < 0) - return false; - } - for (int test_index = index; test_index < list_length; test_index++) { - test_seam = seam_array[test_index]; - if (test_index - test_seam->widthn >= index && - test_index - test_seam->widthn == index && - account_splits(test_seam, word, test_index + 1, -1) < 0) - return false; - } - return true; -} - -/** - * @name insert_seam - * - * Add another seam to a collection of seams at a particular location - * in the seam array. - */ -void insert_seam(const TWERD* word, int index, SEAM *seam, - GenericVector* seam_array) { - SEAM *test_seam; - int list_length = seam_array->size(); - for (int test_index = 0; test_index < index; ++test_index) { - test_seam = seam_array->get(test_index); - if (test_index + test_seam->widthp >= index) { - test_seam->widthp++; /*got in the way */ - } else if (test_seam->widthp + test_index == index - 1) { - test_seam->widthp = account_splits(test_seam, word, test_index + 1, 1); - if (test_seam->widthp < 0) { - tprintf("Failed to find any right blob for a split!\n"); - print_seam("New dud seam", seam); - print_seam("Failed seam", test_seam); - } - } - } - for (int test_index = index; test_index < list_length; test_index++) { - test_seam = seam_array->get(test_index); - if (test_index - test_seam->widthn < index) { - test_seam->widthn++; /*got in the way */ - } else if (test_index - test_seam->widthn == index) { - test_seam->widthn = account_splits(test_seam, word, test_index + 1, -1); - if (test_seam->widthn < 0) { - tprintf("Failed to find any left blob for a split!\n"); - print_seam("New dud seam", seam); - print_seam("Failed seam", test_seam); - } - } - } - seam_array->insert(seam, index); -} - - -/** - * @name account_splits - * - * Account for all the splits by looking to the right (blob_direction == 1), - * or to the left (blob_direction == -1) in the word. - */ -int account_splits(const SEAM *seam, const TWERD *word, int blob_index, - int blob_direction) { - inT8 found_em[3]; - inT8 width; - - found_em[0] = seam->split1 == NULL; - found_em[1] = seam->split2 == NULL; - found_em[2] = seam->split3 == NULL; - if (found_em[0] && found_em[1] && found_em[2]) - return 0; - width = 0; - do { - TBLOB* blob = word->blobs[blob_index]; - if (!found_em[0]) - found_em[0] = find_split_in_blob(seam->split1, blob); - if (!found_em[1]) - found_em[1] = find_split_in_blob(seam->split2, blob); - if (!found_em[2]) - found_em[2] = find_split_in_blob(seam->split3, blob); - if (found_em[0] && found_em[1] && found_em[2]) { - return width; - } - width++; - blob_index += blob_direction; - } while (0 <= blob_index && blob_index < word->NumBlobs()); - return -1; -} - - -/** - * @name find_split_in_blob - * - * @returns TRUE if the split is somewhere in this blob. - */ -bool find_split_in_blob(SPLIT *split, TBLOB *blob) { - TESSLINE *outline; - - for (outline = blob->outlines; outline != NULL; outline = outline->next) - if (outline->Contains(split->point1->pos)) - break; - if (outline == NULL) - return FALSE; - for (outline = blob->outlines; outline != NULL; outline = outline->next) - if (outline->Contains(split->point2->pos)) - return TRUE; - return FALSE; -} - - -/** - * @name join_two_seams - * - * Merge these two seams into a new seam. Duplicate the split records - * in both of the input seams. Return the resultant seam. - */ -SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2) { - SEAM *result = NULL; - SEAM *temp; - - assert(seam1 &&seam2); - - if (((seam1->split3 == NULL && seam2->split2 == NULL) || - (seam1->split2 == NULL && seam2->split3 == NULL) || - seam1->split1 == NULL || seam2->split1 == NULL) && - (!shared_split_points(seam1, seam2))) { - result = new SEAM(*seam1); - temp = new SEAM(*seam2); - combine_seams(result, temp); - } - return (result); -} - -/** - * @name print_seam - * - * Print a list of splits. Show the coordinates of both points in - * each split. - */ -void print_seam(const char *label, SEAM *seam) { - if (seam) { - tprintf(label); - tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", - seam->priority, seam->location.x, seam->location.y, - seam->widthp, seam->widthn); - print_split(seam->split1); - - if (seam->split2) { - tprintf(", "); - print_split (seam->split2); - if (seam->split3) { - tprintf(", "); - print_split (seam->split3); - } - } - tprintf("\n"); - } -} - - -/** - * @name print_seams - * - * Print a list of splits. Show the coordinates of both points in - * each split. - */ -void print_seams(const char *label, const GenericVector& seams) { - char number[CHARS_PER_LINE]; - - if (!seams.empty()) { - tprintf("%s\n", label); - for (int x = 0; x < seams.size(); ++x) { - sprintf(number, "%2d: ", x); - print_seam(number, seams[x]); - } - tprintf("\n"); - } -} - - -/** - * @name shared_split_points - * - * Check these two seams to make sure that neither of them have two - * points in common. Return TRUE if any of the same points are present - * in any of the splits of both seams. - */ -int shared_split_points(const SEAM *seam1, const SEAM *seam2) { - if (seam1 == NULL || seam2 == NULL) - return (FALSE); - - if (seam2->split1 == NULL) - return (FALSE); - if (point_in_seam(seam1, seam2->split1)) - return (TRUE); - - if (seam2->split2 == NULL) - return (FALSE); - if (point_in_seam(seam1, seam2->split2)) - return (TRUE); - - if (seam2->split3 == NULL) - return (FALSE); - if (point_in_seam(seam1, seam2->split3)) - return (TRUE); - - return (FALSE); -} - -/********************************************************************** - * break_pieces - * - * Break up the blobs in this chain so that they are all independent. - * This operation should undo the affect of join_pieces. - **********************************************************************/ -void break_pieces(const GenericVector& seams, int first, int last, - TWERD *word) { - for (int x = first; x < last; ++x) - reveal_seam(seams[x]); - - TESSLINE *outline = word->blobs[first]->outlines; - int next_blob = first + 1; - - while (outline != NULL && next_blob <= last) { - if (outline->next == word->blobs[next_blob]->outlines) { - outline->next = NULL; - outline = word->blobs[next_blob]->outlines; - ++next_blob; - } else { - outline = outline->next; - } - } -} - - -/********************************************************************** - * join_pieces - * - * Join a group of base level pieces into a single blob that can then - * be classified. - **********************************************************************/ -void join_pieces(const GenericVector& seams, int first, int last, - TWERD *word) { - TESSLINE *outline = word->blobs[first]->outlines; - if (!outline) - return; - - for (int x = first; x < last; ++x) { - SEAM *seam = seams[x]; - if (x - seam->widthn >= first && x + seam->widthp < last) - hide_seam(seam); - while (outline->next) - outline = outline->next; - outline->next = word->blobs[x + 1]->outlines; - } -} - - -/********************************************************************** - * hide_seam - * - * Change the edge points that are referenced by this seam to make - * them hidden edges. - **********************************************************************/ -void hide_seam(SEAM *seam) { - if (seam == NULL || seam->split1 == NULL) - return; - hide_edge_pair (seam->split1->point1, seam->split1->point2); - - if (seam->split2 == NULL) - return; - hide_edge_pair (seam->split2->point1, seam->split2->point2); - - if (seam->split3 == NULL) - return; - hide_edge_pair (seam->split3->point1, seam->split3->point2); -} - - -/********************************************************************** - * hide_edge_pair - * - * Change the edge points that are referenced by this seam to make - * them hidden edges. - **********************************************************************/ -void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2) { - EDGEPT *edgept; - - edgept = pt1; - do { - edgept->Hide(); - edgept = edgept->next; - } - while (!exact_point (edgept, pt2) && edgept != pt1); - if (edgept == pt1) { - /* tprintf("Hid entire outline at (%d,%d)!!\n", - edgept->pos.x,edgept->pos.y); */ - } - edgept = pt2; - do { - edgept->Hide(); - edgept = edgept->next; - } - while (!exact_point (edgept, pt1) && edgept != pt2); - if (edgept == pt2) { - /* tprintf("Hid entire outline at (%d,%d)!!\n", - edgept->pos.x,edgept->pos.y); */ - } -} - - -/********************************************************************** - * reveal_seam - * - * Change the edge points that are referenced by this seam to make - * them hidden edges. - **********************************************************************/ -void reveal_seam(SEAM *seam) { - if (seam == NULL || seam->split1 == NULL) - return; - reveal_edge_pair (seam->split1->point1, seam->split1->point2); - - if (seam->split2 == NULL) - return; - reveal_edge_pair (seam->split2->point1, seam->split2->point2); - - if (seam->split3 == NULL) - return; - reveal_edge_pair (seam->split3->point1, seam->split3->point2); -} - - -/********************************************************************** - * reveal_edge_pair - * - * Change the edge points that are referenced by this seam to make - * them hidden edges. - **********************************************************************/ -void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2) { - EDGEPT *edgept; - - edgept = pt1; - do { - edgept->Reveal(); - edgept = edgept->next; - } - while (!exact_point (edgept, pt2) && edgept != pt1); - if (edgept == pt1) { - /* tprintf("Hid entire outline at (%d,%d)!!\n", - edgept->pos.x,edgept->pos.y); */ - } - edgept = pt2; - do { - edgept->Reveal(); - edgept = edgept->next; - } - while (!exact_point (edgept, pt1) && edgept != pt2); - if (edgept == pt2) { - /* tprintf("Hid entire outline at (%d,%d)!!\n", - edgept->pos.x,edgept->pos.y); */ + seam_array->push_back(new SEAM(0.0f, location)); } } diff --git a/ccstruct/seam.h b/ccstruct/seam.h index 23b7bc71..9ae63148 100644 --- a/ccstruct/seam.h +++ b/ccstruct/seam.h @@ -36,95 +36,163 @@ ----------------------------------------------------------------------*/ typedef float PRIORITY; /* PRIORITY */ -struct SEAM { - // Constructor that was formerly new_seam. - SEAM(PRIORITY priority0, const TPOINT& location0, - SPLIT *splita, SPLIT *splitb, SPLIT *splitc) - : priority(priority0), widthp(0), widthn(0), location(location0), - split1(splita), split2(splitb), split3(splitc) {} - // Copy constructor that was formerly clone_seam. - SEAM(const SEAM& src) - : priority(src.priority), widthp(src.widthp), widthn(src.widthn), - location(src.location) { - clone_split(split1, src.split1); - clone_split(split2, src.split2); - clone_split(split3, src.split3); +class SEAM { + public: + // A seam with no splits + SEAM(float priority, const TPOINT& location) + : priority_(priority), + location_(location), + widthp_(0), + widthn_(0), + num_splits_(0) {} + // A seam with a single split point. + SEAM(float priority, const TPOINT& location, const SPLIT& split) + : priority_(priority), + location_(location), + widthp_(0), + widthn_(0), + num_splits_(1) { + splits_[0] = split; } - // Destructor was delete_seam. - ~SEAM() { - if (split1) - delete_split(split1); - if (split2) - delete_split(split2); - if (split3) - delete_split(split3); + // Default copy constructor, operator= and destructor are OK! + + // Accessors. + float priority() const { return priority_; } + void set_priority(float priority) { priority_ = priority; } + bool HasAnySplits() const { return num_splits_ > 0; } + + // Returns the bounding box of all the points in the seam. + TBOX bounding_box() const; + + // Returns true if other can be combined into *this. + bool CombineableWith(const SEAM& other, int max_x_dist, + float max_total_priority) const; + // Combines other into *this. Only works if CombinableWith returned true. + void CombineWith(const SEAM& other); + + // Returns true if the given blob contains all splits of *this SEAM. + bool ContainedByBlob(const TBLOB& blob) const { + for (int s = 0; s < num_splits_; ++s) { + if (!splits_[s].ContainedByBlob(blob)) return false; + } + return true; } - PRIORITY priority; - inT8 widthp; - inT8 widthn; - TPOINT location; - SPLIT *split1; - SPLIT *split2; - SPLIT *split3; + // Returns true if the given EDGEPT is used by this SEAM, checking only + // the EDGEPT pointer, not the coordinates. + bool UsesPoint(const EDGEPT* point) const { + for (int s = 0; s < num_splits_; ++s) { + if (splits_[s].UsesPoint(point)) return true; + } + return false; + } + // Returns true if *this and other share any common point, by coordinates. + bool SharesPosition(const SEAM& other) const { + for (int s = 0; s < num_splits_; ++s) { + for (int t = 0; t < other.num_splits_; ++t) + if (splits_[s].SharesPosition(other.splits_[t])) return true; + } + return false; + } + // Returns true if *this and other have any vertically overlapping splits. + bool OverlappingSplits(const SEAM& other) const { + for (int s = 0; s < num_splits_; ++s) { + TBOX split1_box = splits_[s].bounding_box(); + for (int t = 0; t < other.num_splits_; ++t) { + TBOX split2_box = other.splits_[t].bounding_box(); + if (split1_box.y_overlap(split2_box)) return true; + } + } + return false; + } + + // Marks the edgepts used by the seam so the segments made by the cut + // never get split further by another seam in the future. + void Finalize() { + for (int s = 0; s < num_splits_; ++s) { + splits_[s].point1->MarkChop(); + splits_[s].point2->MarkChop(); + } + } + + // Returns true if the splits in *this SEAM appear OK in the sense that they + // do not cross any outlines and do not chop off any ridiculously small + // pieces. + bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const; + + // Computes the widthp_/widthn_ range for all existing SEAMs and for *this + // seam, which is about to be inserted at insert_index. Returns false if + // any of the computations fails, as this indicates an invalid chop. + // widthn_/widthp_ are only changed if modify is true. + bool PrepareToInsertSeam(const GenericVector& seams, + const GenericVector& blobs, int insert_index, + bool modify); + // Computes the widthp_/widthn_ range. Returns false if not all the splits + // are accounted for. widthn_/widthp_ are only changed if modify is true. + bool FindBlobWidth(const GenericVector& blobs, int index, + bool modify); + + // Splits this blob into two blobs by applying the splits included in + // *this SEAM + void ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const; + // Undoes ApplySeam by removing the seam between these two blobs. + // Produces one blob as a result, and deletes other_blob. + void UndoSeam(TBLOB* blob, TBLOB* other_blob) const; + + // Prints everything in *this SEAM. + void Print(const char* label) const; + // Prints a collection of SEAMs. + static void PrintSeams(const char* label, const GenericVector& seams); +#ifndef GRAPHICS_DISABLED + // Draws the seam in the given window. + void Mark(ScrollView* window) const; +#endif + + // Break up the blobs in this chain so that they are all independent. + // This operation should undo the affect of join_pieces. + static void BreakPieces(const GenericVector& seams, + const GenericVector& blobs, int first, + int last); + // Join a group of base level pieces into a single blob that can then + // be classified. + static void JoinPieces(const GenericVector& seams, + const GenericVector& blobs, int first, + int last); + + // Hides the seam so the outlines appear not to be cut by it. + void Hide() const; + // Undoes hide, so the outlines are cut by the seam. + void Reveal() const; + + // Computes and returns, but does not set, the full priority of *this SEAM. + // The arguments here are config parameters defined in Wordrec. Add chop_ + // to the beginning of the name. + float FullPriority(int xmin, int xmax, double overlap_knob, + int centered_maxwidth, double center_knob, + double width_change_knob) const; + + private: + // Maximum number of splits that a SEAM can hold. + static const int kMaxNumSplits = 3; + // Priority of this split. Lower is better. + float priority_; + // Position of the middle of the seam. + TPOINT location_; + // A range such that all splits in *this SEAM are contained within blobs in + // the range [index - widthn_,index + widthp_] where index is the index of + // this SEAM in the seams vector. + inT8 widthp_; + inT8 widthn_; + // Number of splits_ that are used. + inT8 num_splits_; + // Set of pairs of points that are the ends of each split in the SEAM. + SPLIT splits_[kMaxNumSplits]; }; -/** - * exact_point - * - * Return TRUE if the point positions are the exactly the same. The - * parameters must be of type (EDGEPT*). - */ - -#define exact_point(p1,p2) \ - (! ((p1->pos.x - p2->pos.x) || (p1->pos.y - p2->pos.y))) - /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -bool point_in_split(SPLIT *split, EDGEPT *point1, EDGEPT *point2); -bool point_in_seam(const SEAM *seam, SPLIT *split); - -bool point_used_by_split(SPLIT *split, EDGEPT *point); - -bool point_used_by_seam(SEAM *seam, EDGEPT *point); - -void combine_seams(SEAM *dest_seam, SEAM *source_seam); - -void start_seam_list(TWERD *word, GenericVector* seam_array); - -bool test_insert_seam(const GenericVector& seam_array, - TWERD *word, int index); - -void insert_seam(const TWERD *word, int index, SEAM *seam, - GenericVector* seam_array); - -int account_splits(const SEAM *seam, const TWERD *word, int blob_index, - int blob_direction); - -bool find_split_in_blob(SPLIT *split, TBLOB *blob); - -SEAM *join_two_seams(const SEAM *seam1, const SEAM *seam2); - -void print_seam(const char *label, SEAM *seam); - -void print_seams(const char *label, const GenericVector& seams); - -int shared_split_points(const SEAM *seam1, const SEAM *seam2); - -void break_pieces(const GenericVector& seams, - int first, int last, TWERD *word); - -void join_pieces(const GenericVector& seams, - int first, int last, TWERD *word); - -void hide_seam(SEAM *seam); - -void hide_edge_pair(EDGEPT *pt1, EDGEPT *pt2); - -void reveal_seam(SEAM *seam); - -void reveal_edge_pair(EDGEPT *pt1, EDGEPT *pt2); +void start_seam_list(TWERD* word, GenericVector* seam_array); #endif diff --git a/ccstruct/split.cpp b/ccstruct/split.cpp index a2e974ef..24650d4f 100644 --- a/ccstruct/split.cpp +++ b/ccstruct/split.cpp @@ -36,23 +36,103 @@ /*---------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ +// Limit on the amount of penalty for the chop being off-center. +const int kCenterGradeCap = 25; +// Ridiculously large priority for splits that are no use. +const double kBadPriority = 999.0; + BOOL_VAR(wordrec_display_splits, 0, "Display splits"); -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ - -/********************************************************************** - * delete_split - * - * Remove this split from existence. - **********************************************************************/ -void delete_split(SPLIT *split) { - if (split) { - delete split; - } +// Returns the bounding box of all the points in the split. +TBOX SPLIT::bounding_box() const { + return TBOX( + MIN(point1->pos.x, point2->pos.x), MIN(point1->pos.y, point2->pos.y), + MAX(point1->pos.x, point2->pos.x), MAX(point1->pos.y, point2->pos.y)); } +// Hides the SPLIT so the outlines appear not to be cut by it. +void SPLIT::Hide() const { + EDGEPT* edgept = point1; + do { + edgept->Hide(); + edgept = edgept->next; + } while (!edgept->EqualPos(*point2) && edgept != point1); + edgept = point2; + do { + edgept->Hide(); + edgept = edgept->next; + } while (!edgept->EqualPos(*point1) && edgept != point2); +} + +// Undoes hide, so the outlines are cut by the SPLIT. +void SPLIT::Reveal() const { + EDGEPT* edgept = point1; + do { + edgept->Reveal(); + edgept = edgept->next; + } while (!edgept->EqualPos(*point2) && edgept != point1); + edgept = point2; + do { + edgept->Reveal(); + edgept = edgept->next; + } while (!edgept->EqualPos(*point1) && edgept != point2); +} + +// Compute a split priority based on the bounding boxes of the parts. +// The arguments here are config parameters defined in Wordrec. Add chop_ +// to the beginning of the name. +float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, + int centered_maxwidth, double center_knob, + double width_change_knob) const { + TBOX box1 = Box12(); + TBOX box2 = Box21(); + int min_left = MIN(box1.left(), box2.left()); + int max_right = MAX(box1.right(), box2.right()); + if (xmin < min_left && xmax > max_right) return kBadPriority; + + float grade = 0.0f; + // grade_overlap. + int width1 = box1.width(); + int width2 = box2.width(); + int min_width = MIN(width1, width2); + int overlap = -box1.x_gap(box2); + if (overlap == min_width) { + grade += 100.0f; // Total overlap. + } else { + if (2 * overlap > min_width) overlap += 2 * overlap - min_width; + if (overlap > 0) grade += overlap_knob * overlap; + } + // grade_center_of_blob. + if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) { + grade += MIN(kCenterGradeCap, center_knob * abs(width1 - width2)); + } + // grade_width_change. + float width_change_grade = 20 - (max_right - min_left - MAX(width1, width2)); + if (width_change_grade > 0.0f) + grade += width_change_grade * width_change_knob; + return grade; +} + +// Returns true if *this SPLIT appears OK in the sense that it does not cross +// any outlines and does not chop off any ridiculously small pieces. +bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const { + return !IsLittleChunk(min_points, min_area) && + !blob.SegmentCrossesOutline(point1->pos, point2->pos); +} + +// Returns true if the split generates a small chunk in terms of either area +// or number of points. +bool SPLIT::IsLittleChunk(int min_points, int min_area) const { + if (point1->ShortNonCircularSegment(min_points, point2) && + point1->SegmentArea(point2) < min_area) { + return true; + } + if (point2->ShortNonCircularSegment(min_points, point1) && + point2->SegmentArea(point1) < min_area) { + return true; + } + return false; +} /********************************************************************** * make_edgept @@ -135,102 +215,113 @@ void remove_edgept(EDGEPT *point) { } /********************************************************************** - * new_split + * Print * - * Create a new split record and initialize it. Put it on the display - * list. + * Shows the coordinates of both points in a split. **********************************************************************/ -SPLIT *new_split(EDGEPT *point1, EDGEPT *point2) { - SPLIT *s = new SPLIT; - s->point1 = point1; - s->point2 = point2; - return (s); -} - - -/********************************************************************** - * print_split - * - * Print a list of splits. Show the coordinates of both points in - * each split. - **********************************************************************/ -void print_split(SPLIT *split) { - if (split) { - tprintf("(%d,%d)--(%d,%d)", - split->point1->pos.x, split->point1->pos.y, - split->point2->pos.x, split->point2->pos.y); +void SPLIT::Print() const { + if (this != NULL) { + tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x, + point2->pos.y); } } +#ifndef GRAPHICS_DISABLED +// Draws the split in the given window. +void SPLIT::Mark(ScrollView* window) const { + window->Pen(ScrollView::GREEN); + window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y); + window->UpdateWindow(); +} +#endif -/********************************************************************** - * split_outline - * - * Split between these two edge points. - **********************************************************************/ -void split_outline(EDGEPT *join_point1, EDGEPT *join_point2) { - assert(join_point1 != join_point2); +// Creates two outlines out of one by splitting the original one in half. +// Inserts the resulting outlines into the given list. +void SPLIT::SplitOutlineList(TESSLINE* outlines) const { + SplitOutline(); + while (outlines->next != NULL) outlines = outlines->next; - EDGEPT* temp2 = join_point2->next; - EDGEPT* temp1 = join_point1->next; - /* Create two new points */ - EDGEPT* new_point1 = make_edgept(join_point1->pos.x, join_point1->pos.y, - temp1, join_point2); - EDGEPT* new_point2 = make_edgept(join_point2->pos.x, join_point2->pos.y, - temp2, join_point1); - // Join_point1 and 2 are now cross-over points, so they must have NULL - // src_outlines and give their src_outline information their new - // replacements. - new_point1->src_outline = join_point1->src_outline; - new_point1->start_step = join_point1->start_step; - new_point1->step_count = join_point1->step_count; - new_point2->src_outline = join_point2->src_outline; - new_point2->start_step = join_point2->start_step; - new_point2->step_count = join_point2->step_count; - join_point1->src_outline = NULL; - join_point1->start_step = 0; - join_point1->step_count = 0; - join_point2->src_outline = NULL; - join_point2->start_step = 0; - join_point2->step_count = 0; - join_point1->MarkChop(); - join_point2->MarkChop(); + outlines->next = new TESSLINE; + outlines->next->loop = point1; + outlines->next->ComputeBoundingBox(); + + outlines = outlines->next; + + outlines->next = new TESSLINE; + outlines->next->loop = point2; + outlines->next->ComputeBoundingBox(); + + outlines->next->next = NULL; } +// Makes a split between these two edge points, but does not affect the +// outlines to which they belong. +void SPLIT::SplitOutline() const { + EDGEPT* temp2 = point2->next; + EDGEPT* temp1 = point1->next; + /* Create two new points */ + EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2); + EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1); + // point1 and 2 are now cross-over points, so they must have NULL + // src_outlines and give their src_outline information their new + // replacements. + new_point1->src_outline = point1->src_outline; + new_point1->start_step = point1->start_step; + new_point1->step_count = point1->step_count; + new_point2->src_outline = point2->src_outline; + new_point2->start_step = point2->start_step; + new_point2->step_count = point2->step_count; + point1->src_outline = NULL; + point1->start_step = 0; + point1->step_count = 0; + point2->src_outline = NULL; + point2->start_step = 0; + point2->step_count = 0; +} -/********************************************************************** - * unsplit_outlines - * - * Remove the split that was put between these two points. - **********************************************************************/ -void unsplit_outlines(EDGEPT *p1, EDGEPT *p2) { - EDGEPT *tmp1 = p1->next; - EDGEPT *tmp2 = p2->next; +// Undoes the effect of SplitOutlineList, correcting the outlines for undoing +// the split, but possibly leaving some duplicate outlines. +void SPLIT::UnsplitOutlineList(TBLOB* blob) const { + /* Modify edge points */ + UnsplitOutlines(); - assert (p1 != p2); + TESSLINE* outline1 = new TESSLINE; + outline1->next = blob->outlines; + blob->outlines = outline1; + outline1->loop = point1; - tmp1->next->prev = p2; - tmp2->next->prev = p1; + TESSLINE* outline2 = new TESSLINE; + outline2->next = blob->outlines; + blob->outlines = outline2; + outline2->loop = point2; +} - // tmp2 is coincident with p1. p1 takes tmp2's place as tmp2 is deleted. - p1->next = tmp2->next; - p1->src_outline = tmp2->src_outline; - p1->start_step = tmp2->start_step; - p1->step_count = tmp2->step_count; - // Likewise p2 takes tmp1's place. - p2->next = tmp1->next; - p2->src_outline = tmp1->src_outline; - p2->start_step = tmp1->start_step; - p2->step_count = tmp1->step_count; - p1->UnmarkChop(); - p2->UnmarkChop(); +// Removes the split that was put between these two points. +void SPLIT::UnsplitOutlines() const { + EDGEPT* tmp1 = point1->next; + EDGEPT* tmp2 = point2->next; + + tmp1->next->prev = point2; + tmp2->next->prev = point1; + + // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is + // deleted. + point1->next = tmp2->next; + point1->src_outline = tmp2->src_outline; + point1->start_step = tmp2->start_step; + point1->step_count = tmp2->step_count; + // Likewise point2 takes tmp1's place. + point2->next = tmp1->next; + point2->src_outline = tmp1->src_outline; + point2->start_step = tmp1->start_step; + point2->step_count = tmp1->step_count; delete tmp1; delete tmp2; - p1->vec.x = p1->next->pos.x - p1->pos.x; - p1->vec.y = p1->next->pos.y - p1->pos.y; + point1->vec.x = point1->next->pos.x - point1->pos.x; + point1->vec.y = point1->next->pos.y - point1->pos.y; - p2->vec.x = p2->next->pos.x - p2->pos.x; - p2->vec.y = p2->next->pos.y - p2->pos.y; + point2->vec.x = point2->next->pos.x - point2->pos.x; + point2->vec.y = point2->next->pos.y - point2->pos.y; } diff --git a/ccstruct/split.h b/ccstruct/split.h index 7291b4cf..26424748 100644 --- a/ccstruct/split.h +++ b/ccstruct/split.h @@ -29,18 +29,80 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "blobs.h" -#include "oldlist.h" +#include "scrollview.h" /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ -typedef struct split_record -{ /* SPLIT */ +struct SPLIT { + SPLIT() : point1(NULL), point2(NULL) {} + SPLIT(EDGEPT* pt1, EDGEPT* pt2) : point1(pt1), point2(pt2) {} + + // Returns the bounding box of all the points in the split. + TBOX bounding_box() const; + // Returns the bounding box of the outline from point1 to point2. + TBOX Box12() const { return point1->SegmentBox(point2); } + // Returns the bounding box of the outline from point1 to point1. + TBOX Box21() const { return point2->SegmentBox(point1); } + // Returns the bounding box of the out + + // Hides the SPLIT so the outlines appear not to be cut by it. + void Hide() const; + // Undoes hide, so the outlines are cut by the SPLIT. + void Reveal() const; + + // Returns true if the given EDGEPT is used by this SPLIT, checking only + // the EDGEPT pointer, not the coordinates. + bool UsesPoint(const EDGEPT* point) const { + return point1 == point || point2 == point; + } + // Returns true if the other SPLIT has any position shared with *this. + bool SharesPosition(const SPLIT& other) const { + return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) || + point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2); + } + // Returns true if both points are contained within the blob. + bool ContainedByBlob(const TBLOB& blob) const { + return blob.Contains(point1->pos) && blob.Contains(point2->pos); + } + // Returns true if both points are contained within the outline. + bool ContainedByOutline(const TESSLINE& outline) const { + return outline.Contains(point1->pos) && outline.Contains(point2->pos); + } + // Compute a split priority based on the bounding boxes of the parts. + // The arguments here are config parameters defined in Wordrec. Add chop_ + // to the beginning of the name. + float FullPriority(int xmin, int xmax, double overlap_knob, + int centered_maxwidth, double center_knob, + double width_change_knob) const; + // Returns true if *this SPLIT appears OK in the sense that it does not cross + // any outlines and does not chop off any ridiculously small pieces. + bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const; + // Returns true if the split generates a small chunk in terms of either area + // or number of points. + bool IsLittleChunk(int min_points, int min_area) const; + + void Print() const; +#ifndef GRAPHICS_DISABLED + // Draws the split in the given window. + void Mark(ScrollView* window) const; +#endif + + // Creates two outlines out of one by splitting the original one in half. + // Inserts the resulting outlines into the given list. + void SplitOutlineList(TESSLINE* outlines) const; + // Makes a split between these two edge points, but does not affect the + // outlines to which they belong. + void SplitOutline() const; + // Undoes the effect of SplitOutlineList, correcting the outlines for undoing + // the split, but possibly leaving some duplicate outlines. + void UnsplitOutlineList(TBLOB* blob) const; + // Removes the split that was put between these two points. + void UnsplitOutlines() const; + EDGEPT *point1; EDGEPT *point2; -} SPLIT; - -typedef LIST SPLITS; /* SPLITS */ +}; /*---------------------------------------------------------------------- V a r i a b l e s @@ -48,38 +110,11 @@ typedef LIST SPLITS; /* SPLITS */ extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits"); -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/********************************************************************** - * clone_split - * - * Create a new split record and set the contents equal to the contents - * of this record. - **********************************************************************/ - -#define clone_split(dest,source) \ -if (source) \ - (dest) = new_split ((source)->point1, (source)->point2); \ -else \ - (dest) = (SPLIT*) NULL \ - - /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -void delete_split(SPLIT *split); - EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev); void remove_edgept(EDGEPT *point); -SPLIT *new_split(EDGEPT *point1, EDGEPT *point2); - -void print_split(SPLIT *split); - -void split_outline(EDGEPT *join_point1, EDGEPT *join_point2); - -void unsplit_outlines(EDGEPT *p1, EDGEPT *p2); - #endif diff --git a/ccstruct/vecfuncs.cpp b/ccstruct/vecfuncs.cpp index 3f825173..8357c9aa 100644 --- a/ccstruct/vecfuncs.cpp +++ b/ccstruct/vecfuncs.cpp @@ -30,6 +30,7 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "vecfuncs.h" +#include "blobs.h" /*---------------------------------------------------------------------- F u n c t i o n s diff --git a/ccstruct/vecfuncs.h b/ccstruct/vecfuncs.h index 91bbb088..55cf3108 100644 --- a/ccstruct/vecfuncs.h +++ b/ccstruct/vecfuncs.h @@ -26,7 +26,6 @@ #define VECFUNCS_H #include -#include "blobs.h" struct EDGEPT; diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 0eaf1440..7bbc8471 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -359,8 +359,8 @@ void Classify::LearnPieces(const char* filename, int start, int length, return; if (length > 1) { - join_pieces(word->seam_array, start, start + length - 1, - word->chopped_word); + SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start, + start + length - 1); } TBLOB* blob = word->chopped_word->blobs[start]; // Rotate the blob if needed for classification. @@ -413,7 +413,8 @@ void Classify::LearnPieces(const char* filename, int start, int length, delete rotated_blob; } - break_pieces(word->seam_array, start, start + length - 1, word->chopped_word); + SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start, + start + length - 1); } // LearnPieces. /*---------------------------------------------------------------------------*/ diff --git a/wordrec/chop.cpp b/wordrec/chop.cpp index 9ae61bb9..c7310052 100644 --- a/wordrec/chop.cpp +++ b/wordrec/chop.cpp @@ -29,7 +29,6 @@ #include "chop.h" #include "outlines.h" -#include "olutil.h" #include "callcpp.h" #include "plotedges.h" #include "const.h" @@ -74,6 +73,11 @@ void Wordrec::add_point_to_list(PointHeap* point_heap, EDGEPT *point) { #endif } +// Returns true if the edgept supplied as input is an inside angle. This +// is determined by the angular change of the vectors from point to point. +bool Wordrec::is_inside_angle(EDGEPT *pt) { + return angle_change(pt->prev, pt, pt->next) < chop_inside_angle; +} /** * @name angle_change @@ -111,65 +115,6 @@ int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { return (angle); } -/** - * @name is_little_chunk - * - * Return TRUE if one of the pieces resulting from this split would - * less than some number of edge points. - */ -int Wordrec::is_little_chunk(EDGEPT *point1, EDGEPT *point2) { - EDGEPT *p = point1; /* Iterator */ - int counter = 0; - - do { - /* Go from P1 to P2 */ - if (is_same_edgept (point2, p)) { - if (is_small_area (point1, point2)) - return (TRUE); - else - break; - } - p = p->next; - } - while ((p != point1) && (counter++ < chop_min_outline_points)); - /* Go from P2 to P1 */ - p = point2; - counter = 0; - do { - if (is_same_edgept (point1, p)) { - return (is_small_area (point2, point1)); - } - p = p->next; - } - while ((p != point2) && (counter++ < chop_min_outline_points)); - - return (FALSE); -} - - -/** - * @name is_small_area - * - * Test the area defined by a split accross this outline. - */ -int Wordrec::is_small_area(EDGEPT *point1, EDGEPT *point2) { - EDGEPT *p = point1->next; /* Iterator */ - int area = 0; - TPOINT origin; - - do { - /* Go from P1 to P2 */ - origin.x = p->pos.x - point1->pos.x; - origin.y = p->pos.y - point1->pos.y; - area += CROSS (origin, p->vec); - p = p->next; - } - while (!is_same_edgept (point2, p)); - - return (area < chop_min_outline_area); -} - - /** * @name pick_close_point * diff --git a/wordrec/chopper.cpp b/wordrec/chopper.cpp index cf39ceb6..c1a57fcd 100644 --- a/wordrec/chopper.cpp +++ b/wordrec/chopper.cpp @@ -39,7 +39,6 @@ #include "findseam.h" #include "freelist.h" #include "globals.h" -#include "makechop.h" #include "render.h" #include "pageres.h" #include "seam.h" @@ -135,18 +134,14 @@ void restore_outline_tree(TESSLINE *srcline) { static SEAM* CheckSeam(int debug_level, inT32 blob_number, TWERD* word, TBLOB* blob, TBLOB* other_blob, const GenericVector& seams, SEAM* seam) { - if (seam == NULL || - blob->outlines == NULL || - other_blob->outlines == NULL || - total_containment(blob, other_blob) || - check_blob(other_blob) || - !(check_seam_order(blob, seam) && - check_seam_order(other_blob, seam)) || + if (seam == NULL || blob->outlines == NULL || other_blob->outlines == NULL || + total_containment(blob, other_blob) || check_blob(other_blob) || + !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) || any_shared_split_points(seams, seam) || - !test_insert_seam(seams, word, blob_number)) { + !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) { word->blobs.remove(blob_number + 1); if (seam) { - undo_seam(blob, other_blob, seam); + seam->UndoSeam(blob, other_blob); delete seam; seam = NULL; #ifndef GRAPHICS_DISABLED @@ -185,19 +180,19 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number, if (prioritize_division) { TPOINT location; if (divisible_blob(blob, italic_blob, &location)) { - seam = new SEAM(0.0f, location, NULL, NULL, NULL); + seam = new SEAM(0.0f, location); } } if (seam == NULL) seam = pick_good_seam(blob); if (chop_debug) { if (seam != NULL) - print_seam("Good seam picked=", seam); + seam->Print("Good seam picked="); else tprintf("\n** no seam picked *** \n"); } if (seam) { - apply_seam(blob, other_blob, italic_blob, seam); + seam->ApplySeam(italic_blob, blob, other_blob); } seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, @@ -211,13 +206,17 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number, if (divisible_blob(blob, italic_blob, &location)) { other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */ word->blobs.insert(other_blob, blob_number + 1); - seam = new SEAM(0.0f, location, NULL, NULL, NULL); - apply_seam(blob, other_blob, italic_blob, seam); + seam = new SEAM(0.0f, location); + seam->ApplySeam(italic_blob, blob, other_blob); seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam); } } } + if (seam != NULL) { + // Make sure this seam doesn't get chopped again. + seam->Finalize(); + } return seam; } @@ -286,8 +285,7 @@ int any_shared_split_points(const GenericVector& seams, SEAM *seam) { length = seams.size(); for (index = 0; index < length; index++) - if (shared_split_points(seams[index], seam)) - return TRUE; + if (seam->SharesPosition(*seams[index])) return TRUE; return FALSE; } @@ -384,50 +382,6 @@ SEAM* Wordrec::chop_one_blob(const GenericVector& boxes, blob_number); } } -} // namespace tesseract - -/** - * @name check_seam_order - * - * Make sure that each of the splits in this seam match to outlines - * in this blob. If any of the splits could not correspond to this - * blob then there is a problem (and FALSE should be returned to the - * caller). - */ -inT16 check_seam_order(TBLOB *blob, SEAM *seam) { - TESSLINE *outline; - inT8 found_em[3]; - - if (seam->split1 == NULL || blob == NULL) - return (TRUE); - - found_em[0] = found_em[1] = found_em[2] = FALSE; - - for (outline = blob->outlines; outline; outline = outline->next) { - if (!found_em[0] && - ((seam->split1 == NULL) || - is_split_outline (outline, seam->split1))) { - found_em[0] = TRUE; - } - if (!found_em[1] && - ((seam->split2 == NULL) || - is_split_outline (outline, seam->split2))) { - found_em[1] = TRUE; - } - if (!found_em[2] && - ((seam->split3 == NULL) || - is_split_outline (outline, seam->split3))) { - found_em[2] = TRUE; - } - } - - if (!found_em[0] || !found_em[1] || !found_em[2]) - return (FALSE); - else - return (TRUE); -} - -namespace tesseract { /** * @name chop_word_main diff --git a/wordrec/chopper.h b/wordrec/chopper.h index 7955a51f..4bfbf653 100644 --- a/wordrec/chopper.h +++ b/wordrec/chopper.h @@ -44,7 +44,5 @@ int any_shared_split_points(const GenericVector& seams, SEAM *seam); int check_blob(TBLOB *blob); -inT16 check_seam_order(TBLOB *blob, SEAM *seam); - inT16 total_containment(TBLOB *blob1, TBLOB *blob2); #endif diff --git a/wordrec/findseam.cpp b/wordrec/findseam.cpp index 786393c5..dd2de6e6 100644 --- a/wordrec/findseam.cpp +++ b/wordrec/findseam.cpp @@ -27,7 +27,6 @@ ----------------------------------------------------------------------*/ #include "findseam.h" #include "gradechop.h" -#include "olutil.h" #include "plotedges.h" #include "outlines.h" #include "freelist.h" @@ -67,7 +66,7 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, if (new_seam == NULL) return; if (chop_debug) { tprintf("Pushing new seam with priority %g :", new_priority); - print_seam("seam: ", new_seam); + new_seam->Print("seam: "); } if (seams->size() >= MAX_NUM_SEAMS) { SeamPair old_pair(0, NULL); @@ -101,12 +100,9 @@ void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam, * a split of NULL, then no further splits can be supplied by the * caller. **********************************************************************/ -void Wordrec::choose_best_seam(SeamQueue* seam_queue, - SPLIT *split, - PRIORITY priority, - SEAM **seam_result, - TBLOB *blob, - SeamPile* seam_pile) { +void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, + PRIORITY priority, SEAM **seam_result, + TBLOB *blob, SeamPile *seam_pile) { SEAM *seam; char str[80]; float my_priority; @@ -116,9 +112,8 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue, TPOINT split_point = split->point1->pos; split_point += split->point2->pos; split_point /= 2; - seam = new SEAM(my_priority, split_point, split, NULL, NULL); - if (chop_debug > 1) - print_seam ("Partial priority ", seam); + seam = new SEAM(my_priority, split_point, *split); + if (chop_debug > 1) seam->Print("Partial priority "); add_seam_to_queue(my_priority, seam, seam_queue); if (my_priority > chop_good_split) @@ -132,19 +127,22 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue, seam_queue->Pop(&seam_pair); seam = seam_pair.extract_data(); /* Set full priority */ - my_priority = seam_priority(seam, bbox.left(), bbox.right()); + my_priority = seam->FullPriority(bbox.left(), bbox.right(), + chop_overlap_knob, chop_centered_maxwidth, + chop_center_knob, chop_width_change_knob); if (chop_debug) { sprintf (str, "Full my_priority %0.0f, ", my_priority); - print_seam(str, seam); + seam->Print(str); } - if ((*seam_result == NULL || (*seam_result)->priority > my_priority) && + if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) && my_priority < chop_ok_split) { /* No crossing */ - if (constrained_split(seam->split1, blob)) { + if (seam->IsHealthy(*blob, chop_min_outline_points, + chop_min_outline_area)) { delete *seam_result; *seam_result = new SEAM(*seam); - (*seam_result)->priority = my_priority; + (*seam_result)->set_priority(my_priority); } else { delete seam; seam = NULL; @@ -198,104 +196,17 @@ void Wordrec::choose_best_seam(SeamQueue* seam_queue, **********************************************************************/ void Wordrec::combine_seam(const SeamPile& seam_pile, const SEAM* seam, SeamQueue* seam_queue) { - register inT16 dist; - inT16 bottom1, top1; - inT16 bottom2, top2; - - SEAM *new_one; - const SEAM *this_one; - - bottom1 = seam->split1->point1->pos.y; - if (seam->split1->point2->pos.y >= bottom1) - top1 = seam->split1->point2->pos.y; - else { - top1 = bottom1; - bottom1 = seam->split1->point2->pos.y; - } - if (seam->split2 != NULL) { - bottom2 = seam->split2->point1->pos.y; - if (seam->split2->point2->pos.y >= bottom2) - top2 = seam->split2->point2->pos.y; - else { - top2 = bottom2; - bottom2 = seam->split2->point2->pos.y; - } - } - else { - bottom2 = bottom1; - top2 = top1; - } for (int x = 0; x < seam_pile.size(); ++x) { - this_one = seam_pile.get(x).data(); - dist = seam->location.x - this_one->location.x; - if (-SPLIT_CLOSENESS < dist && - dist < SPLIT_CLOSENESS && - seam->priority + this_one->priority < chop_ok_split) { - inT16 split1_point1_y = this_one->split1->point1->pos.y; - inT16 split1_point2_y = this_one->split1->point2->pos.y; - inT16 split2_point1_y = 0; - inT16 split2_point2_y = 0; - if (this_one->split2) { - split2_point1_y = this_one->split2->point1->pos.y; - split2_point2_y = this_one->split2->point2->pos.y; - } - if ( - /*!tessedit_fix_sideways_chops || */ - ( - /* this_one->split1 always exists */ - ( - ((split1_point1_y >= top1 && split1_point2_y >= top1) || - (split1_point1_y <= bottom1 && split1_point2_y <= bottom1)) - && - ((split1_point1_y >= top2 && split1_point2_y >= top2) || - (split1_point1_y <= bottom2 && split1_point2_y <= bottom2)) - ) - ) - && - ( - this_one->split2 == NULL || - ( - ((split2_point1_y >= top1 && split2_point2_y >= top1) || - (split2_point1_y <= bottom1 && split2_point2_y <= bottom1)) - && - ((split2_point1_y >= top2 && split2_point2_y >= top2) || - (split2_point1_y <= bottom2 && split2_point2_y <= bottom2)) - ) - ) - ) { - new_one = join_two_seams (seam, this_one); - if (new_one != NULL) { - if (chop_debug > 1) - print_seam ("Combo priority ", new_one); - add_seam_to_queue(new_one->priority, new_one, seam_queue); - } - } + const SEAM *this_one = seam_pile.get(x).data(); + if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) { + SEAM *new_one = new SEAM(*seam); + new_one->CombineWith(*this_one); + if (chop_debug > 1) new_one->Print("Combo priority "); + add_seam_to_queue(new_one->priority(), new_one, seam_queue); } } } - -/********************************************************************** - * constrained_split - * - * Constrain this split to obey certain rules. It must not cross any - * inner outline. It must not cut off a small chunk of the outline. - **********************************************************************/ -inT16 Wordrec::constrained_split(SPLIT *split, TBLOB *blob) { - TESSLINE *outline; - - if (is_little_chunk (split->point1, split->point2)) - return (FALSE); - - for (outline = blob->outlines; outline; outline = outline->next) { - if (split_bounds_overlap (split, outline) && - crosses_outline (split->point1, split->point2, outline->loop)) { - return (FALSE); - } - } - return (TRUE); -} - /********************************************************************** * pick_good_seam * @@ -335,16 +246,15 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) { if (seam == NULL) { choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile); - } - else if (seam->priority > chop_good_split) { - choose_best_seam(&seam_queue, NULL, seam->priority, - &seam, blob, &seam_pile); + } else if (seam->priority() > chop_good_split) { + choose_best_seam(&seam_queue, NULL, seam->priority(), &seam, blob, + &seam_pile); } EDGEPT_C_IT it(&new_points); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { EDGEPT *inserted_point = it.data(); - if (!point_used_by_seam(seam, inserted_point)) { + if (seam == NULL || !seam->UsesPoint(inserted_point)) { for (outline = blob->outlines; outline; outline = outline->next) { if (outline->loop == inserted_point) { outline->loop = outline->loop->next; @@ -355,18 +265,13 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) { } if (seam) { - if (seam->priority > chop_ok_split) { + if (seam->priority() > chop_ok_split) { delete seam; seam = NULL; } #ifndef GRAPHICS_DISABLED else if (wordrec_display_splits) { - if (seam->split1) - mark_split (seam->split1); - if (seam->split2) - mark_split (seam->split2); - if (seam->split3) - mark_split (seam->split3); + seam->Mark(edge_window); if (chop_debug > 2) { update_edge_window(); edge_window_wait(); @@ -382,42 +287,6 @@ SEAM *Wordrec::pick_good_seam(TBLOB *blob) { } -/********************************************************************** - * seam_priority - * - * Assign a full priority value to the seam. - **********************************************************************/ -PRIORITY Wordrec::seam_priority(SEAM *seam, inT16 xmin, inT16 xmax) { - PRIORITY priority; - - if (seam->split1 == NULL) - priority = 0; - - else if (seam->split2 == NULL) { - priority = (seam->priority + - full_split_priority (seam->split1, xmin, xmax)); - } - - else if (seam->split3 == NULL) { - split_outline (seam->split2->point1, seam->split2->point2); - priority = (seam->priority + - full_split_priority (seam->split1, xmin, xmax)); - unsplit_outlines (seam->split2->point1, seam->split2->point2); - } - - else { - split_outline (seam->split2->point1, seam->split2->point2); - split_outline (seam->split3->point1, seam->split3->point2); - priority = (seam->priority + - full_split_priority (seam->split1, xmin, xmax)); - unsplit_outlines (seam->split3->point1, seam->split3->point2); - unsplit_outlines (seam->split2->point1, seam->split2->point2); - } - - return (priority); -} - - /********************************************************************** * try_point_pairs * @@ -433,23 +302,20 @@ void Wordrec::try_point_pairs(EDGEPT * points[MAX_NUM_POINTS], TBLOB * blob) { inT16 x; inT16 y; - SPLIT *split; PRIORITY priority; for (x = 0; x < num_points; x++) { for (y = x + 1; y < num_points; y++) { - if (points[y] && - weighted_edgept_dist(points[x], points[y], - chop_x_y_weight) < chop_split_length && - points[x] != points[y]->next && - points[y] != points[x]->next && + points[x]->WeightedDistance(*points[y], chop_x_y_weight) < + chop_split_length && + points[x] != points[y]->next && points[y] != points[x]->next && !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) { - split = new_split (points[x], points[y]); - priority = partial_split_priority (split); + SPLIT split(points[x], points[y]); + priority = partial_split_priority(&split); - choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile); + choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); } } } @@ -474,7 +340,6 @@ void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], SEAM ** seam, TBLOB * blob) { EDGEPT *vertical_point = NULL; - SPLIT *split; inT16 x; PRIORITY priority; TESSLINE *outline; @@ -486,16 +351,13 @@ void Wordrec::try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS], &vertical_point, new_points); } - if (vertical_point && - points[x] != vertical_point->next && - vertical_point != points[x]->next && - weighted_edgept_dist(points[x], vertical_point, - chop_x_y_weight) < chop_split_length) { - - split = new_split (points[x], vertical_point); - priority = partial_split_priority (split); - - choose_best_seam(seam_queue, split, priority, seam, blob, seam_pile); + if (vertical_point && points[x] != vertical_point->next && + vertical_point != points[x]->next && + points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < + chop_split_length) { + SPLIT split(points[x], vertical_point); + priority = partial_split_priority(&split); + choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile); } } } diff --git a/wordrec/gradechop.cpp b/wordrec/gradechop.cpp index dce35ba5..ace8dfc5 100644 --- a/wordrec/gradechop.cpp +++ b/wordrec/gradechop.cpp @@ -27,120 +27,19 @@ ----------------------------------------------------------------------*/ #include "gradechop.h" #include "wordrec.h" -#include "olutil.h" #include "chop.h" #include "ndminx.h" #include -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -#define CENTER_GRADE_CAP 25.0 - /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ -/********************************************************************** - * find_bounds_loop - * - * This is a macro to be used by set_outline_bounds. - **********************************************************************/ - -#define find_bounds_loop(point1,point2,x_min,x_max) \ - x_min = point2->pos.x; \ - x_max = point2->pos.x; \ - \ - this_point = point1; \ - do { \ - x_min = MIN (this_point->pos.x, x_min); \ - x_max = MAX (this_point->pos.x, x_max); \ - this_point = this_point->next; \ - } \ - while (this_point != point2 && this_point != point1) \ - namespace tesseract { /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -/********************************************************************** - * full_split_priority - * - * Assign a priority to this split based on the features that it has. - * Part of the priority has already been calculated so just return the - * additional amount for the bounding box type information. - **********************************************************************/ -PRIORITY Wordrec::full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax) { - BOUNDS_RECT rect; - - set_outline_bounds (split->point1, split->point2, rect); - - if (xmin < MIN (rect[0], rect[2]) && xmax > MAX (rect[1], rect[3])) - return (999.0); - - return (grade_overlap (rect) + - grade_center_of_blob (rect) + grade_width_change (rect)); -} - - -/********************************************************************** - * grade_center_of_blob - * - * Return a grade for the a split. Rank it on closeness to the center - * of the original blob - * 0 = "perfect" - * 100 = "no way jay" - **********************************************************************/ -PRIORITY Wordrec::grade_center_of_blob(register BOUNDS_RECT rect) { - register PRIORITY grade; - int width1 = rect[1] - rect[0]; - int width2 = rect[3] - rect[2]; - - if (width1 > chop_centered_maxwidth && - width2 > chop_centered_maxwidth) { - return 0.0; - } - - grade = width1 - width2; - if (grade < 0) - grade = -grade; - - grade *= chop_center_knob; - grade = MIN (CENTER_GRADE_CAP, grade); - return (MAX (0.0, grade)); -} - - -/********************************************************************** - * grade_overlap - * - * Return a grade for this split for the overlap of the resultant blobs. - * 0 = "perfect" - * 100 = "no way jay" - **********************************************************************/ -PRIORITY Wordrec::grade_overlap(register BOUNDS_RECT rect) { - register PRIORITY grade; - register inT16 width1; - register inT16 width2; - register inT16 overlap; - - width1 = rect[3] - rect[2]; - width2 = rect[1] - rect[0]; - - overlap = MIN (rect[1], rect[3]) - MAX (rect[0], rect[2]); - width1 = MIN (width1, width2); - if (overlap == width1) - return (100.0); /* Total overlap */ - - width1 = 2 * overlap - width1; /* Extra penalty for too */ - overlap += MAX (0, width1); /* much overlap */ - - grade = overlap * chop_overlap_knob; - - return (MAX (0.0, grade)); -} - /********************************************************************** * grade_split_length @@ -153,8 +52,8 @@ PRIORITY Wordrec::grade_split_length(register SPLIT *split) { register PRIORITY grade; register float split_length; - split_length = weighted_edgept_dist (split->point1, split->point2, - chop_x_y_weight); + split_length = + split->point1->WeightedDistance(*split->point2, chop_x_y_weight); if (split_length <= 0) grade = 0; @@ -188,51 +87,4 @@ PRIORITY Wordrec::grade_sharpness(register SPLIT *split) { } -/********************************************************************** - * grade_width_change - * - * Return a grade for the change in width of the resultant blobs. - * 0 = "perfect" - * 100 = "no way jay" - **********************************************************************/ -PRIORITY Wordrec::grade_width_change(register BOUNDS_RECT rect) { - register PRIORITY grade; - register inT32 width1; - register inT32 width2; - - width1 = rect[3] - rect[2]; - width2 = rect[1] - rect[0]; - - grade = 20 - (MAX (rect[1], rect[3]) - - MIN (rect[0], rect[2]) - MAX (width1, width2)); - - grade *= chop_width_change_knob; - - return (MAX (0.0, grade)); -} - - -/********************************************************************** - * set_outline_bounds - * - * Set up the limits for the x coordinate of the outline. - **********************************************************************/ -void Wordrec::set_outline_bounds(register EDGEPT *point1, - register EDGEPT *point2, - BOUNDS_RECT rect) { - register EDGEPT *this_point; - register inT16 x_min; - register inT16 x_max; - - find_bounds_loop(point1, point2, x_min, x_max); - - rect[0] = x_min; - rect[1] = x_max; - - find_bounds_loop(point2, point1, x_min, x_max); - - rect[2] = x_min; - rect[3] = x_max; -} - } // namespace tesseract diff --git a/wordrec/gradechop.h b/wordrec/gradechop.h index 469a140b..01e5bf26 100644 --- a/wordrec/gradechop.h +++ b/wordrec/gradechop.h @@ -32,11 +32,6 @@ #include "seam.h" #include "ndminx.h" -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -typedef inT16 BOUNDS_RECT[4]; - /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ @@ -52,18 +47,4 @@ typedef inT16 BOUNDS_RECT[4]; (grade_split_length (split) + \ grade_sharpness (split)) \ - -/********************************************************************** - * split_bounds_overlap - * - * Check to see if this split might overlap with this outline. Return - * TRUE if there is a positive overlap in the bounding boxes of the two. - **********************************************************************/ - -#define split_bounds_overlap(split,outline) \ -(outline->topleft.x <= MAX (split->point1->pos.x,split->point2->pos.x) && \ - outline->botright.x >= MIN (split->point1->pos.x,split->point2->pos.x) && \ - outline->botright.y <= MAX (split->point1->pos.y,split->point2->pos.y) && \ - outline->topleft.y >= MIN (split->point1->pos.y,split->point2->pos.y)) - #endif diff --git a/wordrec/makechop.cpp b/wordrec/makechop.cpp deleted file mode 100644 index d6795bc3..00000000 --- a/wordrec/makechop.cpp +++ /dev/null @@ -1,226 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: makechop.c (Formerly makechop.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Mon Jul 29 15:50:42 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ - -#include "makechop.h" -#include "blobs.h" -#include "render.h" -#include "structures.h" -#ifdef __UNIX__ -#include -#include -#endif - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -/*---------------------------------------------------------------------- - Public Function Code -----------------------------------------------------------------------*/ -/********************************************************************** - * apply_seam - * - * Split this blob into two blobs by applying the splits included in - * the seam description. - **********************************************************************/ -void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam) { - if (seam->split1 == NULL) { - divide_blobs(blob, other_blob, italic_blob, seam->location); - } - else if (seam->split2 == NULL) { - make_split_blobs(blob, other_blob, italic_blob, seam); - } - else if (seam->split3 == NULL) { - make_double_split(blob, other_blob, italic_blob, seam); - } - else { - make_triple_split(blob, other_blob, italic_blob, seam); - } -} - - -/********************************************************************** - * form_two_blobs - * - * Group the outlines from the first blob into both of them. Do so - * according to the information about the split. - **********************************************************************/ -void form_two_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - const TPOINT& location) { - setup_blob_outlines(blob); - - divide_blobs(blob, other_blob, italic_blob, location); - - eliminate_duplicate_outlines(blob); - eliminate_duplicate_outlines(other_blob); - - correct_blob_order(blob, other_blob); -} - - -/********************************************************************** - * make_double_split - * - * Create two blobs out of one by splitting the original one in half. - * Return the resultant blobs for classification. - **********************************************************************/ -void make_double_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - SEAM *seam) { - make_single_split(blob->outlines, seam->split1); - make_single_split(blob->outlines, seam->split2); - form_two_blobs(blob, other_blob, italic_blob, seam->location); -} - - -/********************************************************************** - * make_single_split - * - * Create two outlines out of one by splitting the original one in half. - * Return the resultant outlines. - **********************************************************************/ -void make_single_split(TESSLINE *outlines, SPLIT *split) { - assert (outlines != NULL); - - split_outline (split->point1, split->point2); - - while (outlines->next != NULL) - outlines = outlines->next; - - outlines->next = new TESSLINE; - outlines->next->loop = split->point1; - outlines->next->ComputeBoundingBox(); - - outlines = outlines->next; - - outlines->next = new TESSLINE; - outlines->next->loop = split->point2; - outlines->next->ComputeBoundingBox(); - - outlines->next->next = NULL; -} - - -/********************************************************************** - * make_split_blobs - * - * Create two blobs out of one by splitting the original one in half. - * Return the resultant blobs for classification. - **********************************************************************/ -void make_split_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - SEAM *seam) { - make_single_split(blob->outlines, seam->split1); - - form_two_blobs (blob, other_blob, italic_blob, seam->location); -} - - -/********************************************************************** - * make_triple_split - * - * Create two blobs out of one by splitting the original one in half. - * This splitting is accomplished by applying three separate splits on - * the outlines. Three of the starting outlines will produce two ending - * outlines. Return the resultant blobs for classification. - **********************************************************************/ -void make_triple_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - SEAM *seam) { - make_single_split(blob->outlines, seam->split1); - make_single_split(blob->outlines, seam->split2); - make_single_split(blob->outlines, seam->split3); - - form_two_blobs(blob, other_blob, italic_blob, seam->location); -} - - -/********************************************************************** - * undo_seam - * - * Remove the seam between these two blobs. Produce one blob as a - * result. The seam may consist of one, two, or three splits. Each - * of these split must be removed from the outlines. - **********************************************************************/ -void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { - TESSLINE *outline; - - if (!seam) - return; /* Append other blob outlines */ - if (blob->outlines == NULL) { - blob->outlines = other_blob->outlines; - other_blob->outlines = NULL; - } - - outline = blob->outlines; - while (outline->next) - outline = outline->next; - outline->next = other_blob->outlines; - other_blob->outlines = NULL; - delete other_blob; - - if (seam->split1 == NULL) { - } - else if (seam->split2 == NULL) { - undo_single_split (blob, seam->split1); - } - else if (seam->split3 == NULL) { - undo_single_split (blob, seam->split1); - undo_single_split (blob, seam->split2); - } - else { - undo_single_split (blob, seam->split3); - undo_single_split (blob, seam->split2); - undo_single_split (blob, seam->split1); - } - - setup_blob_outlines(blob); - eliminate_duplicate_outlines(blob); -} - - -/********************************************************************** - * undo_single_split - * - * Undo a seam that is made by a single split. Perform the correct - * magic to reconstruct the appropriate set of outline data structures. - **********************************************************************/ -void undo_single_split(TBLOB *blob, SPLIT *split) { - TESSLINE *outline1; - TESSLINE *outline2; - /* Modify edge points */ - unsplit_outlines (split->point1, split->point2); - - outline1 = new TESSLINE; - outline1->next = blob->outlines; - blob->outlines = outline1; - outline1->loop = split->point1; - - outline2 = new TESSLINE; - outline2->next = blob->outlines; - blob->outlines = outline2; - outline2->loop = split->point2; -} diff --git a/wordrec/makechop.h b/wordrec/makechop.h deleted file mode 100644 index 1f2639cd..00000000 --- a/wordrec/makechop.h +++ /dev/null @@ -1,71 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: makechop.h (Formerly makechop.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Mon Jul 29 13:33:23 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef MAKECHOP_H -#define MAKECHOP_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "chop.h" -#include "olutil.h" - -/*---------------------------------------------------------------------- - M a c r o s ----------------------------------------------------------------------*/ -/********************************************************************** - * is_split_outline - * - * Check to see if both sides of the split fall within the bounding - * box of this outline. - **********************************************************************/ - -#define is_split_outline(outline,split) \ -(outline->Contains(split->point1->pos) && \ - outline->Contains(split->point2->pos)) \ - - -/*---------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------*/ -void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam); - -void form_two_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - const TPOINT& location); - -void make_double_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - SEAM *seam); - -void make_single_split(TESSLINE *outlines, SPLIT *split); - -void make_split_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - SEAM *seam); - -void make_triple_split(TBLOB *blob, TBLOB *other_blob, bool italic_blob, - SEAM *seam); - -void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam); - -void undo_single_split(TBLOB *blob, SPLIT *split); -#endif diff --git a/wordrec/olutil.cpp b/wordrec/olutil.cpp deleted file mode 100644 index dadf51af..00000000 --- a/wordrec/olutil.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: olutil.c (Formerly olutil.c) - * Description: - * Author: Mark Seaman, OCR Technology - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Fri May 17 13:11:24 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "olutil.h" -#include "structures.h" -#include "blobs.h" -#include "const.h" - -#ifdef __UNIX__ -#include -#endif - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/********************************************************************** - * correct_blob_order - * - * Check to see if the blobs are in the correct order. If they are not - * then swap which outlines are attached to which blobs. - **********************************************************************/ -void correct_blob_order(TBLOB *blob1, TBLOB *blob2) { - TPOINT origin1; - TPOINT origin2; - TESSLINE *temp; - - blob_origin(blob1, &origin1); - blob_origin(blob2, &origin2); - - if (origin1.x > origin2.x) { - temp = blob2->outlines; - blob2->outlines = blob1->outlines; - blob1->outlines = temp; - } -} - - -/********************************************************************** - * eliminate_duplicate_outlines - * - * Find and delete any duplicate outline records in this blob. - **********************************************************************/ -void eliminate_duplicate_outlines(TBLOB *blob) { - TESSLINE *outline; - TESSLINE *other_outline; - TESSLINE *last_outline; - - for (outline = blob->outlines; outline; outline = outline->next) { - - for (last_outline = outline, other_outline = outline->next; - other_outline; - last_outline = other_outline, other_outline = other_outline->next) { - - if (same_outline_bounds (outline, other_outline)) { - last_outline->next = other_outline->next; - // This doesn't leak - the outlines share the EDGEPTs. - other_outline->loop = NULL; - delete other_outline; - other_outline = last_outline; - // If it is part of a cut, then it can't be a hole any more. - outline->is_hole = false; - } - } - } -} - -/********************************************************************** - * setup_blob_outlines - * - * Set up each of the outlines in this blob. - **********************************************************************/ -void setup_blob_outlines(TBLOB *blob) { - TESSLINE *outline; - - for (outline = blob->outlines; outline; outline = outline->next) { - outline->ComputeBoundingBox(); - } -} diff --git a/wordrec/olutil.h b/wordrec/olutil.h deleted file mode 100644 index c7eeecd2..00000000 --- a/wordrec/olutil.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: olutil.h (Formerly olutil.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Wed Jul 10 14:21:55 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *********************************************************************************/ -#ifndef OLUTIL_H -#define OLUTIL_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "blobs.h" - -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/********************************************************************** - * is_inside_angle - * - * Return true if the edgept supplied as input is an inside angle. This - * is determined by the angular change of the vectors from point to - * point. - - **********************************************************************/ - -#define is_inside_angle(pt) \ -(angle_change ((pt)->prev, (pt), (pt)->next) < chop_inside_angle) - -/********************************************************************** - * same_outline_bounds - * - * Return TRUE if these two outlines have the same bounds. - **********************************************************************/ - -#define same_outline_bounds(outline,other_outline) \ -(outline->topleft.x == other_outline->topleft.x && \ - outline->topleft.y == other_outline->topleft.y && \ - outline->botright.x == other_outline->botright.x && \ - outline->botright.y == other_outline->botright.y) \ - - -/********************************************************************** - * weighted_edgept_dist - * - * Return the distance (squared) between the two edge points. - **********************************************************************/ - -#define weighted_edgept_dist(p1,p2,chop_x_y_weight) \ -(((p1)->pos.x - (p2)->pos.x) * \ - ((p1)->pos.x - (p2)->pos.x) * chop_x_y_weight + \ - ((p1)->pos.y - (p2)->pos.y) * \ - ((p1)->pos.y - (p2)->pos.y)) - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -void correct_blob_order(TBLOB *blob1, TBLOB *blob2); - -void eliminate_duplicate_outlines(TBLOB *blob); - -void setup_blob_outlines(TBLOB *blob); - -#endif diff --git a/wordrec/outlines.cpp b/wordrec/outlines.cpp index 3d31a67c..fdcedfc7 100644 --- a/wordrec/outlines.cpp +++ b/wordrec/outlines.cpp @@ -39,73 +39,6 @@ namespace tesseract { /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ -/********************************************************************** - * crosses_outline - * - * Check to see if this line crosses over this outline. If it does - * return TRUE. - **********************************************************************/ -int Wordrec::crosses_outline(EDGEPT *p0, /* Start of line */ - EDGEPT *p1, /* End of line */ - EDGEPT *outline) { /* Outline to check */ - EDGEPT *pt = outline; - do { - if (is_crossed (p0->pos, p1->pos, pt->pos, pt->next->pos)) - return (TRUE); - pt = pt->next; - } - while (pt != outline); - return (FALSE); -} - - -/********************************************************************** - * is_crossed - * - * Return TRUE when the two line segments cross each other. Find out - * where the projected lines would cross and then check to see if the - * point of intersection lies on both of the line segments. If it does - * then these two segments cross. - **********************************************************************/ -int Wordrec::is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) { - int b0a1xb0b1, b0b1xb0a0; - int a1b1xa1a0, a1a0xa1b0; - - TPOINT b0a1, b0a0, a1b1, b0b1, a1a0; - - b0a1.x = a1.x - b0.x; - b0a0.x = a0.x - b0.x; - a1b1.x = b1.x - a1.x; - b0b1.x = b1.x - b0.x; - a1a0.x = a0.x - a1.x; - b0a1.y = a1.y - b0.y; - b0a0.y = a0.y - b0.y; - a1b1.y = b1.y - a1.y; - b0b1.y = b1.y - b0.y; - a1a0.y = a0.y - a1.y; - - b0a1xb0b1 = CROSS (b0a1, b0b1); - b0b1xb0a0 = CROSS (b0b1, b0a0); - a1b1xa1a0 = CROSS (a1b1, a1a0); - /*a1a0xa1b0=CROSS(a1a0,a1b0); */ - a1a0xa1b0 = -CROSS (a1a0, b0a1); - - return ((b0a1xb0b1 > 0 && b0b1xb0a0 > 0) - || (b0a1xb0b1 < 0 && b0b1xb0a0 < 0)) - && ((a1b1xa1a0 > 0 && a1a0xa1b0 > 0) || (a1b1xa1a0 < 0 && a1a0xa1b0 < 0)); -} - - -/********************************************************************** - * is_same_edgept - * - * Return true if the points are identical. - **********************************************************************/ -int Wordrec::is_same_edgept(EDGEPT *p1, EDGEPT *p2) { - return (p1 == p2); -} - - /********************************************************************** * near_point * @@ -153,30 +86,4 @@ bool Wordrec::near_point(EDGEPT *point, } } - -/********************************************************************** - * reverse_outline - * - * Change the direction of the outline. If it was clockwise make it - * counter-clockwise and vice versa. Do this by swapping each of the - * next and prev fields of each edge point. - **********************************************************************/ -void Wordrec::reverse_outline(EDGEPT *outline) { - EDGEPT *edgept = outline; - EDGEPT *temp; - - do { - /* Swap next and prev */ - temp = edgept->prev; - edgept->prev = edgept->next; - edgept->next = temp; - /* Set up vec field */ - edgept->vec.x = edgept->next->pos.x - edgept->pos.x; - edgept->vec.y = edgept->next->pos.y - edgept->pos.y; - - edgept = edgept->prev; /* Go to next point */ - } - while (edgept != outline); -} - } // namespace tesseract diff --git a/wordrec/pieces.cpp b/wordrec/pieces.cpp index 35462ea2..f9205340 100644 --- a/wordrec/pieces.cpp +++ b/wordrec/pieces.cpp @@ -58,7 +58,7 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector& seams, const char* description, TWERD *word, BlamerBundle *blamer_bundle) { - if (end > start) join_pieces(seams, start, end, word); + if (end > start) SEAM::JoinPieces(seams, word->blobs, start, end); BLOB_CHOICE_LIST *choices = classify_blob(word->blobs[start], description, White, blamer_bundle); // Set the matrix_cell_ entries in all the BLOB_CHOICES. @@ -67,7 +67,7 @@ BLOB_CHOICE_LIST *Wordrec::classify_piece(const GenericVector& seams, bc_it.data()->set_matrix_cell(start, end); } - if (end > start) break_pieces(seams, start, end, word); + if (end > start) SEAM::BreakPieces(seams, word->blobs, start, end); return (choices); } diff --git a/wordrec/plotedges.cpp b/wordrec/plotedges.cpp index 0aa02c37..f7fbacee 100644 --- a/wordrec/plotedges.cpp +++ b/wordrec/plotedges.cpp @@ -119,21 +119,4 @@ void mark_outline(EDGEPT *edgept) { /* Start of point list */ c_make_current(window); } - -/********************************************************************** - * mark_split - * - * Set up the marks list to be displayed in subsequent updates and draw - * the marks in the current window. The marks are stored in the second - * sublist. The first sublist is left unmodified. - **********************************************************************/ -void mark_split(SPLIT *split) { - void *window = edge_window; - - c_line_color_index(window, Green); - c_move (window, (float) split->point1->pos.x, (float) split->point1->pos.y); - c_draw (window, (float) split->point2->pos.x, (float) split->point2->pos.y); - c_make_current(window); -} - #endif // GRAPHICS_DISABLED diff --git a/wordrec/plotedges.h b/wordrec/plotedges.h index d0ca40be..91521de7 100644 --- a/wordrec/plotedges.h +++ b/wordrec/plotedges.h @@ -28,7 +28,6 @@ #include "callcpp.h" #include "oldlist.h" #include "blobs.h" -#include "split.h" /*---------------------------------------------------------------------- V a r i a b l e s @@ -67,5 +66,4 @@ void draw_blob_edges(TBLOB *blob); void mark_outline(EDGEPT *edgept); -void mark_split(SPLIT *split); #endif diff --git a/wordrec/segsearch.cpp b/wordrec/segsearch.cpp index 29d03702..a6fe10ff 100644 --- a/wordrec/segsearch.cpp +++ b/wordrec/segsearch.cpp @@ -53,8 +53,7 @@ void Wordrec::SegSearch(WERD_RES* word_res, improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle, blamer_bundle, &pain_points, &pending); } - if (chop_debug) - print_seams("Final seam list:", word_res->seam_array); + if (chop_debug) SEAM::PrintSeams("Final seam list:", word_res->seam_array); if (blamer_bundle != NULL && !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) { diff --git a/wordrec/wordrec.h b/wordrec/wordrec.h index a69026b1..38f09f23 100644 --- a/wordrec/wordrec.h +++ b/wordrec/wordrec.h @@ -290,9 +290,10 @@ class Wordrec : public Classify { // chop.cpp PRIORITY point_priority(EDGEPT *point); void add_point_to_list(PointHeap* point_heap, EDGEPT *point); + // Returns true if the edgept supplied as input is an inside angle. This + // is determined by the angular change of the vectors from point to point. + bool is_inside_angle(EDGEPT *pt); int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3); - int is_little_chunk(EDGEPT *point1, EDGEPT *point2); - int is_small_area(EDGEPT *point1, EDGEPT *point2); EDGEPT *pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist); @@ -335,17 +336,12 @@ class Wordrec : public Classify { // findseam.cpp void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue* seams); - void choose_best_seam(SeamQueue* seam_queue, - SPLIT *split, - PRIORITY priority, - SEAM **seam_result, - TBLOB *blob, - SeamPile* seam_pile); + void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, + PRIORITY priority, SEAM **seam_result, TBLOB *blob, + SeamPile *seam_pile); void combine_seam(const SeamPile& seam_pile, const SEAM* seam, SeamQueue* seam_queue); - inT16 constrained_split(SPLIT *split, TBLOB *blob); SEAM *pick_good_seam(TBLOB *blob); - PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax); void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS], inT16 num_points, SeamQueue* seam_queue, @@ -359,23 +355,12 @@ class Wordrec : public Classify { SEAM ** seam, TBLOB * blob); // gradechop.cpp - PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax); - PRIORITY grade_center_of_blob(register BOUNDS_RECT rect); - PRIORITY grade_overlap(register BOUNDS_RECT rect); PRIORITY grade_split_length(register SPLIT *split); PRIORITY grade_sharpness(register SPLIT *split); - PRIORITY grade_width_change(register BOUNDS_RECT rect); - void set_outline_bounds(register EDGEPT *point1, - register EDGEPT *point2, - BOUNDS_RECT rect); // outlines.cpp - int crosses_outline(EDGEPT *p0, EDGEPT *p1, EDGEPT *outline); - int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1); - int is_same_edgept(EDGEPT *p1, EDGEPT *p2); bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt); - void reverse_outline(EDGEPT *outline); // pieces.cpp virtual BLOB_CHOICE_LIST *classify_piece(const GenericVector& seams,