/////////////////////////////////////////////////////////////////////// // File: colfind.cpp // Description: Class to hold BLOBNBOXs in a grid for fast access // to neighbours. // Author: Ray Smith // Created: Wed Jun 06 17:22:01 PDT 2007 // // (C) Copyright 2007, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #pragma warning(disable:4244) // Conversion warnings #endif // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif #include "colfind.h" #include "ccnontextdetect.h" #include "colpartition.h" #include "colpartitionset.h" #include "equationdetectbase.h" #include "linefind.h" #include "normalis.h" #include "strokewidth.h" #include "blobbox.h" #include "scrollview.h" #include "tablefind.h" #include "params.h" #include "workingpartset.h" namespace tesseract { // Minimum width (in pixels) to be considered when making columns. // TODO(rays) convert to inches, dependent on resolution. const int kMinColumnWidth = 100; // When assigning columns, the max number of misfit grid rows/ColPartitionSets // that can be ignored. const int kMaxIncompatibleColumnCount = 2; // Min fraction of ColPartition height to be overlapping for margin purposes. const double kMarginOverlapFraction = 0.25; // Max fraction of mean_column_gap_ for the gap between two partitions within a // column to allow them to merge. const double kHorizontalGapMergeFraction = 0.5; // Min fraction of grid size to not be considered likely noise. const double kMinNonNoiseFraction = 0.5; // Minimum gutter width as a fraction of gridsize const double kMinGutterWidthGrid = 0.5; // Max multiple of a partition's median size as a distance threshold for // adding noise blobs. const double kMaxDistToPartSizeRatio = 1.5; BOOL_VAR(textord_tabfind_show_initial_partitions, false, "Show partition bounds"); BOOL_VAR(textord_tabfind_show_reject_blobs, false, "Show blobs rejected as noise"); INT_VAR(textord_tabfind_show_partitions, 0, "Show partition bounds, waiting if >1"); BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds"); BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds"); BOOL_VAR(textord_tabfind_find_tables, true, "run table detection"); ScrollView* ColumnFinder::blocks_win_ = NULL; // Gridsize is an estimate of the text size in the image. A suitable value // is in TO_BLOCK::line_size after find_components has been used to make // the blobs. // bleft and tright are the bounds of the image (or rectangle) being processed. // vlines is a (possibly empty) list of TabVector and vertical_x and y are // the sum logical vertical vector produced by LineFinder::FindVerticalLines. ColumnFinder::ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright, int resolution, bool cjk_script, double aligned_gap_fraction, TabVector_LIST* vlines, TabVector_LIST* hlines, int vertical_x, int vertical_y) : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, resolution), cjk_script_(cjk_script), min_gutter_width_(static_cast(kMinGutterWidthGrid * gridsize)), mean_column_gap_(tright.x() - bleft.x()), tabfind_aligned_gap_fraction_(aligned_gap_fraction), reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), best_columns_(NULL), stroke_width_(NULL), part_grid_(gridsize, bleft, tright), nontext_map_(NULL), projection_(resolution), denorm_(NULL), input_blobs_win_(NULL), equation_detect_(NULL) { TabVector_IT h_it(&horizontal_lines_); h_it.add_list_after(hlines); } ColumnFinder::~ColumnFinder() { column_sets_.delete_data_pointers(); if (best_columns_ != NULL) { delete [] best_columns_; } if (stroke_width_ != NULL) delete stroke_width_; delete input_blobs_win_; pixDestroy(&nontext_map_); while (denorm_ != NULL) { DENORM* dead_denorm = denorm_; denorm_ = const_cast(denorm_->predecessor()); delete dead_denorm; } // The ColPartitions are destroyed automatically, but any boxes in // the noise_parts_ list are owned and need to be deleted explicitly. ColPartition_IT part_it(&noise_parts_); for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { ColPartition* part = part_it.data(); part->DeleteBoxes(); } // Likewise any boxes in the good_parts_ list need to be deleted. // These are just the image parts. Text parts have already given their // boxes on to the TO_BLOCK, and have empty lists. part_it.set_to_list(&good_parts_); for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { ColPartition* part = part_it.data(); part->DeleteBoxes(); } // Also, any blobs on the image_bblobs_ list need to have their cblobs // deleted. This only happens if there has been an early return from // FindColumns, as in a normal return, the blobs go into the grid and // end up in noise_parts_, good_parts_ or the output blocks. BLOBNBOX_IT bb_it(&image_bblobs_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.data(); delete bblob->cblob(); } } // Performs initial processing on the blobs in the input_block: // Setup the part_grid, stroke_width_, nontext_map. // Obvious noise blobs are filtered out and used to mark the nontext_map_. // Initial stroke-width analysis is used to get local text alignment // direction, so the textline projection_ map can be setup. // On return, IsVerticallyAlignedText may be called (now optionally) to // determine the gross textline alignment of the page. void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Pix* photo_mask_pix, TO_BLOCK* input_block) { part_grid_.Init(gridsize(), bleft(), tright()); if (stroke_width_ != NULL) delete stroke_width_; stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); min_gutter_width_ = static_cast(kMinGutterWidthGrid * gridsize()); input_block->ReSetAndReFilterBlobs(); #ifndef GRAPHICS_DISABLED if (textord_tabfind_show_blocks) { input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); input_block->plot_graded_blobs(input_blobs_win_); } #endif // GRAPHICS_DISABLED SetBlockRuleEdges(input_block); pixDestroy(&nontext_map_); // Run a preliminary strokewidth neighbour detection on the medium blobs. stroke_width_->SetNeighboursOnMediumBlobs(input_block); CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); // Remove obvious noise and make the initial non-text map. nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind, photo_mask_pix, input_block); stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_, input_block); // Clear the strokewidth grid ready for rotation or leader finding. stroke_width_->Clear(); } // Tests for vertical alignment of text (returning true if so), and generates // a list of blobs of moderate aspect ratio, in the most frequent writing // direction (in osd_blobs) for orientation and script detection to test // the character orientation. // block is the single block for the whole page or rectangle to be OCRed. // Note that the vertical alignment may be due to text whose writing direction // is vertical, like say Japanese, or due to text whose writing direction is // horizontal but whose text appears vertically aligned because the image is // not the right way up. bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs) { return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio, block, osd_blobs); } // Rotates the blobs and the TabVectors so that the gross writing direction // (text lines) are horizontal and lines are read down the page. // Applied rotation stored in rotation_. // A second rotation is calculated for application during recognition to // make the rotated blobs upright for recognition. // Subsequent rotation stored in text_rotation_. // // Arguments: // vertical_text_lines true if the text lines are vertical. // recognition_rotation [0..3] is the number of anti-clockwise 90 degree // rotations from osd required for the text to be upright and readable. void ColumnFinder::CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines, int recognition_rotation) { const FCOORD anticlockwise90(0.0f, 1.0f); const FCOORD clockwise90(0.0f, -1.0f); const FCOORD rotation180(-1.0f, 0.0f); const FCOORD norotation(1.0f, 0.0f); text_rotation_ = norotation; // Rotate the page to make the text upright, as implied by // recognition_rotation. rotation_ = norotation; if (recognition_rotation == 1) { rotation_ = anticlockwise90; } else if (recognition_rotation == 2) { rotation_ = rotation180; } else if (recognition_rotation == 3) { rotation_ = clockwise90; } // We infer text writing direction to be vertical if there are several // vertical text lines detected, and horizontal if not. But if the page // orientation was determined to be 90 or 270 degrees, the true writing // direction is the opposite of what we inferred. if (recognition_rotation & 1) { vertical_text_lines = !vertical_text_lines; } // If we still believe the writing direction is vertical, we use the // convention of rotating the page ccw 90 degrees to make the text lines // horizontal, and mark the blobs for rotation cw 90 degrees for // classification so that the text order is correct after recognition. if (vertical_text_lines) { rotation_.rotate(anticlockwise90); text_rotation_.rotate(clockwise90); } // Set rerotate_ to the inverse of rotation_. rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { // Rotate all the blobs and tab vectors. RotateBlobList(rotation_, &block->large_blobs); RotateBlobList(rotation_, &block->blobs); RotateBlobList(rotation_, &block->small_blobs); RotateBlobList(rotation_, &block->noise_blobs); TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, &min_gutter_width_); part_grid_.Init(gridsize(), bleft(), tright()); // Reset all blobs to initial state and filter by size. // Since they have rotated, the list they belong on could have changed. block->ReSetAndReFilterBlobs(); SetBlockRuleEdges(block); stroke_width_->CorrectForRotation(rerotate_, &part_grid_); } if (textord_debug_tabfind) { tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", vertical_text_lines, recognition_rotation, rotation_.x(), rotation_.y(), text_rotation_.x(), text_rotation_.y()); } // Setup the denormalization. ASSERT_HOST(denorm_ == NULL); denorm_ = new DENORM; denorm_->SetupNormalization(NULL, &rotation_, NULL, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); } // Finds blocks of text, image, rule line, table etc, returning them in the // blocks and to_blocks // (Each TO_BLOCK points to the basic BLOCK and adds more information.) // Image blocks are generated by a combination of photo_mask_pix (which may // NOT be NULL) and the rejected text found during preliminary textline // finding. // The input_block is the result of a call to find_components, and contains // the blobs found in the image or rectangle to be OCRed. These blobs will be // removed and placed in the output blocks, while unused ones will be deleted. // If single_column is true, the input is treated as single column, but // it is still divided into blocks of equal line spacing/text size. // scaled_color is scaled down by scaled_factor from the input color image, // and may be NULL if the input was not color. // grey_pix is optional, but if present must match the photo_mask_pix in size, // and must be a *real* grey image instead of binary_pix * 255. // thresholds_pix is expected to be present iff grey_pix is present and // can be an integer factor reduction of the grey_pix. It represents the // thresholds that were used to create the binary_pix from the grey_pix. // If diacritic_blobs is non-null, then diacritics/noise blobs, that would // confuse layout anaylsis by causing textline overlap, are placed there, // with the expectation that they will be reassigned to words later and // noise/diacriticness determined via classification. // Returns -1 if the user hits the 'd' key in the blocks window while running // in debug mode, which requests a retry with more debug info. int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor, TO_BLOCK* input_block, Pix* photo_mask_pix, Pix* thresholds_pix, Pix* grey_pix, BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks) { pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); stroke_width_->FindLeaderPartitions(input_block, &part_grid_); stroke_width_->RemoveLineResidue(&big_parts_); FindInitialTabVectors(NULL, min_gutter_width_, tabfind_aligned_gap_fraction_, input_block); SetBlockRuleEdges(input_block); stroke_width_->GradeBlobsIntoPartitions( pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_, &projection_, diacritic_blobs, &part_grid_, &big_parts_); if (!PSM_SPARSE(pageseg_mode)) { ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this, &part_grid_, &big_parts_); ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, photo_mask_pix); ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this, &part_grid_, &big_parts_); } part_grid_.ReTypeBlobs(&image_bblobs_); TidyBlobs(input_block); Reset(); // TODO(rays) need to properly handle big_parts_. ColPartition_IT p_it(&big_parts_); for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) p_it.data()->DisownBoxesNoAssert(); big_parts_.clear(); delete stroke_width_; stroke_width_ = NULL; // Compute the edge offsets whether or not there is a grey_pix. It is done // here as the c_blobs haven't been touched by rotation or anything yet, // so no denorm is required, yet the text has been separated from image, so // no time is wasted running it on image blobs. input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); // A note about handling right-to-left scripts (Hebrew/Arabic): // The columns must be reversed and come out in right-to-left instead of // the normal left-to-right order. Because the left-to-right ordering // is implicit in many data structures, it is simpler to fool the algorithms // into thinking they are dealing with left-to-right text. // To do this, we reflect the needed data in the y-axis and then reflect // the blocks back after they have been created. This is a temporary // arrangement that is confined to this function only, so the reflection // is completely invisible in the output blocks. // The only objects reflected are: // The vertical separator lines that have already been found; // The bounding boxes of all BLOBNBOXES on all lists on the input_block // plus the image_bblobs. The outlines are not touched, since they are // not looked at. bool input_is_rtl = input_block->block->right_to_left(); if (input_is_rtl) { // Reflect the vertical separator lines (member of TabFind). ReflectInYAxis(); // Reflect the blob boxes. ReflectForRtl(input_block, &image_bblobs_); part_grid_.ReflectInYAxis(); } if (!PSM_SPARSE(pageseg_mode)) { if (!PSM_COL_FIND_ENABLED(pageseg_mode)) { // No tab stops needed. Just the grid that FindTabVectors makes. DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_); } else { SetBlockRuleEdges(input_block); // Find the tab stops, estimate skew, and deskew the tabs, blobs and // part_grid_. FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, min_gutter_width_, tabfind_aligned_gap_fraction_, &part_grid_, &deskew_, &reskew_); // Add the deskew to the denorm_. DENORM* new_denorm = new DENORM; new_denorm->SetupNormalization(NULL, &deskew_, denorm_, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); denorm_ = new_denorm; } SetBlockRuleEdges(input_block); part_grid_.SetTabStops(this); // Make the column_sets_. if (!MakeColumns(false)) { tprintf("Empty page!!\n"); part_grid_.DeleteParts(); return 0; // This is an empty page. } // Refill the grid using rectangular spreading, and get the benefit // of the completed tab vectors marking the rule edges of each blob. Clear(); #ifndef GRAPHICS_DISABLED if (textord_tabfind_show_reject_blobs) { ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs"); input_block->plot_graded_blobs(rej_win); } #endif // GRAPHICS_DISABLED InsertBlobsToGrid(false, false, &image_bblobs_, this); InsertBlobsToGrid(true, true, &input_block->blobs, this); part_grid_.GridFindMargins(best_columns_); // Split and merge the partitions by looking at local neighbours. GridSplitPartitions(); // Resolve unknown partitions by adding to an existing partition, fixing // the type, or declaring them noise. part_grid_.GridFindMargins(best_columns_); GridMergePartitions(); // Insert any unused noise blobs that are close enough to an appropriate // partition. InsertRemainingNoise(input_block); // Add horizontal line separators as partitions. GridInsertHLinePartitions(); GridInsertVLinePartitions(); // Recompute margins based on a local neighbourhood search. part_grid_.GridFindMargins(best_columns_); SetPartitionTypes(); } if (textord_tabfind_show_initial_partitions) { ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions"); part_grid_.DisplayBoxes(part_win); DisplayTabVectors(part_win); } if (!PSM_SPARSE(pageseg_mode)) { if (equation_detect_) { equation_detect_->FindEquationParts(&part_grid_, best_columns_); } if (textord_tabfind_find_tables) { TableFinder table_finder; table_finder.Init(gridsize(), bleft(), tright()); table_finder.set_resolution(resolution_); table_finder.set_left_to_right_language( !input_block->block->right_to_left()); // Copy cleaned partitions from part_grid_ to clean_part_grid_ and // insert dot-like noise into period_grid_ table_finder.InsertCleanPartitions(&part_grid_, input_block); // Get Table Regions table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); } GridRemoveUnderlinePartitions(); part_grid_.DeleteUnknownParts(input_block); // Build the partitions into chains that belong in the same block and // refine into one-to-one links, then smooth the types within each chain. part_grid_.FindPartitionPartners(); part_grid_.FindFigureCaptions(); part_grid_.RefinePartitionPartners(true); SmoothPartnerRuns(); #ifndef GRAPHICS_DISABLED if (textord_tabfind_show_partitions) { ScrollView* window = MakeWindow(400, 300, "Partitions"); if (window != NULL) { if (textord_debug_images) window->Image(AlignedBlob::textord_debug_pix().string(), image_origin().x(), image_origin().y()); part_grid_.DisplayBoxes(window); if (!textord_debug_printable) DisplayTabVectors(window); if (window != NULL && textord_tabfind_show_partitions > 1) { delete window->AwaitEvent(SVET_DESTROY); } } } #endif // GRAPHICS_DISABLED part_grid_.AssertNoDuplicates(); } // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, // and ownership of the BLOBNBOXes moves to the ColPartitions. // (They were previously owned by the block or the image_bblobs list.) ReleaseBlobsAndCleanupUnused(input_block); // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves // from the ColPartitions to the output TO_BLOCK. In non-text, the // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor. if (PSM_SPARSE(pageseg_mode)) part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks); else TransformToBlocks(blocks, to_blocks); if (textord_debug_tabfind) { tprintf("Found %d blocks, %d to_blocks\n", blocks->length(), to_blocks->length()); } DisplayBlocks(blocks); RotateAndReskewBlocks(input_is_rtl, to_blocks); int result = 0; #ifndef GRAPHICS_DISABLED if (blocks_win_ != NULL) { bool waiting = false; do { waiting = false; SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY); if (event->type == SVET_INPUT && event->parameter != NULL) { if (*event->parameter == 'd') result = -1; else blocks->clear(); } else if (event->type == SVET_DESTROY) { blocks_win_ = NULL; } else { waiting = true; } delete event; } while (waiting); } #endif // GRAPHICS_DISABLED return result; } // Get the rotation required to deskew, and its inverse rotation. void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) { *reskew = reskew_; *deskew = reskew_; deskew->set_y(-deskew->y()); } void ColumnFinder::SetEquationDetect(EquationDetectBase* detect) { equation_detect_ = detect; } //////////////// PRIVATE CODE ///////////////////////// // Displays the blob and block bounding boxes in a window called Blocks. void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) { #ifndef GRAPHICS_DISABLED if (textord_tabfind_show_blocks) { if (blocks_win_ == NULL) blocks_win_ = MakeWindow(700, 300, "Blocks"); else blocks_win_->Clear(); if (textord_debug_images) blocks_win_->Image(AlignedBlob::textord_debug_pix().string(), image_origin().x(), image_origin().y()); else DisplayBoxes(blocks_win_); BLOCK_IT block_it(blocks); int serial = 1; for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { BLOCK* block = block_it.data(); block->plot(blocks_win_, serial++, textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); } blocks_win_->Update(); } #endif } // Displays the column edges at each grid y coordinate defined by // best_columns_. void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) { #ifndef GRAPHICS_DISABLED ScrollView* col_win = MakeWindow(50, 300, "Columns"); if (textord_debug_images) col_win->Image(AlignedBlob::textord_debug_pix().string(), image_origin().x(), image_origin().y()); else DisplayBoxes(col_win); col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); for (int i = 0; i < gridheight_; ++i) { ColPartitionSet* columns = best_columns_[i]; if (columns != NULL) columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win); } #endif } // Sets up column_sets_ (the determined column layout at each horizontal // slice). Returns false if the page is empty. bool ColumnFinder::MakeColumns(bool single_column) { // The part_sets_ are a temporary structure used during column creation, // and is a vector of ColPartitionSets, representing ColPartitions found // at horizontal slices through the page. PartSetVector part_sets; if (!single_column) { if (!part_grid_.MakeColPartSets(&part_sets)) return false; // Empty page. ASSERT_HOST(part_grid_.gridheight() == gridheight_); // Try using only the good parts first. bool good_only = true; do { for (int i = 0; i < gridheight_; ++i) { ColPartitionSet* line_set = part_sets.get(i); if (line_set != NULL && line_set->LegalColumnCandidate()) { ColPartitionSet* column_candidate = line_set->Copy(good_only); if (column_candidate != NULL) column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); } } good_only = !good_only; } while (column_sets_.empty() && !good_only); if (textord_debug_tabfind) PrintColumnCandidates("Column candidates"); // Improve the column candidates against themselves. ImproveColumnCandidates(&column_sets_, &column_sets_); if (textord_debug_tabfind) PrintColumnCandidates("Improved columns"); // Improve the column candidates using the part_sets_. ImproveColumnCandidates(&part_sets, &column_sets_); } ColPartitionSet* single_column_set = part_grid_.MakeSingleColumnSet(WidthCB()); if (single_column_set != NULL) { // Always add the single column set as a backup even if not in // single column mode. single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); } if (textord_debug_tabfind) PrintColumnCandidates("Final Columns"); bool has_columns = !column_sets_.empty(); if (has_columns) { // Divide the page into sections of uniform column layout. bool any_multi_column = AssignColumns(part_sets); if (textord_tabfind_show_columns) { DisplayColumnBounds(&part_sets); } ComputeMeanColumnGap(any_multi_column); } for (int i = 0; i < part_sets.size(); ++i) { ColPartitionSet* line_set = part_sets.get(i); if (line_set != NULL) { line_set->RelinquishParts(); delete line_set; } } return has_columns; } // Attempt to improve the column_candidates by expanding the columns // and adding new partitions from the partition sets in src_sets. // Src_sets may be equal to column_candidates, in which case it will // use them as a source to improve themselves. void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets, PartSetVector* column_sets) { PartSetVector temp_cols; temp_cols.move(column_sets); if (src_sets == column_sets) src_sets = &temp_cols; int set_size = temp_cols.size(); // Try using only the good parts first. bool good_only = true; do { for (int i = 0; i < set_size; ++i) { ColPartitionSet* column_candidate = temp_cols.get(i); ASSERT_HOST(column_candidate != NULL); ColPartitionSet* improved = column_candidate->Copy(good_only); if (improved != NULL) { improved->ImproveColumnCandidate(WidthCB(), src_sets); improved->AddToColumnSetsIfUnique(column_sets, WidthCB()); } } good_only = !good_only; } while (column_sets->empty() && !good_only); if (column_sets->empty()) column_sets->move(&temp_cols); else temp_cols.delete_data_pointers(); } // Prints debug information on the column candidates. void ColumnFinder::PrintColumnCandidates(const char* title) { int set_size = column_sets_.size(); tprintf("Found %d %s:\n", set_size, title); if (textord_debug_tabfind >= 3) { for (int i = 0; i < set_size; ++i) { ColPartitionSet* column_set = column_sets_.get(i); column_set->Print(); } } } // Finds the optimal set of columns that cover the entire image with as // few changes in column partition as possible. // NOTE: this could be thought of as an optimization problem, but a simple // greedy algorithm is used instead. The algorithm repeatedly finds the modal // compatible column in an unassigned region and uses that with the extra // tweak of extending the modal region over small breaks in compatibility. // Where modal regions overlap, the boundary is chosen so as to minimize // the cost in terms of ColPartitions not fitting an approved column. // Returns true if any part of the page is multi-column. bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) { int set_count = part_sets.size(); ASSERT_HOST(set_count == gridheight()); // Allocate and init the best_columns_. best_columns_ = new ColPartitionSet*[set_count]; for (int y = 0; y < set_count; ++y) best_columns_[y] = NULL; int column_count = column_sets_.size(); // column_set_costs[part_sets_ index][column_sets_ index] is // < MAX_INT32 if the partition set is compatible with the column set, // in which case its value is the cost for that set used in deciding // which competing set to assign. // any_columns_possible[part_sets_ index] is true if any of // possible_column_sets[part_sets_ index][*] is < MAX_INT32. // assigned_costs[part_sets_ index] is set to the column_set_costs // of the assigned column_sets_ index or MAX_INT32 if none is set. // On return the best_columns_ member is set. bool* any_columns_possible = new bool[set_count]; int* assigned_costs = new int[set_count]; int** column_set_costs = new int*[set_count]; // Set possible column_sets to indicate whether each set is compatible // with each column. for (int part_i = 0; part_i < set_count; ++part_i) { ColPartitionSet* line_set = part_sets.get(part_i); bool debug = line_set != NULL && WithinTestRegion(2, line_set->bounding_box().left(), line_set->bounding_box().bottom()); column_set_costs[part_i] = new int[column_count]; any_columns_possible[part_i] = false; assigned_costs[part_i] = MAX_INT32; for (int col_i = 0; col_i < column_count; ++col_i) { if (line_set != NULL && column_sets_.get(col_i)->CompatibleColumns(debug, line_set, WidthCB())) { column_set_costs[part_i][col_i] = column_sets_.get(col_i)->UnmatchedWidth(line_set); any_columns_possible[part_i] = true; } else { column_set_costs[part_i][col_i] = MAX_INT32; if (debug) tprintf("Set id %d did not match at y=%d, lineset =%p\n", col_i, part_i, line_set); } } } bool any_multi_column = false; // Assign a column set to each vertical grid position. // While there is an unassigned range, find its mode. int start, end; while (BiggestUnassignedRange(set_count, any_columns_possible, &start, &end)) { if (textord_debug_tabfind >= 2) tprintf("Biggest unassigned range = %d- %d\n", start, end); // Find the modal column_set_id in the range. int column_set_id = RangeModalColumnSet(column_set_costs, assigned_costs, start, end); if (textord_debug_tabfind >= 2) { tprintf("Range modal column id = %d\n", column_set_id); column_sets_.get(column_set_id)->Print(); } // Now find the longest run of the column_set_id in the range. ShrinkRangeToLongestRun(column_set_costs, assigned_costs, any_columns_possible, column_set_id, &start, &end); if (textord_debug_tabfind >= 2) tprintf("Shrunk range = %d- %d\n", start, end); // Extend the start and end past the longest run, while there are // only small gaps in compatibility that can be overcome by larger // regions of compatibility beyond. ExtendRangePastSmallGaps(column_set_costs, assigned_costs, any_columns_possible, column_set_id, -1, -1, &start); --end; ExtendRangePastSmallGaps(column_set_costs, assigned_costs, any_columns_possible, column_set_id, 1, set_count, &end); ++end; if (textord_debug_tabfind) tprintf("Column id %d applies to range = %d - %d\n", column_set_id, start, end); // Assign the column to the range, which now may overlap with other ranges. AssignColumnToRange(column_set_id, start, end, column_set_costs, assigned_costs); if (column_sets_.get(column_set_id)->GoodColumnCount() > 1) any_multi_column = true; } // If anything remains unassigned, the whole lot is unassigned, so // arbitrarily assign id 0. if (best_columns_[0] == NULL) { AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs); } // Free memory. for (int i = 0; i < set_count; ++i) { delete [] column_set_costs[i]; } delete [] assigned_costs; delete [] any_columns_possible; delete [] column_set_costs; return any_multi_column; } // Finds the biggest range in part_sets_ that has no assigned column, but // column assignment is possible. bool ColumnFinder::BiggestUnassignedRange(int set_count, const bool* any_columns_possible, int* best_start, int* best_end) { int best_range_size = 0; *best_start = set_count; *best_end = set_count; int end = set_count; for (int start = 0; start < gridheight_; start = end) { // Find the first unassigned index in start. while (start < set_count) { if (best_columns_[start] == NULL && any_columns_possible[start]) break; ++start; } // Find the first past the end and count the good ones in between. int range_size = 1; // Number of non-null, but unassigned line sets. end = start + 1; while (end < set_count) { if (best_columns_[end] != NULL) break; if (any_columns_possible[end]) ++range_size; ++end; } if (start < set_count && range_size > best_range_size) { best_range_size = range_size; *best_start = start; *best_end = end; } } return *best_start < *best_end; } // Finds the modal compatible column_set_ index within the given range. int ColumnFinder::RangeModalColumnSet(int** column_set_costs, const int* assigned_costs, int start, int end) { int column_count = column_sets_.size(); STATS column_stats(0, column_count); for (int part_i = start; part_i < end; ++part_i) { for (int col_j = 0; col_j < column_count; ++col_j) { if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) column_stats.add(col_j, 1); } } ASSERT_HOST(column_stats.get_total() > 0); return column_stats.mode(); } // Given that there are many column_set_id compatible columns in the range, // shrinks the range to the longest contiguous run of compatibility, allowing // gaps where no columns are possible, but not where competing columns are // possible. void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, const int* assigned_costs, const bool* any_columns_possible, int column_set_id, int* best_start, int* best_end) { // orig_start and orig_end are the maximum range we will look at. int orig_start = *best_start; int orig_end = *best_end; int best_range_size = 0; *best_start = orig_end; *best_end = orig_end; int end = orig_end; for (int start = orig_start; start < orig_end; start = end) { // Find the first possible while (start < orig_end) { if (column_set_costs[start][column_set_id] < assigned_costs[start] || !any_columns_possible[start]) break; ++start; } // Find the first past the end. end = start + 1; while (end < orig_end) { if (column_set_costs[end][column_set_id] >= assigned_costs[start] && any_columns_possible[end]) break; ++end; } if (start < orig_end && end - start > best_range_size) { best_range_size = end - start; *best_start = start; *best_end = end; } } } // Moves start in the direction of step, up to, but not including end while // the only incompatible regions are no more than kMaxIncompatibleColumnCount // in size, and the compatible regions beyond are bigger. void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, const int* assigned_costs, const bool* any_columns_possible, int column_set_id, int step, int end, int* start) { if (textord_debug_tabfind > 2) tprintf("Starting expansion at %d, step=%d, limit=%d\n", *start, step, end); if (*start == end) return; // Cannot be expanded. int barrier_size = 0; int good_size = 0; do { // Find the size of the incompatible barrier. barrier_size = 0; int i; for (i = *start + step; i != end; i += step) { if (column_set_costs[i][column_set_id] < assigned_costs[i]) break; // We are back on. // Locations where none are possible don't count. if (any_columns_possible[i]) ++barrier_size; } if (textord_debug_tabfind > 2) tprintf("At %d, Barrier size=%d\n", i, barrier_size); if (barrier_size > kMaxIncompatibleColumnCount) return; // Barrier too big. if (i == end) { // We can't go any further, but the barrier was small, so go to the end. *start = i - step; return; } // Now find the size of the good region on the other side. good_size = 1; for (i += step; i != end; i += step) { if (column_set_costs[i][column_set_id] < assigned_costs[i]) ++good_size; else if (any_columns_possible[i]) break; } if (textord_debug_tabfind > 2) tprintf("At %d, good size = %d\n", i, good_size); // If we had enough good ones we can extend the start and keep looking. if (good_size >= barrier_size) *start = i - step; } while (good_size >= barrier_size); } // Assigns the given column_set_id to the given range. void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end, int** column_set_costs, int* assigned_costs) { ColPartitionSet* column_set = column_sets_.get(column_set_id); for (int i = start; i < end; ++i) { assigned_costs[i] = column_set_costs[i][column_set_id]; best_columns_[i] = column_set; } } // Computes the mean_column_gap_. void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) { int total_gap = 0; int total_width = 0; int gap_samples = 0; int width_samples = 0; for (int i = 0; i < gridheight_; ++i) { ASSERT_HOST(best_columns_[i] != NULL); best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width, &width_samples, &total_gap, &gap_samples); } mean_column_gap_ = any_multi_column && gap_samples > 0 ? total_gap / gap_samples : total_width / width_samples; } //////// Functions that manipulate ColPartitions in the part_grid_ ///// //////// to split, merge, find margins, and find types. ////////////// // Helper to delete all the deletable blobs on the list. Owned blobs are // extracted from the list, but not deleted, leaving them owned by the owner(). static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) { for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) { BLOBNBOX* blob = blob_it.extract(); if (blob->owner() == NULL) { delete blob->cblob(); delete blob; } } } // Hoovers up all un-owned blobs and deletes them. // The rest get released from the block so the ColPartitions can pass // ownership to the output blocks. void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) { ReleaseAllBlobsAndDeleteUnused(&block->blobs); ReleaseAllBlobsAndDeleteUnused(&block->small_blobs); ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs); ReleaseAllBlobsAndDeleteUnused(&block->large_blobs); ReleaseAllBlobsAndDeleteUnused(&image_bblobs_); } // Splits partitions that cross columns where they have nothing in the gap. void ColumnFinder::GridSplitPartitions() { // Iterate the ColPartitions in the grid. GridSearch gsearch(&part_grid_); gsearch.StartFullSearch(); ColPartition* dont_repeat = NULL; ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat) continue; // Only applies to text partitions. ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; int first_col = -1; int last_col = -1; // Find which columns the partition spans. part->ColumnRange(resolution_, column_set, &first_col, &last_col); if (first_col > 0) --first_col; // Convert output column indices to physical column indices. first_col /= 2; last_col /= 2; // We will only consider cases where a partition spans two columns, // since a heading that spans more columns than that is most likely // genuine. if (last_col != first_col + 1) continue; // Set up a rectangle search x-bounded by the column gap and y by the part. int y = part->MidY(); TBOX margin_box = part->bounding_box(); bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(), margin_box.bottom()); if (debug) { tprintf("Considering partition for GridSplit:"); part->Print(); } ColPartition* column = column_set->GetColumnByIndex(first_col); if (column == NULL) continue; margin_box.set_left(column->RightAtY(y) + 2); column = column_set->GetColumnByIndex(last_col); if (column == NULL) continue; margin_box.set_right(column->LeftAtY(y) - 2); // TODO(rays) Decide whether to keep rectangular filling or not in the // main grid and therefore whether we need a fancier search here. // Now run the rect search on the main blob grid. GridSearch rectsearch(this); if (debug) { tprintf("Searching box (%d,%d)->(%d,%d)\n", margin_box.left(), margin_box.bottom(), margin_box.right(), margin_box.top()); part->Print(); } rectsearch.StartRectSearch(margin_box); BLOBNBOX* bbox; while ((bbox = rectsearch.NextRectSearch()) != NULL) { if (bbox->bounding_box().overlap(margin_box)) break; } if (bbox == NULL) { // There seems to be nothing in the hole, so split the partition. gsearch.RemoveBBox(); int x_middle = (margin_box.left() + margin_box.right()) / 2; if (debug) { tprintf("Splitting part at %d:", x_middle); part->Print(); } ColPartition* split_part = part->SplitAt(x_middle); if (split_part != NULL) { if (debug) { tprintf("Split result:"); part->Print(); split_part->Print(); } part_grid_.InsertBBox(true, true, split_part); } else { // Split had no effect if (debug) tprintf("Split had no effect\n"); dont_repeat = part; } part_grid_.InsertBBox(true, true, part); gsearch.RepositionIterator(); } else if (debug) { tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n", bbox->bounding_box().left(), bbox->bounding_box().bottom(), bbox->bounding_box().right(), bbox->bounding_box().top()); } } } // Merges partitions where there is vertical overlap, within a single column, // and the horizontal gap is small enough. void ColumnFinder::GridMergePartitions() { // Iterate the ColPartitions in the grid. GridSearch gsearch(&part_grid_); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { if (part->IsUnMergeableType()) continue; // Set up a rectangle search x-bounded by the column and y by the part. ColPartitionSet* columns = best_columns_[gsearch.GridY()]; TBOX box = part->bounding_box(); bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom()); if (debug) { tprintf("Considering part for merge at:"); part->Print(); } int y = part->MidY(); ColPartition* left_column = columns->ColumnContaining(box.left(), y); ColPartition* right_column = columns->ColumnContaining(box.right(), y); if (left_column == NULL || right_column != left_column) { if (debug) tprintf("In different columns\n"); continue; } box.set_left(left_column->LeftAtY(y)); box.set_right(right_column->RightAtY(y)); // Now run the rect search. bool modified_box = false; GridSearch rsearch(&part_grid_); rsearch.SetUniqueMode(true); rsearch.StartRectSearch(box); ColPartition* neighbour; while ((neighbour = rsearch.NextRectSearch()) != NULL) { if (neighbour == part || neighbour->IsUnMergeableType()) continue; const TBOX& neighbour_box = neighbour->bounding_box(); if (debug) { tprintf("Considering merge with neighbour at:"); neighbour->Print(); } if (neighbour_box.right() < box.left() || neighbour_box.left() > box.right()) continue; // Not within the same column. if (part->VSignificantCoreOverlap(*neighbour) && part->TypesMatch(*neighbour)) { // There is vertical overlap and the gross types match, but only // merge if the horizontal gap is small enough, as one of the // partitions may be a figure caption within a column. // If there is only one column, then the mean_column_gap_ is large // enough to allow almost any merge, by being the mean column width. const TBOX& part_box = part->bounding_box(); // Don't merge if there is something else in the way. Use the margin // to decide, and check both to allow a bit of overlap. if (neighbour_box.left() > part->right_margin() && part_box.right() < neighbour->left_margin()) continue; // Neighbour is too far to the right. if (neighbour_box.right() < part->left_margin() && part_box.left() > neighbour->right_margin()) continue; // Neighbour is too far to the left. int h_gap = MAX(part_box.left(), neighbour_box.left()) - MIN(part_box.right(), neighbour_box.right()); if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction || part_box.width() < mean_column_gap_ || neighbour_box.width() < mean_column_gap_) { if (debug) { tprintf("Running grid-based merge between:\n"); part->Print(); neighbour->Print(); } rsearch.RemoveBBox(); if (!modified_box) { // We are going to modify part, so remove it and re-insert it after. gsearch.RemoveBBox(); rsearch.RepositionIterator(); modified_box = true; } part->Absorb(neighbour, WidthCB()); } else if (debug) { tprintf("Neighbour failed hgap test\n"); } } else if (debug) { tprintf("Neighbour failed overlap or typesmatch test\n"); } } if (modified_box) { // We modified the box of part, so re-insert it into the grid. // This does no harm in the current cell, as it already exists there, // but it needs to exist in all the cells covered by its bounding box, // or it will never be found by a full search. // Because the box has changed, it has to be removed first, otherwise // add_sorted may fail to keep a single copy of the pointer. part_grid_.InsertBBox(true, true, part); gsearch.RepositionIterator(); } } } // Inserts remaining noise blobs into the most applicable partition if any. // If there is no applicable partition, then the blobs are deleted. void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) { BLOBNBOX_IT blob_it(&block->noise_blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); if (blob->owner() != NULL) continue; TBOX search_box(blob->bounding_box()); bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom()); search_box.pad(gridsize(), gridsize()); // Setup a rectangle search to find the best partition to merge with. ColPartitionGridSearch rsearch(&part_grid_); rsearch.SetUniqueMode(true); rsearch.StartRectSearch(search_box); ColPartition* part; ColPartition* best_part = NULL; int best_distance = 0; while ((part = rsearch.NextRectSearch()) != NULL) { if (part->IsUnMergeableType()) continue; int distance = projection_.DistanceOfBoxFromPartition( blob->bounding_box(), *part, denorm_, debug); if (best_part == NULL || distance < best_distance) { best_part = part; best_distance = distance; } } if (best_part != NULL && best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) { // Close enough to merge. if (debug) { tprintf("Adding noise blob with distance %d, thr=%g:box:", best_distance, kMaxDistToPartSizeRatio * best_part->median_size()); blob->bounding_box().print(); tprintf("To partition:"); best_part->Print(); } part_grid_.RemoveBBox(best_part); best_part->AddBox(blob); part_grid_.InsertBBox(true, true, best_part); blob->set_owner(best_part); blob->set_flow(best_part->flow()); blob->set_region_type(best_part->blob_type()); } else { // Mark the blob for deletion. blob->set_region_type(BRT_NOISE); } } // Delete the marked blobs, clearing neighbour references. block->DeleteUnownedNoise(); } // Helper makes a box from a horizontal line. static TBOX BoxFromHLine(const TabVector* hline) { int top = MAX(hline->startpt().y(), hline->endpt().y()); int bottom = MIN(hline->startpt().y(), hline->endpt().y()); top += hline->mean_width(); if (top == bottom) { if (bottom > 0) --bottom; else ++top; } return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top); } // Remove partitions that come from horizontal lines that look like // underlines, but are not part of a table. void ColumnFinder::GridRemoveUnderlinePartitions() { TabVector_IT hline_it(&horizontal_lines_); for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { TabVector* hline = hline_it.data(); if (hline->intersects_other_lines()) continue; TBOX line_box = BoxFromHLine(hline); TBOX search_box = line_box; search_box.pad(0, line_box.height()); ColPartitionGridSearch part_search(&part_grid_); part_search.SetUniqueMode(true); part_search.StartRectSearch(search_box); ColPartition* covered; bool touched_table = false; bool touched_text = false; ColPartition* line_part = NULL; while ((covered = part_search.NextRectSearch()) != NULL) { if (covered->type() == PT_TABLE) { touched_table = true; break; } else if (covered->IsTextType()) { // TODO(rays) Add a list of underline sections to ColPartition. int text_bottom = covered->median_bottom(); if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top()) touched_text = true; } else if (covered->blob_type() == BRT_HLINE && line_box.contains(covered->bounding_box())) { line_part = covered; } } if (line_part != NULL && !touched_table && touched_text) { part_grid_.RemoveBBox(line_part); delete line_part; } } } // Add horizontal line separators as partitions. void ColumnFinder::GridInsertHLinePartitions() { TabVector_IT hline_it(&horizontal_lines_); for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { TabVector* hline = hline_it.data(); TBOX line_box = BoxFromHLine(hline); ColPartition* part = ColPartition::MakeLinePartition( BRT_HLINE, vertical_skew_, line_box.left(), line_box.bottom(), line_box.right(), line_box.top()); part->set_type(PT_HORZ_LINE); bool any_image = false; ColPartitionGridSearch part_search(&part_grid_); part_search.SetUniqueMode(true); part_search.StartRectSearch(line_box); ColPartition* covered; while ((covered = part_search.NextRectSearch()) != NULL) { if (covered->IsImageType()) { any_image = true; break; } } if (!any_image) part_grid_.InsertBBox(true, true, part); else delete part; } } // Add horizontal line separators as partitions. void ColumnFinder::GridInsertVLinePartitions() { TabVector_IT vline_it(dead_vectors()); for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { TabVector* vline = vline_it.data(); if (!vline->IsSeparator()) continue; int left = MIN(vline->startpt().x(), vline->endpt().x()); int right = MAX(vline->startpt().x(), vline->endpt().x()); right += vline->mean_width(); if (left == right) { if (left > 0) --left; else ++right; } ColPartition* part = ColPartition::MakeLinePartition( BRT_VLINE, vertical_skew_, left, vline->startpt().y(), right, vline->endpt().y()); part->set_type(PT_VERT_LINE); bool any_image = false; ColPartitionGridSearch part_search(&part_grid_); part_search.SetUniqueMode(true); part_search.StartRectSearch(part->bounding_box()); ColPartition* covered; while ((covered = part_search.NextRectSearch()) != NULL) { if (covered->IsImageType()) { any_image = true; break; } } if (!any_image) part_grid_.InsertBBox(true, true, part); else delete part; } } // For every ColPartition in the grid, sets its type based on position // in the columns. void ColumnFinder::SetPartitionTypes() { GridSearch gsearch(&part_grid_); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]); } } // Only images remain with multiple types in a run of partners. // Sets the type of all in the group to the maximum of the group. void ColumnFinder::SmoothPartnerRuns() { // Iterate the ColPartitions in the grid. GridSearch gsearch(&part_grid_); gsearch.StartFullSearch(); ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { ColPartition* partner = part->SingletonPartner(true); if (partner != NULL) { if (partner->SingletonPartner(false) != part) { tprintf("Ooops! Partition:(%d partners)", part->upper_partners()->length()); part->Print(); tprintf("has singleton partner:(%d partners", partner->lower_partners()->length()); partner->Print(); tprintf("but its singleton partner is:"); if (partner->SingletonPartner(false) == NULL) tprintf("NULL\n"); else partner->SingletonPartner(false)->Print(); } ASSERT_HOST(partner->SingletonPartner(false) == part); } else if (part->SingletonPartner(false) != NULL) { ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; int column_count = column_set->ColumnCount(); part->SmoothPartnerRun(column_count * 2 + 1); } } } // Helper functions for TransformToBlocks. // Add the part to the temp list in the correct order. void ColumnFinder::AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list) { int mid_y = part->MidY(); ColPartition_C_IT it(temp_list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* test_part = it.data(); if (part->type() == PT_NOISE || test_part->type() == PT_NOISE) continue; // Noise stays in sequence. if (test_part == part->SingletonPartner(false)) break; // Insert before its lower partner. int neighbour_bottom = test_part->median_bottom(); int neighbour_top = test_part->median_top(); int neighbour_y = (neighbour_bottom + neighbour_top) / 2; if (neighbour_y < mid_y) break; // part is above test_part so insert it. if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part)) continue; // Incompatibles stay in order } if (it.cycled_list()) { it.add_to_end(part); } else { it.add_before_stay_put(part); } } // Add everything from the temp list to the work_set assuming correct order. void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list, WorkingPartSet_LIST* work_set) { ColPartition_C_IT it(temp_list); while (!it.empty()) { it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, &good_parts_, work_set); it.forward(); } } // Transform the grid of partitions to the output blocks. void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { WorkingPartSet_LIST work_set; ColPartitionSet* column_set = NULL; ColPartition_IT noise_it(&noise_parts_); // The temp_part_list holds a list of parts at the same grid y coord // so they can be added in the correct order. This prevents thin objects // like horizontal lines going before the text lines above them. ColPartition_CLIST temp_part_list; // Iterate the ColPartitions in the grid. It starts at the top GridSearch gsearch(&part_grid_); gsearch.StartFullSearch(); int prev_grid_y = -1; ColPartition* part; while ((part = gsearch.NextFullSearch()) != NULL) { int grid_y = gsearch.GridY(); if (grid_y != prev_grid_y) { EmptyTempPartList(&temp_part_list, &work_set); prev_grid_y = grid_y; } if (best_columns_[grid_y] != column_set) { column_set = best_columns_[grid_y]; // Every line should have a non-null best column. ASSERT_HOST(column_set != NULL); column_set->ChangeWorkColumns(bleft_, tright_, resolution_, &good_parts_, &work_set); if (textord_debug_tabfind) tprintf("Changed column groups at grid index %d, y=%d\n", gsearch.GridY(), gsearch.GridY() * gridsize()); } if (part->type() == PT_NOISE) { noise_it.add_to_end(part); } else { AddToTempPartList(part, &temp_part_list); } } EmptyTempPartList(&temp_part_list, &work_set); // Now finish all working sets and transfer ColPartitionSets to block_sets. WorkingPartSet_IT work_it(&work_set); while (!work_it.empty()) { WorkingPartSet* working_set = work_it.extract(); working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_, &good_parts_, blocks, to_blocks); delete working_set; work_it.forward(); } } // Helper reflects a list of blobs in the y-axis. // Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below. static void ReflectBlobList(BLOBNBOX_LIST* bblobs) { BLOBNBOX_IT it(bblobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->reflect_box_in_y_axis(); } } // Reflect the blob boxes (but not the outlines) in the y-axis so that // the blocks get created in the correct RTL order. Reflects the blobs // in the input_block and the bblobs list. // The reflection is undone in RotateAndReskewBlocks by // reflecting the blocks themselves, and then recomputing the blob bounding // boxes. void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) { ReflectBlobList(bblobs); ReflectBlobList(&input_block->blobs); ReflectBlobList(&input_block->small_blobs); ReflectBlobList(&input_block->noise_blobs); ReflectBlobList(&input_block->large_blobs); // Update the denorm with the reflection. DENORM* new_denorm = new DENORM; new_denorm->SetupNormalization(NULL, NULL, denorm_, 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); denorm_ = new_denorm; } // Helper fixes up blobs and cblobs to match the desired rotation, // exploding multi-outline blobs back to single blobs and accumulating // the bounding box widths and heights. static void RotateAndExplodeBlobList(const FCOORD& blob_rotation, BLOBNBOX_LIST* bblobs, STATS* widths, STATS* heights) { BLOBNBOX_IT it(bblobs); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); C_BLOB* cblob = blob->cblob(); C_OUTLINE_LIST* outlines = cblob->out_list(); C_OUTLINE_IT ol_it(outlines); if (!outlines->singleton()) { // This blob has multiple outlines from CJK repair. // Explode the blob back into individual outlines. for (;!ol_it.empty(); ol_it.forward()) { C_OUTLINE* outline = ol_it.extract(); BLOBNBOX* new_blob = BLOBNBOX::RealBlob(outline); // This blob will be revisited later since we add_after_stay_put here. // This means it will get rotated and have its width/height added to // the stats below. it.add_after_stay_put(new_blob); } it.extract(); delete cblob; delete blob; } else { if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) { cblob->rotate(blob_rotation); } blob->compute_bounding_box(); widths->add(blob->bounding_box().width(), 1); heights->add(blob->bounding_box().height(), 1); } } } // Undo the deskew that was done in FindTabVectors, as recognition is done // without correcting blobs or blob outlines for skew. // Reskew the completed blocks to put them back to the original rotated coords // that were created by CorrectOrientation. // If the input_is_rtl, then reflect the blocks in the y-axis to undo the // reflection that was done before FindTabVectors. // Blocks that were identified as vertical text (relative to the rotated // coordinates) are further rotated so the text lines are horizontal. // blob polygonal outlines are rotated to match the position of the blocks // that they are in, and their bounding boxes are recalculated to be accurate. // Record appropriate inverse transformations and required // classifier transformation in the blocks. void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* blocks) { if (input_is_rtl) { // The skew is backwards because of the reflection. FCOORD tmp = deskew_; deskew_ = reskew_; reskew_ = tmp; } TO_BLOCK_IT it(blocks); int block_index = 1; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TO_BLOCK* to_block = it.data(); BLOCK* block = to_block->block; // Blocks are created on the deskewed blob outlines in TransformToBlocks() // so we need to reskew them back to page coordinates. if (input_is_rtl) { block->reflect_polygon_in_y_axis(); } block->rotate(reskew_); // Copy the right_to_left flag to the created block. block->set_right_to_left(input_is_rtl); // Save the skew angle in the block for baseline computations. block->set_skew(reskew_); block->set_index(block_index++); FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block); // Rotate all the blobs if needed and recompute the bounding boxes. // Compute the block median blob width and height as we go. STATS widths(0, block->bounding_box().width()); STATS heights(0, block->bounding_box().height()); RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, &widths, &heights); TO_ROW_IT row_it(to_block->get_rows()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { TO_ROW* row = row_it.data(); RotateAndExplodeBlobList(blob_rotation, row->blob_list(), &widths, &heights); } block->set_median_size(static_cast(widths.median() + 0.5), static_cast(heights.median() + 0.5)); if (textord_debug_tabfind >= 2) tprintf("Block median size = (%d, %d)\n", block->median_size().x(), block->median_size().y()); } } // Computes the rotations for the block (to make textlines horizontal) and // for the blobs (for classification) and sets the appropriate members // of the given block. // Returns the rotation that needs to be applied to the blobs to make // them sit in the rotated block. FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) { // The text_rotation_ tells us the gross page text rotation that needs // to be applied for classification // TODO(rays) find block-level classify rotation by orientation detection. // In the mean time, assume that "up" for text printed in the minority // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading. // Accomplish this by zero-ing out the text rotation. This covers the // common cases of image credits in documents written in Latin scripts // and page headings for predominantly vertically written CJK books. FCOORD classify_rotation(text_rotation_); FCOORD block_rotation(1.0f, 0.0f); if (block->poly_block()->isA() == PT_VERTICAL_TEXT) { // Vertical text needs to be 90 degrees rotated relative to the rest. // If the rest has a 90 degree rotation already, use the inverse, making // the vertical text the original way up. Otherwise use 90 degrees // clockwise. if (rerotate_.x() == 0.0f) block_rotation = rerotate_; else block_rotation = FCOORD(0.0f, -1.0f); block->rotate(block_rotation); classify_rotation = FCOORD(1.0f, 0.0f); } block_rotation.rotate(rotation_); // block_rotation is now what we have done to the blocks. Now do the same // thing to the blobs, but save the inverse rotation in the block, as that // is what we need to DENORM back to the image coordinates. FCOORD blob_rotation(block_rotation); block_rotation.set_y(-block_rotation.y()); block->set_re_rotation(block_rotation); block->set_classify_rotation(classify_rotation); if (textord_debug_tabfind) { tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:", block->index(), block->poly_block()->isA(), block->re_rotation().x(), block->re_rotation().y(), classify_rotation.x(), classify_rotation.y()); block->bounding_box().print(); } return blob_rotation; } } // namespace tesseract.