mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
0a9ad20d1c
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@301 d0cd1f9f-072b-0410-8dd7-cf729c803f20
434 lines
20 KiB
C++
434 lines
20 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: colfind.h
|
|
// Description: Class to find columns in the grid of BLOBNBOXes.
|
|
// Author: Ray Smith
|
|
// Created: Thu Feb 21 14:04:01 PST 2008
|
|
//
|
|
// (C) Copyright 2008, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef TESSERACT_TEXTORD_COLFIND_H__
|
|
#define TESSERACT_TEXTORD_COLFIND_H__
|
|
|
|
#include "tabfind.h"
|
|
#include "tablefind.h"
|
|
#include "imagefind.h"
|
|
#include "colpartition.h"
|
|
#include "colpartitionset.h"
|
|
#include "ocrblock.h"
|
|
|
|
class ScrollView;
|
|
class TO_BLOCK;
|
|
class STATS;
|
|
class BLOCK_LIST;
|
|
struct Boxa;
|
|
struct Pixa;
|
|
|
|
namespace tesseract {
|
|
|
|
class StrokeWidth;
|
|
class LineSpacing;
|
|
class TempColumn_LIST;
|
|
class ColSegment_LIST;
|
|
class ColumnGroup_LIST;
|
|
class ColPartitionSet;
|
|
class ColPartitionSet_LIST;
|
|
|
|
// The ColumnFinder class finds columns in the grid.
|
|
class ColumnFinder : public TabFind {
|
|
public:
|
|
// Gridsize is an estimate of the text size in the image. A suitable value
|
|
// is in TO_BLOCK::line_size after find_components has been used to make
|
|
// the blobs.
|
|
// bleft and tright are the bounds of the image (rectangle) being processed.
|
|
// vlines is a (possibly empty) list of TabVector and vertical_x and y are
|
|
// the sum logical vertical vector produced by LineFinder::FindVerticalLines.
|
|
ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright,
|
|
TabVector_LIST* vlines, TabVector_LIST* hlines,
|
|
int vertical_x, int vertical_y);
|
|
virtual ~ColumnFinder();
|
|
|
|
// Finds the text and image blocks, returning them in the blocks and to_blocks
|
|
// lists. (Each TO_BLOCK points to the basic BLOCK and adds more information.)
|
|
// If boxa and pixa are not NULL, they are assumed to be the output of
|
|
// ImageFinder::FindImages, and are used to generate image blocks.
|
|
// The input boxa and pixa are destroyed.
|
|
// Imageheight and resolution should be the pixel height and resolution in
|
|
// pixels per inch of the original image.
|
|
// The input block is the result of a call to find_components, and contains
|
|
// the blobs found in the image. These blobs will be removed and placed
|
|
// in the output blocks, while unused ones will be deleted.
|
|
// If single_column is true, the input is treated as single column, but
|
|
// it is still divided into blocks of equal line spacing/text size.
|
|
// Returns -1 if the user requested retry with more debug info.
|
|
int FindBlocks(int imageheight, int resolution, bool single_column,
|
|
TO_BLOCK* block, Boxa* boxa, Pixa* pixa,
|
|
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
|
|
|
|
private:
|
|
// Displays the blob and block bounding boxes in a window called Blocks.
|
|
void DisplayBlocks(BLOCK_LIST* blocks);
|
|
// Displays the column edges at each grid y coordinate defined by
|
|
// best_columns_.
|
|
void DisplayColumnBounds(PartSetVector* sets);
|
|
|
|
// Converts the arrays of Box/Pix to a list of C_OUTLINE, and then to blobs.
|
|
// The output is a list of C_BLOBs for the images, but the C_OUTLINEs
|
|
// contain no data.
|
|
void ExtractImageBlobs(int image_height, Boxa* boxa, Pixa* pixa);
|
|
|
|
////// Functions involved in making the initial ColPartitions. /////
|
|
|
|
// Creates the initial ColPartitions, and puts them in a ColPartitionSet
|
|
// for each grid y coordinate, storing the ColPartitionSets in part_sets_.
|
|
// After creating the ColPartitonSets, attempts to merge them where they
|
|
// overlap and unique the BLOBNBOXes within.
|
|
// The return value is the number of ColPartitionSets made.
|
|
int MakeColumnPartitions();
|
|
// Partition the BLOBNBOXES horizontally at the given grid y, creating a
|
|
// ColPartitionSet which is returned. NULL is returned if there are no
|
|
// BLOBNBOXES at the given grid y.
|
|
ColPartitionSet* PartitionsAtGridY(int grid_y);
|
|
// Insert the blobs in the given list into the main grid and for
|
|
// each one also make it a separate unknown partition.
|
|
// If filter is true, use only the blobs that are above a threshold in
|
|
// size or a non-isolated.
|
|
void InsertSmallBlobsAsUnknowns(bool filter, BLOBNBOX_LIST* blobs);
|
|
// Helper function for PartitionsAtGridY, with a long argument list.
|
|
// This bbox is of unknown type, so it is added to an unk_partition.
|
|
// If the edge is past the unk_right_margin then unk_partition has to be
|
|
// completed and a new one made. See CompletePartition and StartPartition
|
|
// for the other args.
|
|
void ProcessUnknownBlob(int page_edge, BLOBNBOX* bbox,
|
|
ColPartition** unk_partition,
|
|
ColPartition_IT* unk_part_it,
|
|
TabVector** unk_right_line,
|
|
int* unk_right_margin,
|
|
int* unk_prev_margin,
|
|
bool* unk_edge_is_left);
|
|
// Creates and returns a new ColPartition of the given start_type
|
|
// and adds the given bbox to it.
|
|
// Also finds the left and right tabvectors that bound the textline, setting
|
|
// the members of the returned ColPartition appropriately:
|
|
// If the left tabvector is less constraining than the input left_margin
|
|
// (assumed to be the right edge of the previous partition), then the
|
|
// tabvector is ignored and the left_margin used instead.
|
|
// If the right tabvector is more constraining than the input *right_margin,
|
|
// (probably the right edge of the page), then the *right_margin is adjusted
|
|
// to use the tabvector.
|
|
// *edge_is_left is set to true if the right tabvector is good and used as the
|
|
// margin, so we can include blobs that overhang the tabvector in this
|
|
// partition.
|
|
ColPartition* StartPartition(BlobRegionType start_type, int left_margin,
|
|
BLOBNBOX* bbox, TabVector** right_line,
|
|
int* right_margin, bool* edge_is_left);
|
|
// Completes the given partition, and adds it to the given iterator.
|
|
// The right_margin on input is the left edge of the next blob if there is
|
|
// one. The right tab vector plus a margin is used as the right margin if
|
|
// it is more constraining than the next blob, but if there are no more
|
|
// blobs, we want the right margin to make it to the page edge.
|
|
// The return value is the next left margin, being the right edge of the
|
|
// bounding box of blobs.
|
|
int CompletePartition(bool no_more_blobs, int page_edge,
|
|
TabVector* right_line, int* right_margin,
|
|
ColPartition** partition, ColPartition_IT* part_it);
|
|
|
|
|
|
////// Functions involved in determining the columns used on the page. /////
|
|
|
|
// Makes an ordered list of candidates to partition the width of the page
|
|
// into columns using the part_sets_.
|
|
// See AddToColumnSetsIfUnique for the ordering.
|
|
// If single_column, then it just makes a single page-wide fake column.
|
|
void MakeColumnCandidates(bool single_column);
|
|
// Attempt to improve the column_candidates by expanding the columns
|
|
// and adding new partitions from the partition sets in src_sets.
|
|
// Src_sets may be equal to column_candidates, in which case it will
|
|
// use them as a source to improve themselves.
|
|
void ImproveColumnCandidates(PartSetVector* src_sets,
|
|
PartSetVector* column_sets);
|
|
// Prints debug information on the column candidates.
|
|
void PrintColumnCandidates(const char* title);
|
|
// Finds the optimal set of columns that cover the entire image with as
|
|
// few changes in column partition as possible.
|
|
void AssignColumns();
|
|
// Finds the biggest range in part_sets_ that has no assigned column, but
|
|
// column assignment is possible.
|
|
bool BiggestUnassignedRange(const bool* any_columns_possible,
|
|
int* start, int* end);
|
|
// Finds the modal compatible column_set_ index within the given range.
|
|
int RangeModalColumnSet(bool** possible_column_sets,
|
|
int start, int end);
|
|
// Given that there are many column_set_id compatible columns in the range,
|
|
// shrinks the range to the longest contiguous run of compatibility, allowing
|
|
// gaps where no columns are possible, but not where competing columns are
|
|
// possible.
|
|
void ShrinkRangeToLongestRun(bool** possible_column_sets,
|
|
const bool* any_columns_possible,
|
|
int column_set_id,
|
|
int* best_start, int* best_end);
|
|
// Moves start in the direction of step, upto, but not including end while
|
|
// the only incompatible regions are no more than kMaxIncompatibleColumnCount
|
|
// in size, and the compatible regions beyond are bigger.
|
|
void ExtendRangePastSmallGaps(bool** possible_column_sets,
|
|
const bool* any_columns_possible,
|
|
int column_set_id,
|
|
int step, int end, int* start);
|
|
// Assigns the given column_set_id to the part_sets_ in the given range.
|
|
void AssignColumnToRange(int column_set_id, int start, int end,
|
|
bool** assigned_column_sets);
|
|
|
|
// Computes the mean_column_gap_.
|
|
void ComputeMeanColumnGap();
|
|
|
|
//////// Functions that manipulate ColPartitions in the part_grid_ /////
|
|
//////// to split, merge, find margins, and find types. //////////////
|
|
|
|
// Removes the ColPartitions from part_sets_, the ColPartitionSets that
|
|
// contain them, and puts them in the part_grid_ after ensuring that no
|
|
// BLOBNBOX is owned by more than one of them.
|
|
void MovePartitionsToGrid();
|
|
// Splits partitions that cross columns where they have nothing in the gap.
|
|
void GridSplitPartitions();
|
|
// Merges partitions where there is vertical overlap, within a single column,
|
|
// and the horizontal gap is small enough.
|
|
void GridMergePartitions();
|
|
// Resolves unknown partitions from the unknown_parts_ list by merging them
|
|
// with a close neighbour, inserting them into the grid with a known type,
|
|
// or declaring them to be noise.
|
|
void GridInsertUnknowns();
|
|
// Add horizontal line separators as partitions.
|
|
void GridInsertHLinePartitions();
|
|
// Improves the margins of the ColPartitions in the grid by calling
|
|
// FindPartitionMargins on each.
|
|
void GridFindMargins();
|
|
// Improves the margins of the ColPartitions in the list by calling
|
|
// FindPartitionMargins on each.
|
|
void ListFindMargins(ColPartition_LIST* parts);
|
|
// Improves the margins of the ColPartition by searching for
|
|
// neighbours that vertically overlap significantly.
|
|
void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);
|
|
// Starting at x, and going in the specified direction, upto x_limit, finds
|
|
// the margin for the given y range by searching sideways,
|
|
// and ignoring not_this.
|
|
int FindMargin(int x, bool right_to_left, int x_limit,
|
|
int y_bottom, int y_top, const ColPartition* not_this);
|
|
// For every ColPartition in the grid, sets its type based on position
|
|
// in the columns.
|
|
void SetPartitionTypes();
|
|
|
|
//////// Functions that manipulate ColPartitions in the part_grid_ /////
|
|
//////// to find chains of partner partitions of the same type. ///////
|
|
|
|
// For every ColPartition in the grid, finds its upper and lower neighbours.
|
|
void FindPartitionPartners();
|
|
// Finds the best partner in the given direction for the given partition.
|
|
// Stores the result with AddPartner.
|
|
void FindPartitionPartners(bool upper, ColPartition* part);
|
|
// For every ColPartition with multiple partners in the grid, reduces the
|
|
// number of partners to 0 or 1.
|
|
void RefinePartitionPartners();
|
|
// Only images remain with multiple types in a run of partners.
|
|
// Sets the type of all in the group to the maximum of the group.
|
|
void SmoothPartnerRuns();
|
|
|
|
//////// Functions that manipulate ColPartitions in the part_grid_ /////
|
|
//////// to find tables. ///////
|
|
|
|
// Copy cleaned partitions from part_grid_ to clean_part_grid_ and
|
|
// insert dot-like noise into period_grid_
|
|
void GetCleanPartitions(TO_BLOCK* block);
|
|
|
|
// High level function to perform table detection
|
|
void LocateTables();
|
|
|
|
// Get Column segments from best_columns_
|
|
void GetColumnBlocks(ColSegment_LIST *col_segments);
|
|
|
|
// Group Column segments into consecutive single column regions.
|
|
void GroupColumnBlocks(ColSegment_LIST *current_segments,
|
|
ColSegment_LIST *col_segments);
|
|
|
|
// Check if two boxes are consecutive within the same column
|
|
bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2);
|
|
|
|
// Set left, right and top, bottom spacings of each colpartition.
|
|
// Left/right spacings are w.r.t the column boundaries
|
|
// Top/bottom spacings are w.r.t. previous and next colpartitions
|
|
void SetPartitionSpacings();
|
|
|
|
// Set spacing and closest neighbors above and below a given colpartition.
|
|
void SetVerticalSpacing(ColPartition* part);
|
|
|
|
// Set global spacing estimates
|
|
void SetGlobalSpacings();
|
|
|
|
// Mark partitions as table rows/cells.
|
|
void GridMarkTablePartitions();
|
|
|
|
// Check if the partition has at lease one large gap between words or no
|
|
// significant gap at all
|
|
bool HasWideOrNoInterWordGap(ColPartition* part);
|
|
|
|
// Check if a period lies in the inter-wrod gap in the parition boxes
|
|
bool LiesInGap(BLOBNBOX* period, BLOBNBOX_CLIST* boxes);
|
|
|
|
// Filter individual text partitions marked as table partitions
|
|
// consisting of paragraph endings, small section headings, and
|
|
// headers and footers.
|
|
void FilterFalseAlarms();
|
|
|
|
// Mark all ColPartitions as table cells that have a table cell above
|
|
// and below them
|
|
void SmoothTablePartitionRuns();
|
|
|
|
// Set the ratio of candidate table partitions in each column
|
|
void SetColumnsType(ColSegment_LIST* col_segments);
|
|
|
|
// Move Column Blocks to col_seg_grid_
|
|
void MoveColSegmentsToGrid(ColSegment_LIST *segments,
|
|
ColSegmentGrid *col_seg_grid);
|
|
|
|
// Merge Column Blocks that were split due to the presence of a table
|
|
void GridMergeColumnBlocks();
|
|
|
|
// Merge table cells into table columns
|
|
void GetTableColumns(ColSegment_LIST *table_columns);
|
|
|
|
// Get Column segments from best_columns_
|
|
void GetTableRegions(ColSegment_LIST *table_columns,
|
|
ColSegment_LIST *table_regions);
|
|
|
|
// Merge table regions corresponding to tables spanning multiple columns
|
|
void GridMergeTableRegions();
|
|
bool BelongToOneTable(const TBOX &box1, const TBOX &box2);
|
|
|
|
// Adjust table boundaries by building a tight bounding box around all
|
|
// ColPartitions contained in it.
|
|
void AdjustTableBoundaries();
|
|
|
|
// Checks whether the horizontal line belong to the table by looking at the
|
|
// side spacing of extra ColParitions that will be included in the table
|
|
// due to expansion
|
|
bool HLineBelongsToTable(ColPartition* part, const TBOX& table_box);
|
|
|
|
// Look for isolated column headers above the given table box and
|
|
// include them in the table
|
|
void IncludeLeftOutColumnHeaders(TBOX& table_box);
|
|
|
|
// Remove false alarms consiting of a single column
|
|
void DeleteSingleColumnTables();
|
|
|
|
// Return true if at least one gap larger than the global x-height
|
|
// exists in the horizontal projection
|
|
bool GapInXProjection(int* xprojection, int length);
|
|
|
|
// Displays Colpartitions marked as table row. Overlays them on top of
|
|
// part_grid_.
|
|
void DisplayColSegments(ColSegment_LIST *cols, ScrollView* win,
|
|
ScrollView::Color color);
|
|
|
|
// Displays the colpartitions using a new coloring on an existing window.
|
|
// Note: This method is only for debug purpose during development and
|
|
// would not be part of checked in code
|
|
void DisplayColPartitions(ScrollView* win,
|
|
ScrollView::Color color);
|
|
|
|
// Write ColParitions and Tables to a PIX image
|
|
// Note: This method is only for debug purpose during development and
|
|
// would not be part of checked in code
|
|
void WriteToPix();
|
|
|
|
// Merge all colpartitions in table regions to make them a single
|
|
// colpartition and revert types of isolated table cells not
|
|
// assigned to any table to their original types.
|
|
void MakeTableBlocks();
|
|
|
|
//////// Functions that make the final output blocks ///////
|
|
|
|
// Helper functions for TransformToBlocks.
|
|
// Add the part to the temp list in the correct order.
|
|
void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list);
|
|
// Add everything from the temp list to the work_set assuming correct order.
|
|
void EmptyTempPartList(ColPartition_CLIST* temp_list,
|
|
WorkingPartSet_LIST* work_set);
|
|
|
|
// Transform the grid of partitions to the output blocks.
|
|
void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
|
|
|
|
// Reskew the completed blocks to put them back to the original coords.
|
|
// (Blob outlines are not corrected for skew.)
|
|
// Rotate blobs and blocks individually so text line direction is
|
|
// horizontal. Record appropriate inverse transformations and required
|
|
// classifier transformation in the blocks.
|
|
void RotateAndReskewBlocks(TO_BLOCK_LIST* to_blocks);
|
|
|
|
|
|
// Move all the small and noise blobs into the main blobs list of
|
|
// the block from the to_blocks list that contains them.
|
|
void MoveSmallBlobs(BLOBNBOX_LIST* bblobs, TO_BLOCK_LIST* to_blocks);
|
|
|
|
// The mean gap between columns over the page.
|
|
int mean_column_gap_;
|
|
// Estimate of median x-height over the page
|
|
int global_median_xheight_;
|
|
// Estimate of median ledding on the page
|
|
int global_median_ledding_;
|
|
// The rotation vector needed to convert deskewed back to original coords.
|
|
FCOORD reskew_;
|
|
// The rotation vector needed to convert the rotated back to original coords.
|
|
FCOORD rerotate_;
|
|
// The part_sets_ are the initial text-line-like partition of the grid,
|
|
// and is a vector of ColPartitionSets.
|
|
PartSetVector part_sets_;
|
|
// The column_sets_ contain the ordered candidate ColPartitionSets that
|
|
// define the possible divisions of the page into columns.
|
|
PartSetVector column_sets_;
|
|
// A simple array of pointers to the best assigned column division at
|
|
// each grid y coordinate.
|
|
ColPartitionSet** best_columns_;
|
|
// The grid used to hold ColPartitions after the columns have been determined.
|
|
ColPartitionGrid part_grid_;
|
|
// Grid to hold cleaned colpartitions after removing all
|
|
// colpartitions that consist of only noise blobs, and removing
|
|
// noise blobs from remaining colpartitions.
|
|
ColPartitionGrid clean_part_grid_;
|
|
// Grid to hold periods, commas, i-dots etc.
|
|
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> period_grid_;
|
|
// List of period blobs extracted from small and noise blobs
|
|
BLOBNBOX_LIST period_blobs_;
|
|
// Grid of page column blocks
|
|
ColSegmentGrid col_seg_grid_;
|
|
// Grid of detected tables
|
|
ColSegmentGrid table_grid_;
|
|
// List of ColPartitions that are no longer needed after they have been
|
|
// turned into regions, but are kept around because they are referenced
|
|
// by the part_grid_.
|
|
ColPartition_LIST good_parts_;
|
|
// List of ColPartitions of unknown type.
|
|
ColPartition_LIST unknown_parts_;
|
|
// List of ColPartitions that have been declared noise.
|
|
ColPartition_LIST noise_parts_;
|
|
// The fake blobs that are made from the input boxa/pixa pair.
|
|
BLOBNBOX_LIST image_bblobs_;
|
|
// Horizontal line separators.
|
|
TabVector_LIST horizontal_lines_;
|
|
// Allow a subsequent instance to reuse the blocks window.
|
|
// Not thread-safe, but multiple threads shouldn't be using windows anyway.
|
|
static ScrollView* blocks_win_;
|
|
};
|
|
|
|
} // namespace tesseract.
|
|
|
|
#endif // TESSERACT_TEXTORD_COLFIND_H__
|