mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-24 07:27:49 +08:00
356 lines
17 KiB
C++
356 lines
17 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: strokewidth.h
|
|
// Description: Subclass of BBGrid to find uniformity of strokewidth.
|
|
// Author: Ray Smith
|
|
// Created: Mon Mar 31 16:17:01 PST 2008
|
|
//
|
|
// (C) Copyright 2008, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H__
|
|
#define TESSERACT_TEXTORD_STROKEWIDTH_H__
|
|
|
|
#include "blobbox.h" // BlobNeighourDir.
|
|
#include "blobgrid.h" // Base class.
|
|
#include "colpartitiongrid.h"
|
|
#include "textlineprojection.h"
|
|
|
|
class DENORM;
|
|
class ScrollView;
|
|
class TO_BLOCK;
|
|
|
|
namespace tesseract {
|
|
|
|
class ColPartition_LIST;
|
|
class TabFind;
|
|
class TextlineProjection;
|
|
|
|
// Misc enums to clarify bool arguments for direction-controlling args.
|
|
enum LeftOrRight {
|
|
LR_LEFT,
|
|
LR_RIGHT
|
|
};
|
|
|
|
// Return value from FindInitialPartitions indicates detection of severe
|
|
// skew or noise.
|
|
enum PartitionFindResult {
|
|
PFR_OK, // Everything is OK.
|
|
PFR_SKEW, // Skew was detected and rotated.
|
|
PFR_NOISE // Noise was detected and removed.
|
|
};
|
|
|
|
/**
|
|
* The StrokeWidth class holds all the normal and large blobs.
|
|
* It is used to find good large blobs and move them to the normal blobs
|
|
* by virtue of having a reasonable strokewidth compatible neighbour.
|
|
*/
|
|
class StrokeWidth : public BlobGrid {
|
|
public:
|
|
StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright);
|
|
virtual ~StrokeWidth();
|
|
|
|
// Sets the neighbours member of the medium-sized blobs in the block.
|
|
// Searches on 4 sides of each blob for similar-sized, similar-strokewidth
|
|
// blobs and sets pointers to the good neighbours.
|
|
void SetNeighboursOnMediumBlobs(TO_BLOCK* block);
|
|
|
|
// Sets the neighbour/textline writing direction members of the medium
|
|
// and large blobs with optional repair of broken CJK characters first.
|
|
// Repair of broken CJK is needed here because broken CJK characters
|
|
// can fool the textline direction detection algorithm.
|
|
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
|
|
bool cjk_merge,
|
|
TO_BLOCK* input_block);
|
|
|
|
// To save computation, the process of generating partitions is broken
|
|
// into the following 4 steps:
|
|
// TestVerticalTextDirection
|
|
// CorrectForRotation (used only if a rotation is to be applied)
|
|
// FindLeaderPartitions
|
|
// GradeBlobsIntoPartitions.
|
|
// These functions are all required, in sequence, except for
|
|
// CorrectForRotation, which is not needed if no rotation is applied.
|
|
|
|
// Types all the blobs as vertical or horizontal text or unknown and
|
|
// returns true if the majority are vertical.
|
|
// If the blobs are rotated, it is necessary to call CorrectForRotation
|
|
// after rotating everything, otherwise the work done here will be enough.
|
|
// If osd_blobs is not null, a list of blobs from the dominant textline
|
|
// direction are returned for use in orientation and script detection.
|
|
// find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
|
|
bool TestVerticalTextDirection(double find_vertical_text_ratio,
|
|
TO_BLOCK* block,
|
|
BLOBNBOX_CLIST* osd_blobs);
|
|
|
|
// Corrects the data structures for the given rotation.
|
|
void CorrectForRotation(const FCOORD& rerotation,
|
|
ColPartitionGrid* part_grid);
|
|
|
|
// Finds leader partitions and inserts them into the give grid.
|
|
void FindLeaderPartitions(TO_BLOCK* block,
|
|
ColPartitionGrid* part_grid);
|
|
|
|
// Finds and marks noise those blobs that look like bits of vertical lines
|
|
// that would otherwise screw up layout analysis.
|
|
void RemoveLineResidue(ColPartition_LIST* big_part_list);
|
|
|
|
// Types all the blobs as vertical text or horizontal text or unknown and
|
|
// puts them into initial ColPartitions in the supplied part_grid.
|
|
// rerotation determines how to get back to the image coordinates from the
|
|
// blob coordinates (since they may have been rotated for vertical text).
|
|
// block is the single block for the whole page or rectangle to be OCRed.
|
|
// nontext_pix (full-size), is a binary mask used to prevent merges across
|
|
// photo/text boundaries. It is not kept beyond this function.
|
|
// denorm provides a mapping back to the image from the current blob
|
|
// coordinate space.
|
|
// projection provides a measure of textline density over the image and
|
|
// provides functions to assist with diacritic detection. It should be a
|
|
// pointer to a new TextlineProjection, and will be setup here.
|
|
// part_grid is the output grid of textline partitions.
|
|
// Large blobs that cause overlap are put in separate partitions and added
|
|
// to the big_parts list.
|
|
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode,
|
|
const FCOORD& rerotation, TO_BLOCK* block,
|
|
Pix* nontext_pix, const DENORM* denorm,
|
|
bool cjk_script, TextlineProjection* projection,
|
|
BLOBNBOX_LIST* diacritic_blobs,
|
|
ColPartitionGrid* part_grid,
|
|
ColPartition_LIST* big_parts);
|
|
|
|
// Handles a click event in a display window.
|
|
virtual void HandleClick(int x, int y);
|
|
|
|
private:
|
|
// Computes the noise_density_ by summing the number of elements in a
|
|
// neighbourhood of each grid cell.
|
|
void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid);
|
|
|
|
// Detects and marks leader dots/dashes.
|
|
// Leaders are horizontal chains of small or noise blobs that look
|
|
// monospace according to ColPartition::MarkAsLeaderIfMonospaced().
|
|
// Detected leaders become the only occupants of the block->small_blobs list.
|
|
// Non-leader small blobs get moved to the blobs list.
|
|
// Non-leader noise blobs remain singletons in the noise list.
|
|
// All small and noise blobs in high density regions are marked BTFT_NONTEXT.
|
|
// block is the single block for the whole page or rectangle to be OCRed.
|
|
// leader_parts is the output.
|
|
void FindLeadersAndMarkNoise(TO_BLOCK* block,
|
|
ColPartition_LIST* leader_parts);
|
|
|
|
/** Inserts the block blobs (normal and large) into this grid.
|
|
* Blobs remain owned by the block. */
|
|
void InsertBlobs(TO_BLOCK* block);
|
|
|
|
// Fix broken CJK characters, using the fake joined blobs mechanism.
|
|
// Blobs are really merged, ie the master takes all the outlines and the
|
|
// others are deleted.
|
|
// Returns true if sufficient blobs are merged that it may be worth running
|
|
// again, due to a better estimate of character size.
|
|
bool FixBrokenCJK(TO_BLOCK* block);
|
|
|
|
// Collect blobs that overlap or are within max_dist of the input bbox.
|
|
// Return them in the list of blobs and expand the bbox to be the union
|
|
// of all the boxes. not_this is excluded from the search, as are blobs
|
|
// that cause the merged box to exceed max_size in either dimension.
|
|
void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
|
|
int max_size, int max_dist,
|
|
TBOX* bbox, BLOBNBOX_CLIST* blobs);
|
|
|
|
// For each blob in this grid, Finds the textline direction to be horizontal
|
|
// or vertical according to distance to neighbours and 1st and 2nd order
|
|
// neighbours. Non-text tends to end up without a definite direction.
|
|
// Result is setting of the neighbours and vert_possible/horz_possible
|
|
// flags in the BLOBNBOXes currently in this grid.
|
|
// This function is called more than once if page orientation is uncertain,
|
|
// so display_if_debugging is true on the final call to display the results.
|
|
void FindTextlineFlowDirection(PageSegMode pageseg_mode,
|
|
bool display_if_debugging);
|
|
|
|
// Sets the neighbours and good_stroke_neighbours members of the blob by
|
|
// searching close on all 4 sides.
|
|
// When finding leader dots/dashes, there is a slightly different rule for
|
|
// what makes a good neighbour.
|
|
// If activate_line_trap, then line-like objects are found and isolated.
|
|
void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob);
|
|
|
|
// Sets the good_stroke_neighbours member of the blob if it has a
|
|
// GoodNeighbour on the given side.
|
|
// Also sets the neighbour in the blob, whether or not a good one is found.
|
|
// Return value is the number of neighbours in the line trap size range.
|
|
// Leaders get extra special lenient treatment.
|
|
int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob);
|
|
|
|
// Makes the blob to be only horizontal or vertical where evidence
|
|
// is clear based on gaps of 2nd order neighbours.
|
|
void SetNeighbourFlows(BLOBNBOX* blob);
|
|
|
|
// Nullify the neighbours in the wrong directions where the direction
|
|
// is clear-cut based on a distance margin. Good for isolating vertical
|
|
// text from neighbouring horizontal text.
|
|
void SimplifyObviousNeighbours(BLOBNBOX* blob);
|
|
|
|
// Smoothes the vertical/horizontal type of the blob based on the
|
|
// 2nd-order neighbours. If reset_all is true, then all blobs are
|
|
// changed. Otherwise, only ambiguous blobs are processed.
|
|
void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate,
|
|
BLOBNBOX* blob);
|
|
|
|
// Checks the left or right side of the given leader partition and sets the
|
|
// (opposite) leader_on_right or leader_on_left flags for blobs
|
|
// that are next to the given side of the given leader partition.
|
|
void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side);
|
|
|
|
// Partition creation. Accumulates vertical and horizontal text chains,
|
|
// puts the remaining blobs in as unknowns, and then merges/splits to
|
|
// minimize overlap and smoothes the types with neighbours and the color
|
|
// image if provided. rerotation is used to rotate the coordinate space
|
|
// back to the nontext_map_ image.
|
|
// If find_problems is true, detects possible noise pollution by the amount
|
|
// of partition overlap that is created by the diacritics. If excessive, the
|
|
// noise is separated out into diacritic blobs, and PFR_NOISE is returned.
|
|
// [TODO(rays): if the partition overlap is caused by heavy skew, deskews
|
|
// the components, saves the skew_angle and returns PFR_SKEW.] If the return
|
|
// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
|
|
// called again after cleaning up the partly done work.
|
|
PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode,
|
|
const FCOORD& rerotation,
|
|
bool find_problems, TO_BLOCK* block,
|
|
BLOBNBOX_LIST* diacritic_blobs,
|
|
ColPartitionGrid* part_grid,
|
|
ColPartition_LIST* big_parts,
|
|
FCOORD* skew_angle);
|
|
// Detects noise by a significant increase in partition overlap from
|
|
// pre_overlap to now, and removes noise from the union of all the overlapping
|
|
// partitions, placing the blobs in diacritic_blobs. Returns true if any noise
|
|
// was found and removed.
|
|
bool DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box,
|
|
TO_BLOCK* block, ColPartitionGrid* part_grid,
|
|
BLOBNBOX_LIST* diacritic_blobs);
|
|
// Finds vertical chains of text-like blobs and puts them in ColPartitions.
|
|
void FindVerticalTextChains(ColPartitionGrid* part_grid);
|
|
// Finds horizontal chains of text-like blobs and puts them in ColPartitions.
|
|
void FindHorizontalTextChains(ColPartitionGrid* part_grid);
|
|
// Finds diacritics and saves their base character in the blob.
|
|
void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block);
|
|
// Searches this grid for an appropriately close and sized neighbour of the
|
|
// given [small] blob. If such a blob is found, the diacritic base is saved
|
|
// in the blob and true is returned.
|
|
// The small_grid is a secondary grid that contains the small/noise objects
|
|
// that are not in this grid, but may be useful for determining a connection
|
|
// between blob and its potential base character. (See DiacriticXGapFilled.)
|
|
bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob);
|
|
// Returns true if there is no gap between the base char and the diacritic
|
|
// bigger than a fraction of the height of the base char:
|
|
// Eg: line end.....'
|
|
// The quote is a long way from the end of the line, yet it needs to be a
|
|
// diacritic. To determine that the quote is not part of an image, or
|
|
// a different text block, we check for other marks in the gap between
|
|
// the base char and the diacritic.
|
|
// '<--Diacritic
|
|
// |---------|
|
|
// | |<-toobig-gap->
|
|
// | Base |<ok gap>
|
|
// |---------| x<-----Dot occupying gap
|
|
// The grid is const really.
|
|
bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box,
|
|
const TBOX& base_box);
|
|
// Merges diacritics with the ColPartition of the base character blob.
|
|
void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid);
|
|
// Any blobs on the large_blobs list of block that are still unowned by a
|
|
// ColPartition, are probably drop-cap or vertically touching so the blobs
|
|
// are removed to the big_parts list and treated separately.
|
|
void RemoveLargeUnusedBlobs(TO_BLOCK* block,
|
|
ColPartitionGrid* part_grid,
|
|
ColPartition_LIST* big_parts);
|
|
|
|
// All remaining unused blobs are put in individual ColPartitions.
|
|
void PartitionRemainingBlobs(PageSegMode pageseg_mode,
|
|
ColPartitionGrid* part_grid);
|
|
|
|
// If combine, put all blobs in the cell_list into a single partition,
|
|
// otherwise put each one into its own partition.
|
|
void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,
|
|
ColPartitionGrid* part_grid,
|
|
BLOBNBOX_CLIST* cell_list);
|
|
|
|
// Helper function to finish setting up a ColPartition and insert into
|
|
// part_grid.
|
|
void CompletePartition(PageSegMode pageseg_mode, ColPartition* part,
|
|
ColPartitionGrid* part_grid);
|
|
|
|
// Helper returns true if we are looking only for vertical textlines,
|
|
// taking into account any rotation that has been done.
|
|
bool FindingVerticalOnly(PageSegMode pageseg_mode) const {
|
|
if (rerotation_.y() == 0.0f) {
|
|
return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
|
|
}
|
|
return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
|
|
pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
|
|
}
|
|
// Helper returns true if we are looking only for horizontal textlines,
|
|
// taking into account any rotation that has been done.
|
|
bool FindingHorizontalOnly(PageSegMode pageseg_mode) const {
|
|
if (rerotation_.y() == 0.0f) {
|
|
return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
|
|
pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
|
|
}
|
|
return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
|
|
}
|
|
|
|
// Merge partitions where the merge appears harmless.
|
|
void EasyMerges(ColPartitionGrid* part_grid);
|
|
|
|
// Compute a search box based on the orientation of the partition.
|
|
// Returns true if a suitable box can be calculated.
|
|
// Callback for EasyMerges.
|
|
bool OrientationSearchBox(ColPartition* part, TBOX* box);
|
|
|
|
// Merge confirmation callback for EasyMerges.
|
|
bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2);
|
|
|
|
// Returns true if there is no significant noise in between the boxes.
|
|
bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const;
|
|
|
|
// Displays the blobs colored according to the number of good neighbours
|
|
// and the vertical/horizontal flow.
|
|
ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y);
|
|
|
|
// Displays blobs colored according to whether or not they are diacritics.
|
|
ScrollView* DisplayDiacritics(const char* window_name,
|
|
int x, int y, TO_BLOCK* block);
|
|
|
|
private:
|
|
// Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
|
|
Pix* nontext_map_;
|
|
// Textline projection map. Borrowed pointer.
|
|
TextlineProjection* projection_;
|
|
// DENORM used by projection_ to get back to image coords. Borrowed pointer.
|
|
const DENORM* denorm_;
|
|
// Bounding box of the grid.
|
|
TBOX grid_box_;
|
|
// Rerotation to get back to the original image.
|
|
FCOORD rerotation_;
|
|
// Windows for debug display.
|
|
ScrollView* leaders_win_;
|
|
ScrollView* initial_widths_win_;
|
|
ScrollView* widths_win_;
|
|
ScrollView* chains_win_;
|
|
ScrollView* diacritics_win_;
|
|
ScrollView* textlines_win_;
|
|
ScrollView* smoothed_win_;
|
|
};
|
|
|
|
} // namespace tesseract.
|
|
|
|
#endif // TESSERACT_TEXTORD_STROKEWIDTH_H__
|