mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-15 09:47:46 +08:00
207 lines
10 KiB
C
207 lines
10 KiB
C
|
// Copyright 2011 Google Inc. All Rights Reserved.
|
||
|
// Author: rays@google.com (Ray Smith)
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
#ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
|
||
|
#define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
|
||
|
|
||
|
#include "blobgrid.h" // For BlobGrid
|
||
|
|
||
|
class DENORM;
|
||
|
struct Pix;
|
||
|
struct TPOINT;
|
||
|
|
||
|
namespace tesseract {
|
||
|
|
||
|
class ColPartition;
|
||
|
|
||
|
// Simple class to encapsulate the computation of an image representing
|
||
|
// local textline density, and function(s) to make use of it.
|
||
|
// The underlying principle is that if you smear connected components
|
||
|
// horizontally (vertically for components on a vertically written textline)
|
||
|
// and count the number of smeared components in an image, then the resulting
|
||
|
// image shows the density of the textlines at each image position.
|
||
|
class TextlineProjection {
|
||
|
public:
|
||
|
// The down-scaling factor is computed to obtain a projection resolution
|
||
|
// of about 100 dpi, whatever the input.
|
||
|
explicit TextlineProjection(int resolution);
|
||
|
~TextlineProjection();
|
||
|
|
||
|
// Build the projection profile given the input_block containing lists of
|
||
|
// blobs, a rotation to convert to image coords,
|
||
|
// and a full-resolution nontext_map, marking out areas to avoid.
|
||
|
// During construction, we have the following assumptions:
|
||
|
// The rotation is a multiple of 90 degrees, ie no deskew yet.
|
||
|
// The blobs have had their left and right rules set to also limit
|
||
|
// the range of projection.
|
||
|
void ConstructProjection(TO_BLOCK* input_block,
|
||
|
const FCOORD& rotation, Pix* nontext_map);
|
||
|
|
||
|
// Display the blobs in the window colored according to textline quality.
|
||
|
void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win);
|
||
|
|
||
|
// Moves blobs that look like they don't sit well on a textline from the
|
||
|
// input blobs list to the output small_blobs list.
|
||
|
// This gets them away from initial textline finding to stop diacritics
|
||
|
// from forming incorrect textlines. (Introduced mainly to fix Thai.)
|
||
|
void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs,
|
||
|
BLOBNBOX_LIST* small_blobs) const;
|
||
|
|
||
|
// Create a window and display the projection in it.
|
||
|
void DisplayProjection() const;
|
||
|
|
||
|
// Compute the distance of the box from the partition using curved projection
|
||
|
// space. As DistanceOfBoxFromBox, except that the direction is taken from
|
||
|
// the ColPartition and the median bounds of the ColPartition are used as
|
||
|
// the to_box.
|
||
|
int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part,
|
||
|
const DENORM* denorm, bool debug) const;
|
||
|
|
||
|
// Compute the distance from the from_box to the to_box using curved
|
||
|
// projection space. Separation that involves a decrease in projection
|
||
|
// density (moving from the from_box to the to_box) is weighted more heavily
|
||
|
// than constant density, and an increase is weighted less.
|
||
|
// If horizontal_textline is true, then curved space is used vertically,
|
||
|
// as for a diacritic on the edge of a textline.
|
||
|
// The projection uses original image coords, so denorm is used to get
|
||
|
// back to the image coords from box/part space.
|
||
|
int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box,
|
||
|
bool horizontal_textline,
|
||
|
const DENORM* denorm, bool debug) const;
|
||
|
|
||
|
// Compute the distance between (x, y1) and (x, y2) using the rule that
|
||
|
// a decrease in textline density is weighted more heavily than an increase.
|
||
|
// The coordinates are in source image space, ie processed by any denorm
|
||
|
// already, but not yet scaled by scale_factor_.
|
||
|
// Going from the outside of a textline to the inside should measure much
|
||
|
// less distance than going from the inside of a textline to the outside.
|
||
|
int VerticalDistance(bool debug, int x, int y1, int y2) const;
|
||
|
|
||
|
// Compute the distance between (x1, y) and (x2, y) using the rule that
|
||
|
// a decrease in textline density is weighted more heavily than an increase.
|
||
|
int HorizontalDistance(bool debug, int x1, int x2, int y) const;
|
||
|
|
||
|
// Returns true if the blob appears to be outside of a horizontal textline.
|
||
|
// Such blobs are potentially diacritics (even if large in Thai) and should
|
||
|
// be kept away from initial textline finding.
|
||
|
bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm,
|
||
|
bool debug) const;
|
||
|
|
||
|
// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
|
||
|
// but uses the median top/bottom for horizontal and median left/right for
|
||
|
// vertical instead of the bounding box edges.
|
||
|
// Evaluates for both horizontal and vertical and returns the best result,
|
||
|
// with a positive value for horizontal and a negative value for vertical.
|
||
|
int EvaluateColPartition(const ColPartition& part, const DENORM* denorm,
|
||
|
bool debug) const;
|
||
|
|
||
|
// Computes the mean projection gradients over the horizontal and vertical
|
||
|
// edges of the box:
|
||
|
// -h-h-h-h-h-h
|
||
|
// |------------| mean=htop -v|+v--------+v|-v
|
||
|
// |+h+h+h+h+h+h| -v|+v +v|-v
|
||
|
// | | -v|+v +v|-v
|
||
|
// | box | -v|+v box +v|-v
|
||
|
// | | -v|+v +v|-v
|
||
|
// |+h+h+h+h+h+h| -v|+v +v|-v
|
||
|
// |------------| mean=hbot -v|+v--------+v|-v
|
||
|
// -h-h-h-h-h-h
|
||
|
// mean=vleft mean=vright
|
||
|
//
|
||
|
// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
|
||
|
// for a horizontal textline, a negative number for a vertical textline,
|
||
|
// and near zero for undecided. Undecided is most likely non-text.
|
||
|
int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const;
|
||
|
|
||
|
private:
|
||
|
// Internal version of EvaluateBox returns the unclipped gradients as well
|
||
|
// as the result of EvaluateBox.
|
||
|
// hgrad1 and hgrad2 are the gradients for the horizontal textline.
|
||
|
int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug,
|
||
|
int* hgrad1, int* hgrad2,
|
||
|
int* vgrad1, int* vgrad2) const;
|
||
|
|
||
|
// Helper returns the mean gradient value for the horizontal row at the given
|
||
|
// y, (in the external coordinates) by subtracting the mean of the transformed
|
||
|
// row 2 pixels above from the mean of the transformed row 2 pixels below.
|
||
|
// This gives a positive value for a good top edge and negative for bottom.
|
||
|
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
|
||
|
int BestMeanGradientInRow(const DENORM* denorm, inT16 min_x, inT16 max_x,
|
||
|
inT16 y, bool best_is_max) const;
|
||
|
|
||
|
// Helper returns the mean gradient value for the vertical column at the
|
||
|
// given x, (in the external coordinates) by subtracting the mean of the
|
||
|
// transformed column 2 pixels left from the mean of the transformed column
|
||
|
// 2 pixels to the right.
|
||
|
// This gives a positive value for a good left edge and negative for right.
|
||
|
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
|
||
|
int BestMeanGradientInColumn(const DENORM* denorm, inT16 x, inT16 min_y,
|
||
|
inT16 max_y, bool best_is_max) const;
|
||
|
|
||
|
// Helper returns the mean pixel value over the line between the start_pt and
|
||
|
// end_pt (inclusive), but shifted perpendicular to the line in the projection
|
||
|
// image by offset pixels. For simplicity, it is assumed that the vector is
|
||
|
// either nearly horizontal or nearly vertical. It works on skewed textlines!
|
||
|
// The end points are in external coordinates, and will be denormalized with
|
||
|
// the denorm if not NULL before further conversion to pix coordinates.
|
||
|
// After all the conversions, the offset is added to the direction
|
||
|
// perpendicular to the line direction. The offset is thus in projection image
|
||
|
// coordinates, which allows the caller to get a guaranteed displacement
|
||
|
// between pixels used to calculate gradients.
|
||
|
int MeanPixelsInLineSegment(const DENORM* denorm, int offset,
|
||
|
TPOINT start_pt, TPOINT end_pt) const;
|
||
|
|
||
|
// Helper function to add 1 to a rectangle in source image coords to the
|
||
|
// internal projection pix_.
|
||
|
void IncrementRectangle8Bit(const TBOX& box);
|
||
|
// Inserts a list of blobs into the projection.
|
||
|
// Rotation is a multiple of 90 degrees to get from blob coords to
|
||
|
// nontext_map coords, image_box is the bounds of the nontext_map.
|
||
|
// Blobs are spread horizontally or vertically according to their internal
|
||
|
// flags, but the spreading is truncated by set pixels in the nontext_map
|
||
|
// and also by the horizontal rule line limits on the blobs.
|
||
|
void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation,
|
||
|
const TBOX& image_box, Pix* nontext_map);
|
||
|
// Pads the bounding box of the given blob according to whether it is on
|
||
|
// a horizontal or vertical text line, taking into account tab-stops near
|
||
|
// the blob. Returns true if padding was in the horizontal direction.
|
||
|
bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox);
|
||
|
|
||
|
// Helper denormalizes the TPOINT with the denorm if not NULL, then
|
||
|
// converts to pix_ coordinates.
|
||
|
void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const;
|
||
|
|
||
|
// Helper truncates the TPOINT to be within the pix_.
|
||
|
void TruncateToImageBounds(TPOINT* pt) const;
|
||
|
|
||
|
// Transform tesseract coordinates to coordinates used in the pix.
|
||
|
int ImageXToProjectionX(int x) const;
|
||
|
int ImageYToProjectionY(int y) const;
|
||
|
|
||
|
// The down-sampling scale factor used in building the image.
|
||
|
int scale_factor_;
|
||
|
// The blob coordinates of the top-left (origin of the pix_) in tesseract
|
||
|
// coordinates. Used to transform the bottom-up tesseract coordinates to
|
||
|
// the top-down coordinates of the pix.
|
||
|
int x_origin_;
|
||
|
int y_origin_;
|
||
|
// The image of horizontally smeared blob boxes summed to provide a
|
||
|
// textline density map. As with a horizontal projection, the map has
|
||
|
// dips in the gaps between textlines.
|
||
|
Pix* pix_;
|
||
|
};
|
||
|
|
||
|
} // namespace tesseract.
|
||
|
|
||
|
#endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
|