mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-18 11:28:51 +08:00
9cf08ca8d3
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@981 d0cd1f9f-072b-0410-8dd7-cf729c803f20
777 lines
33 KiB
C++
777 lines
33 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: linefind.cpp
|
|
// Description: Class to find vertical lines in an image and create
|
|
// a corresponding list of empty blobs.
|
|
// Author: Ray Smith
|
|
// Created: Thu Mar 20 09:49:01 PDT 2008
|
|
//
|
|
// (C) Copyright 2008, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning(disable:4244) // Conversion warnings
|
|
#endif
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config_auto.h"
|
|
#endif
|
|
|
|
#include "linefind.h"
|
|
#include "alignedblob.h"
|
|
#include "tabvector.h"
|
|
#include "blobbox.h"
|
|
#include "edgblob.h"
|
|
#include "openclwrapper.h"
|
|
|
|
#include "allheaders.h"
|
|
|
|
namespace tesseract {
|
|
|
|
/// Denominator of resolution makes max pixel width to allow thin lines.
|
|
const int kThinLineFraction = 20;
|
|
/// Denominator of resolution makes min pixels to demand line lengths to be.
|
|
const int kMinLineLengthFraction = 4;
|
|
/// Spacing of cracks across the page to break up tall vertical lines.
|
|
const int kCrackSpacing = 100;
|
|
/// Grid size used by line finder. Not very critical.
|
|
const int kLineFindGridSize = 50;
|
|
// Min width of a line in pixels to be considered thick.
|
|
const int kMinThickLineWidth = 12;
|
|
// Max size of line residue. (The pixels that fail the long thin opening, and
|
|
// therefore don't make it to the candidate line mask, but are nevertheless
|
|
// part of the line.)
|
|
const int kMaxLineResidue = 6;
|
|
// Min length in inches of a line segment that exceeds kMinThickLineWidth in
|
|
// thickness. (Such lines shouldn't break by simple image degradation.)
|
|
const double kThickLengthMultiple = 0.75;
|
|
// Max fraction of line box area that can be occupied by non-line pixels.
|
|
const double kMaxNonLineDensity = 0.25;
|
|
// Max height of a music stave in inches.
|
|
const double kMaxStaveHeight = 1.0;
|
|
// Minimum fraction of pixels in a music rectangle connected to the staves.
|
|
const double kMinMusicPixelFraction = 0.75;
|
|
|
|
// Erases the unused blobs from the line_pix image, taking into account
|
|
// whether this was a horizontal or vertical line set.
|
|
static void RemoveUnusedLineSegments(bool horizontal_lines,
|
|
BLOBNBOX_LIST* line_bblobs,
|
|
Pix* line_pix) {
|
|
int height = pixGetHeight(line_pix);
|
|
BLOBNBOX_IT bbox_it(line_bblobs);
|
|
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
|
BLOBNBOX* blob = bbox_it.data();
|
|
if (blob->left_tab_type() != TT_VLINE) {
|
|
const TBOX& box = blob->bounding_box();
|
|
Box* pixbox = NULL;
|
|
if (horizontal_lines) {
|
|
// Horizontal lines are in tess format and also have x and y flipped
|
|
// (to use FindVerticalAlignment) so we have to flip x and y and then
|
|
// convert to Leptonica by height - flipped x (ie the right edge).
|
|
// See GetLineBoxes for more explanation.
|
|
pixbox = boxCreate(box.bottom(), height - box.right(),
|
|
box.height(), box.width());
|
|
} else {
|
|
// For vertical lines, just flip upside-down to convert to Leptonica.
|
|
// The y position of the box in Leptonica terms is the distance from
|
|
// the top of the image to the top of the box.
|
|
pixbox = boxCreate(box.left(), height - box.top(),
|
|
box.width(), box.height());
|
|
}
|
|
pixClearInRect(line_pix, pixbox);
|
|
boxDestroy(&pixbox);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Helper subtracts the line_pix image from the src_pix, and removes residue
|
|
// as well by removing components that touch the line, but are not in the
|
|
// non_line_pix mask. It is assumed that the non_line_pix mask has already
|
|
// been prepared to required accuracy.
|
|
static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix,
|
|
int resolution, Pix* src_pix) {
|
|
// First remove the lines themselves.
|
|
pixSubtract(src_pix, src_pix, line_pix);
|
|
// Subtract the non-lines from the image to get the residue.
|
|
Pix* residue_pix = pixSubtract(NULL, src_pix, non_line_pix);
|
|
// Dilate the lines so they touch the residue.
|
|
Pix* fat_line_pix = pixDilateBrick(NULL, line_pix, 3, 3);
|
|
// Seed fill the fat lines to get all the residue.
|
|
pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
|
|
// Subtract the residue from the original image.
|
|
pixSubtract(src_pix, src_pix, fat_line_pix);
|
|
pixDestroy(&fat_line_pix);
|
|
pixDestroy(&residue_pix);
|
|
}
|
|
|
|
// Returns the maximum strokewidth in the given binary image by doubling
|
|
// the maximum of the distance function.
|
|
static int MaxStrokeWidth(Pix* pix) {
|
|
Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
|
|
int width = pixGetWidth(dist_pix);
|
|
int height = pixGetHeight(dist_pix);
|
|
int wpl = pixGetWpl(dist_pix);
|
|
l_uint32* data = pixGetData(dist_pix);
|
|
// Find the maximum value in the distance image.
|
|
int max_dist = 0;
|
|
for (int y = 0; y < height; ++y) {
|
|
for (int x = 0; x < width; ++x) {
|
|
int pixel = GET_DATA_BYTE(data, x);
|
|
if (pixel > max_dist)
|
|
max_dist = pixel;
|
|
}
|
|
data += wpl;
|
|
}
|
|
pixDestroy(&dist_pix);
|
|
return max_dist * 2;
|
|
}
|
|
|
|
// Returns the number of components in the intersection_pix touched by line_box.
|
|
static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) {
|
|
if (intersection_pix == NULL) return 0;
|
|
Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, NULL);
|
|
Boxa* boxa = pixConnComp(rect_pix, NULL, 8);
|
|
pixDestroy(&rect_pix);
|
|
if (boxa == NULL) return false;
|
|
int result = boxaGetCount(boxa);
|
|
boxaDestroy(&boxa);
|
|
return result;
|
|
}
|
|
|
|
// Returns the number of black pixels found in the box made by adding the line
|
|
// width to both sides of the line bounding box. (Increasing the smallest
|
|
// dimension of the bounding box.)
|
|
static int CountPixelsAdjacentToLine(int line_width, Box* line_box,
|
|
Pix* nonline_pix) {
|
|
l_int32 x, y, box_width, box_height;
|
|
boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
|
|
if (box_width > box_height) {
|
|
// horizontal line.
|
|
int bottom = MIN(pixGetHeight(nonline_pix), y + box_height + line_width);
|
|
y = MAX(0, y - line_width);
|
|
box_height = bottom - y;
|
|
} else {
|
|
// Vertical line.
|
|
int right = MIN(pixGetWidth(nonline_pix), x + box_width + line_width);
|
|
x = MAX(0, x - line_width);
|
|
box_width = right - x;
|
|
}
|
|
Box* box = boxCreate(x, y, box_width, box_height);
|
|
Pix* rect_pix = pixClipRectangle(nonline_pix, box, NULL);
|
|
boxDestroy(&box);
|
|
l_int32 result;
|
|
pixCountPixels(rect_pix, &result, NULL);
|
|
pixDestroy(&rect_pix);
|
|
return result;
|
|
}
|
|
|
|
// Helper erases false-positive line segments from the input/output line_pix.
|
|
// 1. Since thick lines shouldn't really break up, we can eliminate some false
|
|
// positives by marking segments that are at least kMinThickLineWidth
|
|
// thickness, yet have a length less than min_thick_length.
|
|
// 2. Lines that don't have at least 2 intersections with other lines and have
|
|
// a lot of neighbouring non-lines are probably not lines (perhaps arabic
|
|
// or Hindi words, or underlines.)
|
|
// Bad line components are erased from line_pix.
|
|
// Returns the number of remaining connected components.
|
|
static int FilterFalsePositives(int resolution, Pix* nonline_pix,
|
|
Pix* intersection_pix, Pix* line_pix) {
|
|
int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
|
|
Pixa* pixa = NULL;
|
|
Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
|
|
// Iterate over the boxes to remove false positives.
|
|
int nboxes = boxaGetCount(boxa);
|
|
int remaining_boxes = nboxes;
|
|
for (int i = 0; i < nboxes; ++i) {
|
|
Box* box = boxaGetBox(boxa, i, L_CLONE);
|
|
l_int32 x, y, box_width, box_height;
|
|
boxGetGeometry(box, &x, &y, &box_width, &box_height);
|
|
Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
|
|
int max_width = MaxStrokeWidth(comp_pix);
|
|
pixDestroy(&comp_pix);
|
|
bool bad_line = false;
|
|
// If the length is too short to stand-alone as a line, and the box width
|
|
// is thick enough, and the stroke width is thick enough it is bad.
|
|
if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
|
|
box_width < min_thick_length && box_height < min_thick_length &&
|
|
max_width > kMinThickLineWidth) {
|
|
// Too thick for the length.
|
|
bad_line = true;
|
|
}
|
|
if (!bad_line &&
|
|
(intersection_pix == NULL ||
|
|
NumTouchingIntersections(box, intersection_pix) < 2)) {
|
|
// Test non-line density near the line.
|
|
int nonline_count = CountPixelsAdjacentToLine(max_width, box,
|
|
nonline_pix);
|
|
if (nonline_count > box_height * box_width * kMaxNonLineDensity)
|
|
bad_line = true;
|
|
}
|
|
if (bad_line) {
|
|
// Not a good line.
|
|
pixClearInRect(line_pix, box);
|
|
--remaining_boxes;
|
|
}
|
|
boxDestroy(&box);
|
|
}
|
|
pixaDestroy(&pixa);
|
|
boxaDestroy(&boxa);
|
|
return remaining_boxes;
|
|
}
|
|
|
|
// Finds vertical and horizontal line objects in the given pix.
|
|
// Uses the given resolution to determine size thresholds instead of any
|
|
// that may be present in the pix.
|
|
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
|
// thereby giving the mean vertical direction.
|
|
// If pix_music_mask != NULL, and music is detected, a mask of the staves
|
|
// and anything that is connected (bars, notes etc.) will be returned in
|
|
// pix_music_mask, the mask subtracted from pix, and the lines will not
|
|
// appear in v_lines or h_lines.
|
|
// The output vectors are owned by the list and Frozen (cannot refit) by
|
|
// having no boxes, as there is no need to refit or merge separator lines.
|
|
// The detected lines are removed from the pix.
|
|
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix,
|
|
int* vertical_x, int* vertical_y,
|
|
Pix** pix_music_mask,
|
|
TabVector_LIST* v_lines,
|
|
TabVector_LIST* h_lines) {
|
|
PERF_COUNT_START("FindAndRemoveLines")
|
|
if (pix == NULL || vertical_x == NULL || vertical_y == NULL) {
|
|
tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
|
|
return;
|
|
}
|
|
Pix* pix_vline = NULL;
|
|
Pix* pix_non_vline = NULL;
|
|
Pix* pix_hline = NULL;
|
|
Pix* pix_non_hline = NULL;
|
|
Pix* pix_intersections = NULL;
|
|
Pixa* pixa_display = debug ? pixaCreate(0) : NULL;
|
|
GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline,
|
|
&pix_non_hline, &pix_intersections, pix_music_mask,
|
|
pixa_display);
|
|
// Find lines, convert to TabVector_LIST and remove those that are used.
|
|
FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y,
|
|
&pix_vline, pix_non_vline, pix, v_lines);
|
|
if (pix_hline != NULL) {
|
|
// Recompute intersections and re-filter false positive h-lines.
|
|
if (pix_vline != NULL)
|
|
pixAnd(pix_intersections, pix_vline, pix_hline);
|
|
else
|
|
pixDestroy(&pix_intersections);
|
|
if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections,
|
|
pix_hline)) {
|
|
pixDestroy(&pix_hline);
|
|
}
|
|
}
|
|
FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y,
|
|
&pix_hline, pix_non_hline, pix, h_lines);
|
|
if (pixa_display != NULL && pix_vline != NULL)
|
|
pixaAddPix(pixa_display, pix_vline, L_CLONE);
|
|
if (pixa_display != NULL && pix_hline != NULL)
|
|
pixaAddPix(pixa_display, pix_hline, L_CLONE);
|
|
if (pix_vline != NULL && pix_hline != NULL) {
|
|
// Remove joins (intersections) where lines cross, and the residue.
|
|
// Recalculate the intersections, since some lines have been deleted.
|
|
pixAnd(pix_intersections, pix_vline, pix_hline);
|
|
// Fatten up the intersections and seed-fill to get the intersection
|
|
// residue.
|
|
Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5);
|
|
pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
|
|
// Now remove the intersection residue.
|
|
pixSubtract(pix, pix, pix_join_residue);
|
|
pixDestroy(&pix_join_residue);
|
|
}
|
|
// Remove any detected music.
|
|
if (pix_music_mask != NULL && *pix_music_mask != NULL) {
|
|
if (pixa_display != NULL)
|
|
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
|
|
pixSubtract(pix, pix, *pix_music_mask);
|
|
}
|
|
if (pixa_display != NULL)
|
|
pixaAddPix(pixa_display, pix, L_CLONE);
|
|
|
|
pixDestroy(&pix_vline);
|
|
pixDestroy(&pix_non_vline);
|
|
pixDestroy(&pix_hline);
|
|
pixDestroy(&pix_non_hline);
|
|
pixDestroy(&pix_intersections);
|
|
if (pixa_display != NULL) {
|
|
#if LIBLEPT_MINOR_VERSION >= 69 || LIBLEPT_MAJOR_VERSION > 1
|
|
pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding",
|
|
"vhlinefinding.pdf");
|
|
#endif
|
|
pixaDestroy(&pixa_display);
|
|
}
|
|
PERF_COUNT_END
|
|
}
|
|
|
|
// Converts the Boxa array to a list of C_BLOB, getting rid of severely
|
|
// overlapping outlines and those that are children of a bigger one.
|
|
// The output is a list of C_BLOBs that are owned by the list.
|
|
// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
|
|
// bounding boxes. The Boxa is consumed and destroyed.
|
|
void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
|
|
Boxa** boxes, C_BLOB_LIST* blobs) {
|
|
C_OUTLINE_LIST outlines;
|
|
C_OUTLINE_IT ol_it = &outlines;
|
|
// Iterate the boxes to convert to outlines.
|
|
int nboxes = boxaGetCount(*boxes);
|
|
for (int i = 0; i < nboxes; ++i) {
|
|
l_int32 x, y, width, height;
|
|
boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
|
|
// Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
|
|
// as there is no outline, just a bounding box, but with some very
|
|
// small changes to coutln.cpp, it works nicely.
|
|
ICOORD top_left(x, y);
|
|
ICOORD bot_right(x + width, y + height);
|
|
CRACKEDGE startpt;
|
|
startpt.pos = top_left;
|
|
C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
|
|
ol_it.add_after_then_move(outline);
|
|
}
|
|
// Use outlines_to_blobs to convert the outlines to blobs and find
|
|
// overlapping and contained objects. The output list of blobs in the block
|
|
// has all the bad ones filtered out and deleted.
|
|
BLOCK block;
|
|
ICOORD page_tl(0, 0);
|
|
ICOORD page_br(image_width, image_height);
|
|
outlines_to_blobs(&block, page_tl, page_br, &outlines);
|
|
// Transfer the created blobs to the output list.
|
|
C_BLOB_IT blob_it(blobs);
|
|
blob_it.add_list_after(block.blob_list());
|
|
// The boxes aren't needed any more.
|
|
boxaDestroy(boxes);
|
|
}
|
|
|
|
// Finds vertical line objects in pix_vline and removes the from src_pix.
|
|
// Uses the given resolution to determine size thresholds instead of any
|
|
// that may be present in the pix.
|
|
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
|
// thereby giving the mean vertical direction.
|
|
// The output vectors are owned by the list and Frozen (cannot refit) by
|
|
// having no boxes, as there is no need to refit or merge separator lines.
|
|
// If no good lines are found, pix_vline is destroyed.
|
|
// None of the input pointers may be NULL, and if *pix_vline is NULL then
|
|
// the function does nothing.
|
|
void LineFinder::FindAndRemoveVLines(int resolution,
|
|
Pix* pix_intersections,
|
|
int* vertical_x, int* vertical_y,
|
|
Pix** pix_vline, Pix* pix_non_vline,
|
|
Pix* src_pix, TabVector_LIST* vectors) {
|
|
if (pix_vline == NULL || *pix_vline == NULL) return;
|
|
C_BLOB_LIST line_cblobs;
|
|
BLOBNBOX_LIST line_bblobs;
|
|
GetLineBoxes(false, *pix_vline, pix_intersections,
|
|
&line_cblobs, &line_bblobs);
|
|
int width = pixGetWidth(src_pix);
|
|
int height = pixGetHeight(src_pix);
|
|
ICOORD bleft(0, 0);
|
|
ICOORD tright(width, height);
|
|
FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
|
|
if (!vectors->empty()) {
|
|
RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
|
|
SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix);
|
|
ICOORD vertical;
|
|
vertical.set_with_shrink(*vertical_x, *vertical_y);
|
|
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
|
|
} else {
|
|
pixDestroy(pix_vline);
|
|
}
|
|
}
|
|
|
|
// Finds horizontal line objects in pix_hline and removes them from src_pix.
|
|
// Uses the given resolution to determine size thresholds instead of any
|
|
// that may be present in the pix.
|
|
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
|
// thereby giving the mean vertical direction.
|
|
// The output vectors are owned by the list and Frozen (cannot refit) by
|
|
// having no boxes, as there is no need to refit or merge separator lines.
|
|
// If no good lines are found, pix_hline is destroyed.
|
|
// None of the input pointers may be NULL, and if *pix_hline is NULL then
|
|
// the function does nothing.
|
|
void LineFinder::FindAndRemoveHLines(int resolution,
|
|
Pix* pix_intersections,
|
|
int vertical_x, int vertical_y,
|
|
Pix** pix_hline, Pix* pix_non_hline,
|
|
Pix* src_pix, TabVector_LIST* vectors) {
|
|
if (pix_hline == NULL || *pix_hline == NULL) return;
|
|
C_BLOB_LIST line_cblobs;
|
|
BLOBNBOX_LIST line_bblobs;
|
|
GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
|
|
int width = pixGetWidth(src_pix);
|
|
int height = pixGetHeight(src_pix);
|
|
ICOORD bleft(0, 0);
|
|
ICOORD tright(height, width);
|
|
FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
|
|
vectors);
|
|
if (!vectors->empty()) {
|
|
RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
|
|
SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix);
|
|
ICOORD vertical;
|
|
vertical.set_with_shrink(vertical_x, vertical_y);
|
|
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
|
|
// Iterate the vectors to flip them. x and y were flipped for horizontal
|
|
// lines, so FindLineVectors can work just with the vertical case.
|
|
// See GetLineBoxes for more on the flip.
|
|
TabVector_IT h_it(vectors);
|
|
for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
|
|
h_it.data()->XYFlip();
|
|
}
|
|
} else {
|
|
pixDestroy(pix_hline);
|
|
}
|
|
}
|
|
|
|
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
|
|
// are the bounds of the image on which the input line_bblobs were found.
|
|
// The input line_bblobs list is const really.
|
|
// The output vertical_x and vertical_y are the total of all the vectors.
|
|
// The output list of TabVector makes no reference to the input BLOBNBOXes.
|
|
void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
|
|
BLOBNBOX_LIST* line_bblobs,
|
|
int* vertical_x, int* vertical_y,
|
|
TabVector_LIST* vectors) {
|
|
BLOBNBOX_IT bbox_it(line_bblobs);
|
|
int b_count = 0;
|
|
// Put all the blobs into the grid to find the lines, and move the blobs
|
|
// to the output lists.
|
|
AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
|
|
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
|
BLOBNBOX* bblob = bbox_it.data();
|
|
bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
|
|
bblob->set_left_rule(bleft.x());
|
|
bblob->set_right_rule(tright.x());
|
|
bblob->set_left_crossing_rule(bleft.x());
|
|
bblob->set_right_crossing_rule(tright.x());
|
|
blob_grid.InsertBBox(false, true, bblob);
|
|
++b_count;
|
|
}
|
|
if (b_count == 0)
|
|
return;
|
|
|
|
// Search the entire grid, looking for vertical line vectors.
|
|
BlobGridSearch lsearch(&blob_grid);
|
|
BLOBNBOX* bbox;
|
|
TabVector_IT vector_it(vectors);
|
|
*vertical_x = 0;
|
|
*vertical_y = 1;
|
|
lsearch.StartFullSearch();
|
|
while ((bbox = lsearch.NextFullSearch()) != NULL) {
|
|
if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
|
|
const TBOX& box = bbox->bounding_box();
|
|
if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
|
|
tprintf("Finding line vector starting at bbox (%d,%d)\n",
|
|
box.left(), box.bottom());
|
|
AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
|
|
TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
|
|
vertical_x,
|
|
vertical_y);
|
|
if (vector != NULL) {
|
|
vector->Freeze();
|
|
vector_it.add_to_end(vector);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Returns a Pix music mask if music is detected.
|
|
// Any vertical line that has at least 5 intersections in sufficient density
|
|
// is taken to be a bar. Bars are used as a seed and the entire touching
|
|
// component is added to the output music mask and subtracted from the lines.
|
|
// Returns NULL and does minimal work if no music is found.
|
|
static Pix* FilterMusic(int resolution, Pix* pix_closed,
|
|
Pix* pix_vline, Pix* pix_hline,
|
|
l_int32* v_empty, l_int32* h_empty) {
|
|
int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
|
|
Pix* intersection_pix = pixAnd(NULL, pix_vline, pix_hline);
|
|
Boxa* boxa = pixConnComp(pix_vline, NULL, 8);
|
|
// Iterate over the boxes to find music bars.
|
|
int nboxes = boxaGetCount(boxa);
|
|
Pix* music_mask = NULL;
|
|
for (int i = 0; i < nboxes; ++i) {
|
|
Box* box = boxaGetBox(boxa, i, L_CLONE);
|
|
l_int32 x, y, box_width, box_height;
|
|
boxGetGeometry(box, &x, &y, &box_width, &box_height);
|
|
int joins = NumTouchingIntersections(box, intersection_pix);
|
|
// Test for the join density being at least 5 per max_stave_height,
|
|
// ie (joins-1)/box_height >= (5-1)/max_stave_height.
|
|
if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
|
|
// This is a music bar. Add to the mask.
|
|
if (music_mask == NULL)
|
|
music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline),
|
|
1);
|
|
pixSetInRect(music_mask, box);
|
|
}
|
|
boxDestroy(&box);
|
|
}
|
|
boxaDestroy(&boxa);
|
|
pixDestroy(&intersection_pix);
|
|
if (music_mask != NULL) {
|
|
// The mask currently contains just the bars. Use the mask as a seed
|
|
// and the pix_closed as the mask for a seedfill to get all the
|
|
// intersecting staves.
|
|
pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
|
|
// Filter out false positives. CCs in the music_mask should be the vast
|
|
// majority of the pixels in their bounding boxes, as we expect just a
|
|
// tiny amount of text, a few phrase marks, and crescendo etc left.
|
|
Boxa* boxa = pixConnComp(music_mask, NULL, 8);
|
|
// Iterate over the boxes to find music components.
|
|
int nboxes = boxaGetCount(boxa);
|
|
for (int i = 0; i < nboxes; ++i) {
|
|
Box* box = boxaGetBox(boxa, i, L_CLONE);
|
|
Pix* rect_pix = pixClipRectangle(music_mask, box, NULL);
|
|
l_int32 music_pixels;
|
|
pixCountPixels(rect_pix, &music_pixels, NULL);
|
|
pixDestroy(&rect_pix);
|
|
rect_pix = pixClipRectangle(pix_closed, box, NULL);
|
|
l_int32 all_pixels;
|
|
pixCountPixels(rect_pix, &all_pixels, NULL);
|
|
pixDestroy(&rect_pix);
|
|
if (music_pixels < kMinMusicPixelFraction * all_pixels) {
|
|
// False positive. Delete from the music mask.
|
|
pixClearInRect(music_mask, box);
|
|
}
|
|
boxDestroy(&box);
|
|
}
|
|
l_int32 no_remaining_music;
|
|
boxaDestroy(&boxa);
|
|
pixZero(music_mask, &no_remaining_music);
|
|
if (no_remaining_music) {
|
|
pixDestroy(&music_mask);
|
|
} else {
|
|
pixSubtract(pix_vline, pix_vline, music_mask);
|
|
pixSubtract(pix_hline, pix_hline, music_mask);
|
|
// We may have deleted all the lines
|
|
pixZero(pix_vline, v_empty);
|
|
pixZero(pix_hline, h_empty);
|
|
}
|
|
}
|
|
return music_mask;
|
|
}
|
|
|
|
// Most of the heavy lifting of line finding. Given src_pix and its separate
|
|
// resolution, returns image masks:
|
|
// pix_vline candidate vertical lines.
|
|
// pix_non_vline pixels that didn't look like vertical lines.
|
|
// pix_hline candidate horizontal lines.
|
|
// pix_non_hline pixels that didn't look like horizontal lines.
|
|
// pix_intersections pixels where vertical and horizontal lines meet.
|
|
// pix_music_mask candidate music staves.
|
|
// This function promises to initialize all the output (2nd level) pointers,
|
|
// but any of the returns that are empty will be NULL on output.
|
|
// None of the input (1st level) pointers may be NULL except pix_music_mask,
|
|
// which will disable music detection, and pixa_display.
|
|
void LineFinder::GetLineMasks(int resolution, Pix* src_pix,
|
|
Pix** pix_vline, Pix** pix_non_vline,
|
|
Pix** pix_hline, Pix** pix_non_hline,
|
|
Pix** pix_intersections, Pix** pix_music_mask,
|
|
Pixa* pixa_display) {
|
|
Pix* pix_closed = NULL;
|
|
Pix* pix_hollow = NULL;
|
|
|
|
int max_line_width = resolution / kThinLineFraction;
|
|
int min_line_length = resolution / kMinLineLengthFraction;
|
|
if (pixa_display != NULL) {
|
|
tprintf("Image resolution = %d, max line width = %d, min length=%d\n",
|
|
resolution, max_line_width, min_line_length);
|
|
}
|
|
int closing_brick = max_line_width / 3;
|
|
|
|
PERF_COUNT_START("GetLineMasksMorph")
|
|
// only use opencl if compiled w/ OpenCL and selected device is opencl
|
|
#ifdef USE_OPENCL
|
|
if (OpenclDevice::selectedDeviceIsOpenCL()) {
|
|
// OpenCL pixGetLines Operation
|
|
int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix),
|
|
pixGetHeight(src_pix),
|
|
src_pix);
|
|
bool getpixclosed = pix_music_mask != NULL ? true : false;
|
|
OpenclDevice::pixGetLinesCL(NULL, src_pix, pix_vline, pix_hline,
|
|
&pix_closed, getpixclosed, closing_brick,
|
|
closing_brick, max_line_width, max_line_width,
|
|
min_line_length, min_line_length);
|
|
} else {
|
|
#endif
|
|
// Close up small holes, making it less likely that false alarms are found
|
|
// in thickened text (as it will become more solid) and also smoothing over
|
|
// some line breaks and nicks in the edges of the lines.
|
|
pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
|
|
if (pixa_display != NULL)
|
|
pixaAddPix(pixa_display, pix_closed, L_CLONE);
|
|
// Open up with a big box to detect solid areas, which can then be subtracted.
|
|
// This is very generous and will leave in even quite wide lines.
|
|
Pix* pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width,
|
|
max_line_width);
|
|
if (pixa_display != NULL)
|
|
pixaAddPix(pixa_display, pix_solid, L_CLONE);
|
|
pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
|
|
|
|
pixDestroy(&pix_solid);
|
|
|
|
// Now open up in both directions independently to find lines of at least
|
|
// 1 inch/kMinLineLengthFraction in length.
|
|
if (pixa_display != NULL)
|
|
pixaAddPix(pixa_display, pix_hollow, L_CLONE);
|
|
*pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
|
|
*pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
|
|
|
|
pixDestroy(&pix_hollow);
|
|
#ifdef USE_OPENCL
|
|
}
|
|
#endif
|
|
PERF_COUNT_END
|
|
|
|
// Lines are sufficiently rare, that it is worth checking for a zero image.
|
|
l_int32 v_empty = 0;
|
|
l_int32 h_empty = 0;
|
|
pixZero(*pix_vline, &v_empty);
|
|
pixZero(*pix_hline, &h_empty);
|
|
if (pix_music_mask != NULL) {
|
|
if (!v_empty && !h_empty) {
|
|
*pix_music_mask = FilterMusic(resolution, pix_closed,
|
|
*pix_vline, *pix_hline,
|
|
&v_empty, &h_empty);
|
|
} else {
|
|
*pix_music_mask = NULL;
|
|
}
|
|
}
|
|
pixDestroy(&pix_closed);
|
|
Pix* pix_nonlines = NULL;
|
|
*pix_intersections = NULL;
|
|
Pix* extra_non_hlines = NULL;
|
|
if (!v_empty) {
|
|
// Subtract both line candidates from the source to get definite non-lines.
|
|
pix_nonlines = pixSubtract(NULL, src_pix, *pix_vline);
|
|
if (!h_empty) {
|
|
pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
|
|
// Intersections are a useful indicator for likelihood of being a line.
|
|
*pix_intersections = pixAnd(NULL, *pix_vline, *pix_hline);
|
|
// Candidate vlines are not hlines (apart from the intersections)
|
|
// and vice versa.
|
|
extra_non_hlines = pixSubtract(NULL, *pix_vline, *pix_intersections);
|
|
}
|
|
*pix_non_vline = pixErodeBrick(NULL, pix_nonlines, kMaxLineResidue, 1);
|
|
pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
|
|
if (!h_empty) {
|
|
// Candidate hlines are not vlines.
|
|
pixOr(*pix_non_vline, *pix_non_vline, *pix_hline);
|
|
pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
|
|
}
|
|
if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections,
|
|
*pix_vline))
|
|
pixDestroy(pix_vline); // No candidates left.
|
|
} else {
|
|
// No vertical lines.
|
|
pixDestroy(pix_vline);
|
|
*pix_non_vline = NULL;
|
|
if (!h_empty) {
|
|
pix_nonlines = pixSubtract(NULL, src_pix, *pix_hline);
|
|
}
|
|
}
|
|
if (h_empty) {
|
|
pixDestroy(pix_hline);
|
|
*pix_non_hline = NULL;
|
|
if (v_empty) {
|
|
return;
|
|
}
|
|
} else {
|
|
*pix_non_hline = pixErodeBrick(NULL, pix_nonlines, 1, kMaxLineResidue);
|
|
pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
|
|
if (extra_non_hlines != NULL) {
|
|
pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
|
|
pixDestroy(&extra_non_hlines);
|
|
}
|
|
if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections,
|
|
*pix_hline))
|
|
pixDestroy(pix_hline); // No candidates left.
|
|
}
|
|
if (pixa_display != NULL) {
|
|
if (*pix_vline != NULL) pixaAddPix(pixa_display, *pix_vline, L_CLONE);
|
|
if (*pix_hline != NULL) pixaAddPix(pixa_display, *pix_hline, L_CLONE);
|
|
if (pix_nonlines != NULL) pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
|
|
if (*pix_non_vline != NULL)
|
|
pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
|
|
if (*pix_non_hline != NULL)
|
|
pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
|
|
if (*pix_intersections != NULL)
|
|
pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
|
|
if (pix_music_mask != NULL && *pix_music_mask != NULL)
|
|
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
|
|
}
|
|
pixDestroy(&pix_nonlines);
|
|
}
|
|
|
|
// Returns a list of boxes corresponding to the candidate line segments. Sets
|
|
// the line_crossings member of the boxes so we can later determin the number
|
|
// of intersections touched by a full line.
|
|
void LineFinder::GetLineBoxes(bool horizontal_lines,
|
|
Pix* pix_lines, Pix* pix_intersections,
|
|
C_BLOB_LIST* line_cblobs,
|
|
BLOBNBOX_LIST* line_bblobs) {
|
|
// Put a single pixel crack in every line at an arbitrary spacing,
|
|
// so they break up and the bounding boxes can be used to get the
|
|
// direction accurately enough without needing outlines.
|
|
int wpl = pixGetWpl(pix_lines);
|
|
int width = pixGetWidth(pix_lines);
|
|
int height = pixGetHeight(pix_lines);
|
|
l_uint32* data = pixGetData(pix_lines);
|
|
if (horizontal_lines) {
|
|
for (int y = 0; y < height; ++y, data += wpl) {
|
|
for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
|
|
CLEAR_DATA_BIT(data, x);
|
|
}
|
|
}
|
|
} else {
|
|
for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
|
|
memset(data + wpl * y, 0, wpl * sizeof(*data));
|
|
}
|
|
}
|
|
// Get the individual connected components
|
|
Boxa* boxa = pixConnComp(pix_lines, NULL, 8);
|
|
ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
|
|
// Make the BLOBNBOXes from the C_BLOBs.
|
|
C_BLOB_IT blob_it(line_cblobs);
|
|
BLOBNBOX_IT bbox_it(line_bblobs);
|
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
|
C_BLOB* cblob = blob_it.data();
|
|
BLOBNBOX* bblob = new BLOBNBOX(cblob);
|
|
bbox_it.add_to_end(bblob);
|
|
// Determine whether the line segment touches two intersections.
|
|
const TBOX& bbox = bblob->bounding_box();
|
|
Box* box = boxCreate(bbox.left(), bbox.bottom(),
|
|
bbox.width(), bbox.height());
|
|
bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
|
|
boxDestroy(&box);
|
|
// Transform the bounding box prior to finding lines. To save writing
|
|
// two line finders, flip x and y for horizontal lines and re-use the
|
|
// tab-stop detection code. For vertical lines we still have to flip the
|
|
// y-coordinates to switch from leptonica coords to tesseract coords.
|
|
if (horizontal_lines) {
|
|
// Note that we have Leptonica coords stored in a Tesseract box, so that
|
|
// bbox.bottom(), being the MIN y coord, is actually the top, so to get
|
|
// back to Leptonica coords in RemoveUnusedLineSegments, we have to
|
|
// use height - box.right() as the top, which looks very odd.
|
|
TBOX new_box(height - bbox.top(), bbox.left(),
|
|
height - bbox.bottom(), bbox.right());
|
|
bblob->set_bounding_box(new_box);
|
|
} else {
|
|
TBOX new_box(bbox.left(), height - bbox.top(),
|
|
bbox.right(), height - bbox.bottom());
|
|
bblob->set_bounding_box(new_box);
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace tesseract.
|
|
|