mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-18 19:39:24 +08:00
369 lines
11 KiB
C++
369 lines
11 KiB
C++
|
///////////////////////////////////////////////////////////////////////
|
||
|
// File: leptonica_pageseg.cpp
|
||
|
// Description: Leptonica-based page segmenter.
|
||
|
// Author: Dan Bloomberg
|
||
|
// Created: Tue Aug 28 08:56:43 PDT 2007
|
||
|
//
|
||
|
// (C) Copyright 2007, Google Inc.
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
//
|
||
|
///////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
// Include automatically generated configuration file if running autoconf.
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "config_auto.h"
|
||
|
#endif
|
||
|
|
||
|
#include "leptonica_pageseg.h"
|
||
|
|
||
|
#ifdef HAVE_LIBLEPT
|
||
|
// The jpeg library still has INT32 as long, which is no good for 64 bit.
|
||
|
#define INT32 WRONGINT32
|
||
|
#define BOX LEPT_BOX
|
||
|
// Include leptonica library only if autoconf (or makefile etc) tell us to.
|
||
|
#include "allheaders.h"
|
||
|
#undef BOX
|
||
|
#undef INT32
|
||
|
#endif
|
||
|
|
||
|
#ifdef HAVE_LIBLEPT
|
||
|
// ONLY available if you have Leptonica installed.
|
||
|
|
||
|
// class LeptonicaPageSeg
|
||
|
//
|
||
|
// Region segmentation
|
||
|
// bool GetHalftoneMask()
|
||
|
// bool GetTextlineMask()
|
||
|
// bool GetTextblockMask()
|
||
|
//
|
||
|
// Top-level (for testing/debugging)
|
||
|
// bool GetAllRegions()
|
||
|
//
|
||
|
//
|
||
|
|
||
|
//------------------------------------------------------------------
|
||
|
// Region segmentation
|
||
|
//------------------------------------------------------------------
|
||
|
// GetHalftoneMask()
|
||
|
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
||
|
// &pixht (returns halftone mask; can be NULL)
|
||
|
// &baht (returns boxa of halftone mask component b.b.s; can be NULL)
|
||
|
// &paht (returns pixa of halftone mask components; can be NULL)
|
||
|
// debugflag (set true to write out intermediate images)
|
||
|
// Return: true if ok, false on error
|
||
|
// Note: If there are no halftone regions, all requested data structures
|
||
|
// are returned as NULL. This is not an error.
|
||
|
bool LeptonicaPageSeg::GetHalftoneMask(Pix *pixs,
|
||
|
Pix **ppixht,
|
||
|
Boxa **pbaht,
|
||
|
Pixa **ppaht,
|
||
|
bool debugflag) {
|
||
|
if (!pixs) {
|
||
|
fprintf(stderr, "pixs not defined\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
int32 debug = debugflag ? 1 : 0;
|
||
|
|
||
|
// 2x reduce, to 150 - 200 ppi
|
||
|
Pix *pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
|
||
|
pixDisplayWrite(pixr, debug);
|
||
|
|
||
|
// Get the halftone mask
|
||
|
Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL, debug);
|
||
|
pixDestroy(&pixr);
|
||
|
if (!pixht2) {
|
||
|
if (debug)
|
||
|
printf("No halftone image parts found\n");
|
||
|
if (ppixht) *ppixht = NULL;
|
||
|
if (pbaht) *pbaht = NULL;
|
||
|
if (ppaht) *ppaht = NULL;
|
||
|
return true;
|
||
|
} else {
|
||
|
if (debug)
|
||
|
printf("Halftone image parts found\n");
|
||
|
}
|
||
|
|
||
|
Pix *pixht = pixExpandReplicate(pixht2, 2);
|
||
|
pixDisplayWrite(pixht, debug);
|
||
|
pixDestroy(&pixht2);
|
||
|
|
||
|
// Fill to capture pixels near the mask edges that were missed
|
||
|
Pix *pixt = pixSeedfillBinary(NULL, pixht, pixs, 8);
|
||
|
pixOr(pixht, pixht, pixt);
|
||
|
pixDestroy(&pixt);
|
||
|
|
||
|
if (ppaht) {
|
||
|
Boxa *boxa = pixConnComp(pixht, ppaht, 4);
|
||
|
if (pbaht) {
|
||
|
*pbaht = boxa;
|
||
|
} else {
|
||
|
boxaDestroy(&boxa);
|
||
|
}
|
||
|
} else if (pbaht) {
|
||
|
*pbaht = pixConnComp(pixht, NULL, 4);
|
||
|
}
|
||
|
|
||
|
if (ppixht) {
|
||
|
*ppixht =pixht;
|
||
|
} else {
|
||
|
pixDestroy(&pixht);
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
// GetTextlineMask()
|
||
|
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
||
|
// &pixtm (returns textline mask; can be NULL)
|
||
|
// &pixvws (returns vertical whitespace mask; can be NULL)
|
||
|
// &batm (returns boxa of textline mask component b.b.s; can be NULL)
|
||
|
// &patm (returns pixa of textline mask components; can be NULL)
|
||
|
// debugflag (set true to write out intermediate images)
|
||
|
// Return: true if ok, false on error
|
||
|
bool LeptonicaPageSeg::GetTextlineMask(Pix *pixs,
|
||
|
Pix **ppixtm,
|
||
|
Pix **ppixvws,
|
||
|
Boxa **pbatm,
|
||
|
Pixa **ppatm,
|
||
|
bool debugflag) {
|
||
|
if (!pixs) {
|
||
|
fprintf(stderr, "pixs not defined\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
int32 debug = debugflag ? 1 : 0;
|
||
|
|
||
|
// 2x reduce, to 150 - 200 ppi
|
||
|
Pix *pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
|
||
|
pixDisplayWrite(pixr, debug);
|
||
|
|
||
|
// Remove the halftone pixels from the image
|
||
|
Pix *pixtext;
|
||
|
Pix *pixht2 = pixGenHalftoneMask(pixr, &pixtext, NULL, debug);
|
||
|
pixDestroy(&pixr);
|
||
|
pixDestroy(&pixht2);
|
||
|
|
||
|
// Get the textline mask at full res
|
||
|
Pix *pixvws;
|
||
|
Pix *pixtm2 = pixGenTextlineMask(pixtext, &pixvws, NULL, debug);
|
||
|
Pix *pixt = pixExpandReplicate(pixtm2, 2);
|
||
|
pixDestroy(&pixtext);
|
||
|
pixDestroy(&pixtm2);
|
||
|
|
||
|
// Small dilation to capture pixels near the mask edges that were missed
|
||
|
// Do not use filling here, because the result is then used to find
|
||
|
// textblocks, and a mistake here gets propagated.
|
||
|
Pix *pixtm = pixDilateBrick(NULL, pixt, 3, 3);
|
||
|
pixDestroy(&pixt);
|
||
|
pixDisplayWrite(pixtm, debug);
|
||
|
|
||
|
if (ppatm) {
|
||
|
Boxa *boxa = pixConnComp(pixtm, ppatm, 4);
|
||
|
if (pbatm) {
|
||
|
*pbatm = boxa;
|
||
|
} else {
|
||
|
boxaDestroy(&boxa);
|
||
|
}
|
||
|
} else if (pbatm) {
|
||
|
*pbatm = pixConnComp(pixtm, NULL, 4);
|
||
|
}
|
||
|
|
||
|
if (ppixtm) {
|
||
|
*ppixtm =pixtm;
|
||
|
} else {
|
||
|
pixDestroy(&pixtm);
|
||
|
}
|
||
|
if (ppixvws) {
|
||
|
*ppixvws =pixvws;
|
||
|
} else {
|
||
|
pixDestroy(&pixvws);
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
// GetTextblockMask()
|
||
|
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
||
|
// &pixtb (returns textblock mask; can be NULL)
|
||
|
// &batb (returns boxa of textblock mask component b.b; can be NULL)
|
||
|
// &patb (returns pixa of textblock mask components; can be NULL)
|
||
|
// debugflag (set true to write out intermediate images)
|
||
|
// Return: true if ok, false on error
|
||
|
// Notes:
|
||
|
// To obtain a set of polylines of the outer borders of each of the
|
||
|
// textblock regions, use pixGetOuterBordersPtaa().
|
||
|
bool LeptonicaPageSeg::GetTextblockMask(Pix *pixs,
|
||
|
Pix **ppixtb,
|
||
|
Boxa **pbatb,
|
||
|
Pixa **ppatb,
|
||
|
bool debugflag) {
|
||
|
if (!pixs) {
|
||
|
fprintf(stderr, "pixs not defined\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
int32 debug = debugflag ? 1 : 0;
|
||
|
|
||
|
// Get the textline mask at 2x reduction
|
||
|
Pix *pixtm, *pixvws;
|
||
|
GetTextlineMask(pixs, &pixtm, &pixvws, NULL, NULL, debugflag);
|
||
|
Pix *pixtm2 = pixReduceRankBinaryCascade(pixtm, 1, 0, 0, 0);
|
||
|
pixDestroy(&pixtm);
|
||
|
|
||
|
// Get the textblock mask
|
||
|
Pix *pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
|
||
|
Pix *pixt = pixExpandReplicate(pixtb2, 2);
|
||
|
pixDestroy(&pixtm2);
|
||
|
pixDestroy(&pixtb2);
|
||
|
pixDestroy(&pixvws);
|
||
|
|
||
|
// Dilate to capture pixels near the mask edges that were missed
|
||
|
Pix *pixtb = pixDilateBrick(NULL, pixt, 3, 3);
|
||
|
pixDestroy(&pixt);
|
||
|
pixDisplayWrite(pixtb, debug);
|
||
|
|
||
|
if (ppatb) {
|
||
|
Boxa *boxa = pixConnComp(pixtb, ppatb, 4);
|
||
|
if (pbatb) {
|
||
|
*pbatb = boxa;
|
||
|
} else {
|
||
|
boxaDestroy(&boxa);
|
||
|
}
|
||
|
} else if (pbatb) {
|
||
|
*pbatb = pixConnComp(pixtb, NULL, 4);
|
||
|
}
|
||
|
|
||
|
if (ppixtb) {
|
||
|
*ppixtb = pixtb;
|
||
|
} else {
|
||
|
pixDestroy(&pixtb);
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
//------------------------------------------------------------------
|
||
|
// Top-level (for testing/debugging)
|
||
|
//------------------------------------------------------------------
|
||
|
// GetAllRegions()
|
||
|
// Input: pixs (input image, assumed to be at 300 - 400 ppi)
|
||
|
// &pixhm (returns halftone mask; can be NULL)
|
||
|
// &pixtm (returns textline mask; can be NULL)
|
||
|
// &pixtb (returns textblock mask; can be NULL)
|
||
|
// debugflag (set true to write out intermediate images and data)
|
||
|
// Return: true if ok, false on error
|
||
|
// Note: use NULL for input on each mask you don't want.
|
||
|
bool LeptonicaPageSeg::GetAllRegions(Pix *pixs,
|
||
|
Pix **ppixhm,
|
||
|
Pix **ppixtm,
|
||
|
Pix **ppixtb,
|
||
|
bool debugflag) {
|
||
|
if (!pixs || (pixGetDepth(pixs) != 1)) {
|
||
|
fprintf(stderr, "pixs not read or not 1 bpp\n");
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
int32 w, h;
|
||
|
pixGetDimensions(pixs, &w, &h, NULL);
|
||
|
int32 debug = debugflag ? 1 : 0;
|
||
|
|
||
|
// Segment the page
|
||
|
Boxa *batm = NULL;
|
||
|
Boxa *batb = NULL;
|
||
|
Pixa *patm = NULL;
|
||
|
Pixa *patb = NULL;
|
||
|
Pix *pixhm = NULL;
|
||
|
Pix *pixtm = NULL;
|
||
|
Pix *pixtb = NULL;
|
||
|
|
||
|
startTimer();
|
||
|
LeptonicaPageSeg::GetHalftoneMask(pixs, &pixhm, NULL, NULL, false);
|
||
|
if (debug)
|
||
|
printf("Halftone segmentation time: %f sec\n", stopTimer());
|
||
|
|
||
|
startTimer();
|
||
|
LeptonicaPageSeg::GetTextlineMask(pixs, &pixtm, NULL, &batm, &patm, false);
|
||
|
if (debug)
|
||
|
printf("Textline segmentation time: %f sec\n", stopTimer());
|
||
|
|
||
|
startTimer();
|
||
|
LeptonicaPageSeg::GetTextblockMask(pixs, &pixtb, &batb, &patb, debugflag);
|
||
|
if (debug)
|
||
|
printf("Textblock segmentation time: %f sec\n", stopTimer());
|
||
|
|
||
|
// Display the textlines
|
||
|
if (debug) {
|
||
|
Pix *pixt = pixaDisplayRandomCmap(patm, w, h);
|
||
|
pixcmapResetColor(pixGetColormap(pixt), 0, 255, 255, 255); // white bg
|
||
|
pixDisplay(pixt, 100, 100);
|
||
|
pixDisplayWrite(pixt, 1);
|
||
|
pixDestroy(&pixt);
|
||
|
}
|
||
|
|
||
|
// Display the textblocks
|
||
|
if (debug) {
|
||
|
Pix *pixt = pixaDisplayRandomCmap(patb, w, h);
|
||
|
pixcmapResetColor(pixGetColormap(pixt), 0, 255, 255, 255);
|
||
|
pixDisplay(pixt, 100, 100);
|
||
|
pixDisplayWrite(pixt, 1);
|
||
|
pixDestroy(&pixt);
|
||
|
}
|
||
|
|
||
|
// Identify the outlines of each textblock
|
||
|
if (debug) {
|
||
|
Ptaa *ptaa = pixGetOuterBordersPtaa(pixtb);
|
||
|
Pix *pixt = pixRenderRandomCmapPtaa(pixtb, ptaa, 8, 1);
|
||
|
PixColormap *cmap = pixGetColormap(pixt);
|
||
|
pixcmapResetColor(cmap, 0, 130, 130, 130);
|
||
|
pixDisplayWrite(pixt, debug);
|
||
|
pixDestroy(&pixt);
|
||
|
ptaaWrite("junk_ptaa_outlines.ptaa", ptaa, 1);
|
||
|
ptaaDestroy(&ptaa);
|
||
|
}
|
||
|
|
||
|
// Save b.b. for textblocks
|
||
|
if (debug) {
|
||
|
Boxa *ba1 = boxaSelectBySize(batb, 3, 3, L_SELECT_IF_BOTH,
|
||
|
L_SELECT_IF_GTE, NULL);
|
||
|
boxaWrite("junk_textblock.boxa", ba1);
|
||
|
boxaDestroy(&ba1);
|
||
|
}
|
||
|
|
||
|
if (ppixhm) {
|
||
|
*ppixhm = pixhm;
|
||
|
} else {
|
||
|
pixDestroy(&pixhm);
|
||
|
}
|
||
|
if (ppixtm) {
|
||
|
*ppixtm = pixtm;
|
||
|
} else {
|
||
|
pixDestroy(&pixtm);
|
||
|
}
|
||
|
if (ppixtb) {
|
||
|
*ppixtb = pixtb;
|
||
|
} else {
|
||
|
pixDestroy(&pixtb);
|
||
|
}
|
||
|
|
||
|
boxaDestroy(&batm);
|
||
|
boxaDestroy(&batb);
|
||
|
pixaDestroy(&patm);
|
||
|
pixaDestroy(&patb);
|
||
|
pixDestroy(&pixs);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
#endif // HAVE_LIBLEPT
|