mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-22 22:47:50 +08:00
91d2265429
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@974 d0cd1f9f-072b-0410-8dd7-cf729c803f20
157 lines
4.8 KiB
C++
157 lines
4.8 KiB
C++
/**********************************************************************
|
|
* File: cube_page_segmenter.h
|
|
* Description: Declaration of the Cube Page Segmenter Class
|
|
* Author: Ahmad Abdulkader
|
|
* Created: 2007
|
|
*
|
|
* (C) Copyright 2008, Google Inc.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
// TODO(ahmadab)
|
|
// This is really a makeshift line segmenter that works well for Arabic
|
|
// This should eventually be replaced by Ray Smith's Page segmenter
|
|
// There are lots of magic numbers below that were determined empirically
|
|
// but not thoroughly tested
|
|
|
|
#ifndef CUBE_LINE_SEGMENTER_H
|
|
#define CUBE_LINE_SEGMENTER_H
|
|
|
|
#include "cube_reco_context.h"
|
|
#include "allheaders.h"
|
|
|
|
namespace tesseract {
|
|
|
|
class CubeLineSegmenter {
|
|
public:
|
|
CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img);
|
|
~CubeLineSegmenter();
|
|
|
|
// Accessor functions
|
|
Pix *PostProcessedImage() {
|
|
if (init_ == false && Init() == false) {
|
|
return NULL;
|
|
}
|
|
return img_;
|
|
}
|
|
int ColumnCnt() {
|
|
if (init_ == false && Init() == false) {
|
|
return 0;
|
|
}
|
|
return columns_->n;
|
|
}
|
|
Box *Column(int col) {
|
|
if (init_ == false && Init() == false) {
|
|
return NULL;
|
|
}
|
|
|
|
return columns_->boxa->box[col];
|
|
}
|
|
int LineCnt() {
|
|
if (init_ == false && Init() == false) {
|
|
return 0;
|
|
}
|
|
|
|
return line_cnt_;
|
|
}
|
|
Pixa *ConComps() {
|
|
if (init_ == false && Init() == false) {
|
|
return NULL;
|
|
}
|
|
|
|
return con_comps_;
|
|
}
|
|
Pixaa *Columns() {
|
|
if (init_ == false && Init() == false) {
|
|
return NULL;
|
|
}
|
|
|
|
return columns_;
|
|
}
|
|
inline double AlefHgtEst() { return est_alef_hgt_; }
|
|
inline double DotHgtEst() { return est_dot_hgt_; }
|
|
Pix *Line(int line, Box **line_box);
|
|
|
|
private:
|
|
static const float kMinValidLineHgtRatio;
|
|
static const int kLineSepMorphMinHgt;
|
|
static const int kHgtBins;
|
|
static const int kMaxConnCompHgt;
|
|
static const int kMaxConnCompWid;
|
|
static const int kMaxHorzAspectRatio;
|
|
static const int kMaxVertAspectRatio;
|
|
static const int kMinWid;
|
|
static const int kMinHgt;
|
|
static const double kMaxValidLineRatio;
|
|
|
|
// Cube Reco context
|
|
CubeRecoContext *cntxt_;
|
|
// Original image
|
|
Pix *orig_img_;
|
|
// Post processed image
|
|
Pix *img_;
|
|
// Init flag
|
|
bool init_;
|
|
// Output Line and column info
|
|
int line_cnt_;
|
|
Pixaa *columns_;
|
|
Pixa *con_comps_;
|
|
Pixa *lines_pixa_;
|
|
// Estimates for sizes of ALEF and DOT needed for Arabic analysis
|
|
double est_alef_hgt_;
|
|
double est_dot_hgt_;
|
|
|
|
// Init the page analysis
|
|
bool Init();
|
|
// Performs line segmentation
|
|
bool LineSegment();
|
|
// Cleanup function
|
|
Pix *CleanUp(Pix *pix);
|
|
// compute validity ratio for a line
|
|
double ValidityRatio(Pix *line_mask_pix, Box *line_box);
|
|
// validate line
|
|
bool ValidLine(Pix *line_mask_pix, Box *line_box);
|
|
// split a line continuously until valid or fail
|
|
Pixa *SplitLine(Pix *line_mask_pix, Box *line_box);
|
|
// do a desperate attempt at cracking lines
|
|
Pixa *CrackLine(Pix *line_mask_pix, Box *line_box);
|
|
Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt);
|
|
// Checks of a line is too small
|
|
bool SmallLine(Box *line_box);
|
|
// Compute the connected components in a line
|
|
Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box,
|
|
Pixa **con_comps_pixa);
|
|
// create a union of two arbitrary pix
|
|
Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box);
|
|
// create a union of a pixa subset
|
|
Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt);
|
|
// create a union of a pixa
|
|
Pix *Pixa2Pix(Pixa *pixa, Box **dest_box);
|
|
// merges a number of lines into one line given a bounding box and a mask
|
|
bool MergeLine(Pix *line_mask_pix, Box *line_box,
|
|
Pixa *lines, Boxaa *lines_con_comps);
|
|
// Creates new set of lines from the computed columns
|
|
bool AddLines(Pixa *lines);
|
|
// Estimate the parameters of the font(s) used in the page
|
|
bool EstimateFontParams();
|
|
// perform a vertical Closing with the specified threshold
|
|
// returning the resulting conn comps as a pixa
|
|
Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa);
|
|
// Index the specific pixa using RTL reading order
|
|
int *IndexRTL(Pixa *pixa);
|
|
// Implements a rudimentary page & line segmenter
|
|
bool FindLines();
|
|
};
|
|
}
|
|
|
|
#endif // CUBE_LINE_SEGMENTER_H
|