tesseract/ccmain/equationdetect.h

278 lines
11 KiB
C
Raw Normal View History

///////////////////////////////////////////////////////////////////////
// File: equationdetect.h
// Description: The equation detection class that inherits equationdetectbase.
// Author: Zongyi (Joe) Liu (joeliu@google.com)
// Created: Fri Aug 31 11:13:01 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H__
#define TESSERACT_CCMAIN_EQUATIONDETECT_H__
#include "blobbox.h"
#include "equationdetectbase.h"
#include "genericvector.h"
#include "unichar.h"
class BLOBNBOX;
class BLOB_CHOICE;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class TBOX;
class UNICHARSET;
namespace tesseract {
class Tesseract;
class ColPartition;
class ColPartitionGrid;
class ColPartitionSet;
class EquationDetect : public EquationDetectBase {
public:
EquationDetect(const char* equ_datapath,
const char* equ_language);
~EquationDetect();
enum IndentType {
NO_INDENT,
LEFT_INDENT,
RIGHT_INDENT,
BOTH_INDENT,
INDENT_TYPE_COUNT
};
// Reset the lang_tesseract_ pointer. This function should be called before we
// do any detector work.
void SetLangTesseract(Tesseract* lang_tesseract);
// Iterate over the blobs inside to_block, and set the blobs that we want to
// process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function
// returns 0 upon success.
int LabelSpecialText(TO_BLOCK* to_block);
// Find possible equation partitions from part_grid. Should be called
// after the special_text_type of blobs are set.
// It returns 0 upon success.
int FindEquationParts(ColPartitionGrid* part_grid,
ColPartitionSet** best_columns);
// Reset the resolution of the processing image. TEST only function.
void SetResolution(const int resolution);
protected:
// Identify the special text type for one blob, and update its field. When
// height_th is set (> 0), we will label the blob as BSTT_NONE if its height
// is less than height_th.
void IdentifySpecialText(BLOBNBOX *blob, const int height_th);
// Estimate the type for one unichar.
BlobSpecialTextType EstimateTypeForUnichar(
const UNICHARSET& unicharset, const UNICHAR_ID id) const;
// Compute special text type for each blobs in part_grid_.
void IdentifySpecialText();
// Identify blobs that we want to skip during special blob type
// classification.
void IdentifyBlobsToSkip(ColPartition* part);
// The ColPartitions in part_grid_ maybe over-segmented, particularly in the
// block equation regions. So we like to identify these partitions and merge
// them before we do the searching.
void MergePartsByLocation();
// Staring from the seed center, we do radius search. And for partitions that
// have large overlaps with seed, we remove them from part_grid_ and add into
// parts_overlap. Note: this function may update the part_grid_, so if the
// caller is also running ColPartitionGridSearch, use the RepositionIterator
// to continue.
void SearchByOverlap(ColPartition* seed,
GenericVector<ColPartition*>* parts_overlap);
// Insert part back into part_grid_, after it absorbs some other parts.
void InsertPartAfterAbsorb(ColPartition* part);
// Identify the colparitions in part_grid_, label them as PT_EQUATION, and
// save them into cp_seeds_.
void IdentifySeedParts();
// Check the blobs count for a seed region candidate.
bool CheckSeedBlobsCount(ColPartition* part);
// Compute the foreground pixel density for a tbox area.
float ComputeForegroundDensity(const TBOX& tbox);
// Check if part from seed2 label: with low math density and left indented. We
// are using two checks:
// 1. If its left is aligned with any coordinates in indented_texts_left,
// which we assume have been sorted.
// 2. If its foreground density is over foreground_density_th.
bool CheckForSeed2(
const GenericVector<int>& indented_texts_left,
const float foreground_density_th,
ColPartition* part);
// Count the number of values in sorted_vec that is close to val, used to
// check if a partition is aligned with text partitions.
int CountAlignment(
const GenericVector<int>& sorted_vec, const int val) const;
// Check for a seed candidate using the foreground pixel density. And we
// return true if the density is below a certain threshold, because characters
// in equation regions usually are apart with more white spaces.
bool CheckSeedFgDensity(const float density_th, ColPartition* part);
// A light version of SplitCPHor: instead of really doing the part split, we
// simply compute the union bounding box of each splitted part.
void SplitCPHorLite(ColPartition* part, GenericVector<TBOX>* splitted_boxes);
// Split the part (horizontally), and save the splitted result into
// parts_splitted. Note that it is caller's responsibility to release the
// memory owns by parts_splitted. On the other hand, the part is unchanged
// during this process and still owns the blobs, so do NOT call DeleteBoxes
// when freeing the colpartitions in parts_splitted.
void SplitCPHor(ColPartition* part,
GenericVector<ColPartition*>* parts_splitted);
// Check the density for a seed candidate (part) using its math density and
// italic density, returns true if the check passed.
bool CheckSeedDensity(const float math_density_high,
const float math_density_low,
const ColPartition* part) const;
// Check if part is indented.
IndentType IsIndented(ColPartition* part);
// Identify inline partitions from cp_seeds_, and re-label them.
void IdentifyInlineParts();
// Comute the super bounding box for all colpartitions inside part_grid_.
void ComputeCPsSuperBBox();
// Identify inline partitions from cp_seeds_ using the horizontal search.
void IdentifyInlinePartsHorizontal();
// Estimate the line spacing between two text partitions. Returns -1 if not
// enough data.
int EstimateTextPartLineSpacing();
// Identify inline partitions from cp_seeds_ using vertical search.
void IdentifyInlinePartsVertical(const bool top_to_bottom,
const int textPartsLineSpacing);
// Check if part is an inline equation zone. This should be called after we
// identified the seed regions.
bool IsInline(const bool search_bottom,
const int textPartsLineSpacing,
ColPartition* part);
// For a given seed partition, we search the part_grid_ and see if there is
// any partition can be merged with it. It returns true if the seed has been
// expanded.
bool ExpandSeed(ColPartition* seed);
// Starting from the seed position, we search the part_grid_
// horizontally/vertically, find all parititions that can be
// merged with seed, remove them from part_grid_, and put them into
// parts_to_merge.
void ExpandSeedHorizontal(const bool search_left,
ColPartition* seed,
GenericVector<ColPartition*>* parts_to_merge);
void ExpandSeedVertical(const bool search_bottom,
ColPartition* seed,
GenericVector<ColPartition*>* parts_to_merge);
// Check if a part_box is the small neighbor of seed_box.
bool IsNearSmallNeighbor(const TBOX& seed_box,
const TBOX& part_box) const;
// Perform the density check for part, which we assume is nearing a seed
// partition. It returns true if the check passed.
bool CheckSeedNeighborDensity(const ColPartition* part) const;
// After identify the math blocks, we do one more scanning on all text
// partitions, and check if any of them is the satellite of:
// math blocks: here a p is the satellite of q if:
// 1. q is the nearest vertical neighbor of p, and
// 2. y_gap(p, q) is less than a threshold, and
// 3. x_overlap(p, q) is over a threshold.
// Note that p can be the satellites of two blocks: its top neighbor and
// bottom neighbor.
void ProcessMathBlockSatelliteParts();
// Check if part is the satellite of one/two math blocks. If it is, we return
// true, and save the blocks into math_blocks.
bool IsMathBlockSatellite(
ColPartition* part, GenericVector<ColPartition*>* math_blocks);
// Search the nearest neighbor of part in one vertical direction as defined in
// search_bottom. It returns the neighbor found that major x overlap with it,
// or NULL when not found.
ColPartition* SearchNNVertical(const bool search_bottom,
const ColPartition* part);
// Check if the neighbor with vertical distance of y_gap is a near and math
// block partition.
bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const;
// Generate the tiff file name for output/debug file.
void GetOutputTiffName(const char* name, STRING* image_name) const;
// Debugger function that renders ColPartitions on the input image, where:
// parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION
// will be painted in green, and other parts will be painted in blue.
void PaintColParts(const STRING& outfile) const;
// Debugger function that renders the blobs in part_grid_ over the input
// image.
void PaintSpecialTexts(const STRING& outfile) const;
// Debugger function that print the math blobs density values for a
// ColPartition object.
void PrintSpecialBlobsDensity(const ColPartition* part) const;
// The tesseract engine intialized from equation training data.
Tesseract* equ_tesseract_;
// The tesseract engine used for OCR. This pointer is passed in by the caller,
// so do NOT destroy it in this class.
Tesseract* lang_tesseract_;
// The ColPartitionGrid that we are processing. This pointer is passed in from
// the caller, so do NOT destroy it in the class.
ColPartitionGrid* part_grid_;
// A simple array of pointers to the best assigned column division at
// each grid y coordinate. This pointer is passed in from the caller, so do
// NOT destroy it in the class.
ColPartitionSet** best_columns_;
// The super bounding box of all cps in the part_grid_.
TBOX* cps_super_bbox_;
// The seed ColPartition for equation region.
GenericVector<ColPartition*> cp_seeds_;
// The resolution (dpi) of the processing image.
int resolution_;
// The number of pages we have processed.
int page_count_;
};
} // namespace tesseract
#endif // TESSERACT_CCMAIN_EQUATIONDETECT_H_