Added experimental equation detector

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@646 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2012-02-02 02:50:01 +00:00
parent ef786ad29b
commit ac014eb27a
4 changed files with 1936 additions and 0 deletions

1535
ccmain/equationdetect.cpp Normal file

File diff suppressed because it is too large Load Diff

277
ccmain/equationdetect.h Normal file
View File

@ -0,0 +1,277 @@
///////////////////////////////////////////////////////////////////////
// File: equationdetect.h
// Description: The equation detection class that inherits equationdetectbase.
// Author: Zongyi (Joe) Liu (joeliu@google.com)
// Created: Fri Aug 31 11:13:01 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H__
#define TESSERACT_CCMAIN_EQUATIONDETECT_H__
#include "blobbox.h"
#include "equationdetectbase.h"
#include "genericvector.h"
#include "unichar.h"
class BLOBNBOX;
class BLOB_CHOICE;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class TBOX;
class UNICHARSET;
namespace tesseract {
class Tesseract;
class ColPartition;
class ColPartitionGrid;
class ColPartitionSet;
class EquationDetect : public EquationDetectBase {
public:
EquationDetect(const char* equ_datapath,
const char* equ_language);
~EquationDetect();
enum IndentType {
NO_INDENT,
LEFT_INDENT,
RIGHT_INDENT,
BOTH_INDENT,
INDENT_TYPE_COUNT
};
// Reset the lang_tesseract_ pointer. This function should be called before we
// do any detector work.
void SetLangTesseract(Tesseract* lang_tesseract);
// Iterate over the blobs inside to_block, and set the blobs that we want to
// process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function
// returns 0 upon success.
int LabelSpecialText(TO_BLOCK* to_block);
// Find possible equation partitions from part_grid. Should be called
// after the special_text_type of blobs are set.
// It returns 0 upon success.
int FindEquationParts(ColPartitionGrid* part_grid,
ColPartitionSet** best_columns);
// Reset the resolution of the processing image. TEST only function.
void SetResolution(const int resolution);
protected:
// Identify the special text type for one blob, and update its field. When
// height_th is set (> 0), we will label the blob as BSTT_NONE if its height
// is less than height_th.
void IdentifySpecialText(BLOBNBOX *blob, const int height_th);
// Estimate the type for one unichar.
BlobSpecialTextType EstimateTypeForUnichar(
const UNICHARSET& unicharset, const UNICHAR_ID id) const;
// Compute special text type for each blobs in part_grid_.
void IdentifySpecialText();
// Identify blobs that we want to skip during special blob type
// classification.
void IdentifyBlobsToSkip(ColPartition* part);
// The ColPartitions in part_grid_ maybe over-segmented, particularly in the
// block equation regions. So we like to identify these partitions and merge
// them before we do the searching.
void MergePartsByLocation();
// Staring from the seed center, we do radius search. And for partitions that
// have large overlaps with seed, we remove them from part_grid_ and add into
// parts_overlap. Note: this function may update the part_grid_, so if the
// caller is also running ColPartitionGridSearch, use the RepositionIterator
// to continue.
void SearchByOverlap(ColPartition* seed,
GenericVector<ColPartition*>* parts_overlap);
// Insert part back into part_grid_, after it absorbs some other parts.
void InsertPartAfterAbsorb(ColPartition* part);
// Identify the colparitions in part_grid_, label them as PT_EQUATION, and
// save them into cp_seeds_.
void IdentifySeedParts();
// Check the blobs count for a seed region candidate.
bool CheckSeedBlobsCount(ColPartition* part);
// Compute the foreground pixel density for a tbox area.
float ComputeForegroundDensity(const TBOX& tbox);
// Check if part from seed2 label: with low math density and left indented. We
// are using two checks:
// 1. If its left is aligned with any coordinates in indented_texts_left,
// which we assume have been sorted.
// 2. If its foreground density is over foreground_density_th.
bool CheckForSeed2(
const GenericVector<int>& indented_texts_left,
const float foreground_density_th,
ColPartition* part);
// Count the number of values in sorted_vec that is close to val, used to
// check if a partition is aligned with text partitions.
int CountAlignment(
const GenericVector<int>& sorted_vec, const int val) const;
// Check for a seed candidate using the foreground pixel density. And we
// return true if the density is below a certain threshold, because characters
// in equation regions usually are apart with more white spaces.
bool CheckSeedFgDensity(const float density_th, ColPartition* part);
// A light version of SplitCPHor: instead of really doing the part split, we
// simply compute the union bounding box of each splitted part.
void SplitCPHorLite(ColPartition* part, GenericVector<TBOX>* splitted_boxes);
// Split the part (horizontally), and save the splitted result into
// parts_splitted. Note that it is caller's responsibility to release the
// memory owns by parts_splitted. On the other hand, the part is unchanged
// during this process and still owns the blobs, so do NOT call DeleteBoxes
// when freeing the colpartitions in parts_splitted.
void SplitCPHor(ColPartition* part,
GenericVector<ColPartition*>* parts_splitted);
// Check the density for a seed candidate (part) using its math density and
// italic density, returns true if the check passed.
bool CheckSeedDensity(const float math_density_high,
const float math_density_low,
const ColPartition* part) const;
// Check if part is indented.
IndentType IsIndented(ColPartition* part);
// Identify inline partitions from cp_seeds_, and re-label them.
void IdentifyInlineParts();
// Comute the super bounding box for all colpartitions inside part_grid_.
void ComputeCPsSuperBBox();
// Identify inline partitions from cp_seeds_ using the horizontal search.
void IdentifyInlinePartsHorizontal();
// Estimate the line spacing between two text partitions. Returns -1 if not
// enough data.
int EstimateTextPartLineSpacing();
// Identify inline partitions from cp_seeds_ using vertical search.
void IdentifyInlinePartsVertical(const bool top_to_bottom,
const int textPartsLineSpacing);
// Check if part is an inline equation zone. This should be called after we
// identified the seed regions.
bool IsInline(const bool search_bottom,
const int textPartsLineSpacing,
ColPartition* part);
// For a given seed partition, we search the part_grid_ and see if there is
// any partition can be merged with it. It returns true if the seed has been
// expanded.
bool ExpandSeed(ColPartition* seed);
// Starting from the seed position, we search the part_grid_
// horizontally/vertically, find all parititions that can be
// merged with seed, remove them from part_grid_, and put them into
// parts_to_merge.
void ExpandSeedHorizontal(const bool search_left,
ColPartition* seed,
GenericVector<ColPartition*>* parts_to_merge);
void ExpandSeedVertical(const bool search_bottom,
ColPartition* seed,
GenericVector<ColPartition*>* parts_to_merge);
// Check if a part_box is the small neighbor of seed_box.
bool IsNearSmallNeighbor(const TBOX& seed_box,
const TBOX& part_box) const;
// Perform the density check for part, which we assume is nearing a seed
// partition. It returns true if the check passed.
bool CheckSeedNeighborDensity(const ColPartition* part) const;
// After identify the math blocks, we do one more scanning on all text
// partitions, and check if any of them is the satellite of:
// math blocks: here a p is the satellite of q if:
// 1. q is the nearest vertical neighbor of p, and
// 2. y_gap(p, q) is less than a threshold, and
// 3. x_overlap(p, q) is over a threshold.
// Note that p can be the satellites of two blocks: its top neighbor and
// bottom neighbor.
void ProcessMathBlockSatelliteParts();
// Check if part is the satellite of one/two math blocks. If it is, we return
// true, and save the blocks into math_blocks.
bool IsMathBlockSatellite(
ColPartition* part, GenericVector<ColPartition*>* math_blocks);
// Search the nearest neighbor of part in one vertical direction as defined in
// search_bottom. It returns the neighbor found that major x overlap with it,
// or NULL when not found.
ColPartition* SearchNNVertical(const bool search_bottom,
const ColPartition* part);
// Check if the neighbor with vertical distance of y_gap is a near and math
// block partition.
bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const;
// Generate the tiff file name for output/debug file.
void GetOutputTiffName(const char* name, STRING* image_name) const;
// Debugger function that renders ColPartitions on the input image, where:
// parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION
// will be painted in green, and other parts will be painted in blue.
void PaintColParts(const STRING& outfile) const;
// Debugger function that renders the blobs in part_grid_ over the input
// image.
void PaintSpecialTexts(const STRING& outfile) const;
// Debugger function that print the math blobs density values for a
// ColPartition object.
void PrintSpecialBlobsDensity(const ColPartition* part) const;
// The tesseract engine intialized from equation training data.
Tesseract* equ_tesseract_;
// The tesseract engine used for OCR. This pointer is passed in by the caller,
// so do NOT destroy it in this class.
Tesseract* lang_tesseract_;
// The ColPartitionGrid that we are processing. This pointer is passed in from
// the caller, so do NOT destroy it in the class.
ColPartitionGrid* part_grid_;
// A simple array of pointers to the best assigned column division at
// each grid y coordinate. This pointer is passed in from the caller, so do
// NOT destroy it in the class.
ColPartitionSet** best_columns_;
// The super bounding box of all cps in the part_grid_.
TBOX* cps_super_bbox_;
// The seed ColPartition for equation region.
GenericVector<ColPartition*> cp_seeds_;
// The resolution (dpi) of the processing image.
int resolution_;
// The number of pages we have processed.
int page_count_;
};
} // namespace tesseract
#endif // TESSERACT_CCMAIN_EQUATIONDETECT_H_

View File

@ -0,0 +1,65 @@
///////////////////////////////////////////////////////////////////////
// File: equationdetectbase.cpp
// Description: The base class equation detection class.
// Author: Zongyi (Joe) Liu (joeliu@google.com)
// Created: Fri Aug 31 11:13:01 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "allheaders.h"
#include "blobbox.h"
#include "equationdetectbase.h"
namespace tesseract {
EquationDetectBase::EquationDetectBase() {
}
EquationDetectBase::~EquationDetectBase() {
}
void EquationDetectBase::RenderSpecialText(Pix* pix,
BLOBNBOX* blob) {
ASSERT_HOST(pix != NULL && pixGetDepth(pix) == 32 && blob != NULL);
const TBOX& tbox = blob->bounding_box();
int height = pixGetHeight(pix);
const int box_width = 5;
// Coordinate translation: tesseract use left bottom as the original, while
// leptonica uses left top as the original.
Box *box = boxCreate(tbox.left(), height - tbox.top(),
tbox.width(), tbox.height());
switch (blob->special_text_type()) {
case BSTT_MATH: // Red box.
pixRenderBoxArb(pix, box, box_width, 255, 0, 0);
break;
case BSTT_DIGIT: // cyan box.
pixRenderBoxArb(pix, box, box_width, 0, 255, 255);
break;
case BSTT_ITALIC: // Green box.
pixRenderBoxArb(pix, box, box_width, 0, 255, 0);
break;
case BSTT_UNCLEAR: // blue box.
pixRenderBoxArb(pix, box, box_width, 0, 255, 0);
break;
case BSTT_NONE:
default:
// yellow box.
pixRenderBoxArb(pix, box, box_width, 255, 255, 0);
break;
}
boxDestroy(&box);
}
}; // namespace tesseract

View File

@ -0,0 +1,59 @@
///////////////////////////////////////////////////////////////////////
// File: equationdetectbase.h
// Description: The base class equation detection class.
// Author: Zongyi (Joe) Liu (joeliu@google.com)
// Created: Fri Aug 31 11:13:01 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H__
#define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H__
class BLOBNBOX_LIST;
class TO_BLOCK;
struct Pix;
namespace tesseract {
class ColPartitionGrid;
class ColPartitionSet;
class EquationDetectBase {
public:
EquationDetectBase();
virtual ~EquationDetectBase();
// Iterate over the blobs inside to_block, and set the blobs that we want to
// process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function
// returns 0 upon success.
virtual int LabelSpecialText(TO_BLOCK* to_block) = 0;
// Interface to find possible equation partition grid from part_grid. This
// should be called after IdentifySpecialText function.
virtual int FindEquationParts(ColPartitionGrid* part_grid,
ColPartitionSet** best_columns) = 0;
// Debug function: Render a bounding box on pix based on the value of its
// special_text_type, specifically:
// BSTT_MATH: red box
// BSTT_DIGIT: cyan box
// BSTT_ITALIC: green box
// BSTT_UNCLEAR: blue box
// All others: yellow box
static void RenderSpecialText(Pix* pix, BLOBNBOX* blob);
};
}; // namespace tesseract
#endif // TESSERACT_TEXTORD_EQUATIONDETECTBASE_H__