mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
4523ce9f7d
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20
127 lines
5.8 KiB
C++
127 lines
5.8 KiB
C++
/**********************************************************************
|
|
* File: beam_search.h
|
|
* Description: Declaration of Beam Word Search Algorithm Class
|
|
* Author: Ahmad Abdulkader
|
|
* Created: 2007
|
|
*
|
|
* (C) Copyright 2008, Google Inc.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
// The Beam Search class implements a Beam Search algorithm for the
|
|
// N-best paths through the lattice of a search object using a language model
|
|
// The search object is a segmented bitmap of a word image. The language model
|
|
// is a state machine that defines valid sequences of characters
|
|
// The cost of each path is the combined (product) probabilities of the
|
|
// characters along the path. The character probabilities are computed using
|
|
// the character classifier member of the RecoContext
|
|
// The BeamSearch class itself holds the state of the last search it performed
|
|
// using its "Search" method. Subsequent class to the Search method erase the
|
|
// states of previously done searches
|
|
|
|
#ifndef BEAM_SEARCH_H
|
|
#define BEAM_SEARCH_H
|
|
|
|
#include "search_column.h"
|
|
#include "word_altlist.h"
|
|
#include "search_object.h"
|
|
#include "lang_model.h"
|
|
#include "cube_utils.h"
|
|
#include "cube_reco_context.h"
|
|
#include "allheaders.h"
|
|
|
|
namespace tesseract {
|
|
|
|
class BeamSearch {
|
|
public:
|
|
explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true);
|
|
~BeamSearch();
|
|
// Performs a beam seach in the specified search using the specified
|
|
// language model; returns an alternate list of possible words as a result.
|
|
WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL);
|
|
// Returns the best node in the last column of last performed search.
|
|
SearchNode *BestNode() const;
|
|
// Returns the string corresponding to the specified alt.
|
|
char_32 *Alt(int alt) const;
|
|
// Backtracks from the specified lattice node and returns the corresponding
|
|
// character-mapped segments, character count, char_32 result string, and
|
|
// character bounding boxes (if char_boxes is not NULL). If the segments
|
|
// cannot be constructed, returns NULL, and all result arguments
|
|
// will be NULL.
|
|
CharSamp **BackTrack(SearchObject *srch_obj, int node_index,
|
|
int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
|
|
// Same as above, except it takes a pointer to a search node object
|
|
// instead of node index.
|
|
CharSamp **BackTrack(SearchObject *srch_obj, SearchNode *node,
|
|
int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
|
|
// Returns the size cost of a specified string of a lattice
|
|
// path that ends at the specified lattice node.
|
|
int SizeCost(SearchObject *srch_obj, SearchNode *node,
|
|
char_32 **str32 = NULL) const;
|
|
// Returns the word unigram cost of the given string, possibly
|
|
// stripping out a single trailing punctuation character.
|
|
int WordUnigramCost(char_32 *str32, WordUnigrams* word_unigrams) const;
|
|
|
|
// Supplementary functions needed for visualization
|
|
// Return column count of the lattice.
|
|
inline int ColCnt() const { return col_cnt_; }
|
|
// Returns the lattice column corresponding to the specified column index.
|
|
SearchColumn *Column(int col_idx) const;
|
|
// Return the index of the best node in the last column of the
|
|
// best-cost path before the alternates list is sorted.
|
|
inline int BestPresortedNodeIndex() const {
|
|
return best_presorted_node_idx_;
|
|
};
|
|
|
|
private:
|
|
// Maximum reasonable segmentation point count
|
|
static const int kMaxSegPointCnt = 128;
|
|
// Recognition context object; the context holds the character classifier
|
|
// and the tuning parameters object
|
|
CubeRecoContext *cntxt_;
|
|
// Count of segmentation pts
|
|
int seg_pt_cnt_;
|
|
// Lattice column count; currently redundant with respect to seg_pt_cnt_
|
|
// but that might change in the future
|
|
int col_cnt_;
|
|
// Array of lattice columns
|
|
SearchColumn **col_;
|
|
// Run in word or phrase mode
|
|
bool word_mode_;
|
|
// Node index of best-cost node, before alternates are merged and sorted
|
|
int best_presorted_node_idx_;
|
|
// Cleans up beam search state
|
|
void Cleanup();
|
|
// Creates a Word alternate list from the results in the lattice.
|
|
// This function computes a cost for each node in the final column
|
|
// of the lattice, which is a weighted average of several costs:
|
|
// size cost, character bigram cost, word unigram cost, and
|
|
// recognition cost from the beam search. The weights are the
|
|
// CubeTuningParams, which are learned together with the character
|
|
// classifiers.
|
|
WordAltList *CreateWordAltList(SearchObject *srch_obj);
|
|
// Creates a set of children nodes emerging from a parent node based on
|
|
// the character alternate list and the language model.
|
|
void CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
|
|
SearchNode *parent_node, LangModEdge *lm_parent_edge,
|
|
CharAltList *char_alt_list, int extra_cost);
|
|
// Backtracks from the given lattice node and returns the corresponding
|
|
// char mapped segments, character count, and character bounding boxes (if
|
|
// char_boxes is not NULL). If the segments cannot be constructed,
|
|
// returns NULL, and all result arguments will be NULL.
|
|
CharSamp **SplitByNode(SearchObject *srch_obj, SearchNode *srch_node,
|
|
int* char_cnt, Boxa **char_boxes) const;
|
|
};
|
|
}
|
|
|
|
#endif // BEAM_SEARCH_H
|