tesseract/cube/lang_model.h

79 lines
2.9 KiB
C++

/**********************************************************************
* File: lang_model.h
* Description: Declaration of the Language Model Edge Base Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The LanguageModel class abstracts a State machine that is modeled as a Trie
// structure. The state machine models the language being recognized by the OCR
// Engine
// This is an abstract class that is to be inherited by any language model
#ifndef LANG_MODEL_H
#define LANG_MODEL_H
#include "lang_mod_edge.h"
#include "char_altlist.h"
#include "char_set.h"
#include "tuning_params.h"
namespace tesseract {
class LangModel {
public:
LangModel() {
ood_enabled_ = true;
numeric_enabled_ = true;
word_list_enabled_ = true;
punc_enabled_ = true;
}
virtual ~LangModel() {}
// Returns an edge pointer to the Root
virtual LangModEdge *Root() = 0;
// Returns the edges that fan-out of the specified edge and their count
virtual LangModEdge **GetEdges(CharAltList *alt_list,
LangModEdge *parent_edge,
int *edge_cnt) = 0;
// Returns is a sequence of 32-bit characters are valid within this language
// model or net. And EndOfWord flag is specified. If true, the sequence has
// to end on a valid word. The function also optionally returns the list
// of language model edges traversed to parse the string
virtual bool IsValidSequence(const char_32 *str, bool eow_flag,
LangModEdge **edge_array = NULL) = 0;
virtual bool IsLeadingPunc(char_32 ch) = 0;
virtual bool IsTrailingPunc(char_32 ch) = 0;
virtual bool IsDigit(char_32 ch) = 0;
// accessor functions
inline bool OOD() { return ood_enabled_; }
inline bool Numeric() { return numeric_enabled_; }
inline bool WordList() { return word_list_enabled_; }
inline bool Punc() { return punc_enabled_; }
inline void SetOOD(bool ood) { ood_enabled_ = ood; }
inline void SetNumeric(bool numeric) { numeric_enabled_ = numeric; }
inline void SetWordList(bool word_list) { word_list_enabled_ = word_list; }
inline void SetPunc(bool punc_enabled) { punc_enabled_ = punc_enabled; }
protected:
bool ood_enabled_;
bool numeric_enabled_;
bool word_list_enabled_;
bool punc_enabled_;
};
}
#endif // LANG_MODEL_H