/********************************************************************** * File: lang_model.h * Description: Declaration of the Language Model Edge Base Class * Author: Ahmad Abdulkader * Created: 2007 * * (C) Copyright 2008, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ // The LanguageModel class abstracts a State machine that is modeled as a Trie // structure. The state machine models the language being recognized by the OCR // Engine // This is an abstract class that is to be inherited by any language model #ifndef LANG_MODEL_H #define LANG_MODEL_H #include "lang_mod_edge.h" #include "char_altlist.h" #include "char_set.h" #include "tuning_params.h" namespace tesseract { class LangModel { public: LangModel() { ood_enabled_ = true; numeric_enabled_ = true; word_list_enabled_ = true; punc_enabled_ = true; } virtual ~LangModel() {} // Returns an edge pointer to the Root virtual LangModEdge *Root() = 0; // Returns the edges that fan-out of the specified edge and their count virtual LangModEdge **GetEdges(CharAltList *alt_list, LangModEdge *parent_edge, int *edge_cnt) = 0; // Returns is a sequence of 32-bit characters are valid within this language // model or net. And EndOfWord flag is specified. If true, the sequence has // to end on a valid word. The function also optionally returns the list // of language model edges traversed to parse the string virtual bool IsValidSequence(const char_32 *str, bool eow_flag, LangModEdge **edge_array = NULL) = 0; virtual bool IsLeadingPunc(char_32 ch) = 0; virtual bool IsTrailingPunc(char_32 ch) = 0; virtual bool IsDigit(char_32 ch) = 0; // accessor functions inline bool OOD() { return ood_enabled_; } inline bool Numeric() { return numeric_enabled_; } inline bool WordList() { return word_list_enabled_; } inline bool Punc() { return punc_enabled_; } inline void SetOOD(bool ood) { ood_enabled_ = ood; } inline void SetNumeric(bool numeric) { numeric_enabled_ = numeric; } inline void SetWordList(bool word_list) { word_list_enabled_ = word_list; } inline void SetPunc(bool punc_enabled) { punc_enabled_ = punc_enabled; } protected: bool ood_enabled_; bool numeric_enabled_; bool word_list_enabled_; bool punc_enabled_; }; } #endif // LANG_MODEL_H