mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 19:19:05 +08:00
156 lines
5.0 KiB
C
156 lines
5.0 KiB
C
|
/**********************************************************************
|
||
|
* File: cube_reco_context.h
|
||
|
* Description: Declaration of the Cube Recognition Context Class
|
||
|
* Author: Ahmad Abdulkader
|
||
|
* Created: 2007
|
||
|
*
|
||
|
* (C) Copyright 2008, Google Inc.
|
||
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
** you may not use this file except in compliance with the License.
|
||
|
** You may obtain a copy of the License at
|
||
|
** http://www.apache.org/licenses/LICENSE-2.0
|
||
|
** Unless required by applicable law or agreed to in writing, software
|
||
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
** See the License for the specific language governing permissions and
|
||
|
** limitations under the License.
|
||
|
*
|
||
|
**********************************************************************/
|
||
|
|
||
|
// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
|
||
|
// (or a thread) would create one CubeRecoContext object per language.
|
||
|
// The CubeRecoContext object also provides methods to get and set the
|
||
|
// different attribues of the Cube OCR Engine.
|
||
|
|
||
|
#ifndef CUBE_RECO_CONTEXT_H
|
||
|
#define CUBE_RECO_CONTEXT_H
|
||
|
|
||
|
#include <string>
|
||
|
#include "neural_net.h"
|
||
|
#include "lang_model.h"
|
||
|
#include "classifier_base.h"
|
||
|
#include "feature_base.h"
|
||
|
#include "char_set.h"
|
||
|
#include "word_size_model.h"
|
||
|
#include "char_bigrams.h"
|
||
|
#include "word_unigrams.h"
|
||
|
|
||
|
namespace tesseract {
|
||
|
|
||
|
class Tesseract;
|
||
|
class TessdataManager;
|
||
|
|
||
|
class CubeRecoContext {
|
||
|
public:
|
||
|
// Reading order enum type
|
||
|
enum ReadOrder {
|
||
|
L2R,
|
||
|
R2L
|
||
|
};
|
||
|
|
||
|
// Instantiate using a Tesseract object
|
||
|
CubeRecoContext(Tesseract *tess_obj);
|
||
|
|
||
|
~CubeRecoContext();
|
||
|
|
||
|
// accessor functions
|
||
|
inline const string & Lang() const { return lang_; }
|
||
|
inline CharSet *CharacterSet() const { return char_set_; }
|
||
|
inline CharClassifier *Classifier() const { return char_classifier_; }
|
||
|
inline WordSizeModel *SizeModel() const { return word_size_model_; }
|
||
|
inline CharBigrams *Bigrams() const { return char_bigrams_; }
|
||
|
inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
|
||
|
inline TuningParams *Params() const { return params_; }
|
||
|
inline LangModel *LangMod() const { return lang_mod_; }
|
||
|
|
||
|
// the reading order of the language
|
||
|
inline ReadOrder ReadingOrder() const {
|
||
|
return ((lang_ == "ara") ? R2L : L2R);
|
||
|
}
|
||
|
|
||
|
// does the language support case
|
||
|
inline bool HasCase() const {
|
||
|
return (lang_ != "ara" && lang_ != "hin");
|
||
|
}
|
||
|
|
||
|
inline bool Cursive() const {
|
||
|
return (lang_ == "ara");
|
||
|
}
|
||
|
|
||
|
inline bool HasItalics() const {
|
||
|
return (lang_ != "ara" && lang_ != "hin" && lang_ != "uk");
|
||
|
}
|
||
|
|
||
|
inline bool Contextual() const {
|
||
|
return (lang_ == "ara");
|
||
|
}
|
||
|
|
||
|
// RecoContext runtime flags accessor functions
|
||
|
inline bool SizeNormalization() const { return size_normalization_; }
|
||
|
inline bool NoisyInput() const { return noisy_input_; }
|
||
|
inline bool OOD() const { return lang_mod_->OOD(); }
|
||
|
inline bool Numeric() const { return lang_mod_->Numeric(); }
|
||
|
inline bool WordList() const { return lang_mod_->WordList(); }
|
||
|
inline bool Punc() const { return lang_mod_->Punc(); }
|
||
|
inline bool CaseSensitive() const {
|
||
|
return char_classifier_->CaseSensitive();
|
||
|
}
|
||
|
|
||
|
inline void SetSizeNormalization(bool size_normalization) {
|
||
|
size_normalization_ = size_normalization;
|
||
|
}
|
||
|
inline void SetNoisyInput(bool noisy_input) {
|
||
|
noisy_input_ = noisy_input;
|
||
|
}
|
||
|
inline void SetOOD(bool ood_enabled) {
|
||
|
lang_mod_->SetOOD(ood_enabled);
|
||
|
}
|
||
|
inline void SetNumeric(bool numeric_enabled) {
|
||
|
lang_mod_->SetNumeric(numeric_enabled);
|
||
|
}
|
||
|
inline void SetWordList(bool word_list_enabled) {
|
||
|
lang_mod_->SetWordList(word_list_enabled);
|
||
|
}
|
||
|
inline void SetPunc(bool punc_enabled) {
|
||
|
lang_mod_->SetPunc(punc_enabled);
|
||
|
}
|
||
|
inline void SetCaseSensitive(bool case_sensitive) {
|
||
|
char_classifier_->SetCaseSensitive(case_sensitive);
|
||
|
}
|
||
|
inline tesseract::Tesseract *TesseractObject() const {
|
||
|
return tess_obj_;
|
||
|
}
|
||
|
|
||
|
// Returns the path of the data files
|
||
|
bool GetDataFilePath(string *path) const;
|
||
|
// Creates a CubeRecoContext object using a tesseract object. Data
|
||
|
// files are loaded via the tessdata_manager, and the tesseract
|
||
|
// unicharset is provided in order to map Cube's unicharset to
|
||
|
// Tesseract's in the case where the two unicharsets differ.
|
||
|
static CubeRecoContext *Create(Tesseract *tess_obj,
|
||
|
TessdataManager *tessdata_manager,
|
||
|
UNICHARSET *tess_unicharset);
|
||
|
|
||
|
private:
|
||
|
bool loaded_;
|
||
|
string lang_;
|
||
|
CharSet *char_set_;
|
||
|
WordSizeModel *word_size_model_;
|
||
|
CharClassifier *char_classifier_;
|
||
|
CharBigrams *char_bigrams_;
|
||
|
WordUnigrams *word_unigrams_;
|
||
|
TuningParams *params_;
|
||
|
LangModel *lang_mod_;
|
||
|
Tesseract *tess_obj_; // CubeRecoContext does not own this pointer
|
||
|
bool size_normalization_;
|
||
|
bool noisy_input_;
|
||
|
|
||
|
// Loads and initialized all the necessary components of a
|
||
|
// CubeRecoContext. See .cpp for more details.
|
||
|
bool Load(TessdataManager *tessdata_manager,
|
||
|
UNICHARSET *tess_unicharset);
|
||
|
};
|
||
|
}
|
||
|
|
||
|
#endif // CUBE_RECO_CONTEXT_H
|