mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-07 18:27:48 +08:00
4523ce9f7d
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20
130 lines
5.1 KiB
C++
130 lines
5.1 KiB
C++
/**********************************************************************
|
|
* File: tuning_params.h
|
|
* Description: Declaration of the Tuning Parameters Base Class
|
|
* Author: Ahmad Abdulkader
|
|
* Created: 2008
|
|
*
|
|
* (C) Copyright 2008, Google Inc.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
// The TuningParams class abstracts all the parameters that can be learned or
|
|
// tuned during the training process. It is a base class that all TuningParams
|
|
// classes should inherit from.
|
|
|
|
#ifndef TUNING_PARAMS_H
|
|
#define TUNING_PARAMS_H
|
|
|
|
#include <string>
|
|
#ifdef USE_STD_NAMESPACE
|
|
using std::string;
|
|
#endif
|
|
|
|
namespace tesseract {
|
|
class TuningParams {
|
|
public:
|
|
enum type_classifer {
|
|
NN,
|
|
HYBRID_NN
|
|
};
|
|
enum type_feature {
|
|
BMP,
|
|
CHEBYSHEV,
|
|
HYBRID
|
|
};
|
|
|
|
TuningParams() {}
|
|
virtual ~TuningParams() {}
|
|
// Accessor functions
|
|
inline double RecoWgt() const { return reco_wgt_; }
|
|
inline double SizeWgt() const { return size_wgt_; }
|
|
inline double CharBigramWgt() const { return char_bigrams_wgt_; }
|
|
inline double WordUnigramWgt() const { return word_unigrams_wgt_; }
|
|
inline int MaxSegPerChar() const { return max_seg_per_char_; }
|
|
inline int BeamWidth() const { return beam_width_; }
|
|
inline int TypeClassifier() const { return tp_classifier_; }
|
|
inline int TypeFeature() const { return tp_feat_; }
|
|
inline int ConvGridSize() const { return conv_grid_size_; }
|
|
inline int HistWindWid() const { return hist_wind_wid_; }
|
|
inline int MinConCompSize() const { return min_con_comp_size_; }
|
|
inline double MaxWordAspectRatio() const { return max_word_aspect_ratio_; }
|
|
inline double MinSpaceHeightRatio() const { return min_space_height_ratio_; }
|
|
inline double MaxSpaceHeightRatio() const { return max_space_height_ratio_; }
|
|
inline double CombinerRunThresh() const { return combiner_run_thresh_; }
|
|
inline double CombinerClassifierThresh() const {
|
|
return combiner_classifier_thresh_; }
|
|
|
|
inline void SetRecoWgt(double wgt) { reco_wgt_ = wgt; }
|
|
inline void SetSizeWgt(double wgt) { size_wgt_ = wgt; }
|
|
inline void SetCharBigramWgt(double wgt) { char_bigrams_wgt_ = wgt; }
|
|
inline void SetWordUnigramWgt(double wgt) { word_unigrams_wgt_ = wgt; }
|
|
inline void SetMaxSegPerChar(int max_seg_per_char) {
|
|
max_seg_per_char_ = max_seg_per_char;
|
|
}
|
|
inline void SetBeamWidth(int beam_width) { beam_width_ = beam_width; }
|
|
inline void SetTypeClassifier(type_classifer tp_classifier) {
|
|
tp_classifier_ = tp_classifier;
|
|
}
|
|
inline void SetTypeFeature(type_feature tp_feat) {tp_feat_ = tp_feat;}
|
|
inline void SetHistWindWid(int hist_wind_wid) {
|
|
hist_wind_wid_ = hist_wind_wid;
|
|
}
|
|
|
|
virtual bool Save(string file_name) = 0;
|
|
virtual bool Load(string file_name) = 0;
|
|
|
|
protected:
|
|
// weight of recognition cost. This includes the language model cost
|
|
double reco_wgt_;
|
|
// weight of size cost
|
|
double size_wgt_;
|
|
// weight of character bigrams cost
|
|
double char_bigrams_wgt_;
|
|
// weight of word unigrams cost
|
|
double word_unigrams_wgt_;
|
|
// Maximum number of segments per character
|
|
int max_seg_per_char_;
|
|
// Beam width equal to the maximum number of nodes kept in the beam search
|
|
// trellis column after pruning
|
|
int beam_width_;
|
|
// Classifier type: See enum type_classifer for classifier types
|
|
type_classifer tp_classifier_;
|
|
// Feature types: See enum type_feature for feature types
|
|
type_feature tp_feat_;
|
|
// Grid size to scale a grapheme bitmap used by the BMP feature type
|
|
int conv_grid_size_;
|
|
// Histogram window size as a ratio of the word height used in computing
|
|
// the vertical pixel density histogram in the segmentation algorithm
|
|
int hist_wind_wid_;
|
|
// Minimum possible size of a connected component
|
|
int min_con_comp_size_;
|
|
// Maximum aspect ratio of a word (width / height)
|
|
double max_word_aspect_ratio_;
|
|
// Minimum ratio relative to the line height of a gap to be considered as
|
|
// a word break
|
|
double min_space_height_ratio_;
|
|
// Maximum ratio relative to the line height of a gap to be considered as
|
|
// a definite word break
|
|
double max_space_height_ratio_;
|
|
// When Cube and Tesseract are run in combined mode, only run
|
|
// combiner classifier when tesseract confidence is below this
|
|
// threshold. When Cube is run without Tesseract, this is ignored.
|
|
double combiner_run_thresh_;
|
|
// When Cube and tesseract are run in combined mode, threshold on
|
|
// output of combiner binary classifier (chosen from ROC during
|
|
// combiner training). When Cube is run without Tesseract, this is ignored.
|
|
double combiner_classifier_thresh_;
|
|
};
|
|
}
|
|
|
|
#endif // TUNING_PARAMS_H
|