mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-29 20:08:19 +08:00
85 lines
3.1 KiB
C++
85 lines
3.1 KiB
C++
|
///////////////////////////////////////////////////////////////////////
|
||
|
// File: tesseractclass.cpp
|
||
|
// Description: An instance of Tesseract. For thread safety, *every*
|
||
|
// global variable goes in here, directly, or indirectly.
|
||
|
// Author: Ray Smith
|
||
|
// Created: Fri Mar 07 08:17:01 PST 2008
|
||
|
//
|
||
|
// (C) Copyright 2008, Google Inc.
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
//
|
||
|
///////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
#include "tesseractclass.h"
|
||
|
#include "globals.h"
|
||
|
|
||
|
// Include automatically generated configuration file if running autoconf.
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "config_auto.h"
|
||
|
#endif
|
||
|
|
||
|
#ifdef HAVE_LIBLEPT
|
||
|
// Include leptonica library only if autoconf (or makefile etc) tell us to.
|
||
|
#include "allheaders.h"
|
||
|
#endif
|
||
|
|
||
|
namespace tesseract {
|
||
|
|
||
|
Tesseract::Tesseract()
|
||
|
: BOOL_MEMBER(tessedit_resegment_from_boxes, false,
|
||
|
"Take segmentation and labeling from box file"),
|
||
|
BOOL_MEMBER(tessedit_train_from_boxes, false,
|
||
|
"Generate training data from boxed chars"),
|
||
|
BOOL_MEMBER(tessedit_dump_pageseg_images, false,
|
||
|
"Dump itermediate images made during page segmentation"),
|
||
|
// The default for pageseg_mode is the old behaviour, so as not to
|
||
|
// upset anything that relies on that.
|
||
|
INT_MEMBER(tessedit_pageseg_mode, 2,
|
||
|
"Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char"
|
||
|
" (Values from PageSegMode enum in baseapi.h)"),
|
||
|
INT_MEMBER(tessedit_accuracyvspeed, 0,
|
||
|
"Accuracy V Speed tradeoff: 0 fastest, 100 most accurate"
|
||
|
" (Values from AccuracyVSpeed enum in baseapi.h)"),
|
||
|
BOOL_MEMBER(tessedit_train_from_boxes_word_level, false,
|
||
|
"Generate training data from boxed chars at word level."),
|
||
|
STRING_MEMBER(tessedit_char_blacklist, "",
|
||
|
"Blacklist of chars not to recognize"),
|
||
|
STRING_MEMBER(tessedit_char_whitelist, "",
|
||
|
"Whitelist of chars to recognize"),
|
||
|
BOOL_MEMBER(global_tessedit_ambigs_training, false,
|
||
|
"Perform training for ambiguities"),
|
||
|
pix_binary_(NULL),
|
||
|
deskew_(1.0f, 0.0f),
|
||
|
reskew_(1.0f, 0.0f),
|
||
|
hindi_image_(false) {
|
||
|
}
|
||
|
|
||
|
Tesseract::~Tesseract() {
|
||
|
Clear();
|
||
|
}
|
||
|
|
||
|
void Tesseract::Clear() {
|
||
|
#ifdef HAVE_LIBLEPT
|
||
|
if (pix_binary_ != NULL)
|
||
|
pixDestroy(&pix_binary_);
|
||
|
#endif
|
||
|
deskew_ = FCOORD(1.0f, 0.0f);
|
||
|
reskew_ = FCOORD(1.0f, 0.0f);
|
||
|
}
|
||
|
|
||
|
void Tesseract::SetBlackAndWhitelist() {
|
||
|
// Set the white and blacklists (if any)
|
||
|
unicharset.set_black_and_whitelist(tessedit_char_blacklist.string(),
|
||
|
tessedit_char_whitelist.string());
|
||
|
}
|
||
|
|
||
|
} // namespace tesseract
|