tesseract/cube/cube_object.cpp

288 lines
8.2 KiB
C++

/**********************************************************************
* File: cube_object.cpp
* Description: Implementation of the Cube Object Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <math.h>
#include "cube_object.h"
#include "cube_utils.h"
#include "word_list_lang_model.h"
namespace tesseract {
CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) {
Init();
char_samp_ = char_samp;
cntxt_ = cntxt;
}
CubeObject::CubeObject(CubeRecoContext *cntxt, IMAGE *img,
int left, int top, int wid, int hgt) {
Init();
char_samp_ = CubeUtils::CharSampleFromImg(img, left, top, wid, hgt);
own_char_samp_ = true;
cntxt_ = cntxt;
}
CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix,
int left, int top, int wid, int hgt) {
Init();
char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt);
own_char_samp_ = true;
cntxt_ = cntxt;
}
// Data member initialization function
void CubeObject::Init() {
char_samp_ = NULL;
own_char_samp_ = false;
alt_list_ = NULL;
srch_obj_ = NULL;
deslanted_alt_list_ = NULL;
deslanted_srch_obj_ = NULL;
deslanted_ = false;
deslanted_char_samp_ = NULL;
beam_obj_ = NULL;
deslanted_beam_obj_ = NULL;
cntxt_ = NULL;
}
// Cleanup function
void CubeObject::Cleanup() {
if (alt_list_ != NULL) {
delete alt_list_;
alt_list_ = NULL;
}
if (deslanted_alt_list_ != NULL) {
delete deslanted_alt_list_;
deslanted_alt_list_ = NULL;
}
}
CubeObject::~CubeObject() {
if (char_samp_ != NULL && own_char_samp_ == true) {
delete char_samp_;
char_samp_ = NULL;
}
if (srch_obj_ != NULL) {
delete srch_obj_;
srch_obj_ = NULL;
}
if (deslanted_srch_obj_ != NULL) {
delete deslanted_srch_obj_;
deslanted_srch_obj_ = NULL;
}
if (beam_obj_ != NULL) {
delete beam_obj_;
beam_obj_ = NULL;
}
if (deslanted_beam_obj_ != NULL) {
delete deslanted_beam_obj_;
deslanted_beam_obj_ = NULL;
}
if (deslanted_char_samp_ != NULL) {
delete deslanted_char_samp_;
deslanted_char_samp_ = NULL;
}
Cleanup();
}
// Actually do the recognition using the specified language mode. If none
// is specified, the default language model in the CubeRecoContext is used.
// Returns the sorted list of alternate answers
// The Word mode determines whether recognition is done as a word or a phrase
WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
if (char_samp_ == NULL) {
return NULL;
}
// clear alt lists
Cleanup();
// no specified language model, use the one in the reco context
if (lang_mod == NULL) {
lang_mod = cntxt_->LangMod();
}
// normalize if necessary
if (cntxt_->SizeNormalization()) {
Normalize();
}
// assume not de-slanted by default
deslanted_ = false;
// create a beam search object
if (beam_obj_ == NULL) {
beam_obj_ = new BeamSearch(cntxt_, word_mode);
if (beam_obj_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct "
"BeamSearch\n");
return NULL;
}
}
// create a cube search object
if (srch_obj_ == NULL) {
srch_obj_ = new CubeSearchObject(cntxt_, char_samp_);
if (srch_obj_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct "
"CubeSearchObject\n");
return NULL;
}
}
// run a beam search against the tesslang model
alt_list_ = beam_obj_->Search(srch_obj_, lang_mod);
// deslant (if supported by language) and re-reco if probability is low enough
if (cntxt_->HasItalics() == true &&
(alt_list_ == NULL || alt_list_->AltCount() < 1 ||
alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) {
if (deslanted_beam_obj_ == NULL) {
deslanted_beam_obj_ = new BeamSearch(cntxt_);
if (deslanted_beam_obj_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
"construct deslanted BeamSearch\n");
return false;
}
}
if (deslanted_srch_obj_ == NULL) {
deslanted_char_samp_ = char_samp_->Clone();
if (deslanted_char_samp_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
"construct deslanted CharSamp\n");
return NULL;
}
if (deslanted_char_samp_->Deslant() == false) {
return NULL;
}
deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_);
if (deslanted_srch_obj_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
"construct deslanted CubeSearchObject\n");
return NULL;
}
}
// run a beam search against the tesslang model
deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_,
lang_mod);
// should we use de-slanted altlist?
if (deslanted_alt_list_ != NULL && deslanted_alt_list_->AltCount() > 0) {
if (alt_list_ == NULL || alt_list_->AltCount() < 1 ||
deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) {
deslanted_ = true;
return deslanted_alt_list_;
}
}
}
return alt_list_;
}
// Recognize the member char sample as a word
WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) {
return Recognize(lang_mod, true);
}
// Recognize the member char sample as a word
WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) {
return Recognize(lang_mod, false);
}
// Computes the cost of a specific string. This is done by performing
// recognition of a language model that allows only the specified word
int CubeObject::WordCost(const char *str) {
WordListLangModel *lang_mod = new WordListLangModel(cntxt_);
if (lang_mod == NULL) {
return WORST_COST;
}
if (lang_mod->AddString(str) == false) {
delete lang_mod;
return WORST_COST;
}
// run a beam search against the single string wordlist model
WordAltList *alt_list = RecognizeWord(lang_mod);
delete lang_mod;
int cost = WORST_COST;
if (alt_list != NULL) {
if (alt_list->AltCount() > 0) {
cost = alt_list->AltCost(0);
}
}
return cost;
}
// Normalize the input word bitmap to have a minimum aspect ratio
bool CubeObject::Normalize() {
// create a cube search object
CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_);
if (srch_obj == NULL) {
return false;
}
// Perform over-segmentation
int seg_cnt = srch_obj->SegPtCnt();
// Only perform normalization if segment count is large enough
if (seg_cnt < kMinNormalizationSegmentCnt) {
delete srch_obj;
return true;
}
// compute the mean AR of the segments
double ar_mean = 0.0;
for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) {
CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx);
if (seg_samp != NULL && seg_samp->Width() > 0) {
ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width());
}
}
ar_mean /= (seg_cnt + 1);
// perform normalization if segment AR is too high
if (ar_mean > kMinNormalizationAspectRatio) {
// scale down the image in the y-direction to attain AR
CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(),
2.0 * char_samp_->Height() / ar_mean,
false);
if (new_samp != NULL) {
// free existing char samp if owned
if (own_char_samp_) {
delete char_samp_;
}
// update with new scaled charsamp and set ownership flag
char_samp_ = new_samp;
own_char_samp_ = true;
}
}
delete srch_obj;
return true;
}
}