tesseract/cube/tess_lang_mod_edge.cpp

121 lines
3.7 KiB
C++
Raw Normal View History

/**********************************************************************
* File: tess_lang_mod_edge.cpp
* Description: Implementation of the Tesseract Language Model Edge Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "tess_lang_mod_edge.h"
#include "const.h"
#include "unichar.h"
namespace tesseract {
// OOD constructor
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
root_ = false;
cntxt_ = cntxt;
dawg_ = NULL;
start_edge_ = 0;
end_edge_ = 0;
edge_mask_ = 0;
class_id_ = class_id;
str_ = cntxt_->CharacterSet()->ClassString(class_id);
path_cost_ = Cost();
}
// leading, trailing punc constructor and single byte UTF char
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
root_ = false;
cntxt_ = cntxt;
dawg_ = dawg;
start_edge_ = edge_idx;
end_edge_ = edge_idx;
edge_mask_ = 0;
class_id_ = class_id;
str_ = cntxt_->CharacterSet()->ClassString(class_id);
path_cost_ = Cost();
}
// dict constructor: multi byte UTF char
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
int class_id) {
root_ = false;
cntxt_ = cntxt;
dawg_ = dawg;
start_edge_ = start_edge_idx;
end_edge_ = end_edge_idx;
edge_mask_ = 0;
class_id_ = class_id;
str_ = cntxt_->CharacterSet()->ClassString(class_id);
path_cost_ = Cost();
}
char *TessLangModEdge::Description() const {
char *char_ptr = new char[256];
if (!char_ptr) {
return NULL;
}
char dawg_str[256];
char edge_str[32];
if (dawg_ == (Dawg *)DAWG_OOD) {
strcpy(dawg_str, "OOD");
} else if (dawg_ == (Dawg *)DAWG_NUMBER) {
strcpy(dawg_str, "NUM");
} else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
strcpy(dawg_str, "Main");
} else if (dawg_->permuter() == USER_DAWG_PERM) {
strcpy(dawg_str, "User");
} else if (dawg_->permuter() == DOC_DAWG_PERM) {
strcpy(dawg_str, "Doc");
} else {
strcpy(dawg_str, "N/A");
}
sprintf(edge_str, "%d", static_cast<int>(start_edge_));
if (IsLeadingPuncEdge(edge_mask_)) {
strcat(edge_str, "-LP");
}
if (IsTrailingPuncEdge(edge_mask_)) {
strcat(edge_str, "-TP");
}
sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
return char_ptr;
}
int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt,
const Dawg *dawg,
NODE_REF parent_node,
LangModEdge **edge_array) {
int edge_cnt = 0;
NodeChildVector vec;
dawg->unichar_ids_of(parent_node, &vec); // find all children of the parent
for (int i = 0; i < vec.size(); ++i) {
const NodeChild &child = vec[i];
if (child.unichar_id == INVALID_UNICHAR_ID) continue;
edge_array[edge_cnt] =
new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
if (edge_array[edge_cnt] != NULL) edge_cnt++;
}
return edge_cnt;
}
}