mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-07 10:17:50 +08:00
4523ce9f7d
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20
121 lines
3.7 KiB
C++
121 lines
3.7 KiB
C++
/**********************************************************************
|
|
* File: tess_lang_mod_edge.cpp
|
|
* Description: Implementation of the Tesseract Language Model Edge Class
|
|
* Author: Ahmad Abdulkader
|
|
* Created: 2008
|
|
*
|
|
* (C) Copyright 2008, Google Inc.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
#include "tess_lang_mod_edge.h"
|
|
#include "const.h"
|
|
#include "unichar.h"
|
|
|
|
|
|
|
|
namespace tesseract {
|
|
// OOD constructor
|
|
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
|
|
root_ = false;
|
|
cntxt_ = cntxt;
|
|
dawg_ = NULL;
|
|
start_edge_ = 0;
|
|
end_edge_ = 0;
|
|
edge_mask_ = 0;
|
|
class_id_ = class_id;
|
|
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
|
path_cost_ = Cost();
|
|
}
|
|
|
|
// leading, trailing punc constructor and single byte UTF char
|
|
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
|
|
const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
|
|
root_ = false;
|
|
cntxt_ = cntxt;
|
|
dawg_ = dawg;
|
|
start_edge_ = edge_idx;
|
|
end_edge_ = edge_idx;
|
|
edge_mask_ = 0;
|
|
class_id_ = class_id;
|
|
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
|
path_cost_ = Cost();
|
|
}
|
|
|
|
// dict constructor: multi byte UTF char
|
|
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
|
|
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
|
|
int class_id) {
|
|
root_ = false;
|
|
cntxt_ = cntxt;
|
|
dawg_ = dawg;
|
|
start_edge_ = start_edge_idx;
|
|
end_edge_ = end_edge_idx;
|
|
edge_mask_ = 0;
|
|
class_id_ = class_id;
|
|
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
|
path_cost_ = Cost();
|
|
}
|
|
|
|
char *TessLangModEdge::Description() const {
|
|
char *char_ptr = new char[256];
|
|
if (!char_ptr) {
|
|
return NULL;
|
|
}
|
|
|
|
char dawg_str[256];
|
|
char edge_str[32];
|
|
if (dawg_ == (Dawg *)DAWG_OOD) {
|
|
strcpy(dawg_str, "OOD");
|
|
} else if (dawg_ == (Dawg *)DAWG_NUMBER) {
|
|
strcpy(dawg_str, "NUM");
|
|
} else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
|
|
strcpy(dawg_str, "Main");
|
|
} else if (dawg_->permuter() == USER_DAWG_PERM) {
|
|
strcpy(dawg_str, "User");
|
|
} else if (dawg_->permuter() == DOC_DAWG_PERM) {
|
|
strcpy(dawg_str, "Doc");
|
|
} else {
|
|
strcpy(dawg_str, "N/A");
|
|
}
|
|
|
|
sprintf(edge_str, "%d", static_cast<int>(start_edge_));
|
|
if (IsLeadingPuncEdge(edge_mask_)) {
|
|
strcat(edge_str, "-LP");
|
|
}
|
|
if (IsTrailingPuncEdge(edge_mask_)) {
|
|
strcat(edge_str, "-TP");
|
|
}
|
|
sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
|
|
dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
|
|
|
|
return char_ptr;
|
|
}
|
|
|
|
int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt,
|
|
const Dawg *dawg,
|
|
NODE_REF parent_node,
|
|
LangModEdge **edge_array) {
|
|
int edge_cnt = 0;
|
|
NodeChildVector vec;
|
|
dawg->unichar_ids_of(parent_node, &vec); // find all children of the parent
|
|
for (int i = 0; i < vec.size(); ++i) {
|
|
const NodeChild &child = vec[i];
|
|
if (child.unichar_id == INVALID_UNICHAR_ID) continue;
|
|
edge_array[edge_cnt] =
|
|
new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
|
|
if (edge_array[edge_cnt] != NULL) edge_cnt++;
|
|
}
|
|
return edge_cnt;
|
|
}
|
|
}
|