mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-12 23:49:06 +08:00
121 lines
3.7 KiB
C++
121 lines
3.7 KiB
C++
|
/**********************************************************************
|
||
|
* File: tess_lang_mod_edge.cpp
|
||
|
* Description: Implementation of the Tesseract Language Model Edge Class
|
||
|
* Author: Ahmad Abdulkader
|
||
|
* Created: 2008
|
||
|
*
|
||
|
* (C) Copyright 2008, Google Inc.
|
||
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
** you may not use this file except in compliance with the License.
|
||
|
** You may obtain a copy of the License at
|
||
|
** http://www.apache.org/licenses/LICENSE-2.0
|
||
|
** Unless required by applicable law or agreed to in writing, software
|
||
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
** See the License for the specific language governing permissions and
|
||
|
** limitations under the License.
|
||
|
*
|
||
|
**********************************************************************/
|
||
|
|
||
|
#include "tess_lang_mod_edge.h"
|
||
|
#include "const.h"
|
||
|
#include "unichar.h"
|
||
|
|
||
|
|
||
|
|
||
|
namespace tesseract {
|
||
|
// OOD constructor
|
||
|
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
|
||
|
root_ = false;
|
||
|
cntxt_ = cntxt;
|
||
|
dawg_ = NULL;
|
||
|
start_edge_ = 0;
|
||
|
end_edge_ = 0;
|
||
|
edge_mask_ = 0;
|
||
|
class_id_ = class_id;
|
||
|
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
||
|
path_cost_ = Cost();
|
||
|
}
|
||
|
|
||
|
// leading, trailing punc constructor and single byte UTF char
|
||
|
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
|
||
|
const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
|
||
|
root_ = false;
|
||
|
cntxt_ = cntxt;
|
||
|
dawg_ = dawg;
|
||
|
start_edge_ = edge_idx;
|
||
|
end_edge_ = edge_idx;
|
||
|
edge_mask_ = 0;
|
||
|
class_id_ = class_id;
|
||
|
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
||
|
path_cost_ = Cost();
|
||
|
}
|
||
|
|
||
|
// dict constructor: multi byte UTF char
|
||
|
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
|
||
|
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
|
||
|
int class_id) {
|
||
|
root_ = false;
|
||
|
cntxt_ = cntxt;
|
||
|
dawg_ = dawg;
|
||
|
start_edge_ = start_edge_idx;
|
||
|
end_edge_ = end_edge_idx;
|
||
|
edge_mask_ = 0;
|
||
|
class_id_ = class_id;
|
||
|
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
||
|
path_cost_ = Cost();
|
||
|
}
|
||
|
|
||
|
char *TessLangModEdge::Description() const {
|
||
|
char *char_ptr = new char[256];
|
||
|
if (!char_ptr) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
char dawg_str[256];
|
||
|
char edge_str[32];
|
||
|
if (dawg_ == (Dawg *)DAWG_OOD) {
|
||
|
strcpy(dawg_str, "OOD");
|
||
|
} else if (dawg_ == (Dawg *)DAWG_NUMBER) {
|
||
|
strcpy(dawg_str, "NUM");
|
||
|
} else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
|
||
|
strcpy(dawg_str, "Main");
|
||
|
} else if (dawg_->permuter() == USER_DAWG_PERM) {
|
||
|
strcpy(dawg_str, "User");
|
||
|
} else if (dawg_->permuter() == DOC_DAWG_PERM) {
|
||
|
strcpy(dawg_str, "Doc");
|
||
|
} else {
|
||
|
strcpy(dawg_str, "N/A");
|
||
|
}
|
||
|
|
||
|
sprintf(edge_str, "%d", static_cast<int>(start_edge_));
|
||
|
if (IsLeadingPuncEdge(edge_mask_)) {
|
||
|
strcat(edge_str, "-LP");
|
||
|
}
|
||
|
if (IsTrailingPuncEdge(edge_mask_)) {
|
||
|
strcat(edge_str, "-TP");
|
||
|
}
|
||
|
sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
|
||
|
dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
|
||
|
|
||
|
return char_ptr;
|
||
|
}
|
||
|
|
||
|
int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt,
|
||
|
const Dawg *dawg,
|
||
|
NODE_REF parent_node,
|
||
|
LangModEdge **edge_array) {
|
||
|
int edge_cnt = 0;
|
||
|
NodeChildVector vec;
|
||
|
dawg->unichar_ids_of(parent_node, &vec); // find all children of the parent
|
||
|
for (int i = 0; i < vec.size(); ++i) {
|
||
|
const NodeChild &child = vec[i];
|
||
|
if (child.unichar_id == INVALID_UNICHAR_ID) continue;
|
||
|
edge_array[edge_cnt] =
|
||
|
new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
|
||
|
if (edge_array[edge_cnt] != NULL) edge_cnt++;
|
||
|
}
|
||
|
return edge_cnt;
|
||
|
}
|
||
|
}
|