/////////////////////////////////////////////////////////////////////// // File: lm_state.cpp // Description: Structures and functionality for capturing the state of // segmentation search guided by the language model. // Author: Rika Antonova // Created: Mon Jun 20 11:26:43 PST 2012 // // (C) Copyright 2012, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #include "lm_state.h" namespace tesseract { ELISTIZE(ViterbiStateEntry); void ViterbiStateEntry::Print(const char *msg) const { tprintf("%s ViterbiStateEntry", msg); if (updated) tprintf("(NEW)"); if (this->debug_str != NULL) { tprintf(" str=%s", this->debug_str->string()); } tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", this->ratings_sum, this->length, this->cost); if (this->top_choice_flags) { tprintf(" top_choice_flags=0x%x", this->top_choice_flags); } if (!this->Consistent()) { tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)", this->consistency_info.NumInconsistentPunc(), this->consistency_info.NumInconsistentCase(), this->consistency_info.NumInconsistentChartype(), this->consistency_info.inconsistent_script, this->consistency_info.inconsistent_font); } if (this->dawg_info) tprintf(" permuter=%d", this->dawg_info->permuter); if (this->ngram_info) { tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d", this->ngram_info->ngram_and_classifier_cost, this->ngram_info->context.string(), this->ngram_info->pruned); } if (this->associate_stats.shape_cost > 0.0f) { tprintf(" shape_cost=%g", this->associate_stats.shape_cost); } tprintf(" %s", XHeightConsistencyEnumName[this->consistency_info.xht_decision]); tprintf("\n"); } // Clears the viterbi search state back to its initial conditions. void LanguageModelState::Clear() { viterbi_state_entries.clear(); viterbi_state_entries_prunable_length = 0; viterbi_state_entries_prunable_max_cost = MAX_FLOAT32; viterbi_state_entries_length = 0; } void LanguageModelState::Print(const char *msg) { tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", msg, viterbi_state_entries_prunable_max_cost, viterbi_state_entries_prunable_length, viterbi_state_entries_length); ViterbiStateEntry_IT vit(&viterbi_state_entries); for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) { vit.data()->Print(""); } } } // namespace tesseract