mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-03 00:49:01 +08:00
018f192fc2
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=658634 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@675 d0cd1f9f-072b-0410-8dd7-cf729c803f20
641 lines
20 KiB
C++
641 lines
20 KiB
C++
/**********************************************************************
|
|
* File: ratngs.cpp (Formerly ratings.c)
|
|
* Description: Code to manipulate the BLOB_CHOICE and WERD_CHOICE classes.
|
|
* Author: Ray Smith
|
|
* Created: Thu Apr 23 13:23:29 BST 1992
|
|
*
|
|
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
#include "mfcpch.h"
|
|
#include "ratngs.h"
|
|
|
|
#include "callcpp.h"
|
|
#include "genericvector.h"
|
|
#include "unicharset.h"
|
|
|
|
ELISTIZE (BLOB_CHOICE) CLISTIZE (BLOB_CHOICE_LIST) CLISTIZE (WERD_CHOICE)
|
|
|
|
const float WERD_CHOICE::kBadRating = 100000.0;
|
|
|
|
static const char kPermuterTypeNoPerm[] = "None";
|
|
static const char kPermuterTypePuncPerm[] = "Punctuation";
|
|
static const char kPermuterTypeTopPerm[] = "Top Choice";
|
|
static const char kPermuterTypeLowerPerm[] = "Top Lower Case";
|
|
static const char kPermuterTypeUpperPerm[] = "Top Upper Case";
|
|
static const char kPermuterTypeNgramPerm[] = "Ngram";
|
|
static const char kPermuterTypeNumberPerm[] = "Number";
|
|
static const char kPermuterTypeUserPatPerm[] = "User Pattern";
|
|
static const char kPermuterTypeSysDawgPerm[] = "System Dictionary";
|
|
static const char kPermuterTypeDocDawgPerm[] = "Document Dictionary";
|
|
static const char kPermuterTypeUserDawgPerm[] = "User Dictionary";
|
|
static const char kPermuterTypeFreqDawgPerm[] = "Frequent Words Dictionary";
|
|
static const char kPermuterTypeCompoundPerm[] = "Compound";
|
|
|
|
static const char * const kPermuterTypeNames[] = {
|
|
kPermuterTypeNoPerm, // 0
|
|
kPermuterTypePuncPerm, // 1
|
|
kPermuterTypeTopPerm, // 2
|
|
kPermuterTypeLowerPerm, // 3
|
|
kPermuterTypeUpperPerm, // 4
|
|
kPermuterTypeNgramPerm, // 5
|
|
kPermuterTypeNumberPerm, // 6
|
|
kPermuterTypeUserPatPerm, // 7
|
|
kPermuterTypeSysDawgPerm, // 8
|
|
kPermuterTypeDocDawgPerm, // 9
|
|
kPermuterTypeUserDawgPerm, // 10
|
|
kPermuterTypeFreqDawgPerm, // 11
|
|
kPermuterTypeCompoundPerm // 12
|
|
};
|
|
|
|
/**
|
|
* BLOB_CHOICE::BLOB_CHOICE
|
|
*
|
|
* Constructor to build a BLOB_CHOICE from a char, rating and certainty.
|
|
*/
|
|
BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
|
|
float src_rating, // rating
|
|
float src_cert, // certainty
|
|
inT16 src_fontinfo_id, // font
|
|
inT16 src_fontinfo_id2, // 2nd choice font
|
|
int src_script_id, // script
|
|
inT16 min_xheight, // min xheight allowed
|
|
inT16 max_xheight, // max xheight by this char
|
|
bool adapted // adapted match or not
|
|
) {
|
|
unichar_id_ = src_unichar_id;
|
|
rating_ = src_rating;
|
|
certainty_ = src_cert;
|
|
fontinfo_id_ = src_fontinfo_id;
|
|
fontinfo_id2_ = src_fontinfo_id2;
|
|
script_id_ = src_script_id;
|
|
language_model_state_ = NULL;
|
|
min_xheight_ = min_xheight;
|
|
max_xheight_ = max_xheight;
|
|
adapted_ = adapted;
|
|
}
|
|
|
|
/**
|
|
* BLOB_CHOICE::BLOB_CHOICE
|
|
*
|
|
* Constructor to build a BLOB_CHOICE from another BLOB_CHOICE.
|
|
*/
|
|
BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) {
|
|
unichar_id_ = other.unichar_id();
|
|
rating_ = other.rating();
|
|
certainty_ = other.certainty();
|
|
fontinfo_id_ = other.fontinfo_id();
|
|
fontinfo_id2_ = other.fontinfo_id2();
|
|
script_id_ = other.script_id();
|
|
language_model_state_ = NULL;
|
|
min_xheight_ = other.min_xheight_;
|
|
max_xheight_ = other.max_xheight_;
|
|
adapted_ = other.adapted_;
|
|
}
|
|
|
|
/**
|
|
* WERD_CHOICE::WERD_CHOICE
|
|
*
|
|
* Constructor to build a WERD_CHOICE from the given string.
|
|
* The function assumes that src_string is not NULL.
|
|
*/
|
|
WERD_CHOICE::WERD_CHOICE(const char *src_string,
|
|
const UNICHARSET &unicharset)
|
|
: unicharset_(&unicharset){
|
|
STRING src_lengths;
|
|
const char *ptr = src_string;
|
|
const char *end = src_string + strlen(src_string);
|
|
int step = unicharset.step(ptr);
|
|
for (; ptr < end && step > 0;
|
|
step = unicharset.step(ptr), src_lengths += step, ptr += step);
|
|
if (step != 0 && ptr == end) {
|
|
this->init(src_string, src_lengths.string(),
|
|
0.0, 0.0, NO_PERM);
|
|
} else { // there must have been an invalid unichar in the string
|
|
this->init(8);
|
|
this->make_bad();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* WERD_CHOICE::init
|
|
*
|
|
* Helper function to build a WERD_CHOICE from the given string,
|
|
* fragment lengths, rating, certainty and permuter.
|
|
*
|
|
* The function assumes that src_string is not NULL.
|
|
* src_lengths argument could be NULL, in which case the unichars
|
|
* in src_string are assumed to all be of length 1.
|
|
*/
|
|
void WERD_CHOICE::init(const char *src_string,
|
|
const char *src_lengths,
|
|
float src_rating,
|
|
float src_certainty,
|
|
uinT8 src_permuter) {
|
|
int src_string_len = strlen(src_string);
|
|
if (src_string_len == 0) {
|
|
this->init(8);
|
|
} else {
|
|
this->init(src_lengths ? strlen(src_lengths): src_string_len);
|
|
length_ = reserved_;
|
|
int offset = 0;
|
|
for (int i = 0; i < length_; ++i) {
|
|
int unichar_length = src_lengths ? src_lengths[i] : 1;
|
|
unichar_ids_[i] =
|
|
unicharset_->unichar_to_id(src_string+offset, unichar_length);
|
|
fragment_lengths_[i] = 1;
|
|
offset += unichar_length;
|
|
}
|
|
}
|
|
rating_ = src_rating;
|
|
certainty_ = src_certainty;
|
|
permuter_ = src_permuter;
|
|
}
|
|
|
|
/**
|
|
* WERD_CHOICE::~WERD_CHOICE
|
|
*/
|
|
WERD_CHOICE::~WERD_CHOICE() {
|
|
delete[] unichar_ids_;
|
|
delete[] fragment_lengths_;
|
|
delete_blob_choices();
|
|
}
|
|
|
|
const char *WERD_CHOICE::permuter_name() const {
|
|
return kPermuterTypeNames[permuter_];
|
|
}
|
|
|
|
/**
|
|
* WERD_CHOICE::set_blob_choices
|
|
*
|
|
* Delete current blob_choices. Set the blob_choices to the given new
|
|
* list.
|
|
*/
|
|
void WERD_CHOICE::set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices) {
|
|
if (blob_choices_ != blob_choices) {
|
|
delete_blob_choices();
|
|
blob_choices_ = blob_choices;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* contains_unichar_id
|
|
*
|
|
* Returns true if unichar_ids_ contain the given unichar_id, false otherwise.
|
|
*/
|
|
bool WERD_CHOICE::contains_unichar_id(UNICHAR_ID unichar_id) const {
|
|
for (int i = 0; i < length_; ++i) {
|
|
if (unichar_ids_[i] == unichar_id) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* remove_unichar_ids
|
|
*
|
|
* Removes num unichar ids starting from index start from unichar_ids_
|
|
* and updates length_ and fragment_lengths_ to reflect this change.
|
|
* Note: this function does not modify rating_ and certainty_.
|
|
*/
|
|
void WERD_CHOICE::remove_unichar_ids(int start, int num) {
|
|
ASSERT_HOST(start >= 0 && start + num <= length_);
|
|
for (int i = start; i+num < length_; ++i) {
|
|
unichar_ids_[i] = unichar_ids_[i+num];
|
|
fragment_lengths_[i] = fragment_lengths_[i+num];
|
|
}
|
|
length_ -= num;
|
|
}
|
|
|
|
/**
|
|
* reverse_and_mirror_unichar_ids
|
|
*
|
|
* Reverses and mirrors unichars in unichar_ids.
|
|
*/
|
|
void WERD_CHOICE::reverse_and_mirror_unichar_ids() {
|
|
for (int i = 0; i < length_/2; ++i) {
|
|
UNICHAR_ID tmp_id = unichar_ids_[i];
|
|
unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]);
|
|
unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id);
|
|
}
|
|
if (length_ % 2 != 0) {
|
|
unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* punct_stripped
|
|
*
|
|
* Returns the half-open interval of unichar_id indices [start, end) which
|
|
* enclose the core portion of this word -- the part after stripping
|
|
* punctuation from the left and right.
|
|
*/
|
|
void WERD_CHOICE::punct_stripped(int *start, int *end) const {
|
|
*start = 0;
|
|
*end = length() - 1;
|
|
while (*start < length() &&
|
|
unicharset()->get_ispunctuation(unichar_id(*start))) {
|
|
(*start)++;
|
|
}
|
|
while (*end > -1 &&
|
|
unicharset()->get_ispunctuation(unichar_id(*end))) {
|
|
(*end)--;
|
|
}
|
|
(*end)++;
|
|
}
|
|
|
|
WERD_CHOICE WERD_CHOICE::shallow_copy(int start, int end) const {
|
|
ASSERT_HOST(start >= 0 && start <= length_);
|
|
ASSERT_HOST(end >= 0 && end <= length_);
|
|
if (end < start) { end = start; }
|
|
WERD_CHOICE retval(unicharset_, end - start);
|
|
for (int i = start; i < end; i++) {
|
|
retval.append_unichar_id_space_allocated(
|
|
unichar_ids_[i], fragment_lengths_[i], 0.0f, 0.0f);
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
/**
|
|
* has_rtl_unichar_id
|
|
*
|
|
* Returns true if unichar_ids contain at least one "strongly" RTL unichar.
|
|
*/
|
|
bool WERD_CHOICE::has_rtl_unichar_id() const {
|
|
int i;
|
|
for (i = 0; i < length_; ++i) {
|
|
UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]);
|
|
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
|
|
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* string_and_lengths
|
|
*
|
|
* Populates the given word_str with unichars from unichar_ids and
|
|
* and word_lengths_str with the corresponding unichar lengths.
|
|
*/
|
|
void WERD_CHOICE::string_and_lengths(STRING *word_str,
|
|
STRING *word_lengths_str) const {
|
|
*word_str = "";
|
|
if (word_lengths_str != NULL) *word_lengths_str = "";
|
|
for (int i = 0; i < length_; ++i) {
|
|
const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
|
|
*word_str += ch;
|
|
if (word_lengths_str != NULL) {
|
|
*word_lengths_str += strlen(ch);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* append_unichar_id
|
|
*
|
|
* Make sure there is enough space in the word for the new unichar id
|
|
* and call append_unichar_id_space_allocated().
|
|
*/
|
|
void WERD_CHOICE::append_unichar_id(
|
|
UNICHAR_ID unichar_id, char fragment_length,
|
|
float rating, float certainty) {
|
|
if (length_ == reserved_) {
|
|
this->double_the_size();
|
|
}
|
|
this->append_unichar_id_space_allocated(unichar_id, fragment_length,
|
|
rating, certainty);
|
|
}
|
|
|
|
/**
|
|
* WERD_CHOICE::operator+=
|
|
*
|
|
* Cat a second word rating on the end of this current one.
|
|
* The ratings are added and the confidence is the min.
|
|
* If the permuters are NOT the same the permuter is set to COMPOUND_PERM
|
|
*/
|
|
WERD_CHOICE & WERD_CHOICE::operator+= (const WERD_CHOICE & second) {
|
|
// TODO(daria): find out why the choice was cleared this way if any
|
|
// of the pieces are empty. Add the description of this behavior
|
|
// to the comments.
|
|
// if (word_string.length () == 0 || second.word_string.length () == 0) {
|
|
// word_string = NULL; //make it empty
|
|
// word_lengths = NULL;
|
|
// delete_blob_choices();
|
|
// } else {
|
|
ASSERT_HOST(unicharset_ == second.unicharset_);
|
|
while (reserved_ < length_ + second.length()) {
|
|
this->double_the_size();
|
|
}
|
|
const UNICHAR_ID *other_unichar_ids = second.unichar_ids();
|
|
const char *other_fragment_lengths = second.fragment_lengths();
|
|
for (int i = 0; i < second.length(); ++i) {
|
|
unichar_ids_[length_ + i] = other_unichar_ids[i];
|
|
fragment_lengths_[length_ + i] = other_fragment_lengths[i];
|
|
}
|
|
length_ += second.length();
|
|
rating_ += second.rating(); // add ratings
|
|
if (second.certainty() < certainty_) // take min
|
|
certainty_ = second.certainty();
|
|
if (permuter_ == NO_PERM) {
|
|
permuter_ = second.permuter();
|
|
} else if (second.permuter() != NO_PERM &&
|
|
second.permuter() != permuter_) {
|
|
permuter_ = COMPOUND_PERM;
|
|
}
|
|
|
|
// Append a deep copy of second blob_choices if it exists.
|
|
if (second.blob_choices_ != NULL) {
|
|
if (this->blob_choices_ == NULL)
|
|
this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST;
|
|
|
|
BLOB_CHOICE_LIST_C_IT this_blob_choices_it;
|
|
BLOB_CHOICE_LIST_C_IT second_blob_choices_it;
|
|
|
|
this_blob_choices_it.set_to_list(this->blob_choices_);
|
|
this_blob_choices_it.move_to_last();
|
|
|
|
second_blob_choices_it.set_to_list(second.blob_choices_);
|
|
|
|
for (second_blob_choices_it.mark_cycle_pt();
|
|
!second_blob_choices_it.cycled_list();
|
|
second_blob_choices_it.forward()) {
|
|
|
|
BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST();
|
|
blob_choices_copy->deep_copy(second_blob_choices_it.data(),
|
|
&BLOB_CHOICE::deep_copy);
|
|
|
|
this_blob_choices_it.add_after_then_move(blob_choices_copy);
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
|
|
/**
|
|
* WERD_CHOICE::operator=
|
|
*
|
|
* Allocate enough memory to hold a copy of source and copy over
|
|
* all the information from source to this WERD_CHOICE.
|
|
*/
|
|
WERD_CHOICE& WERD_CHOICE::operator=(const WERD_CHOICE& source) {
|
|
while (reserved_ < source.length()) {
|
|
this->double_the_size();
|
|
}
|
|
|
|
unicharset_ = source.unicharset_;
|
|
const UNICHAR_ID *other_unichar_ids = source.unichar_ids();
|
|
const char *other_fragment_lengths = source.fragment_lengths();
|
|
for (int i = 0; i < source.length(); ++i) {
|
|
unichar_ids_[i] = other_unichar_ids[i];
|
|
fragment_lengths_[i] = other_fragment_lengths[i];
|
|
}
|
|
length_ = source.length();
|
|
rating_ = source.rating();
|
|
certainty_ = source.certainty();
|
|
permuter_ = source.permuter();
|
|
fragment_mark_ = source.fragment_mark();
|
|
|
|
// Delete existing blob_choices
|
|
this->delete_blob_choices();
|
|
|
|
// Deep copy blob_choices of source
|
|
if (source.blob_choices_ != NULL) {
|
|
BLOB_CHOICE_LIST_C_IT this_blob_choices_it;
|
|
BLOB_CHOICE_LIST_C_IT source_blob_choices_it;
|
|
|
|
this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST();
|
|
|
|
this_blob_choices_it.set_to_list(this->blob_choices_);
|
|
source_blob_choices_it.set_to_list(source.blob_choices_);
|
|
|
|
for (source_blob_choices_it.mark_cycle_pt();
|
|
!source_blob_choices_it.cycled_list();
|
|
source_blob_choices_it.forward()) {
|
|
|
|
BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST();
|
|
blob_choices_copy->deep_copy(source_blob_choices_it.data(),
|
|
&BLOB_CHOICE::deep_copy);
|
|
|
|
this_blob_choices_it.add_after_then_move(blob_choices_copy);
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
/**********************************************************************
|
|
* WERD_CHOICE::delete_blob_choices
|
|
*
|
|
* Clear the blob_choices list, delete it and set it to NULL.
|
|
**********************************************************************/
|
|
void WERD_CHOICE::delete_blob_choices() {
|
|
if (blob_choices_ != NULL) {
|
|
blob_choices_->deep_clear();
|
|
delete blob_choices_;
|
|
blob_choices_ = NULL;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* WERD_CHOICE::print
|
|
*
|
|
* Print WERD_CHOICE to stdout.
|
|
*/
|
|
const void WERD_CHOICE::print(const char *msg) const {
|
|
tprintf("%s WERD_CHOICE:\n", msg);
|
|
tprintf("length_ %d reserved_ %d permuter_ %d\n",
|
|
length_, reserved_, permuter_);
|
|
tprintf("rating_ %.4f certainty_ %.4f", rating_, certainty_);
|
|
if (fragment_mark_) {
|
|
tprintf(" fragment_mark_ true");
|
|
}
|
|
tprintf("\n");
|
|
if (unichar_string_.length() > 0) {
|
|
tprintf("unichar_string_ %s unichar_lengths_ %s\n",
|
|
unichar_string_.string(), unichar_lengths_.string());
|
|
}
|
|
tprintf("unichar_ids: ");
|
|
int i;
|
|
for (i = 0; i < length_; ++i) {
|
|
tprintf("%d ", unichar_ids_[i]);
|
|
}
|
|
tprintf("\nfragment_lengths_: ");
|
|
for (i = 0; i < length_; ++i) {
|
|
tprintf("%d ", fragment_lengths_[i]);
|
|
}
|
|
tprintf("\n");
|
|
fflush(stdout);
|
|
}
|
|
|
|
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
|
|
const WERD_CHOICE &word2) {
|
|
const UNICHARSET *uchset = word1.unicharset();
|
|
if (word2.unicharset() != uchset) return false;
|
|
int w1start, w1end;
|
|
word1.punct_stripped(&w1start, &w1end);
|
|
int w2start, w2end;
|
|
word2.punct_stripped(&w2start, &w2end);
|
|
if (w1end - w1start != w2end - w2start) return false;
|
|
for (int i = 0; i < w1end - w1start; i++) {
|
|
if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
|
|
uchset->to_lower(word2.unichar_id(w2start + i))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* print_ratings_list
|
|
*
|
|
* Send all the ratings out to the logfile.
|
|
*
|
|
* @param msg intro message
|
|
* @param ratings list of ratings
|
|
* @param current_unicharset unicharset that can be used
|
|
* for id-to-unichar conversion
|
|
*/
|
|
void print_ratings_list(const char *msg,
|
|
BLOB_CHOICE_LIST *ratings,
|
|
const UNICHARSET ¤t_unicharset) {
|
|
if (ratings->length() == 0) {
|
|
tprintf("%s:<none>\n", msg);
|
|
return;
|
|
}
|
|
if (*msg != '\0') {
|
|
tprintf("%s\n", msg);
|
|
}
|
|
BLOB_CHOICE_IT c_it;
|
|
c_it.set_to_list(ratings);
|
|
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
|
|
c_it.data()->print(¤t_unicharset);
|
|
if (!c_it.at_last()) tprintf("\n");
|
|
}
|
|
tprintf("\n");
|
|
fflush(stdout);
|
|
}
|
|
|
|
/**
|
|
* print_ratings_list
|
|
*
|
|
* Print ratings list (unichar ids only).
|
|
*/
|
|
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings) {
|
|
if (ratings->length() == 0) {
|
|
tprintf("%s:<none>\n", msg);
|
|
return;
|
|
}
|
|
if (*msg != '\0') {
|
|
tprintf("%s\n", msg);
|
|
}
|
|
BLOB_CHOICE_IT c_it;
|
|
c_it.set_to_list(ratings);
|
|
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
|
|
c_it.data()->print(NULL);
|
|
if (!c_it.at_last()) tprintf("\n");
|
|
}
|
|
tprintf("\n");
|
|
fflush(stdout);
|
|
}
|
|
|
|
/**
|
|
* print_ratings_info
|
|
*
|
|
* Send all the ratings out to the logfile.
|
|
*
|
|
* @param fp file to use
|
|
* @param ratings list of results
|
|
* @param current_unicharset unicharset that can be used
|
|
* for id-to-unichar conversion
|
|
*/
|
|
void print_ratings_info(FILE *fp,
|
|
BLOB_CHOICE_LIST *ratings,
|
|
const UNICHARSET ¤t_unicharset) {
|
|
inT32 index; // to list
|
|
const char* first_char = NULL; // character
|
|
FLOAT32 first_rat; // rating
|
|
FLOAT32 first_cert; // certainty
|
|
const char* sec_char = NULL; // character
|
|
FLOAT32 sec_rat = 0.0f; // rating
|
|
FLOAT32 sec_cert = 0.0f; // certainty
|
|
BLOB_CHOICE_IT c_it = ratings; // iterator
|
|
|
|
index = ratings->length();
|
|
if (index > 0) {
|
|
first_char = current_unicharset.id_to_unichar(c_it.data()->unichar_id());
|
|
first_rat = c_it.data()->rating();
|
|
first_cert = -c_it.data()->certainty();
|
|
if (index > 1) {
|
|
sec_char = current_unicharset.id_to_unichar(
|
|
c_it.data_relative(1)->unichar_id());
|
|
sec_rat = c_it.data_relative(1)->rating();
|
|
sec_cert = -c_it.data_relative(1)->certainty();
|
|
} else {
|
|
sec_char = NULL;
|
|
sec_rat = -1;
|
|
sec_cert = -1;
|
|
}
|
|
} else {
|
|
first_char = NULL;
|
|
first_rat = -1;
|
|
first_cert = -1;
|
|
}
|
|
if (first_char != NULL && (*first_char == '\0' || *first_char == ' '))
|
|
first_char = NULL;
|
|
if (sec_char != NULL && (*sec_char == '\0' || *sec_char == ' '))
|
|
sec_char = NULL;
|
|
tprintf(" " INT32FORMAT " %s %g %g %s %g %g\n",
|
|
ratings->length(),
|
|
first_char != NULL ? first_char : "~",
|
|
first_rat, first_cert, sec_char != NULL ? sec_char : "~",
|
|
sec_rat, sec_cert);
|
|
}
|
|
|
|
/**
|
|
* print_char_choices_list
|
|
*/
|
|
void print_char_choices_list(const char *msg,
|
|
const BLOB_CHOICE_LIST_VECTOR &char_choices,
|
|
const UNICHARSET ¤t_unicharset,
|
|
BOOL8 detailed) {
|
|
if (*msg != '\0') tprintf("%s\n", msg);
|
|
for (int x = 0; x < char_choices.length(); ++x) {
|
|
BLOB_CHOICE_IT c_it;
|
|
c_it.set_to_list(char_choices.get(x));
|
|
tprintf("\nchar[%d]: %s\n", x,
|
|
current_unicharset.debug_str( c_it.data()->unichar_id()).string());
|
|
if (detailed)
|
|
print_ratings_list("", char_choices.get(x), current_unicharset);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* print_word_alternates_list
|
|
*/
|
|
void print_word_alternates_list(
|
|
WERD_CHOICE *word,
|
|
GenericVector<WERD_CHOICE *> *alternates) {
|
|
if (!word || !alternates) return;
|
|
|
|
STRING alternates_str;
|
|
for (int i = 0; i < alternates->size(); i++) {
|
|
if (i > 0) alternates_str += "\", \"";
|
|
alternates_str += alternates->get(i)->unichar_string();
|
|
}
|
|
tprintf("Alternates for \"%s\": {\"%s\"}\n",
|
|
word->unichar_string().string(), alternates_str.string());
|
|
}
|