/////////////////////////////////////////////////////////////////////// // File: conversion.cpp // Description: Collection of utility functions for A_CHOICE conversions. // TODO(daria): delete this file when conversion to unichar_ids // is finished and all permuters are completely updated/replaced. // Author: Daria Antonova // Created: Mon Jun 23 11:26:43 PDT 2008 // // (C) Copyright 2007, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #include "conversion.h" #include "callcpp.h" #include "choicearr.h" #include "choices.h" #include "dict.h" #include "ratngs.h" #include "stopper.h" #include "unicharset.h" namespace tesseract { int Dict::valid_word(const char *string) { WERD_CHOICE word(string, getUnicharset()); return valid_word(word); } void Dict::LogNewWordChoice(A_CHOICE *a_choice, FLOAT32 adjust_factor, const float certainties[], const UNICHARSET &unicharset) { WERD_CHOICE word_choice(strlen(a_choice->lengths)); convert_to_word_choice(a_choice, unicharset, &word_choice); LogNewChoice(word_choice, adjust_factor, certainties, false); } } // namespace tesseract // Fills in the given WERD_CHOICE with information from the given A_CHOICE. // Assumes that word_choice pointer is not NULL. void convert_to_word_choice(const A_CHOICE *a_choice, const UNICHARSET ¤t_unicharset, WERD_CHOICE *word_choice) { if (a_choice == NULL) return; const char *string = a_choice->string; const char *lengths = a_choice->lengths; const char *fragment_lengths = a_choice->fragment_lengths; int offset = 0; for (int x = 0; x < strlen(a_choice->lengths); ++x) { UNICHAR_ID unichar_id = current_unicharset.unichar_to_id(string + offset, lengths[x]); word_choice->append_unichar_id(unichar_id, fragment_lengths[x], 0.0, 0.0); offset += lengths[x]; } word_choice->set_rating(a_choice->rating); word_choice->set_certainty(a_choice->certainty); word_choice->set_permuter(a_choice->permuter); word_choice->set_fragment_mark(a_choice->fragment_mark); } // Returns the best of two choices and deletes the other (worse) choice. // A choice is better if it has a non-empty string and has a lower // rating than the other choice. If the ratings are the same, // a_choice is preferred over choice. // If the best choice is in the A_CHOICE form, copies it to a new // WERD_CHOICE and deletes A_CHOICE. WERD_CHOICE *get_best_delete_other(const UNICHARSET ¤t_unicharset, WERD_CHOICE *choice, A_CHOICE *a_choice) { if (!a_choice) return choice; if (choice != NULL && (choice->rating() < a_choice->rating || a_choice->string == NULL)) { free_choice(a_choice); return choice; } else { delete choice; WERD_CHOICE *word_choice = new WERD_CHOICE(); convert_to_word_choice(a_choice, current_unicharset, word_choice); free_choice(a_choice); return word_choice; } } // Convert BLOB_CHOICE_LIST_VECTOR to CHOICES_LIST. // The caller is responsible for deleting the returned CHOICES_LIST. CHOICES_LIST convert_to_choices_list( const BLOB_CHOICE_LIST_VECTOR &char_choices, const UNICHARSET ¤t_unicharset) { CHOICES_LIST old_char_choices = new_choice_list(); int x; BLOB_CHOICE_IT it; BLOB_CHOICE *blob_choice; char choice_lengths[2] = {0, 0}; char unichar[UNICHAR_LEN + 1]; for (x = 0; x < char_choices.length(); ++x) { it.set_to_list(char_choices.get(x)); LIST result = NIL; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { blob_choice = it.data(); strcpy(unichar, current_unicharset.id_to_unichar(blob_choice->unichar_id())); choice_lengths[0] = strlen(unichar); result = append_char_choice(result, unichar, choice_lengths, blob_choice->rating(), blob_choice->certainty(), blob_choice->config(), NULL); } old_char_choices = array_push(old_char_choices, result); } return old_char_choices; }