/********************************************************************** * File: ratngs.h (Formerly ratings.h) * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes. * Author: Ray Smith * Created: Thu Apr 23 11:40:38 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #ifndef RATNGS_H #define RATNGS_H #include #include "clst.h" #include "genericvector.h" #include "notdll.h" #include "unichar.h" #include "unicharset.h" #include "werd.h" class BLOB_CHOICE: public ELIST_LINK { public: BLOB_CHOICE() { unichar_id_ = INVALID_UNICHAR_ID; config_ = '\0'; rating_ = MAX_FLOAT32; certainty_ = -MAX_FLOAT32; script_id_ = -1; } BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id float src_rating, // rating float src_cert, // certainty inT8 src_config, // config (font) int script_id); // script BLOB_CHOICE(const BLOB_CHOICE &other); ~BLOB_CHOICE() {} UNICHAR_ID unichar_id() const { return unichar_id_; } float rating() const { return rating_; } float certainty() const { return certainty_; } inT8 config() const { return config_; } int script_id() const { return script_id_; } void set_unichar_id(UNICHAR_ID newunichar_id) { unichar_id_ = newunichar_id; } void set_rating(float newrat) { rating_ = newrat; } void set_certainty(float newrat) { certainty_ = newrat; } void set_config(inT8 newfont) { config_ = newfont; } void set_script(int newscript_id) { script_id_ = newscript_id; } static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) { BLOB_CHOICE* choice = new BLOB_CHOICE; *choice = *src; return choice; } NEWDELETE private: UNICHAR_ID unichar_id_; // unichar id char config_; // char config (font) inT16 junk2_; float rating_; // size related float certainty_; // absolute int script_id_; }; // Make BLOB_CHOICE listable. ELISTIZEH (BLOB_CHOICE) CLISTIZEH (BLOB_CHOICE_LIST) // Permuter codes used in WERD_CHOICEs. enum PermuterType { NO_PERM, // 0 PUNC_PERM, // 1 TOP_CHOICE_PERM, // 2 LOWER_CASE_PERM, // 3 UPPER_CASE_PERM, // 4 NUMBER_PERM, // 5 SYSTEM_DAWG_PERM, // 6 DOC_DAWG_PERM, // 7 USER_DAWG_PERM, // 8 FREQ_DAWG_PERM, // 9 COMPOUND_PERM, // 10 }; class WERD_CHOICE { public: WERD_CHOICE() { this->init(8); } WERD_CHOICE(int reserved) { this->init(reserved); } WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter, const UNICHARSET &unicharset) { this->init(src_string, src_lengths, src_rating, src_certainty, src_permuter, unicharset); } WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset); WERD_CHOICE(const WERD_CHOICE &word) { this->init(word.length()); this->operator=(word); } ~WERD_CHOICE(); inline int length() const { return length_; } inline const UNICHAR_ID *unichar_ids() const { return unichar_ids_; } inline const UNICHAR_ID unichar_id(int index) const { assert(index < length_); return unichar_ids_[index]; } inline const char *fragment_lengths() const { return fragment_lengths_; } inline const char fragment_length(int index) const { assert(index < length_); return fragment_lengths_[index]; } inline float rating() const { return rating_; } inline float certainty() const { return certainty_; } inline uinT8 permuter() const { return permuter_; } inline bool fragment_mark() const { return fragment_mark_; } inline BLOB_CHOICE_LIST_CLIST* blob_choices() { return blob_choices_; } inline void set_unichar_id(UNICHAR_ID unichar_id, int index) { assert(index < length_); unichar_ids_[index] = unichar_id; } inline void set_rating(float new_val) { rating_ = new_val; } inline void set_certainty(float new_val) { certainty_ = new_val; } inline void set_permuter(uinT8 perm) { permuter_ = perm; } inline void set_fragment_mark(bool new_fragment_mark) { fragment_mark_ = new_fragment_mark; } void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices); /// Make more space in unichar_id_ and fragment_lengths_ arrays. inline void double_the_size() { unichar_ids_ = GenericVector::double_the_size_memcpy( reserved_, unichar_ids_); fragment_lengths_ = GenericVector::double_the_size_memcpy( reserved_, fragment_lengths_); reserved_ *= 2; } /// Initializes WERD_CHOICE - reseves length slots in unichar_ids_ and /// fragment_length_ arrays. Sets other values to default (blank) values. inline void init(int reserved) { reserved_ = reserved; unichar_ids_ = new UNICHAR_ID[reserved]; fragment_lengths_ = new char[reserved]; length_ = 0; rating_ = 0.0; certainty_ = MAX_FLOAT32; permuter_ = NO_PERM; fragment_mark_ = false; blob_choices_ = NULL; unichar_string_ = ""; unichar_lengths_ = ""; } /// Helper function to build a WERD_CHOICE from the given string, /// fragment lengths, rating, certainty and permuter. /// The function assumes that src_string is not NULL. /// src_lengths argument could be NULL, in which case the unichars /// in src_string are assumed to all be of length 1. void init(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter, const UNICHARSET ¤t_unicharset); /// Set the fields in this choice to be default (bad) values. inline void make_bad() { length_ = 0; rating_ = MAX_FLOAT32; certainty_ = -MAX_FLOAT32; fragment_mark_ = false; unichar_string_ = ""; unichar_lengths_ = ""; } /// This function assumes that there is enough space reserved /// in the WERD_CHOICE for adding another unichar. /// This is an efficient alternative to append_unichar_id(). inline void append_unichar_id_space_allocated( UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty) { assert(reserved_ > length_); length_++; this->set_unichar_id(unichar_id, fragment_length, rating, certainty, length_-1); } void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty); inline void set_unichar_id(UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty, int index) { assert(index < length_); unichar_ids_[index] = unichar_id; fragment_lengths_[index] = fragment_length; rating_ += rating; if (certainty < certainty_) { certainty_ = certainty; } } bool contains_unichar_id(UNICHAR_ID unichar_id) const; void remove_unichar_ids(int index, int num); inline void remove_last_unichar_id() { --length_; } inline void remove_unichar_id(int index) { this->remove_unichar_ids(index, 1); } void string_and_lengths(const UNICHARSET ¤t_unicharset, STRING *word_str, STRING *word_lengths_str) const; const STRING debug_string(const UNICHARSET ¤t_unicharset) const { STRING word_str; for (int i = 0; i < length_; ++i) { word_str += current_unicharset.debug_str(unichar_ids_[i]); word_str += " "; } return word_str; } /// Since this function walks over the whole word to convert unichar ids /// to unichars, it is best to call it once, e.g. after all changes to /// unichar_ids_ in WERD_CHOICE are finished. void populate_unichars(const UNICHARSET ¤t_unicharset) { this->string_and_lengths(current_unicharset, &unichar_string_, &unichar_lengths_); } /// This function should only be called if populate_unichars() /// was called and WERD_CHOICE did not change since then. const STRING &unichar_string() const { assert(unichar_string_.length() <= 0 || unichar_string_.length() >= length_); // sanity check return unichar_string_; } /// This function should only be called if populate_unichars() /// was called and WERD_CHOICE did not change since then. const STRING &unichar_lengths() const { assert(unichar_lengths_.length() <= 0 || unichar_lengths_.length() == length_); // sanity check return unichar_lengths_; } const void print() const { this->print(""); } const void print(const char *msg) const; WERD_CHOICE& operator+= ( // concatanate const WERD_CHOICE & second);// second on first WERD_CHOICE& operator= (const WERD_CHOICE& source); NEWDELETE private: UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word char *fragment_lengths_; // number of fragments in each unichar int reserved_; // size of the above arrays int length_; // word length float rating_; // size related float certainty_; // absolute uinT8 permuter_; // permuter code bool fragment_mark_; // if true, indicates that this choice // was chosen over a better one that // contained a fragment BLOB_CHOICE_LIST_CLIST *blob_choices_; // best choices for each blob // The following variables are only populated by calling populate_unichars(). // They are not synchronized with the values in unichar_ids otherwise. STRING unichar_string_; STRING unichar_lengths_; bool unichar_info_present; private: void delete_blob_choices(); }; // Make WERD_CHOICE listable. CLISTIZEH (WERD_CHOICE) typedef GenericVector BLOB_CHOICE_LIST_VECTOR; typedef GenericVector WERD_CHOICE_LIST_VECTOR; typedef void (*POLY_TESTER) (const STRING&, PBLOB *, DENORM *, BOOL8, char *, inT32, BLOB_CHOICE_LIST *); void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings); void print_ratings_list( const char *msg, // intro message BLOB_CHOICE_LIST *ratings, // list of results const UNICHARSET ¤t_unicharset // unicharset that can be used // for id-to-unichar conversion ); void print_ratings_info( FILE *fp, // file to use BLOB_CHOICE_LIST *ratings, // list of results const UNICHARSET ¤t_unicharset // unicharset that can be used // for id-to-unichar conversion ); void print_char_choices_list( const char *msg, const BLOB_CHOICE_LIST_VECTOR &char_choices, const UNICHARSET ¤t_unicharset, BOOL8 detailed ); #endif