/* -*-C-*- ******************************************************************************** * * File: dawg.h (Formerly dawg.h) * Description: * Author: Mark Seaman, SW Productivity * Created: Fri Oct 16 14:37:00 1987 * Modified: Wed Jun 19 16:50:24 1991 (Mark Seaman) marks@hpgrlt * Language: C * Package: N/A * Status: Reusable Software Component * * (c) Copyright 1987, Hewlett-Packard Company. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * *********************************************************************************/ #ifndef DAWG_H #define DAWG_H /*---------------------------------------------------------------------- I n c l u d e s ----------------------------------------------------------------------*/ #include #include "general.h" /*---------------------------------------------------------------------- T y p e s ----------------------------------------------------------------------*/ #define MAX_WERD_LENGTH (INT32) 40 #define MAX_NODE_EDGES (INT32) 100 #define LAST_FLAG (INT32) 1 #define DIRECTION_FLAG (INT32) 2 #define WERD_END_FLAG (INT32) 4 #define FLAG_START_BIT 21 #define LETTER_START_BIT 24 #define NO_EDGE (INT32) 0x1fffff typedef UINT32 EDGE_RECORD; typedef EDGE_RECORD *EDGE_ARRAY; typedef INT32 EDGE_REF; typedef INT32 NODE_REF; /*--------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ extern INT32 case_sensative; extern INT32 debug; /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ /********************************************************************** * next_node * * The next node visited in the DAWG by following this edge. **********************************************************************/ #define next_node(edges,e) \ ((edges)[e] & NO_EDGE) /********************************************************************** * set_next_edge * * Set the next node link for this edge in the DAWG. **********************************************************************/ #define set_next_edge(edges,e,value) \ ((edges)[e] = ((edges)[e] & (INT32) 0xffe00000) |\ (value & NO_EDGE)) /********************************************************************** * set_empty_edge * * Return TRUE if this edge spot in this location is unoccupied. **********************************************************************/ #define set_empty_edge(edges,e) \ ((edges)[e] = NO_EDGE) /********************************************************************** * clear_all_edges * * Go through all the edges in the DAWG and clear out each one. **********************************************************************/ #define clear_all_edges(dawg,edge,max_num_edges) \ for (edge=0; edge> LETTER_START_BIT) /********************************************************************** * last_edge * * Return TRUE if this edge is the last edge in the sequence. This is * TRUE for the last one in both the forward and backward part. **********************************************************************/ #define last_edge(edges,e) \ ((edges)[e] & (LAST_FLAG << FLAG_START_BIT)) /********************************************************************** * end_of_word * * Return TRUE if this edge marks the end of a word. **********************************************************************/ #define end_of_word(edges,e) \ ((edges)[e] & (WERD_END_FLAG << FLAG_START_BIT)) /********************************************************************** * forward_edge * * Return TRUE if this edge is in the forward direction. **********************************************************************/ #define forward_edge(edges,e) \ ((edges)[e] & (DIRECTION_FLAG << FLAG_START_BIT) && \ edge_occupied (edges,e)) /********************************************************************** * backward_edge * * Return TRUE if this edge is in the backward direction. **********************************************************************/ #define backward_edge(edges,e) \ (! ((edges)[e] & (DIRECTION_FLAG << FLAG_START_BIT)) && \ edge_occupied (edges,e)) /********************************************************************** * edge_loop * * Loop for each of the edges in the forward direction. This macro * can be used in the following way: *********************************************************************/ #define edge_loop(edges,e) \ while (! last_edge (edges,e++)) /********************************************************************** * case_is_okay * * Check the case of this character in the character string to make * sure that there is not a problem with the case. **********************************************************************/ #define case_is_okay(word,i) \ (i ? \ ((isupper(word[i]) && islower(word[i-1])) ? \ FALSE : \ ((islower(word[i]) && isupper(word[i-1]) && \ i>1 && isalpha (word[i-2])) ? \ FALSE : \ TRUE)) : \ TRUE) /********************************************************************** * trailing_punc * * Check for leading punctuation. **********************************************************************/ #define trailing_punc(ch) \ ((ch == '}' ) || \ (ch == ':' ) || \ (ch == ';' ) || \ (ch == '-' ) || \ (ch == ']' ) || \ (ch == '!' ) || \ (ch == '?' ) || \ (ch == '`' ) || \ (ch == ',' ) || \ (ch == '.' ) || \ (ch == ')' ) || \ (ch == '\"' ) || \ (ch == '\'' )) /********************************************************************** * leading_punc * * Check for leading punctuation. **********************************************************************/ #define leading_punc(ch) \ ((ch == '\"' ) || \ (ch == '(' ) || \ (ch == '{' ) || \ (ch == '[' ) || \ (ch == '`' ) || \ (ch == '\'' )) /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ EDGE_REF edge_char_of(EDGE_ARRAY dawg, NODE_REF node, int character, int word_end); INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node); INT32 def_letter_is_okay(EDGE_ARRAY dawg, NODE_REF *node, INT32 char_index, char prevchar, const char *word, INT32 word_end); /* * Allow for externally provided letter_is_okay. */ typedef INT32 (*LETTER_OK_FUNC)(EDGE_ARRAY, NODE_REF*, INT32, char, const char*, INT32); extern LETTER_OK_FUNC letter_is_okay; INT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node); void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node); void read_squished_dawg(const char *filename, EDGE_ARRAY dawg, INT32 max_num_edges); INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index); INT32 word_in_dawg(EDGE_ARRAY dawg, const char *string); /* #if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS # define _ARGS(s) s #else # define _ARGS(s) () #endif*/ /* dawg.c EDGE_REF edge_char_of _ARGS((EDGE_ARRAY dawg, NODE_REF node, int character, int word_end)); INT32 edges_in_node _ARGS((EDGE_ARRAY dawg, NODE_REF node)); INT32 def_letter_is_okay _ARGS((EDGE_ARRAY dawg, NODE_REF *node, INT32 char_index, char *word, INT32 word_end)); INT32 num_forward_edges _ARGS((EDGE_ARRAY dawg, NODE_REF node)); void print_dawg_node _ARGS((EDGE_ARRAY dawg, NODE_REF node)); void read_squished_dawg _ARGS((char *filename, EDGE_ARRAY dawg, INT32 max_num_edges)); INT32 verify_trailing_punct _ARGS((EDGE_ARRAY dawg, char *word, INT32 char_index)); INT32 word_in_dawg _ARGS((EDGE_ARRAY dawg, char *string)); #undef _ARGS */ #endif