/* -*-C-*- ******************************************************************************** * * File: makedawg.cpp * Description: Create a Directed Accyclic Word Graph * Author: Mark Seaman, OCR Technology * Created: Fri Oct 16 14:37:00 1987 * Modified: Fri Jul 26 12:18:12 1991 (Mark Seaman) marks@hpgrlt * Language: C * Package: N/A * Status: Reusable Software Component * * (c) Copyright 1987, Hewlett-Packard Company, all rights reserved. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * ******************************************************************************** */ /* ---------------------------------------------------------------------- I n c l u d e s ---------------------------------------------------------------------- */ #ifdef __MSW32__ #include #else #include #endif #include "makedawg.h" #include "reduce.h" #include "cutil.h" #include "callcpp.h" #ifdef __UNIX__ #include #endif #include /* ---------------------------------------------------------------------- V a r i a b l e s ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- F u n c t i o n s ---------------------------------------------------------------------- */ /********************************************************************** * build_node_map * * Create a node map that will help translate the indices of the DAWG * into a compacted form. * Construct in memory a mapping from the memory node values into the * disk node values. Return the values in this map as requested. If * a new value mapping is requested assign the next sequential number * to it. **********************************************************************/ NODE_MAP build_node_map (EDGE_ARRAY dawg, INT32 *num_nodes, INT32 both_links, INT32 max_num_edges, INT32 reserved_edges) { EDGE_REF edge; NODE_MAP node_map; INT32 node_counter; INT32 num_edges; node_map = (NODE_MAP) malloc (sizeof (EDGE_REF) * max_num_edges); for (edge=0; edge= 0 && the_next_node < max_num_edges && node_map [the_next_node] >= 0 && node_map [the_next_node] < max_num_edges); /* Map each edge in node */ if (debug) cprintf (" " REFFORMAT " --> ", next_node (dawg, edge)); set_next_edge (dawg, edge, node_map [next_node (dawg, edge)]); if (debug) cprintf (REFFORMAT "\n", next_node (dawg, edge)); } if (destination != 0) next_node_space = edge; edge = node + num_edges; } else { edge++; } } cprintf ("Compacting node from " REFFORMAT " to " REFFORMAT " (%d)\n", node, next_node_space, num_edges); free (node_map); } /********************************************************************** * delete_node * * Remove all the edges that are currently used within this node in the * DAWG. **********************************************************************/ void delete_node (EDGE_ARRAY dawg, NODE_REF node) { EDGE_REF edge = node; INT32 counter = edges_in_node (dawg, node); /* printf ("node deleted = %d (%d)\n", node, counter); */ while (counter--) set_empty_edge (dawg, edge++); } /********************************************************************** * write_squished_dawg * * Write the DAWG out to a file **********************************************************************/ void write_squished_dawg (const char *filename, EDGE_ARRAY dawg, INT32 max_num_edges, INT32 reserved_edges) { FILE *file; EDGE_REF edge; INT32 num_edges; INT32 node_count = 0; NODE_MAP node_map; EDGE_REF old_index; UINT32 temp_record_32; if (debug) print_string ("write_squished_dawg"); node_map = build_node_map (dawg, &node_count, FALSE, max_num_edges, reserved_edges); file = open_file (filename, "w"); num_edges = 0; /* Count number of edges */ for (edge=0; edge MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE) { cprintf("Error: squished DAWG is too big to be written (%d edges > %d).\n", num_edges, MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE); exit(1); } for (edge=0; edge 1) { strcpy (filename, argv[1]); } else { strcpy (filename, "WORDS"); } baselength = strlen (filename); /* strcpy (filename+baselength, ".ful"); read_full_dawg (filename, dawg, max_num_edges); */ strcpy (filename+baselength, ".lst"); printf ("Building Dawg from word list in file, '%s'\n", filename); read_word_list (filename, dawg, max_num_edges, reserved_edges); strcpy (filename+baselength, ".ful"); printf ("Writing full Trie file, '%s'\n", filename); write_full_dawg (filename, dawg, max_num_edges); strcpy (filename+baselength, ".opt"); trie_to_dawg (dawg, max_num_edges, reserved_edges); printf ("Writing full DAWG file, '%s'\n", filename); write_full_dawg (filename, dawg, max_num_edges); strcpy (filename+baselength, ".squ"); printf ("Writing squished file, '%s'\n", filename); write_squished_dawg (filename, dawg, max_num_edges, reserved_edges); end_time = time (&end_time); printf ("Seconds Elapsed = %4.1lf\n", difftime (end_time, start_time)); while ((option = getopt (argc, argv, "e:c:d:n:s:t:v")) != EOF) switch (option) { case 'c' : { printf ("makedawg -c %s %s\n", optarg, argv[optind]); printf ("Reading Dawg file, '%s'\n", optarg); read_dawg (optarg, dawg, max_num_edges); max_new_attempts = 1000; compact_dawg (dawg, max_num_edges, reserved_edges); printf ("Writing full file, '%s'\n", argv[optind]); write_full_dawg (argv[optind++], dawg, max_num_edges); break; } case 'd' : { printf ("makedawg -d %s %s\n", optarg, argv[optind]); printf ("Reading Dawg file, '%s'\n", optarg); read_dawg (optarg, dawg, max_num_edges); trie_to_dawg (dawg, max_num_edges, reserved_edges); printf ("Writing full file, '%s'\n", argv[optind]); write_full_dawg (argv[optind++], dawg, max_num_edges); break; } case 'n' : { printf ("makedawg -n %s %s\n", optarg, argv[optind]); printf ("Building Dawg from word list in file, '%s'\n", optarg); read_word_list (optarg, dawg, max_num_edges, reserved_edges); printf ("Writing full Dawg file, '%s'\n", argv[optind]); write_full_dawg (argv[optind++], dawg, max_num_edges); break; } case 's' : { printf ("makedawg -s %s %s\n", optarg, argv[optind]); printf ("Reading Dawg file, '%s'\n", optarg); read_dawg (optarg, dawg, max_num_edges); printf ("Writing squished file, '%s'\n", argv[optind]); write_squished_dawg (argv[optind++], dawg, max_num_edges, reserved_edges); break; } case 'v' : { debug = 1; break; } case 't' : { read_squished_dawg (optarg, dawg, max_num_edges); if (optind < argc) check_for_words (dawg, argv[optind++]); else check_for_words (dawg, NULL); break; } case 'e' : { read_dawg (optarg, dawg, max_num_edges); if (optind < argc) check_for_words (dawg, argv[optind++]); else check_for_words (dawg, NULL); break; } default : { printf ("usage: makedawg -c \n"); printf (" -d \n"); printf (" -n \n"); printf (" -s \n"); printf (" -e \n"); printf (" -t \n"); printf (" -v \n"); } } } #endif