mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Fixed name collision with jpeg library
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@164 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
c4f4840fbe
commit
520077bd41
101
dict/choices.cpp
101
dict/choices.cpp
@ -25,7 +25,8 @@
|
||||
#include "choices.h"
|
||||
#include "structures.h"
|
||||
#include "tordvars.h"
|
||||
#include "callcpp.h"
|
||||
#include "tprintf.h"
|
||||
#include "globals.h"
|
||||
#include "danerror.h"
|
||||
#include "host.h"
|
||||
|
||||
@ -53,7 +54,7 @@ CHOICES append_choice(CHOICES ratings,
|
||||
const char *lengths,
|
||||
float rating,
|
||||
float certainty,
|
||||
INT8 config) {
|
||||
inT8 config) {
|
||||
A_CHOICE *this_choice;
|
||||
|
||||
this_choice = new_choice (string, lengths, rating, certainty, config, NO_PERM);
|
||||
@ -61,6 +62,21 @@ CHOICES append_choice(CHOICES ratings,
|
||||
return (ratings);
|
||||
}
|
||||
|
||||
CHOICES append_choice(CHOICES ratings,
|
||||
const char *string,
|
||||
const char *lengths,
|
||||
float rating,
|
||||
float certainty,
|
||||
inT8 config,
|
||||
const char* script) {
|
||||
A_CHOICE *this_choice;
|
||||
|
||||
this_choice = new_choice (string, lengths, rating, certainty, config,
|
||||
script, NO_PERM);
|
||||
ratings = push_last (ratings, (LIST) this_choice);
|
||||
return (ratings);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* copy_choices
|
||||
@ -79,6 +95,7 @@ CHOICES copy_choices(CHOICES choices) {
|
||||
class_probability (first_node (l)),
|
||||
class_certainty (first_node (l)),
|
||||
class_config (first_node (l)),
|
||||
class_script (first_node (l)),
|
||||
class_permuter (first_node (l))));
|
||||
}
|
||||
return (reverse_d (result));
|
||||
@ -114,7 +131,18 @@ A_CHOICE *new_choice(const char *string,
|
||||
const char *lengths,
|
||||
float rating,
|
||||
float certainty,
|
||||
INT8 config,
|
||||
inT8 config,
|
||||
char permuter) {
|
||||
return new_choice(string, lengths, rating, certainty,
|
||||
config, "dummy", permuter);
|
||||
}
|
||||
|
||||
A_CHOICE *new_choice(const char *string,
|
||||
const char *lengths,
|
||||
float rating,
|
||||
float certainty,
|
||||
inT8 config,
|
||||
const char* script,
|
||||
char permuter) {
|
||||
A_CHOICE *this_choice;
|
||||
|
||||
@ -125,6 +153,7 @@ A_CHOICE *new_choice(const char *string,
|
||||
this_choice->certainty = certainty;
|
||||
this_choice->config = config;
|
||||
this_choice->permuter = permuter;
|
||||
this_choice->script = script;
|
||||
return (this_choice);
|
||||
}
|
||||
|
||||
@ -134,34 +163,48 @@ A_CHOICE *new_choice(const char *string,
|
||||
*
|
||||
* Print the probability ratings for a particular blob or word.
|
||||
**********************************************************************/
|
||||
void print_choices( /* List of (A_CHOICE*) */
|
||||
const char *label,
|
||||
CHOICES rating) {
|
||||
int first_one = TRUE;
|
||||
char str[CHARS_PER_LINE];
|
||||
int len;
|
||||
|
||||
cprintf ("%-20s\n", label);
|
||||
void print_choices(const char *label,
|
||||
CHOICES rating) { // List of (A_CHOICE*).
|
||||
tprintf("%s\n", label);
|
||||
if (rating == NIL)
|
||||
cprintf (" No rating ");
|
||||
tprintf(" No rating ");
|
||||
|
||||
iterate(rating) {
|
||||
|
||||
if (first_one && show_bold) {
|
||||
cprintf ("|");
|
||||
len = sprintf (str, " %s ", best_string (rating));
|
||||
print_bold(str);
|
||||
while (len++ < 8)
|
||||
cprintf (" ");
|
||||
}
|
||||
else {
|
||||
cprintf ("| %-7s", best_string (rating));
|
||||
}
|
||||
|
||||
cprintf ("%5.2lf ", best_probability (rating));
|
||||
|
||||
cprintf ("%5.2lf", best_certainty (rating));
|
||||
first_one = FALSE;
|
||||
tprintf("%.2f %.2f", best_probability(rating), best_certainty(rating));
|
||||
print_word_string(best_string(rating));
|
||||
}
|
||||
cprintf ("\n");
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* print_word_choice
|
||||
*
|
||||
* Print the string in a human-readable format and ratings for a word.
|
||||
**********************************************************************/
|
||||
void print_word_choice(const char *label, A_CHOICE* choice) {
|
||||
tprintf("%s : ", label);
|
||||
if (choice == NULL) {
|
||||
tprintf("No rating\n");
|
||||
} else {
|
||||
tprintf("%.2f %.2f", class_probability(choice), class_certainty(choice));
|
||||
print_word_string(class_string(choice));
|
||||
tprintf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* print_word_string
|
||||
*
|
||||
* Print the string in a human-readable format.
|
||||
* The output is not newline terminated.
|
||||
**********************************************************************/
|
||||
void print_word_string(const char* str) {
|
||||
int step = 1;
|
||||
for (int i = 0; str[i] != '\0'; i += step) {
|
||||
step = unicharset.step(str + i);
|
||||
int unichar_id = unicharset.unichar_to_id(str + i, step);
|
||||
STRING ch_str = unicharset.debug_str(unichar_id);
|
||||
tprintf(" : %s ", ch_str.string());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -66,9 +66,11 @@ typedef struct choicestruct
|
||||
float rating;
|
||||
float certainty;
|
||||
char permuter;
|
||||
INT8 config;
|
||||
inT8 config;
|
||||
char *string;
|
||||
char *lengths; //Length of each unichar in the string
|
||||
const char* script; // script is a script returned by unicharset,
|
||||
// and thus must not be deleted.
|
||||
} A_CHOICE;
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
@ -154,6 +156,14 @@ typedef struct choicestruct
|
||||
#define class_config(choice) \
|
||||
(((A_CHOICE*) (choice))->config)
|
||||
|
||||
/**********************************************************************
|
||||
* class_script
|
||||
*
|
||||
* Return the script of a given character class.
|
||||
**********************************************************************/
|
||||
#define class_script(choice) \
|
||||
(((A_CHOICE*) (choice))->script)
|
||||
|
||||
/**********************************************************************
|
||||
* clone_choice
|
||||
*
|
||||
@ -195,7 +205,15 @@ CHOICES append_choice(CHOICES ratings,
|
||||
const char *lengths,
|
||||
float rating,
|
||||
float certainty,
|
||||
INT8 config);
|
||||
inT8 config,
|
||||
const char* script);
|
||||
|
||||
CHOICES append_choice(CHOICES ratings,
|
||||
const char *string,
|
||||
const char *lengths,
|
||||
float rating,
|
||||
float certainty,
|
||||
inT8 config);
|
||||
|
||||
CHOICES copy_choices(CHOICES choices);
|
||||
|
||||
@ -205,8 +223,19 @@ A_CHOICE *new_choice(const char *string,
|
||||
const char *lengths,
|
||||
float rating,
|
||||
float certainty,
|
||||
INT8 config,
|
||||
inT8 config,
|
||||
const char* script,
|
||||
char permuter);
|
||||
|
||||
A_CHOICE *new_choice(const char *string,
|
||||
const char *lengths,
|
||||
float rating,
|
||||
float certainty,
|
||||
inT8 config,
|
||||
char permuter);
|
||||
|
||||
void print_choices(const char *label, CHOICES rating);
|
||||
void print_word_string(const char* str);
|
||||
void print_word_choice(const char *label, A_CHOICE* choice);
|
||||
|
||||
#endif
|
||||
|
@ -227,11 +227,11 @@ int case_ok(const char *word, const char *lengths) {
|
||||
for (x = 0, offset = 0; x < strlen (lengths); offset += lengths[x++]) {
|
||||
|
||||
ch_id = unicharset.unichar_to_id(word + offset, lengths[x]);
|
||||
if (unicharset.get_islower (ch_id))
|
||||
state = case_state_table[state][2];
|
||||
else if (unicharset.get_isupper (ch_id))
|
||||
if (unicharset.get_isupper(ch_id))
|
||||
state = case_state_table[state][1];
|
||||
else if (unicharset.get_isdigit (ch_id))
|
||||
else if (unicharset.get_isalpha(ch_id))
|
||||
state = case_state_table[state][2];
|
||||
else if (unicharset.get_isdigit(ch_id))
|
||||
state = case_state_table[state][3];
|
||||
else
|
||||
state = case_state_table[state][0];
|
||||
|
@ -41,8 +41,8 @@
|
||||
/*----------------------------------------------------------------------
|
||||
V a r i a b l e s
|
||||
----------------------------------------------------------------------*/
|
||||
INT32 debug = 0;
|
||||
INT32 case_sensative = 1;
|
||||
inT32 debug = 0;
|
||||
inT32 case_sensative = 1;
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
@ -79,7 +79,7 @@ EDGE_REF edge_char_of(EDGE_ARRAY dawg,
|
||||
* Count the number of edges in this node in the DAWG. This includes
|
||||
* both forward and back links.
|
||||
**********************************************************************/
|
||||
INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node) {
|
||||
inT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node) {
|
||||
EDGE_REF edge = node;
|
||||
|
||||
if (edge_occupied (dawg, edge)) {
|
||||
@ -112,12 +112,12 @@ LETTER_OK_FUNC letter_is_okay = &def_letter_is_okay;
|
||||
**********************************************************************/
|
||||
// TODO(tkielbus) Change the prevchar argument to make it unicode safe.
|
||||
// We might want to get rid of def_letter_is_okay at some point though.
|
||||
INT32 def_letter_is_okay(EDGE_ARRAY dawg,
|
||||
inT32 def_letter_is_okay(EDGE_ARRAY dawg,
|
||||
NODE_REF *node,
|
||||
INT32 char_index,
|
||||
inT32 char_index,
|
||||
char prevchar,
|
||||
const char *word,
|
||||
INT32 word_end) {
|
||||
inT32 word_end) {
|
||||
EDGE_REF edge;
|
||||
STRING dummy_word(word); // Auto-deleting string fixes memory leak.
|
||||
STRING word_single_lengths; //Lengths of single UTF-8 characters of the word.
|
||||
@ -132,10 +132,8 @@ INT32 def_letter_is_okay(EDGE_ARRAY dawg,
|
||||
}
|
||||
|
||||
if (*node == NO_EDGE) { /* Trailing punctuation */
|
||||
if (trailing_punc (dummy_word [char_index])
|
||||
&& (!trailing_punc (prevchar)
|
||||
|| punctuation_ok(dummy_word.string(),
|
||||
word_single_lengths.string())>=0))
|
||||
if (trailing_punc(dummy_word[char_index]) &&
|
||||
punctuation_ok(dummy_word.string(), word_single_lengths.string()) >= 0)
|
||||
return (TRUE);
|
||||
else
|
||||
return (FALSE);
|
||||
@ -191,8 +189,7 @@ INT32 def_letter_is_okay(EDGE_ARRAY dawg,
|
||||
if (leading_punc (word [char_index]) &&
|
||||
(char_index == 0 || leading_punc (dummy_word [char_index-1]))) {
|
||||
*node = 0;
|
||||
if (leading_punc (prevchar) ||
|
||||
punctuation_ok (word, word_single_lengths.string())>=0)
|
||||
if (punctuation_ok(word, word_single_lengths.string()) >= 0)
|
||||
return (TRUE);
|
||||
else
|
||||
return FALSE;
|
||||
@ -213,9 +210,9 @@ INT32 def_letter_is_okay(EDGE_ARRAY dawg,
|
||||
*
|
||||
* Count and return the number of forward edges for this node.
|
||||
**********************************************************************/
|
||||
INT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node) {
|
||||
inT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node) {
|
||||
EDGE_REF edge = node;
|
||||
INT32 num = 0;
|
||||
inT32 num = 0;
|
||||
|
||||
if (forward_edge (dawg, edge)) {
|
||||
do {
|
||||
@ -303,8 +300,8 @@ void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node) {
|
||||
EDGE_ARRAY read_squished_dawg(const char *filename) {
|
||||
FILE *file;
|
||||
EDGE_REF edge;
|
||||
INT32 num_edges = 0;
|
||||
INT32 node_count = 0;
|
||||
inT32 num_edges = 0;
|
||||
inT32 node_count = 0;
|
||||
|
||||
if (debug) print_string ("read_debug");
|
||||
|
||||
@ -313,7 +310,7 @@ EDGE_ARRAY read_squished_dawg(const char *filename) {
|
||||
#else
|
||||
file = open_file (filename, "rb");
|
||||
#endif
|
||||
fread (&num_edges, sizeof (INT32), 1, file);
|
||||
fread (&num_edges, sizeof (inT32), 1, file);
|
||||
num_edges = ntohl(num_edges);
|
||||
if (num_edges > MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE || num_edges < 0) {
|
||||
tprintf("(ENDIAN)Error: trying to read a DAWG '%s' that contains "
|
||||
@ -322,8 +319,8 @@ EDGE_ARRAY read_squished_dawg(const char *filename) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
UINT32 *dawg_32 = (UINT32*) Emalloc(num_edges * sizeof (UINT32));
|
||||
fread(&dawg_32[0], sizeof (UINT32), num_edges, file);
|
||||
uinT32 *dawg_32 = (uinT32*) Emalloc(num_edges * sizeof (uinT32));
|
||||
fread(&dawg_32[0], sizeof (uinT32), num_edges, file);
|
||||
fclose(file);
|
||||
EDGE_ARRAY dawg = (EDGE_ARRAY) memalloc(sizeof(EDGE_RECORD) * num_edges);
|
||||
|
||||
@ -346,7 +343,7 @@ EDGE_ARRAY read_squished_dawg(const char *filename) {
|
||||
* string of trailing puntuation. TRUE is returned if everything is
|
||||
* OK.
|
||||
**********************************************************************/
|
||||
INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index) {
|
||||
inT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, inT32 char_index) {
|
||||
char last_char;
|
||||
char *first_char;
|
||||
|
||||
@ -372,10 +369,10 @@ INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index) {
|
||||
*
|
||||
* Test to see if the word can be found in the DAWG.
|
||||
**********************************************************************/
|
||||
INT32 word_in_dawg(EDGE_ARRAY dawg, const char *string) {
|
||||
inT32 word_in_dawg(EDGE_ARRAY dawg, const char *string) {
|
||||
NODE_REF node = 0;
|
||||
INT32 i;
|
||||
INT32 length;
|
||||
inT32 i;
|
||||
inT32 length;
|
||||
|
||||
length=strlen(string);
|
||||
if (length==0)
|
||||
|
96
dict/dawg.h
96
dict/dawg.h
@ -35,65 +35,65 @@
|
||||
/*----------------------------------------------------------------------
|
||||
T y p e s
|
||||
----------------------------------------------------------------------*/
|
||||
/* #define MAX_WERD_LENGTH (INT32) 40 */
|
||||
/* #define MAX_NODE_EDGES_DISPLAY (INT32) 100 */
|
||||
/* #define LAST_FLAG (INT32) 1 */
|
||||
/* #define DIRECTION_FLAG (INT32) 2 */
|
||||
/* #define WERD_END_FLAG (INT32) 4 */
|
||||
/* #define MAX_WERD_LENGTH (inT32) 40 */
|
||||
/* #define MAX_NODE_EDGES_DISPLAY (inT32) 100 */
|
||||
/* #define LAST_FLAG (inT32) 1 */
|
||||
/* #define DIRECTION_FLAG (inT32) 2 */
|
||||
/* #define WERD_END_FLAG (inT32) 4 */
|
||||
|
||||
/* #define LETTER_START_BIT 0 */
|
||||
/* #define FLAG_START_BIT 8 */
|
||||
/* #define NEXT_EDGE_START_BIT 11 */
|
||||
|
||||
/* #define NO_EDGE (INT32) 0x001fffff */
|
||||
/* #define NO_EDGE (inT32) 0x001fffff */
|
||||
|
||||
/* #define NEXT_EDGE_MASK (INT32) 0xfffff800 */
|
||||
/* #define FLAGS_MASK (INT32) 0x00000700 */
|
||||
/* #define LETTER_MASK (INT32) 0x000000ff */
|
||||
/* #define NEXT_EDGE_MASK (inT32) 0xfffff800 */
|
||||
/* #define FLAGS_MASK (inT32) 0x00000700 */
|
||||
/* #define LETTER_MASK (inT32) 0x000000ff */
|
||||
|
||||
/* #define REFFORMAT "%d" */
|
||||
|
||||
/* typedef UINT32 EDGE_RECORD; */
|
||||
/* typedef uinT32 EDGE_RECORD; */
|
||||
/* typedef EDGE_RECORD *EDGE_ARRAY; */
|
||||
/* typedef INT32 EDGE_REF; */
|
||||
/* typedef INT32 NODE_REF; */
|
||||
/* typedef inT32 EDGE_REF; */
|
||||
/* typedef inT32 NODE_REF; */
|
||||
|
||||
#define MAX_WERD_LENGTH (INT64) 40
|
||||
#define MAX_NODE_EDGES_DISPLAY (INT64) 100
|
||||
#define LAST_FLAG (INT64) 1
|
||||
#define DIRECTION_FLAG (INT64) 2
|
||||
#define WERD_END_FLAG (INT64) 4
|
||||
#define MAX_WERD_LENGTH (inT64) 40
|
||||
#define MAX_NODE_EDGES_DISPLAY (inT64) 100
|
||||
#define LAST_FLAG (inT64) 1
|
||||
#define DIRECTION_FLAG (inT64) 2
|
||||
#define WERD_END_FLAG (inT64) 4
|
||||
|
||||
#define LETTER_START_BIT 0
|
||||
#define FLAG_START_BIT 8
|
||||
#define NEXT_EDGE_START_BIT 11
|
||||
|
||||
#ifdef __MSW32__
|
||||
#define NO_EDGE (INT64) 0x001fffffffffffffi64
|
||||
#define NEXT_EDGE_MASK (INT64) 0xfffffffffffff800i64
|
||||
#define FLAGS_MASK (INT64) 0x0000000000000700i64
|
||||
#define LETTER_MASK (INT64) 0x00000000000000ffi64
|
||||
#define NO_EDGE (inT64) 0x001fffffffffffffi64
|
||||
#define NEXT_EDGE_MASK (inT64) 0xfffffffffffff800i64
|
||||
#define FLAGS_MASK (inT64) 0x0000000000000700i64
|
||||
#define LETTER_MASK (inT64) 0x00000000000000ffi64
|
||||
#else
|
||||
#define NO_EDGE (INT64) 0x001fffffffffffffll
|
||||
#define NEXT_EDGE_MASK (INT64) 0xfffffffffffff800ll
|
||||
#define FLAGS_MASK (INT64) 0x0000000000000700ll
|
||||
#define LETTER_MASK (INT64) 0x00000000000000ffll
|
||||
#define NO_EDGE (inT64) 0x001fffffffffffffll
|
||||
#define NEXT_EDGE_MASK (inT64) 0xfffffffffffff800ll
|
||||
#define FLAGS_MASK (inT64) 0x0000000000000700ll
|
||||
#define LETTER_MASK (inT64) 0x00000000000000ffll
|
||||
#endif
|
||||
|
||||
#define MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE 2000000
|
||||
|
||||
#define REFFORMAT "%lld"
|
||||
|
||||
typedef UINT64 EDGE_RECORD;
|
||||
typedef uinT64 EDGE_RECORD;
|
||||
typedef EDGE_RECORD *EDGE_ARRAY;
|
||||
typedef INT64 EDGE_REF;
|
||||
typedef INT64 NODE_REF;
|
||||
typedef inT64 EDGE_REF;
|
||||
typedef inT64 NODE_REF;
|
||||
|
||||
/*---------------------------------------------------------------------
|
||||
V a r i a b l e s
|
||||
----------------------------------------------------------------------*/
|
||||
extern INT32 case_sensative;
|
||||
extern INT32 debug;
|
||||
extern inT32 case_sensative;
|
||||
extern inT32 debug;
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
M a c r o s
|
||||
@ -314,33 +314,33 @@ EDGE_REF edge_char_of(EDGE_ARRAY dawg,
|
||||
int character,
|
||||
int word_end);
|
||||
|
||||
INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node);
|
||||
inT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node);
|
||||
|
||||
|
||||
INT32 def_letter_is_okay(EDGE_ARRAY dawg,
|
||||
inT32 def_letter_is_okay(EDGE_ARRAY dawg,
|
||||
NODE_REF *node,
|
||||
INT32 char_index,
|
||||
inT32 char_index,
|
||||
char prevchar,
|
||||
const char *word,
|
||||
INT32 word_end);
|
||||
inT32 word_end);
|
||||
|
||||
/*
|
||||
* Allow for externally provided letter_is_okay.
|
||||
*/
|
||||
typedef INT32 (*LETTER_OK_FUNC)(EDGE_ARRAY, NODE_REF*, INT32, char, const char*,
|
||||
INT32);
|
||||
typedef inT32 (*LETTER_OK_FUNC)(EDGE_ARRAY, NODE_REF*, inT32, char, const char*,
|
||||
inT32);
|
||||
extern LETTER_OK_FUNC letter_is_okay;
|
||||
|
||||
|
||||
INT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node);
|
||||
inT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node);
|
||||
|
||||
void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node);
|
||||
|
||||
EDGE_ARRAY read_squished_dawg(const char *filename);
|
||||
|
||||
INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index);
|
||||
inT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, inT32 char_index);
|
||||
|
||||
INT32 word_in_dawg(EDGE_ARRAY dawg, const char *string);
|
||||
inT32 word_in_dawg(EDGE_ARRAY dawg, const char *string);
|
||||
|
||||
/*
|
||||
#if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS
|
||||
@ -356,18 +356,18 @@ EDGE_REF edge_char_of
|
||||
int character,
|
||||
int word_end));
|
||||
|
||||
INT32 edges_in_node
|
||||
inT32 edges_in_node
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
NODE_REF node));
|
||||
|
||||
INT32 def_letter_is_okay
|
||||
inT32 def_letter_is_okay
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
NODE_REF *node,
|
||||
INT32 char_index,
|
||||
inT32 char_index,
|
||||
char *word,
|
||||
INT32 word_end));
|
||||
inT32 word_end));
|
||||
|
||||
INT32 num_forward_edges
|
||||
inT32 num_forward_edges
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
NODE_REF node));
|
||||
|
||||
@ -378,14 +378,14 @@ void print_dawg_node
|
||||
void read_squished_dawg
|
||||
_ARGS((char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges));
|
||||
inT32 max_num_edges));
|
||||
|
||||
INT32 verify_trailing_punct
|
||||
inT32 verify_trailing_punct
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
char *word,
|
||||
INT32 char_index));
|
||||
inT32 char_index));
|
||||
|
||||
INT32 word_in_dawg
|
||||
inT32 word_in_dawg
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
char *string));
|
||||
|
||||
|
@ -109,7 +109,7 @@ int main (argc, argv)
|
||||
int argc;
|
||||
char **argv;
|
||||
{
|
||||
INT32 max_num_edges = 700000;
|
||||
inT32 max_num_edges = 700000;
|
||||
EDGE_ARRAY dawg;
|
||||
int argnum = 1;
|
||||
int show_nodes = FALSE;
|
||||
@ -161,10 +161,10 @@ int main (argc, argv)
|
||||
|
||||
void match_words (EDGE_ARRAY dawg,
|
||||
char *string,
|
||||
INT32 index,
|
||||
inT32 index,
|
||||
NODE_REF node) {
|
||||
EDGE_REF edge;
|
||||
INT32 word_end;
|
||||
inT32 word_end;
|
||||
|
||||
if (string[index] == '*') {
|
||||
edge = node;
|
||||
|
@ -66,7 +66,7 @@ void check_for_words(EDGE_ARRAY dawg,
|
||||
|
||||
void match_words(EDGE_ARRAY dawg,
|
||||
char *string,
|
||||
INT32 index,
|
||||
inT32 index,
|
||||
NODE_REF node);
|
||||
|
||||
void print_lost_words(EDGE_ARRAY dawg,
|
||||
|
@ -72,14 +72,14 @@
|
||||
**********************************************************************/
|
||||
|
||||
NODE_MAP build_node_map (EDGE_ARRAY dawg,
|
||||
INT32 *num_nodes,
|
||||
INT32 both_links,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 *num_nodes,
|
||||
inT32 both_links,
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
EDGE_REF edge;
|
||||
NODE_MAP node_map;
|
||||
INT32 node_counter;
|
||||
INT32 num_edges;
|
||||
inT32 node_counter;
|
||||
inT32 num_edges;
|
||||
|
||||
node_map = (NODE_MAP) malloc (sizeof (EDGE_REF) * max_num_edges);
|
||||
|
||||
@ -122,14 +122,14 @@ NODE_MAP build_node_map (EDGE_ARRAY dawg,
|
||||
**********************************************************************/
|
||||
|
||||
void compact_dawg (EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
EDGE_REF edge;
|
||||
INT32 num_edges = 0;
|
||||
inT32 num_edges = 0;
|
||||
NODE_REF next_node_space;
|
||||
NODE_REF node = 0;
|
||||
NODE_REF destination;
|
||||
INT32 node_count;
|
||||
inT32 node_count;
|
||||
NODE_MAP node_map;
|
||||
NODE_REF the_next_node;
|
||||
|
||||
@ -201,7 +201,7 @@ void compact_dawg (EDGE_ARRAY dawg,
|
||||
void delete_node (EDGE_ARRAY dawg,
|
||||
NODE_REF node) {
|
||||
EDGE_REF edge = node;
|
||||
INT32 counter = edges_in_node (dawg, node);
|
||||
inT32 counter = edges_in_node (dawg, node);
|
||||
|
||||
/*
|
||||
printf ("node deleted = %d (%d)\n", node, counter);
|
||||
@ -219,15 +219,15 @@ void delete_node (EDGE_ARRAY dawg,
|
||||
|
||||
void write_squished_dawg (const char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
FILE *file;
|
||||
EDGE_REF edge;
|
||||
INT32 num_edges;
|
||||
INT32 node_count = 0;
|
||||
inT32 num_edges;
|
||||
inT32 node_count = 0;
|
||||
NODE_MAP node_map;
|
||||
EDGE_REF old_index;
|
||||
UINT32 temp_record_32;
|
||||
uinT32 temp_record_32;
|
||||
|
||||
if (debug) print_string ("write_squished_dawg");
|
||||
|
||||
@ -242,7 +242,7 @@ void write_squished_dawg (const char *filename,
|
||||
num_edges++;
|
||||
|
||||
num_edges = htonl(num_edges);
|
||||
fwrite (&num_edges, sizeof (INT32), 1, file); /* Write edge count to file */
|
||||
fwrite (&num_edges, sizeof (inT32), 1, file); /* Write edge count to file */
|
||||
num_edges = ntohl(num_edges);
|
||||
|
||||
printf ("%d nodes in DAWG\n", node_count);
|
||||
@ -260,8 +260,8 @@ void write_squished_dawg (const char *filename,
|
||||
do {
|
||||
old_index = next_node (dawg,edge);
|
||||
set_next_edge (dawg, edge, node_map [next_node (dawg, edge)]);
|
||||
temp_record_32 = htonl((UINT32) edge_of (dawg,edge));
|
||||
fwrite (&temp_record_32, sizeof (UINT32), 1, file);
|
||||
temp_record_32 = htonl((uinT32) edge_of (dawg,edge));
|
||||
fwrite (&temp_record_32, sizeof (uinT32), 1, file);
|
||||
set_next_edge (dawg, edge, old_index);
|
||||
} edge_loop (dawg, edge);
|
||||
|
||||
@ -294,9 +294,9 @@ main (argc, argv)
|
||||
time_t end_time;
|
||||
FILE *word_file;
|
||||
char string [CHARS_PER_LINE];
|
||||
INT32 word_count = 0;
|
||||
INT32 max_num_edges = 700000;
|
||||
INT32 reserved_edges = 50000;
|
||||
inT32 word_count = 0;
|
||||
inT32 max_num_edges = 700000;
|
||||
inT32 reserved_edges = 50000;
|
||||
EDGE_ARRAY dawg;
|
||||
char *wordfile = "WORDS";
|
||||
char *dawgfile = "DAWG";
|
||||
|
@ -63,21 +63,21 @@
|
||||
*/
|
||||
|
||||
NODE_MAP build_node_map(EDGE_ARRAY dawg,
|
||||
INT32 *num_nodes,
|
||||
INT32 both_links,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 *num_nodes,
|
||||
inT32 both_links,
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
void compact_dawg(EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
void delete_node(EDGE_ARRAY dawg,
|
||||
NODE_REF node);
|
||||
|
||||
void write_squished_dawg(const char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
#endif
|
||||
|
@ -38,7 +38,7 @@ typedef UNICHAR_ID CLASS_ID;
|
||||
/* define a type for the index (rather than the class id) of a class.
|
||||
Class indexes are sequentially defined, while class id's are defined
|
||||
by the ascii character set. */
|
||||
typedef INT16 CLASS_INDEX;
|
||||
typedef inT16 CLASS_INDEX;
|
||||
typedef CLASS_INDEX CLASS_TO_INDEX[MAX_CLASS_ID + 1];
|
||||
typedef CLASS_ID INDEX_TO_CLASS[MAX_NUM_CLASSES];
|
||||
#define ILLEGAL_CLASS (-1)
|
||||
@ -46,13 +46,13 @@ typedef CLASS_ID INDEX_TO_CLASS[MAX_NUM_CLASSES];
|
||||
/* a PROTO_ID is the index of a prototype within it's class. Valid proto
|
||||
id's are 0 to N-1 where N is the number of prototypes that make up the
|
||||
class. */
|
||||
typedef INT16 PROTO_ID;
|
||||
typedef inT16 PROTO_ID;
|
||||
#define NO_PROTO (-1)
|
||||
|
||||
/* FEATURE_ID is the index of a feature within a character description
|
||||
The feature id ranges from 0 to N-1 where N is the number
|
||||
of features in a character description. */
|
||||
typedef UINT8 FEATURE_ID;
|
||||
typedef uinT8 FEATURE_ID;
|
||||
#define NO_FEATURE 255
|
||||
#define NOISE_FEATURE 254
|
||||
#define MISSING_PROTO 254
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "stopper.h"
|
||||
#include "freelist.h"
|
||||
#include "globals.h"
|
||||
#include "tprintf.h"
|
||||
#include "cutil.h"
|
||||
#include "dawg.h"
|
||||
#include <ctype.h>
|
||||
@ -81,7 +82,7 @@ void adjust_word(A_CHOICE *best_choice, float *certainty_array) {
|
||||
float adjust_factor;
|
||||
|
||||
if (adjust_debug)
|
||||
cprintf ("%s %4.2f ",
|
||||
tprintf ("%s %4.2f ",
|
||||
class_string (best_choice), class_probability (best_choice));
|
||||
|
||||
this_word = class_string (best_choice);
|
||||
@ -95,13 +96,13 @@ void adjust_word(A_CHOICE *best_choice, float *certainty_array) {
|
||||
class_permuter (best_choice) = FREQ_DAWG_PERM;
|
||||
adjust_factor = freq_word;
|
||||
if (adjust_debug)
|
||||
cprintf (", F, %4.2f ", freq_word);
|
||||
tprintf(", F, %4.2f ", freq_word);
|
||||
}
|
||||
else {
|
||||
class_probability (best_choice) *= good_word;
|
||||
adjust_factor = good_word;
|
||||
if (adjust_debug)
|
||||
cprintf (", %4.2f ", good_word);
|
||||
tprintf(", %4.2f ", good_word);
|
||||
}
|
||||
}
|
||||
else {
|
||||
@ -109,10 +110,10 @@ void adjust_word(A_CHOICE *best_choice, float *certainty_array) {
|
||||
adjust_factor = ok_word;
|
||||
if (adjust_debug) {
|
||||
if (!case_ok (this_word, class_lengths (best_choice)))
|
||||
cprintf (", C");
|
||||
tprintf(", C");
|
||||
if (punctuation_ok (this_word, class_lengths (best_choice)) == -1)
|
||||
cprintf (", P");
|
||||
cprintf (", %4.2f ", ok_word);
|
||||
tprintf(", P");
|
||||
tprintf(", %4.2f ", ok_word);
|
||||
}
|
||||
}
|
||||
|
||||
@ -121,7 +122,7 @@ void adjust_word(A_CHOICE *best_choice, float *certainty_array) {
|
||||
LogNewWordChoice(best_choice, adjust_factor, certainty_array);
|
||||
|
||||
if (adjust_debug)
|
||||
cprintf (" --> %4.2f\n", class_probability (best_choice));
|
||||
tprintf(" --> %4.2f\n", class_probability (best_choice));
|
||||
}
|
||||
|
||||
|
||||
@ -178,9 +179,13 @@ void append_next_choice( /*previous option */
|
||||
}
|
||||
else {
|
||||
if (rating_array[char_index] * rating_margin + rating_pad < rating) {
|
||||
if (dawg_debug)
|
||||
cprintf ("early pruned word (%s, rating=%4.2f, limit=%4.2f)\n",
|
||||
word, rating, *limit);
|
||||
if (dawg_debug) {
|
||||
tprintf("early pruned word rating=%4.2f, limit=%4.2f",
|
||||
rating, *limit);
|
||||
print_word_string(word);
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -190,7 +195,7 @@ void append_next_choice( /*previous option */
|
||||
char_index > 0) {
|
||||
*limit = rating;
|
||||
if (dawg_debug)
|
||||
cprintf ("new hyphen choice = %s\n", word);
|
||||
tprintf("new hyphen choice = %s\n", word);
|
||||
better_choice = new_choice (word, unichar_lengths, rating, certainty, -1, permuter);
|
||||
adjust_word(better_choice, certainty_array);
|
||||
push_on(*result, better_choice);
|
||||
@ -209,7 +214,7 @@ void append_next_choice( /*previous option */
|
||||
/* Add a new word choice */
|
||||
if (word_ending) {
|
||||
if (dawg_debug == 1)
|
||||
cprintf ("new choice = %s\n", word);
|
||||
tprintf("new choice = %s\n", word);
|
||||
*limit = rating;
|
||||
|
||||
better_choice = new_choice (hyphen_tail (word), unichar_lengths +
|
||||
@ -228,6 +233,16 @@ void append_next_choice( /*previous option */
|
||||
rating_array, certainty_array, last_word));
|
||||
}
|
||||
} else {
|
||||
if (dawg_debug == 1) {
|
||||
tprintf("letter not OK at char %d, index %d + sub index %d/%d\n",
|
||||
char_index, unichar_offsets[char_index],
|
||||
sub_offset, unichar_lengths[char_index]);
|
||||
tprintf("Word");
|
||||
print_word_string(word);
|
||||
tprintf("\nRejected tail");
|
||||
print_word_string(word + unichar_offsets[char_index]);
|
||||
tprintf("\n");
|
||||
}
|
||||
if (node != 0)
|
||||
node = node_saved;
|
||||
}
|
||||
@ -262,10 +277,11 @@ CHOICES dawg_permute(EDGE_ARRAY dawg,
|
||||
int word_ending = FALSE;
|
||||
|
||||
if (dawg_debug) {
|
||||
cprintf ("dawg_permute (node=" REFFORMAT ", char_index=%d, limit=%f, ",
|
||||
node, char_index, *limit);
|
||||
cprintf ("word=%s, rating=%4.2f, certainty=%4.2f)\n",
|
||||
word, rating, certainty);
|
||||
tprintf("dawg_permute (node=" REFFORMAT ", char_index=%d, limit=%f, word=",
|
||||
node, char_index, *limit);
|
||||
print_word_string(word);
|
||||
tprintf(", rating=%4.2f, certainty=%4.2f)\n",
|
||||
rating, certainty);
|
||||
}
|
||||
|
||||
/* Check for EOW */
|
||||
@ -304,7 +320,7 @@ void dawg_permute_and_select(const char *string,
|
||||
char permuter,
|
||||
CHOICES_LIST character_choices,
|
||||
A_CHOICE *best_choice,
|
||||
INT16 system_words) {
|
||||
inT16 system_words) {
|
||||
CHOICES result = NIL;
|
||||
char word[UNICHAR_LEN * MAX_WERD_LENGTH + 1];
|
||||
char unichar_lengths[MAX_WERD_LENGTH + 1];
|
||||
@ -344,8 +360,9 @@ void dawg_permute_and_select(const char *string,
|
||||
char_index, &rating, word, unichar_lengths, unichar_offsets, 0.0, 0.0,
|
||||
rating_array, certainty_array, is_last_word ());
|
||||
|
||||
if (display_ratings && result)
|
||||
if (display_ratings && result) {
|
||||
print_choices(string, result);
|
||||
}
|
||||
|
||||
while (result != NIL) {
|
||||
if (best_probability (result) < class_probability (best_choice)) {
|
||||
|
@ -89,7 +89,7 @@ void dawg_permute_and_select(const char *string,
|
||||
char permuter,
|
||||
CHOICES_LIST character_choices,
|
||||
A_CHOICE *best_choice,
|
||||
INT16 system_words);
|
||||
inT16 system_words);
|
||||
|
||||
void init_permdawg_vars();
|
||||
void init_permdawg();
|
||||
|
@ -88,7 +88,7 @@ int permute_only_top = 0;
|
||||
|
||||
#if 0
|
||||
//0x0=.
|
||||
static INT32 bigram_counts[256][3] = { {
|
||||
static inT32 bigram_counts[256][3] = { {
|
||||
0, 0, 0
|
||||
},
|
||||
{ //0x1=.
|
||||
@ -1139,9 +1139,7 @@ void permute_characters(CHOICES_LIST char_choices,
|
||||
free_choice(this_choice);
|
||||
|
||||
if (display_ratings)
|
||||
cprintf ("permute_characters: %-15s %4.2f %4.2f\n",
|
||||
class_string (best_choice),
|
||||
class_probability (best_choice), class_certainty (best_choice));
|
||||
print_word_choice("permute_characters", best_choice);
|
||||
}
|
||||
|
||||
|
||||
@ -1346,8 +1344,8 @@ A_CHOICE *permute_top_choice(CHOICES_LIST character_choices,
|
||||
|
||||
register CHOICES this_char;
|
||||
register const char* ch;
|
||||
register INT8 lower_done;
|
||||
register INT8 upper_done;
|
||||
register inT8 lower_done;
|
||||
register inT8 upper_done;
|
||||
|
||||
prev_char[0] = '\0';
|
||||
|
||||
@ -1566,9 +1564,9 @@ const char* choose_il1(const char *first_char, //first choice
|
||||
const char *prev_char, //prev in word
|
||||
const char *next_char, //next in word
|
||||
const char *next_next_char) { //after next next in word
|
||||
INT32 type1; //1/I/l type of first choice
|
||||
INT32 type2; //1/I/l type of second choice
|
||||
INT32 type3; //1/I/l type of third choice
|
||||
inT32 type1; //1/I/l type of first choice
|
||||
inT32 type2; //1/I/l type of second choice
|
||||
inT32 type3; //1/I/l type of third choice
|
||||
|
||||
int first_char_length = strlen(first_char);
|
||||
int prev_char_length = strlen(prev_char);
|
||||
|
@ -54,7 +54,7 @@
|
||||
----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static INT32 debug_1 = 0;
|
||||
static inT32 debug_1 = 0;
|
||||
|
||||
/*
|
||||
----------------------------------------------------------------------
|
||||
@ -81,9 +81,9 @@ void collapse_source_nodes (EDGE_ARRAY dawg,
|
||||
NODE_REF source_node_1,
|
||||
NODE_REF source_node_2,
|
||||
NODE_REF dest_node,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
INT32 num_links;
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
inT32 num_links;
|
||||
EDGE_REF edge;
|
||||
/* NODE_REF new_source_1; */
|
||||
|
||||
@ -152,14 +152,14 @@ void collapse_source_nodes (EDGE_ARRAY dawg,
|
||||
* reduction and return TRUE. If not, return FALSE.
|
||||
**********************************************************************/
|
||||
|
||||
INT32 eliminate_redundant_edges (EDGE_ARRAY dawg,
|
||||
inT32 eliminate_redundant_edges (EDGE_ARRAY dawg,
|
||||
NODE_REF node,
|
||||
EDGE_REF edge_1,
|
||||
EDGE_REF edge_2,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
static INT32 elim_count = 0;
|
||||
static INT32 keep_count = 0;
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
static inT32 elim_count = 0;
|
||||
static inT32 keep_count = 0;
|
||||
|
||||
if (same_output (dawg,
|
||||
next_node (dawg, edge_1),
|
||||
@ -195,7 +195,7 @@ INT32 eliminate_redundant_edges (EDGE_ARRAY dawg,
|
||||
* Compare two edges to see which one of the letters is larger.
|
||||
**********************************************************************/
|
||||
|
||||
INT32 letter_order (const void* edge1_ptr,
|
||||
inT32 letter_order (const void* edge1_ptr,
|
||||
const void* edge2_ptr) {
|
||||
|
||||
if (letter_of_edge(*((EDGE_RECORD*) edge1_ptr)) <
|
||||
@ -220,7 +220,7 @@ INT32 letter_order (const void* edge1_ptr,
|
||||
*/
|
||||
|
||||
void print_n_edges (EDGE_RECORD *edge1,
|
||||
INT32 n) {
|
||||
inT32 n) {
|
||||
EDGE_RECORD *edge;
|
||||
|
||||
edge = edge1;
|
||||
@ -244,16 +244,16 @@ void print_n_edges (EDGE_RECORD *edge1,
|
||||
* possible then FALSE is returned.
|
||||
**********************************************************************/
|
||||
|
||||
INT32 reduce_lettered_edges (EDGE_ARRAY dawg,
|
||||
inT32 reduce_lettered_edges (EDGE_ARRAY dawg,
|
||||
EDGE_REF *edge,
|
||||
NODE_REF node,
|
||||
NODE_MARKER reduced_nodes,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
EDGE_REF edge_1;
|
||||
EDGE_REF edge_2;
|
||||
INT32 fixed_one;
|
||||
INT32 did_something = FALSE;
|
||||
inT32 fixed_one;
|
||||
inT32 did_something = FALSE;
|
||||
|
||||
if (debug_1)
|
||||
printf ("reduce_lettered_edges (edge=" REFFORMAT ")\n", *edge);
|
||||
@ -315,13 +315,13 @@ INT32 reduce_lettered_edges (EDGE_ARRAY dawg,
|
||||
void reduce_node_input (EDGE_ARRAY dawg,
|
||||
NODE_REF node,
|
||||
NODE_MARKER reduced_nodes,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
EDGE_REF edge_1;
|
||||
INT32 forward_edges = num_forward_edges (dawg, node);
|
||||
INT32 backward_edges = edges_in_node (dawg, node) - forward_edges;
|
||||
inT32 forward_edges = num_forward_edges (dawg, node);
|
||||
inT32 backward_edges = edges_in_node (dawg, node) - forward_edges;
|
||||
|
||||
static INT32 num_nodes_reduced = 0;
|
||||
static inT32 num_nodes_reduced = 0;
|
||||
|
||||
if (debug_1) {
|
||||
printf ("reduce_node_input (node=" REFFORMAT ")\n", node);
|
||||
@ -375,7 +375,7 @@ void reduce_node_input (EDGE_ARRAY dawg,
|
||||
* they can be collapsed into a single node.
|
||||
**********************************************************************/
|
||||
|
||||
INT32 same_output (EDGE_ARRAY dawg,
|
||||
inT32 same_output (EDGE_ARRAY dawg,
|
||||
NODE_REF node1,
|
||||
NODE_REF node2) {
|
||||
if (debug_1) printf ("Edge nodes = " REFFORMAT " , " \
|
||||
@ -407,10 +407,10 @@ INT32 same_output (EDGE_ARRAY dawg,
|
||||
**********************************************************************/
|
||||
|
||||
void trie_to_dawg (EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
NODE_MARKER reduced_nodes;
|
||||
INT32 x;
|
||||
inT32 x;
|
||||
|
||||
max_new_attempts = 100000;
|
||||
compact_dawg (dawg, max_num_edges, reserved_edges);
|
||||
|
@ -71,42 +71,42 @@ void collapse_source_nodes(EDGE_ARRAY dawg,
|
||||
NODE_REF source_node_1,
|
||||
NODE_REF source_node_2,
|
||||
NODE_REF dest_node,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
INT32 eliminate_redundant_edges(EDGE_ARRAY dawg,
|
||||
inT32 eliminate_redundant_edges(EDGE_ARRAY dawg,
|
||||
NODE_REF node,
|
||||
EDGE_REF edge_1,
|
||||
EDGE_REF edge_2,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
INT32 letter_order(const void* edge1_ptr,
|
||||
inT32 letter_order(const void* edge1_ptr,
|
||||
const void* edge2_ptr);
|
||||
|
||||
void print_n_edges(EDGE_RECORD *edge1,
|
||||
INT32 n);
|
||||
inT32 n);
|
||||
|
||||
INT32 reduce_lettered_edges(EDGE_ARRAY dawg,
|
||||
inT32 reduce_lettered_edges(EDGE_ARRAY dawg,
|
||||
EDGE_REF *edge,
|
||||
NODE_REF node,
|
||||
NODE_MARKER reduced_nodes,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
void reduce_node_input(EDGE_ARRAY dawg,
|
||||
NODE_REF node,
|
||||
NODE_MARKER reduced_nodes,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
INT32 same_output(EDGE_ARRAY dawg,
|
||||
inT32 same_output(EDGE_ARRAY dawg,
|
||||
NODE_REF node1,
|
||||
NODE_REF node2);
|
||||
|
||||
void trie_to_dawg(EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -100,7 +100,7 @@ SEARCH_STATE bin_to_chunks(STATE *state, int num_joints) {
|
||||
void bin_to_pieces(STATE *state, int num_joints, PIECES_STATE pieces) {
|
||||
int x;
|
||||
unsigned int mask; /* Bit mask */
|
||||
INT16 num_pieces = 0;
|
||||
inT16 num_pieces = 0;
|
||||
/* Preset mask */
|
||||
if (debug_8)
|
||||
print_state ("bin_to_pieces = ", state, num_joints);
|
||||
@ -187,8 +187,8 @@ STATE *new_state(STATE *oldstate) {
|
||||
* Return the number of ones that are in this state.
|
||||
**********************************************************************/
|
||||
int ones_in_state(STATE *state, int num_joints) {
|
||||
INT8 num_ones = 0;
|
||||
INT8 x;
|
||||
inT8 num_ones = 0;
|
||||
inT8 x;
|
||||
unsigned int mask;
|
||||
|
||||
if (num_joints > 32) /* Preset mask */
|
||||
@ -283,7 +283,7 @@ int compare_states(STATE *true_state, STATE *this_state, int *blob_index) {
|
||||
int true_index; //index of true blob
|
||||
int index; //current
|
||||
int result = 0; //return value
|
||||
UINT32 mask;
|
||||
uinT32 mask;
|
||||
|
||||
if (true_state->part1 == this_state->part1
|
||||
&& true_state->part2 == this_state->part2)
|
||||
|
@ -37,14 +37,14 @@
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UINT32 part1;
|
||||
UINT32 part2;
|
||||
uinT32 part1;
|
||||
uinT32 part2;
|
||||
} STATE;
|
||||
|
||||
typedef int *SEARCH_STATE; /* State variable for search */
|
||||
|
||||
/* State variable for search */
|
||||
typedef UINT8 PIECES_STATE[MAX_NUM_CHUNKS + 2];
|
||||
typedef uinT8 PIECES_STATE[MAX_NUM_CHUNKS + 2];
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
|
@ -54,7 +54,7 @@ typedef LIST AMBIG_TABLE;
|
||||
typedef struct
|
||||
{
|
||||
UNICHAR_ID Class;
|
||||
UINT16 NumChunks;
|
||||
uinT16 NumChunks;
|
||||
float Certainty;
|
||||
}
|
||||
|
||||
|
@ -24,11 +24,11 @@
|
||||
#include "choicearr.h"
|
||||
#include "states.h"
|
||||
|
||||
typedef UINT8 BLOB_WIDTH;
|
||||
typedef uinT8 BLOB_WIDTH;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
INT16 index;
|
||||
inT16 index;
|
||||
unsigned bad_length:8;
|
||||
unsigned good_length:8;
|
||||
} DANGERR;
|
||||
|
@ -36,11 +36,11 @@
|
||||
/*----------------------------------------------------------------------
|
||||
V a r i a b l e s
|
||||
----------------------------------------------------------------------*/
|
||||
static INT32 move_counter = 0;
|
||||
static INT32 new_counter = 0;
|
||||
static INT32 edge_counter = 0;
|
||||
static inT32 move_counter = 0;
|
||||
static inT32 new_counter = 0;
|
||||
static inT32 edge_counter = 0;
|
||||
|
||||
INT32 max_new_attempts = 0;
|
||||
inT32 max_new_attempts = 0;
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
@ -59,8 +59,8 @@ void add_edge_linkage(EDGE_ARRAY dawg,
|
||||
EDGE_RECORD word_end) {
|
||||
EDGE_REF edge1 = node1;
|
||||
EDGE_REF edge2;
|
||||
INT32 num_edges = edges_in_node (dawg, node1);
|
||||
INT32 last_one;
|
||||
inT32 num_edges = edges_in_node (dawg, node1);
|
||||
inT32 last_one;
|
||||
|
||||
word_end = (word_end ? WERD_END_FLAG : 0);
|
||||
|
||||
@ -119,8 +119,8 @@ bool add_new_edge(EDGE_ARRAY dawg,
|
||||
NODE_REF *node2,
|
||||
char character,
|
||||
EDGE_RECORD word_end,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
int direction;
|
||||
|
||||
if (debug)
|
||||
@ -150,14 +150,14 @@ bool add_new_edge(EDGE_ARRAY dawg,
|
||||
**********************************************************************/
|
||||
void add_word_to_dawg(EDGE_ARRAY dawg,
|
||||
char *string,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
EDGE_REF edge;
|
||||
NODE_REF last_node = 0;
|
||||
NODE_REF the_next_node;
|
||||
INT32 i;
|
||||
INT32 still_finding_chars = TRUE;
|
||||
INT32 word_end = FALSE;
|
||||
inT32 i;
|
||||
inT32 still_finding_chars = TRUE;
|
||||
inT32 word_end = FALSE;
|
||||
bool add_failed = false;
|
||||
|
||||
if (debug) cprintf("Adding word %s\n", string);
|
||||
@ -233,8 +233,8 @@ void add_word_to_dawg(EDGE_ARRAY dawg,
|
||||
* Initialize the DAWG data structure for further used. Reset each of
|
||||
* the edge cells to NO_EDGE.
|
||||
**********************************************************************/
|
||||
void initialize_dawg(EDGE_ARRAY dawg, INT32 max_num_edges) {
|
||||
INT32 x;
|
||||
void initialize_dawg(EDGE_ARRAY dawg, inT32 max_num_edges) {
|
||||
inT32 x;
|
||||
|
||||
|
||||
//changed by jetsoft
|
||||
@ -257,14 +257,14 @@ void initialize_dawg(EDGE_ARRAY dawg, INT32 max_num_edges) {
|
||||
**********************************************************************/
|
||||
bool move_node_if_needed(EDGE_ARRAY dawg,
|
||||
NODE_REF* node,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
if (room_in_node(dawg, *node))
|
||||
return true;
|
||||
|
||||
NODE_REF this_new_node;
|
||||
EDGE_REF edge;
|
||||
INT32 num_edges = edges_in_node (dawg, *node);
|
||||
inT32 num_edges = edges_in_node (dawg, *node);
|
||||
|
||||
if (debug)
|
||||
print_dawg_node(dawg, *node);
|
||||
@ -309,13 +309,13 @@ bool move_node_if_needed(EDGE_ARRAY dawg,
|
||||
* consists of the requested number of edges.
|
||||
**********************************************************************/
|
||||
NODE_REF new_dawg_node(EDGE_ARRAY dawg,
|
||||
INT32 num_edges,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
INT32 i;
|
||||
INT32 n;
|
||||
INT32 edge_index;
|
||||
INT32 edge_collision;
|
||||
inT32 num_edges,
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
inT32 i;
|
||||
inT32 n;
|
||||
inT32 edge_index;
|
||||
inT32 edge_collision;
|
||||
|
||||
/* Try several times */
|
||||
for (i=0; i<NUM_PLACEMENT_ATTEMPTS; i++) {
|
||||
@ -346,9 +346,9 @@ NODE_REF new_dawg_node(EDGE_ARRAY dawg,
|
||||
*
|
||||
* Print the contents of one of the nodes in the DAWG.
|
||||
**********************************************************************/
|
||||
void print_dawg_map (EDGE_ARRAY dawg, INT32 max_num_edges) {
|
||||
void print_dawg_map (EDGE_ARRAY dawg, inT32 max_num_edges) {
|
||||
EDGE_REF edge = 0;
|
||||
INT32 counter = 0;
|
||||
inT32 counter = 0;
|
||||
|
||||
do {
|
||||
if (edge_occupied (dawg, edge))
|
||||
@ -368,12 +368,12 @@ void print_dawg_map (EDGE_ARRAY dawg, INT32 max_num_edges) {
|
||||
**********************************************************************/
|
||||
void read_full_dawg (const char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges) {
|
||||
inT32 max_num_edges) {
|
||||
FILE *file;
|
||||
EDGE_REF node_index;
|
||||
INT32 num_edges;
|
||||
INT32 node_count;
|
||||
INT32 error_occured = FALSE;
|
||||
inT32 num_edges;
|
||||
inT32 node_count;
|
||||
inT32 error_occured = FALSE;
|
||||
|
||||
if (debug) print_string ("read_dawg");
|
||||
|
||||
@ -381,12 +381,12 @@ void read_full_dawg (const char *filename,
|
||||
|
||||
file = open_file (filename, "rb");
|
||||
|
||||
fread (&node_count, sizeof (INT32), 1, file);
|
||||
fread (&node_count, sizeof (inT32), 1, file);
|
||||
|
||||
while (node_count-- > 0) {
|
||||
|
||||
fread (&node_index, sizeof (EDGE_REF), 1, file);
|
||||
fread (&num_edges, sizeof (INT32), 1, file);
|
||||
fread (&num_edges, sizeof (inT32), 1, file);
|
||||
|
||||
assert (node_index + num_edges < max_num_edges);
|
||||
fread (&dawg[node_index], sizeof (EDGE_RECORD), num_edges, file);
|
||||
@ -410,8 +410,8 @@ void read_full_dawg (const char *filename,
|
||||
**********************************************************************/
|
||||
void read_word_list(const char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges) {
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges) {
|
||||
FILE *word_file;
|
||||
char string [CHARS_PER_LINE];
|
||||
int word_count = 0;
|
||||
@ -516,10 +516,10 @@ void remove_edge_linkage(EDGE_ARRAY dawg,
|
||||
EDGE_RECORD direction,
|
||||
char character,
|
||||
EDGE_RECORD word_end) {
|
||||
INT32 forward_edges;
|
||||
INT32 num_edges;
|
||||
inT32 forward_edges;
|
||||
inT32 num_edges;
|
||||
NODE_REF e = node;
|
||||
INT32 last_flag;
|
||||
inT32 last_flag;
|
||||
|
||||
forward_edges = num_forward_edges (dawg, node);
|
||||
num_edges = edges_in_node (dawg, node);
|
||||
@ -571,7 +571,7 @@ void remove_edge_linkage(EDGE_ARRAY dawg,
|
||||
* Check to see if there is enough room left in this node for one more
|
||||
* edge link. This may be a forward or backward link.
|
||||
**********************************************************************/
|
||||
INT32 room_in_node(EDGE_ARRAY dawg, NODE_REF node) {
|
||||
inT32 room_in_node(EDGE_ARRAY dawg, NODE_REF node) {
|
||||
EDGE_REF edge = node;
|
||||
|
||||
if (edge_occupied (dawg, edge + edges_in_node (dawg, node))) {
|
||||
@ -589,11 +589,11 @@ INT32 room_in_node(EDGE_ARRAY dawg, NODE_REF node) {
|
||||
* Write the DAWG out to a file
|
||||
**********************************************************************/
|
||||
void write_full_dawg (const char *filename, EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges) {
|
||||
inT32 max_num_edges) {
|
||||
FILE *file;
|
||||
EDGE_REF edge;
|
||||
INT32 num_edges;
|
||||
INT32 node_count = 0;
|
||||
inT32 num_edges;
|
||||
inT32 node_count = 0;
|
||||
NODE_REF node;
|
||||
|
||||
if (debug) print_string ("write_full_dawg");
|
||||
@ -609,7 +609,7 @@ void write_full_dawg (const char *filename, EDGE_ARRAY dawg,
|
||||
}
|
||||
|
||||
file = open_file (filename, "wb");
|
||||
fwrite (&node_count, sizeof (INT32), 1, file);
|
||||
fwrite (&node_count, sizeof (inT32), 1, file);
|
||||
|
||||
node_count = 0;
|
||||
for (edge=0; edge<max_num_edges; edge++) {
|
||||
@ -621,7 +621,7 @@ void write_full_dawg (const char *filename, EDGE_ARRAY dawg,
|
||||
assert ((node + num_edges < max_num_edges) && (num_edges > 0));
|
||||
|
||||
fwrite (&edge, sizeof (EDGE_REF), 1, file);
|
||||
fwrite (&num_edges, sizeof (INT32), 1, file);
|
||||
fwrite (&num_edges, sizeof (inT32), 1, file);
|
||||
fwrite (&edge_of (dawg,edge), sizeof (EDGE_RECORD), num_edges, file);
|
||||
|
||||
node_count++;
|
||||
|
74
dict/trie.h
74
dict/trie.h
@ -34,7 +34,7 @@
|
||||
/*----------------------------------------------------------------------
|
||||
T y p e s
|
||||
----------------------------------------------------------------------*/
|
||||
#define NUM_PLACEMENT_ATTEMPTS (INT32) 100000
|
||||
#define NUM_PLACEMENT_ATTEMPTS (inT32) 100000
|
||||
#define EDGE_NUM_MARGIN (EDGE_RECORD) 2
|
||||
#define DEFAULT_NODE_SIZE (EDGE_RECORD) 2
|
||||
#define FORWARD_EDGE (EDGE_RECORD) 0
|
||||
@ -47,7 +47,7 @@ typedef char *NODE_MARKER;
|
||||
V a r i a b l e s
|
||||
----------------------------------------------------------------------*/
|
||||
|
||||
extern INT32 max_new_attempts;
|
||||
extern inT32 max_new_attempts;
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
M a c r o s
|
||||
@ -131,36 +131,36 @@ bool add_new_edge(EDGE_ARRAY dawg,
|
||||
NODE_REF *node2,
|
||||
char character,
|
||||
EDGE_RECORD word_end,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
void add_word_to_dawg(EDGE_ARRAY dawg,
|
||||
char *string,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
void initialize_dawg(EDGE_ARRAY dawg, INT32 max_num_edges);
|
||||
void initialize_dawg(EDGE_ARRAY dawg, inT32 max_num_edges);
|
||||
|
||||
bool move_node_if_needed(EDGE_ARRAY dawg,
|
||||
NODE_REF* node,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
NODE_REF new_dawg_node(EDGE_ARRAY dawg,
|
||||
INT32 num_edges,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 num_edges,
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
void print_dawg_map (EDGE_ARRAY dawg, INT32 max_num_edges);
|
||||
void print_dawg_map (EDGE_ARRAY dawg, inT32 max_num_edges);
|
||||
|
||||
void read_full_dawg (const char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges);
|
||||
inT32 max_num_edges);
|
||||
|
||||
void read_word_list(const char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges);
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges);
|
||||
|
||||
void relocate_edge(EDGE_ARRAY dawg,
|
||||
NODE_REF node,
|
||||
@ -180,11 +180,11 @@ void remove_edge_linkage(EDGE_ARRAY dawg,
|
||||
char character,
|
||||
EDGE_RECORD word_end);
|
||||
|
||||
INT32 room_in_node(EDGE_ARRAY dawg, NODE_REF node);
|
||||
inT32 room_in_node(EDGE_ARRAY dawg, NODE_REF node);
|
||||
|
||||
void write_full_dawg (const char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges);
|
||||
inT32 max_num_edges);
|
||||
|
||||
|
||||
/*
|
||||
@ -199,46 +199,46 @@ void add_edge_linkage
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
NODE_REF node1,
|
||||
NODE_REF node2,
|
||||
INT32 direction,
|
||||
inT32 direction,
|
||||
int character,
|
||||
INT32 word_end));
|
||||
inT32 word_end));
|
||||
|
||||
void add_new_edge
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
NODE_REF *node1,
|
||||
NODE_REF *node2,
|
||||
int character,
|
||||
INT32 word_end,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges));
|
||||
inT32 word_end,
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges));
|
||||
|
||||
void add_word_to_dawg
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
char *string,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges));
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges));
|
||||
|
||||
void initialize_dawg
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges));
|
||||
inT32 max_num_edges));
|
||||
|
||||
NODE_REF move_node
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
NODE_REF node,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges));
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges));
|
||||
|
||||
NODE_REF new_dawg_node
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
INT32 num_edges,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges));
|
||||
inT32 num_edges,
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges));
|
||||
|
||||
void read_word_list
|
||||
_ARGS((char *filename,
|
||||
EDGE_ARRAY dawg,
|
||||
INT32 max_num_edges,
|
||||
INT32 reserved_edges));
|
||||
inT32 max_num_edges,
|
||||
inT32 reserved_edges));
|
||||
|
||||
void relocate_edge
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
@ -251,17 +251,17 @@ void remove_edge
|
||||
NODE_REF node1,
|
||||
NODE_REF node2,
|
||||
int character,
|
||||
INT32 word_end));
|
||||
inT32 word_end));
|
||||
|
||||
void remove_edge_linkage
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
NODE_REF node,
|
||||
NODE_REF next,
|
||||
INT32 direction,
|
||||
inT32 direction,
|
||||
int character,
|
||||
INT32 word_end));
|
||||
inT32 word_end));
|
||||
|
||||
INT32 room_in_node
|
||||
inT32 room_in_node
|
||||
_ARGS((EDGE_ARRAY dawg,
|
||||
NODE_REF node));
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user