Fixed the extern C mismatches properly.

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@82 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith 2007-07-18 01:00:54 +00:00
parent f4baca27c8
commit 1943de9aa9
9 changed files with 344 additions and 296 deletions

View File

@ -50,6 +50,7 @@
#include "notdll.h"
#include "tordvars.h"
#include "adaptmatch.h"
#include "globals.h"
#define MIN_FONT_ROW_COUNT 8
#define MAX_XHEIGHT_DIFF 3
@ -148,18 +149,9 @@ EXTERN double_VAR (test_pt_x, 99999.99, "xcoord");
EXTERN double_VAR (test_pt_y, 99999.99, "ycoord");
extern int MatcherDebugLevel;
extern "C" { extern int display_ratings; }
extern int display_ratings;
extern int number_debug;
extern int adjust_debug;
/*
extern "C" {
extern int MatcherDebugLevel;
extern int display_ratings;
extern int number_debug;
extern int adjust_debug;
// extern int LearningDebugLevel;
};
*/
FILE *choice_file = NULL; //Choice file ptr
CLISTIZEH (PBLOB) CLISTIZE (PBLOB)
@ -569,8 +561,8 @@ if (dopasses==1) return;
////changed by jetsoft
//needed for dll to output memory structure
if ((dopasses==0 || dopasses==2) && monitor)
output_pass (page_res_it,true, target_word_box);
if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv))
output_pass(page_res_it, ocr_char_space() > 0, target_word_box);
// end jetsoft
}
@ -620,34 +612,33 @@ void classify_word_pass1( //recog one word
tess_default_matcher,
word->raw_choice, &blob_choices,
word->outword);
/*
Test for TESS screw up on word. Recog_word has already ensured that the
choice list, outword blob lists and best_choice string are the same
length. A TESS screw up is indicated by a blank filled or 0 length string.
*/
if ((word->best_choice->string ().length () == 0) ||
if ((word->best_choice->lengths ().length () == 0) ||
(strspn (word->best_choice->string ().string (), " ") ==
word->best_choice->string ().length ())) {
word->done = FALSE; //Try again on pass2 - adaption may help
word->tess_failed = TRUE;
word->reject_map.initialise (word->best_choice->string ().length ());
word->reject_map.initialise (word->best_choice->lengths ().length ());
word->reject_map.rej_word_tess_failure ();
}
else {
word->tess_failed = FALSE;
if ((word->best_choice->string ().length () !=
if ((word->best_choice->lengths ().length () !=
word->outword->blob_list ()->length ()) ||
(word->best_choice->string ().length () != blob_choices.length ())) {
(word->best_choice->lengths ().length () != blob_choices.length ())) {
tprintf
("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
word->best_choice->string ().string (),
word->best_choice->string ().length (),
word->best_choice->lengths ().length (),
word->outword->blob_list ()->length (), blob_choices.length ());
}
ASSERT_HOST (word->best_choice->string ().length () ==
ASSERT_HOST (word->best_choice->lengths ().length () ==
word->outword->blob_list ()->length ());
ASSERT_HOST (word->best_choice->string ().length () ==
ASSERT_HOST (word->best_choice->lengths ().length () ==
blob_choices.length ());
/*
@ -664,12 +655,12 @@ void classify_word_pass1( //recog one word
fix_rep_char(word);
}
else {
fix_quotes ((char *) word->best_choice->string ().string (),
fix_quotes (word->best_choice,
//turn to double
word->outword, &blob_choices);
if (tessedit_fix_hyphens)
//turn 2 to 1
fix_hyphens ((char *) word->best_choice->string ().string (), word->outword, &blob_choices);
fix_hyphens (word->best_choice, word->outword, &blob_choices);
record_certainty (word->best_choice->certainty (), 1);
//accounting
@ -692,7 +683,7 @@ void classify_word_pass1( //recog one word
rejmap = NULL;
else {
ASSERT_HOST (word->reject_map.length () ==
word->best_choice->string ().length ());
word->best_choice->lengths ().length ());
for (index = 0; index < word->reject_map.length (); index++) {
if (adapt_ok || word->reject_map[index].accepted ())
@ -704,7 +695,9 @@ void classify_word_pass1( //recog one word
}
//adapt to it
tess_adapter (word->outword, &word->denorm, word->best_choice->string ().string (), word->raw_choice->string ().string (), rejmap);
tess_adapter (word->outword, &word->denorm,
*word->best_choice,
*word->raw_choice, rejmap);
}
if (tessedit_enable_doc_dict)
@ -712,10 +705,12 @@ void classify_word_pass1( //recog one word
set_word_fonts(word, &blob_choices);
}
}
#if 0
if (tessedit_print_text) {
write_cooked_text (bln_word, word->best_choice->string (),
word->done, FALSE, stdout);
}
#endif
delete bln_word;
blob_choices.deep_clear ();
}
@ -898,10 +893,12 @@ void classify_word_pass2( //word to do
#endif
set_global_subloc_code(SUBLOC_NORM);
#if 0
if (tessedit_print_text) {
write_cooked_text (word->outword, word->best_choice->string (),
word->done, done_this_pass, stdout);
}
#endif
check_debug_pt (word, 50);
}
@ -971,18 +968,18 @@ void match_word_pass2( //recog one word
// tprintf("Empty word produced\n");
}
else {
if ((word->best_choice->string ().length () !=
if ((word->best_choice->lengths ().length () !=
word->outword->blob_list ()->length ()) ||
(word->best_choice->string ().length () != blob_choices.length ())) {
(word->best_choice->lengths ().length () != blob_choices.length ())) {
tprintf
("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
word->best_choice->string ().string (),
word->best_choice->string ().length (),
word->best_choice->lengths ().length (),
word->outword->blob_list ()->length (), blob_choices.length ());
}
ASSERT_HOST (word->best_choice->string ().length () ==
ASSERT_HOST (word->best_choice->lengths ().length () ==
word->outword->blob_list ()->length ());
ASSERT_HOST (word->best_choice->string ().length () ==
ASSERT_HOST (word->best_choice->lengths ().length () ==
blob_choices.length ());
word->tess_failed = FALSE;
@ -990,29 +987,29 @@ void match_word_pass2( //recog one word
fix_rep_char(word);
}
else {
fix_quotes ((char *) word->best_choice->string ().string (),
fix_quotes (word->best_choice,
word->outword, &blob_choices);
if (tessedit_fix_hyphens)
fix_hyphens ((char *) word->best_choice->string ().string (),
fix_hyphens (word->best_choice,
word->outword, &blob_choices);
/* Dont trust fix_quotes! - though I think I've fixed the bug */
if ((word->best_choice->string ().length () !=
word->outword->blob_list ()->length ()) ||
(word->best_choice->string ().length () !=
blob_choices.length ())) {
if ((word->best_choice->lengths ().length () !=
word->outword->blob_list ()->length ()) ||
(word->best_choice->lengths ().length () !=
blob_choices.length ())) {
#ifndef SECURE_NAMES
tprintf
("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
word->best_choice->string ().string (),
word->best_choice->string ().length (),
word->outword->blob_list ()->length (),
blob_choices.length ());
word->best_choice->string ().string (),
word->best_choice->lengths ().length (),
word->outword->blob_list ()->length (),
blob_choices.length ());
#endif
}
ASSERT_HOST (word->best_choice->string ().length () ==
ASSERT_HOST (word->best_choice->lengths ().length () ==
word->outword->blob_list ()->length ());
ASSERT_HOST (word->best_choice->string ().length () ==
ASSERT_HOST (word->best_choice->lengths ().length () ==
blob_choices.length ());
word->tess_accepted = tess_acceptable_word (word->best_choice,
@ -1039,7 +1036,7 @@ void fix_rep_char( //Repeated char word
) {
struct REP_CH
{
char ch;
char ch[UNICHAR_LEN + 1];
int count;
};
@ -1048,19 +1045,25 @@ void fix_rep_char( //Repeated char word
int rep_ch_count = 0; //how many unique chs
const char *word_str; //the repeated chs
int i, j;
int offset;
int total = 0;
int max = 0;
char maxch = ' '; //Most common char
char *maxch = NULL; //Most common char
word_str = word->best_choice->string ().string ();
word_len = strlen (word_str);
word_len = word->best_choice->lengths ().length ();;
rep_ch = (REP_CH *) alloc_mem (word_len * sizeof (REP_CH));
for (i = 0; i < word_len; i++) {
for (j = 0; j < rep_ch_count && rep_ch[j].ch != word_str[i]; j++);
for (i = 0, offset = 0; i < word_len;
offset += word->best_choice->lengths()[i++]) {
for (j = 0; j < rep_ch_count &&
strncmp(rep_ch[j].ch, word_str + offset,
word->best_choice->lengths()[i]) != 0; j++);
if (j < rep_ch_count)
rep_ch[j].count++;
else {
rep_ch[rep_ch_count].ch = word_str[i];
strncpy(rep_ch[rep_ch_count].ch, word_str + offset,
word->best_choice->lengths()[i]);
rep_ch[rep_ch_count].ch[word->best_choice->lengths()[i]] = '\0';
rep_ch[rep_ch_count].count = 1;
rep_ch_count++;
}
@ -1068,7 +1071,7 @@ void fix_rep_char( //Repeated char word
for (j = 0; j < rep_ch_count; j++) {
total += rep_ch[j].count;
if ((rep_ch[j].count > max) && (rep_ch[j].ch != ' ')) {
if ((rep_ch[j].count > max) && (*rep_ch[j].ch != ' ')) {
max = rep_ch[j].count;
maxch = rep_ch[j].ch;
}
@ -1078,26 +1081,47 @@ void fix_rep_char( //Repeated char word
free_mem(rep_ch);
word->reject_map.initialise (word_len);
for (i = 0; i < word_len; i++) {
if (word_str[i] != maxch)
for (i = 0, offset = 0; i < word_len;
offset += word->best_choice->lengths()[i++]) {
if (strncmp(word_str + offset, maxch,
word->best_choice->lengths()[i]) != 0)
//rej unrecognised blobs
word->reject_map[i].setrej_bad_repetition ();
}
word->done = TRUE;
}
// TODO(tkielbus) Decide between keeping this behavior here or modifying the
// training data.
// Utility function for fix_quotes
// Return true if the next character in the string (given the UTF8 length in
// bytes) is a quote character.
static int is_simple_quote(const char* signed_str, int length) {
const unsigned char* str = reinterpret_cast<const unsigned char*>(signed_str);
//standard 1 byte quotes
return (length == 1 && (*str == '\'' || *str == '`')) ||
//utf8 3 bytes curved quotes
(length == 3 && ((*str == 0xe2 &&
*(str + 1) == 0x80 &&
*(str + 2) == 0x98) ||
(*str == 0xe2 &&
*(str + 1) == 0x80 &&
*(str + 2) == 0x99)));
}
/**********************************************************************
* fix_quotes
*
* Change pairs of quotes to double quotes.
**********************************************************************/
void fix_quotes( //make double quotes
char *string, //string to fix
WERD_CHOICE *choice, //choice to fix
WERD *word, //word to do //char choices
BLOB_CHOICE_LIST_CLIST *blob_choices) {
char *ptr; //string ptr
char *str = (char *) choice->string().string();//string ptr
int i;
int offset;
//blobs
PBLOB_IT blob_it = word->blob_list ();
//choices
@ -1105,12 +1129,20 @@ void fix_quotes( //make double quotes
BLOB_CHOICE_IT it1; //first choices
BLOB_CHOICE_IT it2; //second choices
for (ptr = string;
*ptr != '\0'; ptr++, blob_it.forward (), choice_it.forward ()) {
if ((*ptr == '\'' || *ptr == '`')
&& (*(ptr + 1) == '\'' || *(ptr + 1) == '`')) {
*ptr = '"'; //turn to double
strcpy (ptr + 1, ptr + 2); //shuffle up
for (i = 0, offset = 0; str[offset] != '\0';
offset += choice->lengths()[i++],
blob_it.forward (), choice_it.forward ()) {
if (str[offset + choice->lengths()[i]] != '\0' &&
is_simple_quote(str + offset, choice->lengths()[i]) &&
is_simple_quote(str + offset + choice->lengths()[i],
choice->lengths()[i + 1])) {
str[offset] = '"'; //turn to double
strcpy (str + offset + 1,
str + offset + choice->lengths()[i] +
choice->lengths()[i + 1]); //shuffle up
choice->lengths()[i] = 1;
strcpy ((char*) choice->lengths().string() + i + 1,
choice->lengths().string() + i + 2);
merge_blobs (blob_it.data (), blob_it.data_relative (1));
blob_it.forward ();
delete blob_it.extract (); //get rid of spare
@ -1138,12 +1170,13 @@ void fix_quotes( //make double quotes
* Change pairs of hyphens to a single hyphen if the bounding boxes touch
* Typically a long dash which has been segmented.
**********************************************************************/
void fix_hyphens( //crunch double hyphens
char *string, //string to fix
WERD_CHOICE *choice, //choice to fix
WERD *word, //word to do //char choices
BLOB_CHOICE_LIST_CLIST *blob_choices) {
char *ptr; //string ptr
char *str = (char *) choice->string().string();//string ptr
int i;
int offset;
//blobs
PBLOB_IT blob_it = word->blob_list ();
//choices
@ -1151,14 +1184,20 @@ void fix_hyphens( //crunch double hyphens
BLOB_CHOICE_IT it1; //first choices
BLOB_CHOICE_IT it2; //second choices
for (ptr = string;
*ptr != '\0'; ptr++, blob_it.forward (), choice_it.forward ()) {
if ((*ptr == '-' || *ptr == '~') &&
(*(ptr + 1) == '-' || *(ptr + 1) == '~') &&
for (i = 0, offset = 0; str[offset] != '\0';
offset += choice->lengths()[i++],
blob_it.forward (), choice_it.forward ()) {
if ((str[offset] == '-' || str[offset] == '~') &&
(str[offset + choice->lengths()[i]] == '-' ||
str[offset + choice->lengths()[i]] == '~') &&
(blob_it.data ()->bounding_box ().right () >=
blob_it.data_relative (1)->bounding_box ().left ())) {
*ptr = '-'; //turn to single hyphen
strcpy (ptr + 1, ptr + 2); //shuffle up
str[offset] = '-'; //turn to single hyphen
strcpy (str + offset + choice->lengths()[i],
str + offset + choice->lengths()[i] +
choice->lengths()[i + 1]); //shuffle up
strcpy ((char*) choice->lengths().string() + i + 1,
choice->lengths().string() + i + 2);
merge_blobs (blob_it.data (), blob_it.data_relative (1));
blob_it.forward ();
delete blob_it.extract (); //get rid of spare
@ -1249,11 +1288,9 @@ void choice_dump_tester( //dump chars in word
it.set_to_list (ratings);
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
blob_choice = it.data ();
if ((blob_choice->char_class () >= '!') &&
(blob_choice->char_class () <= '~'))
fprintf (choice_file, "\t%c\t%f\t%f",
blob_choice->char_class (),
blob_choice->rating (), blob_choice->certainty ());
fprintf (choice_file, "\t%s\t%f\t%f",
blob_choice->unichar (),
blob_choice->rating (), blob_choice->certainty ());
}
fprintf (choice_file, "\n");
}
@ -1290,33 +1327,37 @@ WERD *make_bln_copy(WERD *src_word, ROW *row, float x_height, DENORM *denorm) {
}
ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s) {
ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s,
const char *lengths) {
int i = 0;
int offset = 0;
int leading_punct_count;
int upper_count = 0;
int hyphen_pos = -1;
ACCEPTABLE_WERD_TYPE word_type = AC_UNACCEPTABLE;
if (strlen (s) > 20)
if (strlen (lengths) > 20)
return word_type;
/* Single Leading punctuation char*/
if ((s[i] != '\0') && (STRING (chs_leading_punct).contains (s[i])))
i++;
if ((s[offset] != '\0') && (STRING (chs_leading_punct).contains (s[offset])))
offset += lengths[i++];
leading_punct_count = i;
/* Initial cap */
while (isupper (s[i])) {
i++;
while ((s[offset] != '\0') &&
unicharset.get_isupper(s + offset, lengths[i])) {
offset += lengths[i++];
upper_count++;
}
if (upper_count > 1)
word_type = AC_UPPER_CASE;
else {
/* Lower case word, possibly with an initial cap */
while (islower (s[i])) {
i++;
while ((s[offset] != '\0') &&
unicharset.get_islower (s + offset, lengths[i])) {
offset += lengths[i++];
}
if (i - leading_punct_count < quality_min_initial_alphas_reqd)
goto not_a_word;
@ -1324,11 +1365,13 @@ ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s) {
Allow a single hyphen in a lower case word
- dont trust upper case - I've seen several cases of "H" -> "I-I"
*/
if (s[i] == '-') {
hyphen_pos = i++;
if (s[i] != '\0') {
while (islower (s[i])) {
i++;
if (lengths[i] == 1 && s[offset] == '-') {
hyphen_pos = i;
offset += lengths[i++];
if (s[offset] != '\0') {
while ((s[offset] != '\0') &&
unicharset.get_islower(s + offset, lengths[i])) {
offset += lengths[i++];
}
if (i < hyphen_pos + 3)
goto not_a_word;
@ -1336,8 +1379,11 @@ ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s) {
}
else {
/* Allow "'s" in NON hyphenated lower case words */
if ((s[i] == '\'') && (s[i + 1] == 's'))
i += 2;
if (lengths[i] == 1 && (s[offset] == '\'') &&
lengths[i + 1] == 1 && (s[offset + lengths[i]] == 's')) {
offset += lengths[i++];
offset += lengths[i++];
}
}
if (upper_count > 0)
word_type = AC_INITIAL_CAP;
@ -1346,13 +1392,15 @@ ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s) {
}
/* Up to two different, constrained trailing punctuation chars */
if ((s[i] != '\0') && (STRING (chs_trailing_punct1).contains (s[i])))
i++;
if ((s[i] != '\0') &&
(s[i - 1] != s[i]) && (STRING (chs_trailing_punct2).contains (s[i])))
i++;
if (lengths[i] == 1 && (s[offset] != '\0') &&
(STRING (chs_trailing_punct1).contains (s[offset])))
offset += lengths[i++];
if (lengths[i] == 1 && (s[offset] != '\0') && i > 0 &&
(s[offset - lengths[i - 1]] != s[offset]) &&
(STRING (chs_trailing_punct2).contains (s[offset])))
offset += lengths[i++];
if (s[i] != '\0')
if (s[offset] != '\0')
word_type = AC_UNACCEPTABLE;
not_a_word:
@ -1360,17 +1408,26 @@ ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s) {
if (word_type == AC_UNACCEPTABLE) {
/* Look for abbreviation string */
i = 0;
if (isupper (s[0])) {
offset = 0;
if (s[0] != '\0' && unicharset.get_isupper (s, lengths[0])) {
word_type = AC_UC_ABBREV;
while ((s[i] != '\0') && isupper (s[i]) && (s[i + 1] == '.'))
i += 2;
while ((s[offset] != '\0') &&
unicharset.get_isupper(s + offset, lengths[i]) &&
(lengths[i + 1] == 1 && s[offset + lengths[i]] == '.')) {
offset += lengths[i++];
offset += lengths[i++];
}
}
else if (islower (s[0])) {
else if (s[0] != '\0' && unicharset.get_islower (s, lengths[0])) {
word_type = AC_LC_ABBREV;
while ((s[i] != '\0') && islower (s[i]) && (s[i + 1] == '.'))
i += 2;
while ((s[offset] != '\0') &&
unicharset.get_islower(s + offset, lengths[i]) &&
(lengths[i + 1] == 1 && s[offset + lengths[i]] == '.')) {
offset += lengths[i++];
offset += lengths[i++];
}
}
if (s[i] != '\0')
if (s[offset] != '\0')
word_type = AC_UNACCEPTABLE;
}
@ -1478,7 +1535,8 @@ void set_word_fonts( //good chars in word
WERD_RES *word, //word to adapt to //detailed results
BLOB_CHOICE_LIST_CLIST *blob_choices) {
INT32 index; //char index
char choice_char; //char from word
INT32 offset; //char offset
char choice_char[UNICHAR_LEN + 1]; //char from word
INT8 config; //font of char
//character iterator
BLOB_CHOICE_LIST_C_IT char_it = blob_choices;
@ -1517,16 +1575,19 @@ void set_word_fonts( //good chars in word
word->italic = 0;
word->bold = 0;
for (char_it.mark_cycle_pt (), index = 0;
!char_it.cycled_list (); char_it.forward (), index++) {
choice_char = word->best_choice->string ()[index];
for (char_it.mark_cycle_pt (), index = 0, offset = 0;
!char_it.cycled_list (); char_it.forward (),
offset += word->best_choice->lengths()[index++]) {
strncpy(choice_char, word->best_choice->string ().string() + offset,
word->best_choice->lengths()[index]);
choice_char[word->best_choice->lengths()[index]] = '\0';
choice_it.set_to_list (char_it.data ());
for (choice_it.mark_cycle_pt (); !choice_it.cycled_list ();
choice_it.forward ()) {
if (choice_it.data ()->char_class () == choice_char) {
choice_it.forward ()) {
if (strcmp(choice_it.data ()->unichar (), choice_char) == 0) {
config = choice_it.data ()->config ();
if (tessedit_debug_fonts)
tprintf ("%c(%d=%d%c%c)",
tprintf ("%s(%d=%d%c%c)",
choice_char, config, (config & 31) >> 2,
config & 2 ? 'N' : 'B', config & 1 ? 'N' : 'I');
if (config != -1) {

View File

@ -46,66 +46,8 @@
typedef void (*TESS_TESTER) (TBLOB *, BOOL8, char *, INT32, LIST);
typedef LIST (*TESS_MATCHER) (TBLOB *, TBLOB *, TBLOB *, void *, TEXTROW *);
extern "C"
{
/*
int start_recog( //Real main in C
int argc,
char *argv[]);
void program_editup2( //afterforking part
int argc,
char** argv);
int end_recog( //Real main in C
int argc,
char *argv[]);
void set_interactive_pass();
void set_pass1();
void set_pass2();
//ARRAY cc_recog(TWERD*,TESS_CHOICE*,TESS_CHOICE*,TESS_TESTER,
// TESS_TESTER);*/
//void wo_learn_blob(TBLOB*,TEXTROW*,char*,INT32);
//LIST AdaptiveClassifier(TBLOB*,TBLOB*,TEXTROW*);
//void LearnBlob(TBLOB*,TEXTROW*,char*,INT32);
//TWERD *newword();
//TBLOB *newblob();
//TESSLINE *newoutline();
//EDGEPT *newedgept();
//void oldedgept(EDGEPT*);
//void destroy_nodes(void*,void (*)(void*));
//TESS_LIST *append_choice(TESS_LIST*,char*,double,double,char);
//void fix_quotes (char*);
//void record_certainty(double,int);
//int AcceptableResult(A_CHOICE*,A_CHOICE*);
//int AdaptableWord(TWERD*,const char*,const char*);
//void delete_word(TWERD*);
//void free_blob(TBLOB*);
//void add_document_word(A_CHOICE*);
//void AdaptToWord(TWERD*,TEXTROW*,const char*,const char*,const char*);
//void SaveBadWord(const char*,double);
//void free_choice(TESS_CHOICE*);
//TWERD *newword();
//TBLOB *newblob();
//void free_blob( //free a blob
// TBLOB *blob); //blob to free
//int dict_word( const char* );
//extern int tess_cn_matching;
//extern int tess_bn_matching;
//extern int last_word_on_line;
extern TEXTROW normalized_row;
//extern TESS_MATCHER blob_matchers[];
//extern FILE *rawfile;
//extern FILE *textfile;
//extern int character_count;
//extern int word_count;
//extern int enable_assoc;
//extern int chop_enable;
//extern int permute_only_top;
extern int display_ratings;
};
extern TEXTROW normalized_row;
extern int display_ratings;
#if 0
#define strsave(s) \

View File

@ -23,8 +23,8 @@
#include "tessopt.h"
#include "notdll.h" //must be last include
int optind;
char *optarg;
int tessoptind;
char *tessoptarg;
/**********************************************************************
* tessopt
@ -37,22 +37,22 @@ INT32 argc, //arg count
char *argv[], //args
const char *arglist //string of arg chars
) {
char *arg; //arg char
const char *arg; //arg char
if (optind == 0)
optind = 1;
if (optind < argc && argv[optind][0] == '-') {
arg = strchr (arglist, argv[optind][1]);
if (tessoptind == 0)
tessoptind = 1;
if (tessoptind < argc && argv[tessoptind][0] == '-') {
arg = strchr (arglist, argv[tessoptind][1]);
if (arg == NULL || *arg == ':')
return '?'; //dud option
optind++;
optarg = argv[optind];
tessoptind++;
tessoptarg = argv[tessoptind];
if (arg[1] == ':') {
if (argv[optind - 1][2] != '\0')
if (argv[tessoptind - 1][2] != '\0')
//immediately after
optarg = argv[optind - 1] + 2;
tessoptarg = argv[tessoptind - 1] + 2;
else
optind++;
tessoptind++;
}
return *arg;
}

View File

@ -20,8 +20,8 @@
#include "host.h"
#include "notdll.h" //must be last include
extern int optind;
extern char *optarg;
extern int tessoptind;
extern char *tessoptarg;
int tessopt ( //parse args
INT32 argc, //arg count

View File

@ -28,7 +28,7 @@
*/
#ifdef __cplusplus
#define EXTERN extern "C"
#define EXTERN extern
#else
#define EXTERN extern
#endif

View File

@ -39,14 +39,12 @@
extern TBLOB *pageblobs; /*first blob on page */
extern TEXTBLOCK *pageblocks; /*first block on page */
/*class definitions */
extern char classes[CLASSIZE][CLASSLENGTH];
/* extern char classes[CLASSIZE][CLASSLENGTH]; */
extern int resolution; /*scanner res in dpi */
extern int acts[MAXPROC]; /*action flags */
extern int debugs[MAXPROC]; /*debug flags */
extern int plots[MAXPROC]; /*plot flags */
extern int corners[4]; /*corners of scan window */
extern int optind; /*option index */
extern char *optarg; /*option argument */
/*image file name */
extern char imagefile[FILENAMESIZE];
/* main directory */

View File

@ -37,6 +37,7 @@
#include <string.h>
#include <stdio.h>
#include <math.h>
#include "unichar.h"
#define MAXNAMESIZE 80
#define MAX_NUM_SAMPLES 10000
@ -219,21 +220,34 @@ int main (
ParseArguments (argc, argv);
while ((PageName = GetNextFilename()) != NULL)
{
printf ("\nReading %s ...", PageName);
printf ("Reading %s ...\n", PageName);
TrainingPage = Efopen (PageName, "r");
ReadTrainingSamples (TrainingPage, &CharList);
fclose (TrainingPage);
//WriteTrainingSamples (Directory, CharList);
}
printf("Clustering ...\n");
pCharList = CharList;
iterate(pCharList)
{
//Cluster
CharSample = (LABELEDLIST) first_node (pCharList);
printf ("\nClustering %s ...", CharSample->Label);
Clusterer = SetUpForClustering(CharSample);
ProtoList = ClusterSamples(Clusterer, &Config);
AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
//Cluster
CharSample = (LABELEDLIST) first_node (pCharList);
//printf ("\nClustering %s ...", CharSample->Label);
Clusterer = SetUpForClustering(CharSample);
float SavedMinSamples = Config.MinSamples;
while (Config.MinSamples > 0.001) {
ProtoList = ClusterSamples(Clusterer, &Config);
if (NumberOfProtos(ProtoList, 1, 0) > 0)
break;
else {
Config.MinSamples *= 0.95;
printf("0 significant protos for %s."
" Retrying clustering with MinSamples = %f%%\n",
CharSample->Label, Config.MinSamples);
}
}
Config.MinSamples = SavedMinSamples;
AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
}
FreeTrainingSamples (CharList);
WriteNormProtos (Directory, NormProtoList, Clusterer);
@ -262,7 +276,7 @@ void ParseArguments(
** ShowSignificantProtos flag controlling proto display
** ShowInsignificantProtos flag controlling proto display
** Config current clustering parameters
** optarg, optind defined by tessopt sys call
** tessoptarg, tessoptind defined by tessopt sys call
** Argc, Argv global copies of argc and argv
** Operation:
** This routine parses the command line arguments that were
@ -287,7 +301,6 @@ void ParseArguments(
int Option;
int ParametersRead;
BOOL8 Error;
extern char *optarg;
Error = FALSE;
Argc = argc;
@ -297,48 +310,48 @@ void ParseArguments(
switch ( Option )
{
case 'n':
sscanf(optarg,"%d", &ParametersRead);
sscanf(tessoptarg,"%d", &ParametersRead);
ShowInsignificantProtos = ParametersRead;
break;
case 'p':
sscanf(optarg,"%d", &ParametersRead);
sscanf(tessoptarg,"%d", &ParametersRead);
ShowSignificantProtos = ParametersRead;
break;
case 'd':
ShowAllSamples = FALSE;
break;
case 'C':
ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) );
ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.Confidence > 1 ) Config.Confidence = 1;
else if ( Config.Confidence < 0 ) Config.Confidence = 0;
break;
case 'I':
ParametersRead = sscanf( optarg, "%f", &(Config.Independence) );
ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.Independence > 1 ) Config.Independence = 1;
else if ( Config.Independence < 0 ) Config.Independence = 0;
break;
case 'M':
ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) );
ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
break;
case 'B':
ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) );
ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
break;
case 'R':
ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy );
ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
break;
case 'S':
switch ( optarg[0] )
switch ( tessoptarg[0] )
{
case 's': Config.ProtoStyle = spherical; break;
case 'e': Config.ProtoStyle = elliptical; break;
@ -348,10 +361,10 @@ void ParseArguments(
}
break;
case 'D':
Directory = optarg;
Directory = tessoptarg;
break;
case 'N':
if (sscanf (optarg, "%d", &MaxNumSamples) != 1 ||
if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
MaxNumSamples <= 0)
Error = TRUE;
break;
@ -375,7 +388,7 @@ char *GetNextFilename ()
/*
** Parameters: none
** Globals:
** optind defined by tessopt sys call
** tessoptind defined by tessopt sys call
** Argc, Argv global copies of argc and argv
** Operation:
** This routine returns the next command line argument. If
@ -388,8 +401,8 @@ char *GetNextFilename ()
*/
{
if (optind < Argc)
return (Argv [optind++]);
if (tessoptind < Argc)
return (Argv [tessoptind++]);
else
return (NULL);
@ -417,32 +430,32 @@ void ReadTrainingSamples (
*/
{
char CharName[MAXNAMESIZE];
char unichar[UNICHAR_LEN + 1];
LABELEDLIST CharSample;
FEATURE_SET FeatureSamples;
CHAR_DESC CharDesc;
int Type, i;
CHAR_DESC CharDesc;
int Type, i;
while (fscanf (File, "%s %s", FontName, CharName) == 2) {
CharSample = FindList (*TrainingSamples, CharName);
if (CharSample == NULL) {
CharSample = NewLabeledList (CharName);
*TrainingSamples = push (*TrainingSamples, CharSample);
}
CharDesc = ReadCharDescription (File);
Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
FeatureSamples = FeaturesOfType(CharDesc, Type);
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
FEATURE f = FeatureSamples->Features[feature];
for (int dim =0; dim < f->Type->NumParams; ++dim)
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
}
CharSample->List = push (CharSample->List, FeatureSamples);
for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
if (Type != i)
FreeFeatureSet (FeaturesOfType (CharDesc, i));
free (CharDesc);
}
while (fscanf (File, "%s %s", FontName, unichar) == 2) {
CharSample = FindList (*TrainingSamples, unichar);
if (CharSample == NULL) {
CharSample = NewLabeledList (unichar);
*TrainingSamples = push (*TrainingSamples, CharSample);
}
CharDesc = ReadCharDescription (File);
Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
FeatureSamples = FeaturesOfType(CharDesc, Type);
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
FEATURE f = FeatureSamples->Features[feature];
for (int dim =0; dim < f->Type->NumParams; ++dim)
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
}
CharSample->List = push (CharSample->List, FeatureSamples);
for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
if (Type != i)
FreeFeatureSet (FeaturesOfType (CharDesc, i));
free (CharDesc);
}
} // ReadTrainingSamples
/*---------------------------------------------------------------------------*/
@ -606,7 +619,6 @@ void WriteNormProtos (
char Filename[MAXNAMESIZE];
LABELEDLIST LabeledProto;
int N;
char Label;
strcpy (Filename, "");
if (Directory != NULL)
@ -623,9 +635,17 @@ void WriteNormProtos (
{
LabeledProto = (LABELEDLIST) first_node (LabeledProtoList);
N = NumberOfProtos(LabeledProto->List,
ShowSignificantProtos, ShowInsignificantProtos);
Label = NameToChar(LabeledProto->Label);
fprintf(File, "\n%c %d\n", Label, N);
ShowSignificantProtos, ShowInsignificantProtos);
if (N < 1) {
printf ("\nError! Not enough protos for %s: %d protos"
" (%d significant protos"
", %d insignificant protos)\n",
LabeledProto->Label, N,
NumberOfProtos(LabeledProto->List, 1, 0),
NumberOfProtos(LabeledProto->List, 0, 1));
exit(1);
}
fprintf(File, "\n%s %d\n", LabeledProto->Label, N);
WriteProtos(File, Clusterer->SampleSize, LabeledProto->List,
ShowSignificantProtos, ShowInsignificantProtos);
}

View File

@ -44,6 +44,9 @@
#include "intproto.h"
#include "variables.h"
#include "freelist.h"
#include "efio.h"
#include "danerror.h"
#include "globals.h"
#include <string.h>
#include <stdio.h>
@ -73,7 +76,6 @@ typedef MERGE_CLASS_NODE* MERGE_CLASS;
#define round(x,frag)(floor(x/frag+.5)*frag)
/**----------------------------------------------------------------------------
Public Function Prototypes
----------------------------------------------------------------------------**/
@ -164,21 +166,7 @@ void Normalize (
void SetUpForFloat2Int(
LIST LabeledClassList);
void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) {
FILE* fp = Efopen(filename, "wb");
/* then write out each class */
for (int i = 0; i < NumClassesIn (Templates); i++) {
int MaxLength = 0;
INT_CLASS Class = ClassForIndex (Templates, i);
for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {
if (LengthForConfigId (Class, ConfigId) > MaxLength)
MaxLength = LengthForConfigId (Class, ConfigId);
}
fprintf(fp, "%c %d\n", ClassIdForIndex(Templates, i), MaxLength);
}
fclose(fp);
}
void WritePFFMTable(INT_TEMPLATES Templates, const char* filename);
//--------------Global Data Definitions and Declarations--------------
static char FontName[MAXNAMESIZE];
@ -200,6 +188,9 @@ static CLUSTERCONFIG Config =
static FLOAT32 RoundingAccuracy = 0.0;
// The unicharset used during mftraining
static UNICHARSET unicharset_mftraining;
/*----------------------------------------------------------------------------
Public Code
-----------------------------------------------------------------------------*/
@ -260,12 +251,17 @@ int main (
LIST pCharList, pProtoList;
char Filename[MAXNAMESIZE];
// Clean the unichar set
unicharset_mftraining.clear();
// Space character needed to represent NIL classification
unicharset_mftraining.unichar_insert(" ");
ParseArguments (argc, argv);
InitFastTrainerVars ();
InitSubfeatureVars ();
while ((PageName = GetNextFilename()) != NULL)
{
printf ("\nReading %s ...", PageName);
printf ("Reading %s ...\n", PageName);
TrainingPage = Efopen (PageName, "r");
CharList = ReadTrainingSamples (TrainingPage);
fclose (TrainingPage);
@ -275,7 +271,7 @@ int main (
{
//Cluster
CharSample = (LABELEDLIST) first_node (pCharList);
printf ("\nClustering %s ...", CharSample->Label);
// printf ("\nClustering %s ...", CharSample->Label);
Clusterer = SetUpForClustering(CharSample);
ProtoList = ClusterSamples(Clusterer, &Config);
//WriteClusteredTrainingSamples (Directory, ProtoList, Clusterer, CharSample);
@ -320,14 +316,13 @@ int main (
FreeProtoList (&ProtoList);
}
FreeTrainingSamples (CharList);
printf ("\n");
}
//WriteMergedTrainingSamples(Directory,ClassList);
WriteMicrofeat(Directory, ClassList);
InitIntProtoVars ();
InitPrototypes ();
SetUpForFloat2Int(ClassList);
IntTemplates = CreateIntTemplates(TrainingData);
IntTemplates = CreateIntTemplates(TrainingData, unicharset_mftraining);
strcpy (Filename, "");
if (Directory != NULL)
{
@ -340,11 +335,18 @@ int main (
#else
OutFile = Efopen (Filename, "wb");
#endif
WriteIntTemplates(OutFile, IntTemplates);
WriteIntTemplates(OutFile, IntTemplates, unicharset_mftraining);
fclose (OutFile);
// Now create pffmtable.
WritePFFMTable(IntTemplates, "pffmtable");
printf ("\nDone!\n"); /**/
strcpy (Filename, "");
if (Directory != NULL)
{
strcat (Filename, Directory);
strcat (Filename, "/");
}
strcat (Filename, "pffmtable");
// Now create pffmtable.
WritePFFMTable(IntTemplates, Filename);
printf ("Done!\n"); /**/
FreeLabeledClassList (ClassList);
return 0;
} /* main */
@ -367,7 +369,7 @@ char **argv)
** ShowSignificantProtos flag controlling proto display
** ShowInsignificantProtos flag controlling proto display
** Config current clustering parameters
** optarg, optind defined by tessopt sys call
** tessoptarg, tessoptind defined by tessopt sys call
** Argc, Argv global copies of argc and argv
** Operation:
** This routine parses the command line arguments that were
@ -392,7 +394,6 @@ char **argv)
int Option;
int ParametersRead;
BOOL8 Error;
extern char *optarg;
Error = FALSE;
Argc = argc;
@ -411,37 +412,37 @@ char **argv)
ShowAllSamples = FALSE;
break;
case 'C':
ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) );
ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.Confidence > 1 ) Config.Confidence = 1;
else if ( Config.Confidence < 0 ) Config.Confidence = 0;
break;
case 'I':
ParametersRead = sscanf( optarg, "%f", &(Config.Independence) );
ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.Independence > 1 ) Config.Independence = 1;
else if ( Config.Independence < 0 ) Config.Independence = 0;
break;
case 'M':
ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) );
ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
break;
case 'B':
ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) );
ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
break;
case 'R':
ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy );
ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
if ( ParametersRead != 1 ) Error = TRUE;
else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
break;
case 'S':
switch ( optarg[0] )
switch ( tessoptarg[0] )
{
case 's': Config.ProtoStyle = spherical; break;
case 'e': Config.ProtoStyle = elliptical; break;
@ -451,10 +452,10 @@ char **argv)
}
break;
case 'D':
Directory = optarg;
Directory = tessoptarg;
break;
case 'N':
if (sscanf (optarg, "%d", &MaxNumSamples) != 1 ||
if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
MaxNumSamples <= 0)
Error = TRUE;
break;
@ -478,7 +479,7 @@ char *GetNextFilename ()
/*
** Parameters: none
** Globals:
** optind defined by tessopt sys call
** tessoptind defined by tessopt sys call
** Argc, Argv global copies of argc and argv
** Operation:
** This routine returns the next command line argument. If
@ -491,8 +492,8 @@ char *GetNextFilename ()
*/
{
if (optind < Argc)
return (Argv [optind++]);
if (tessoptind < Argc)
return (Argv [tessoptind++]);
else
return (NULL);
@ -519,33 +520,41 @@ LIST ReadTrainingSamples (
*/
{
char CharName[MAXNAMESIZE];
LABELEDLIST CharSample;
FEATURE_SET FeatureSamples;
char unichar[UNICHAR_LEN + 1];
LABELEDLIST CharSample;
FEATURE_SET FeatureSamples;
LIST TrainingSamples = NIL;
CHAR_DESC CharDesc;
int Type, i;
while (fscanf (File, "%s %s", FontName, CharName) == 2) {
CharSample = FindList (TrainingSamples, CharName);
while (fscanf (File, "%s %s", FontName, unichar) == 2) {
if (!unicharset_mftraining.contains_unichar(unichar)) {
unicharset_mftraining.unichar_insert(unichar);
if (unicharset_mftraining.size() > MAX_NUM_CLASSES) {
cprintf("Error: Size of unicharset of mftraining is "
"greater than MAX_NUM_CLASSES\n");
exit(1);
}
}
CharSample = FindList (TrainingSamples, unichar);
if (CharSample == NULL) {
CharSample = NewLabeledList (CharName);
CharSample = NewLabeledList (unichar);
TrainingSamples = push (TrainingSamples, CharSample);
}
CharDesc = ReadCharDescription (File);
Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
FeatureSamples = FeaturesOfType(CharDesc, Type);
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
FEATURE f = FeatureSamples->Features[feature];
for (int dim =0; dim < f->Type->NumParams; ++dim)
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
}
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
FEATURE f = FeatureSamples->Features[feature];
for (int dim =0; dim < f->Type->NumParams; ++dim)
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
}
CharSample->List = push (CharSample->List, FeatureSamples);
for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
if (Type != i)
FreeFeatureSet (FeaturesOfType (CharDesc, i));
if (Type != i)
FreeFeatureSet (FeaturesOfType (CharDesc, i));
free (CharDesc);
}
}
return (TrainingSamples);
} /* ReadTrainingSamples */
@ -843,7 +852,7 @@ void WriteProtos(
int i;
PROTO Proto;
fprintf(File, "%c\n", NameToChar(MergeClass->Label));
fprintf(File, "%s\n", MergeClass->Label);
fprintf(File, "%d\n", NumProtosIn(MergeClass->Class));
for(i=0; i < NumProtosIn(MergeClass->Class); i++)
{
@ -900,7 +909,7 @@ void FreeTrainingSamples (
LIST FeatureList;
printf ("\nFreeTrainingSamples...");
// printf ("FreeTrainingSamples...\n");
iterate (CharList) /* iterate thru all of the fonts */
{
CharSample = (LABELEDLIST) first_node (CharList);
@ -1161,12 +1170,13 @@ void SetUpForFloat2Int(
BIT_VECTOR NewConfig;
BIT_VECTOR OldConfig;
printf("Float2Int ...");
// printf("Float2Int ...\n");
iterate(LabeledClassList)
{
MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
Class = &TrainingData[NameToChar(MergeClass->Label)];
Class = &TrainingData[unicharset_mftraining.unichar_to_id(
MergeClass->Label)];
NumProtos = NumProtosIn(MergeClass->Class);
NumConfigs = NumConfigsIn(MergeClass->Class);
@ -1204,3 +1214,20 @@ void SetUpForFloat2Int(
}
}
} // SetUpForFloat2Int
/*--------------------------------------------------------------------------*/
void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) {
FILE* fp = Efopen(filename, "wb");
/* then write out each class */
for (int i = 0; i < NumClassesIn (Templates); i++) {
int MaxLength = 0;
INT_CLASS Class = ClassForIndex (Templates, i);
for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {
if (LengthForConfigId (Class, ConfigId) > MaxLength)
MaxLength = LengthForConfigId (Class, ConfigId);
}
fprintf(fp, "%s %d\n", unicharset_mftraining.id_to_unichar(
ClassIdForIndex(Templates, i)), MaxLength);
}
fclose(fp);
} // WritePFFMTable

View File

@ -52,8 +52,8 @@ int main(int argc, char** argv) {
while ((option = tessopt(argc, argv, "D" )) != EOF) {
switch (option) {
case 'D':
output_directory = optarg;
++optind;
output_directory = tessoptarg;
++tessoptind;
break;
}
}
@ -64,12 +64,12 @@ int main(int argc, char** argv) {
unicharset_file_name += kUnicharsetFileName;
// Load box files
for (; optind < argc; ++optind) {
printf("Extracting unicharset from %s\n", argv[optind]);
for (; tessoptind < argc; ++tessoptind) {
printf("Extracting unicharset from %s\n", argv[tessoptind]);
FILE* box_file = fopen(argv[optind], "r");
FILE* box_file = fopen(argv[tessoptind], "r");
if (box_file == NULL) {
printf("Cannot open box file %s\n", argv[optind]);
printf("Cannot open box file %s\n", argv[tessoptind]);
return -1;
}