ccmain: Fix typos in comments and strings

Most of them were found by codespell.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2015-09-14 21:59:16 +02:00 committed by Zdenko Podobný
parent f72e65b36e
commit bd3cd8f447
8 changed files with 37 additions and 37 deletions

View File

@ -1556,7 +1556,7 @@ void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,
word->fix_quotes(); word->fix_quotes();
if (tessedit_fix_hyphens) if (tessedit_fix_hyphens)
word->fix_hyphens(); word->fix_hyphens();
/* Dont trust fix_quotes! - though I think I've fixed the bug */ /* Don't trust fix_quotes! - though I think I've fixed the bug */
if (word->best_choice->length() != word->box_word->length()) { if (word->best_choice->length() != word->box_word->length()) {
tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;" tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;"
" #Blobs=%d\n", " #Blobs=%d\n",
@ -1694,7 +1694,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(
goto not_a_word; goto not_a_word;
/* /*
Allow a single hyphen in a lower case word Allow a single hyphen in a lower case word
- dont trust upper case - I've seen several cases of "H" -> "I-I" - don't trust upper case - I've seen several cases of "H" -> "I-I"
*/ */
if (lengths[i] == 1 && s[offset] == '-') { if (lengths[i] == 1 && s[offset] == '-') {
hyphen_pos = i; hyphen_pos = i;

View File

@ -129,7 +129,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) {
int expected_outline_count; int expected_outline_count;
if (STRING (outlines_odd).contains (c)) if (STRING (outlines_odd).contains (c))
return 0; //Dont use this char return 0; //Don't use this char
else if (STRING (outlines_2).contains (c)) else if (STRING (outlines_2).contains (c))
expected_outline_count = 2; expected_outline_count = 2;
else else
@ -157,7 +157,7 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
* - Word segmentation is the same as the original image * - Word segmentation is the same as the original image
* - All characters have the expected number of outlines * - All characters have the expected number of outlines
* NOTE - the rejection counts are recalculated after unrejection * NOTE - the rejection counts are recalculated after unrejection
* - CANT do it in a single pass without a bit of fiddling * - CAN'T do it in a single pass without a bit of fiddling
* - keep it simple but inefficient * - keep it simple but inefficient
*************************************************************************/ *************************************************************************/
void Tesseract::unrej_good_quality_words( //unreject potential void Tesseract::unrej_good_quality_words( //unreject potential
@ -403,7 +403,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
/************************************************************************* /*************************************************************************
* reject_whole_page() * reject_whole_page()
* Dont believe any of it - set the reject map to 00..00 in all words * Don't believe any of it - set the reject map to 00..00 in all words
* *
*************************************************************************/ *************************************************************************/

View File

@ -55,7 +55,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
WERD_RES *word_res; WERD_RES *word_res;
WERD_RES_LIST fuzzy_space_words; WERD_RES_LIST fuzzy_space_words;
inT16 new_length; inT16 new_length;
BOOL8 prevent_null_wd_fixsp; // DONT process blobless wds BOOL8 prevent_null_wd_fixsp; // DON'T process blobless wds
inT32 word_index; // current word inT32 word_index; // current word
block_res_it.set_to_list(&page_res->block_res_list); block_res_it.set_to_list(&page_res->block_res_list);
@ -222,7 +222,7 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
* fuzzy spaces. The problem with the basic measure is that "561 63" would score * fuzzy spaces. The problem with the basic measure is that "561 63" would score
* the same as "56163", though given our knowledge that the space is fuzzy, and * the same as "56163", though given our knowledge that the space is fuzzy, and
* that there is a "1" next to the fuzzy space, we need to ensure that "56163" * that there is a "1" next to the fuzzy space, we need to ensure that "56163"
* is prefered. * is preferred.
* *
* The solution is to NOT COUNT the score of any word which has a digit at one * The solution is to NOT COUNT the score of any word which has a digit at one
* end and a "1Il" as the character the other side of the space. * end and a "1Il" as the character the other side of the space.
@ -272,8 +272,8 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
} else { } else {
/* /*
Can we add the prev word score and potentially count this word? Can we add the prev word score and potentially count this word?
Yes IF it didnt end in a 1 when the first char of this word is a digit Yes IF it didn't end in a 1 when the first char of this word is a digit
AND it didnt end in a digit when the first char of this word is a 1 AND it didn't end in a digit when the first char of this word is a 1
*/ */
word_len = word->reject_map.length(); word_len = word->reject_map.length();
current_word_ok_so_far = FALSE; current_word_ok_so_far = FALSE;
@ -507,7 +507,7 @@ BOOL8 Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
/* /*
Use all the standard pass 2 conditions for mode 5 in set_done() in Use all the standard pass 2 conditions for mode 5 in set_done() in
reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T
CARE WHETHER WE HAVE of/at on/an etc. CARE WHETHER WE HAVE of/at on/an etc.
*/ */
if (fixsp_done_mode > 0 && if (fixsp_done_mode > 0 &&

View File

@ -297,7 +297,7 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
/************************************************************************* /*************************************************************************
* SUSPECT LEVELS * SUSPECT LEVELS
* *
* 0 - dont reject ANYTHING * 0 - don't reject ANYTHING
* 1,2 - partial rejection * 1,2 - partial rejection
* 3 - BEST * 3 - BEST
* *
@ -337,7 +337,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
rating_per_ch = word.rating() / word_res->reject_map.length(); rating_per_ch = word.rating() / word_res->reject_map.length();
if (rating_per_ch >= suspect_rating_per_ch) if (rating_per_ch >= suspect_rating_per_ch)
return; //Dont touch bad ratings return; //Don't touch bad ratings
if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) { if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
/* Unreject any Tess Acceptable word - but NOT tess reject chs*/ /* Unreject any Tess Acceptable word - but NOT tess reject chs*/

View File

@ -329,13 +329,13 @@ void ParamsEditor::WriteParams(char *filename,
fclose(fp); fclose(fp);
sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename); sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
int a = sv_window_->ShowYesNoDialog(msg_str); int a = sv_window_->ShowYesNoDialog(msg_str);
if (a == 'n') { return; } // dont write if (a == 'n') { return; } // don't write
} }
fp = fopen (filename, "wb"); // can we write to it? fp = fopen (filename, "wb"); // can we write to it?
if (fp == NULL) { if (fp == NULL) {
sv_window_->AddMessage("Cant write to file " "%s" "", filename); sv_window_->AddMessage("Can't write to file " "%s" "", filename);
return; return;
} }

View File

@ -521,7 +521,7 @@ BOOL8 Tesseract::word_contains_non_1_digit(const char *word,
/************************************************************************* /*************************************************************************
* dont_allow_1Il() * dont_allow_1Il()
* Dont unreject LONE accepted 1Il conflict set chars * Don't unreject LONE accepted 1Il conflict set chars
*************************************************************************/ *************************************************************************/
void Tesseract::dont_allow_1Il(WERD_RES *word) { void Tesseract::dont_allow_1Il(WERD_RES *word) {
int i = 0; int i = 0;
@ -633,7 +633,7 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) {
next_left = 9999; next_left = 9999;
else else
next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left(); next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left();
// Dont touch small or touching blobs - it is too dangerous. // Don't touch small or touching blobs - it is too dangerous.
if ((out_box.width() > 8 * word_res->denorm.x_scale()) && if ((out_box.width() > 8 * word_res->denorm.x_scale()) &&
(out_box.left() > prev_right) && (out_box.right() < next_left)) { (out_box.left() > prev_right) && (out_box.right() < next_left)) {
aspect_ratio = out_box.width() / (float) out_box.height(); aspect_ratio = out_box.width() / (float) out_box.height();

View File

@ -136,7 +136,7 @@ Tesseract::Tesseract()
BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true, BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true,
"Try to improve fuzzy spaces", this->params()), "Try to improve fuzzy spaces", this->params()),
BOOL_MEMBER(tessedit_unrej_any_wd, false, BOOL_MEMBER(tessedit_unrej_any_wd, false,
"Dont bother with word plausibility", this->params()), "Don't bother with word plausibility", this->params()),
BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?", BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?",
this->params()), this->params()),
BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height", BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height",
@ -310,19 +310,19 @@ Tesseract::Tesseract()
this->params()), this->params()),
INT_MEMBER(crunch_pot_indicators, 1, INT_MEMBER(crunch_pot_indicators, 1,
"How many potential indicators needed", this->params()), "How many potential indicators needed", this->params()),
BOOL_MEMBER(crunch_leave_ok_strings, true, "Dont touch sensible strings", BOOL_MEMBER(crunch_leave_ok_strings, true, "Don't touch sensible strings",
this->params()), this->params()),
BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring", BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring",
this->params()), this->params()),
BOOL_MEMBER(crunch_leave_accept_strings, false, BOOL_MEMBER(crunch_leave_accept_strings, false,
"Dont pot crunch sensible strings", this->params()), "Don't pot crunch sensible strings", this->params()),
BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures", BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures",
this->params()), this->params()),
INT_MEMBER(crunch_leave_lc_strings, 4, INT_MEMBER(crunch_leave_lc_strings, 4,
"Dont crunch words with long lower case strings", "Don't crunch words with long lower case strings",
this->params()), this->params()),
INT_MEMBER(crunch_leave_uc_strings, 4, INT_MEMBER(crunch_leave_uc_strings, 4,
"Dont crunch words with long lower case strings", "Don't crunch words with long lower case strings",
this->params()), this->params()),
INT_MEMBER(crunch_long_repetitions, 3, INT_MEMBER(crunch_long_repetitions, 3,
"Crunch words with long repetitions", this->params()), "Crunch words with long repetitions", this->params()),
@ -393,21 +393,21 @@ Tesseract::Tesseract()
INT_MEMBER(suspect_space_level, 100, INT_MEMBER(suspect_space_level, 100,
"Min suspect level for rejecting spaces", this->params()), "Min suspect level for rejecting spaces", this->params()),
INT_MEMBER(suspect_short_words, 2, INT_MEMBER(suspect_short_words, 2,
"Dont Suspect dict wds longer than this", this->params()), "Don't suspect dict wds longer than this", this->params()),
BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected", BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
this->params()), this->params()),
double_MEMBER(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit", double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit",
this->params()), this->params()),
double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit", double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
this->params()), this->params()),
BOOL_MEMBER(tessedit_minimal_rejection, false, BOOL_MEMBER(tessedit_minimal_rejection, false,
"Only reject tess failures", this->params()), "Only reject tess failures", this->params()),
BOOL_MEMBER(tessedit_zero_rejection, false, "Dont reject ANYTHING", BOOL_MEMBER(tessedit_zero_rejection, false, "Don't reject ANYTHING",
this->params()), this->params()),
BOOL_MEMBER(tessedit_word_for_word, false, BOOL_MEMBER(tessedit_word_for_word, false,
"Make output have exactly one word per WERD", this->params()), "Make output have exactly one word per WERD", this->params()),
BOOL_MEMBER(tessedit_zero_kelvin_rejection, false, BOOL_MEMBER(tessedit_zero_kelvin_rejection, false,
"Dont reject ANYTHING AT ALL", this->params()), "Don't reject ANYTHING AT ALL", this->params()),
BOOL_MEMBER(tessedit_consistent_reps, true, BOOL_MEMBER(tessedit_consistent_reps, true,
"Force all rep chars the same", this->params()), "Force all rep chars the same", this->params()),
INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm", INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm",
@ -424,7 +424,7 @@ Tesseract::Tesseract()
"Use DOC dawg in 11l conf. detector", this->params()), "Use DOC dawg in 11l conf. detector", this->params()),
BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test", BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test",
this->params()), this->params()),
BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Dont double check", BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Don't double check",
this->params()), this->params()),
BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control", BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control",
this->params()), this->params()),

View File

@ -833,7 +833,7 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true, BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,
"Try to improve fuzzy spaces"); "Try to improve fuzzy spaces");
BOOL_VAR_H(tessedit_unrej_any_wd, false, BOOL_VAR_H(tessedit_unrej_any_wd, false,
"Dont bother with word plausibility"); "Don't bother with word plausibility");
BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?"); BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?");
BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height"); BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height");
BOOL_VAR_H(tessedit_enable_doc_dict, true, BOOL_VAR_H(tessedit_enable_doc_dict, true,
@ -954,15 +954,15 @@ class Tesseract : public Wordrec {
double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this"); double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this");
INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch"); INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch");
INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed"); INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed");
BOOL_VAR_H(crunch_leave_ok_strings, true, "Dont touch sensible strings"); BOOL_VAR_H(crunch_leave_ok_strings, true, "Don't touch sensible strings");
BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring"); BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring");
BOOL_VAR_H(crunch_leave_accept_strings, false, BOOL_VAR_H(crunch_leave_accept_strings, false,
"Dont pot crunch sensible strings"); "Don't pot crunch sensible strings");
BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures"); BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures");
INT_VAR_H(crunch_leave_lc_strings, 4, INT_VAR_H(crunch_leave_lc_strings, 4,
"Dont crunch words with long lower case strings"); "Don't crunch words with long lower case strings");
INT_VAR_H(crunch_leave_uc_strings, 4, INT_VAR_H(crunch_leave_uc_strings, 4,
"Dont crunch words with long lower case strings"); "Don't crunch words with long lower case strings");
INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions"); INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions");
INT_VAR_H(crunch_debug, 0, "As it says"); INT_VAR_H(crunch_debug, 0, "As it says");
INT_VAR_H(fixsp_non_noise_limit, 1, INT_VAR_H(fixsp_non_noise_limit, 1,
@ -1010,16 +1010,16 @@ class Tesseract : public Wordrec {
INT_VAR_H(suspect_space_level, 100, INT_VAR_H(suspect_space_level, 100,
"Min suspect level for rejecting spaces"); "Min suspect level for rejecting spaces");
INT_VAR_H(suspect_short_words, 2, INT_VAR_H(suspect_short_words, 2,
"Dont Suspect dict wds longer than this"); "Don't Suspect dict wds longer than this");
BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected"); BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
double_VAR_H(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit"); double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit");
double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit"); double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures"); BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures");
BOOL_VAR_H(tessedit_zero_rejection, false, "Dont reject ANYTHING"); BOOL_VAR_H(tessedit_zero_rejection, false, "Don't reject ANYTHING");
BOOL_VAR_H(tessedit_word_for_word, false, BOOL_VAR_H(tessedit_word_for_word, false,
"Make output have exactly one word per WERD"); "Make output have exactly one word per WERD");
BOOL_VAR_H(tessedit_zero_kelvin_rejection, false, BOOL_VAR_H(tessedit_zero_kelvin_rejection, false,
"Dont reject ANYTHING AT ALL"); "Don't reject ANYTHING AT ALL");
BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same"); BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same");
INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm"); INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm");
BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug"); BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug");
@ -1030,7 +1030,7 @@ class Tesseract : public Wordrec {
"Aspect ratio dot/hyphen test"); "Aspect ratio dot/hyphen test");
BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector"); BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector");
BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test"); BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test");
BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Dont double check"); BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Don't double check");
BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control"); BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control");
BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control"); BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control");
BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control"); BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control");