Fix some typos (most found by codespell)

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-04-11 11:03:04 +02:00
parent 14505484c1
commit 0401b9470c
22 changed files with 26 additions and 26 deletions

View File

@ -498,7 +498,7 @@ public:
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for desciptions of other parameters.
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,

View File

@ -85,7 +85,7 @@ public class ScrollView {
}
/**
* The main program loop. Basically loops trough receiving messages and
* The main program loop. Basically loops through receiving messages and
* processing them and then sending messages (if there are any).
*/
private static void IOLoop() {

View File

@ -235,7 +235,7 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
continue;
}
}
// Sync pr_it with the wth WordData.
// Sync pr_it with the WordData.
while (pr_it->word() != nullptr && pr_it->word() != word->word) {
pr_it->forward();
}

View File

@ -25,7 +25,7 @@
#include "errcode.h" // for ASSERT_HOST
#include "normalis.h" // for kBlnXHeight, kBlnBaselineOffset
#include "pageres.h" // for WERD_RES_IT, WERD_RES, WERD_RES_LIST
#include "params.h" // for IntParam, StringParam, BoolParam, Doub...
#include "params.h" // for IntParam, StringParam, BoolParam, DoubleParam, ...
#include "ratngs.h" // for WERD_CHOICE, FREQ_DAWG_PERM, NUMBER_PERM
#include "rect.h" // for TBOX
#include "stepblob.h" // for C_BLOB_IT, C_BLOB_LIST, C_BLOB

View File

@ -256,7 +256,7 @@ Tesseract::Tesseract()
, INT_MEMBER(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?", this->params())
, double_MEMBER(fixsp_small_outlines_size, 0.28, "Small if lt xht x this", this->params())
, BOOL_MEMBER(tessedit_prefer_joined_punct, false, "Reward punctuation joins", this->params())
, INT_MEMBER(fixsp_done_mode, 1, "What constitues done for spacing", this->params())
, INT_MEMBER(fixsp_done_mode, 1, "What constitutes done for spacing", this->params())
, INT_MEMBER(debug_fix_space_level, 0, "Contextual fixspace debug", this->params())
, STRING_MEMBER(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers", this->params())
, INT_MEMBER(x_ht_acceptance_tolerance, 8,

View File

@ -902,7 +902,7 @@ public:
INT_VAR_H(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?");
double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this");
BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctuation joins");
INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing");
INT_VAR_H(fixsp_done_mode, 1, "What constitutes done for spacing");
INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug");
STRING_VAR_H(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers");
INT_VAR_H(x_ht_acceptance_tolerance, 8, "Max allowed deviation of blob top outside of font data");

View File

@ -93,7 +93,7 @@ enum BlobSpecialTextType {
BSTT_NONE, // No special.
BSTT_ITALIC, // Italic style.
BSTT_DIGIT, // Digit symbols.
BSTT_MATH, // Mathmatical symobls (not including digit).
BSTT_MATH, // Mathematical symbols (not including digit).
BSTT_UNCLEAR, // Characters with low recognition rate.
BSTT_SKIP, // Characters that we skip labeling (usually too small).
BSTT_COUNT

View File

@ -217,7 +217,7 @@ public:
std::vector<std::vector<std::pair<const char *, float>>> timesteps;
// Stores the lstm choices of every timestep segmented by character
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> segmented_timesteps;
// Symbolchoices aquired during CTC
// Symbolchoices acquired during CTC
std::vector<std::vector<std::pair<const char *, float>>> CTC_symbol_choices;
// Stores if the timestep vector starts with a space
bool leading_space = false;

View File

@ -296,7 +296,7 @@ void fix2( // polygonal approx
/*single fixed step */
if (edgept->flags[FLAGS] & FIXED &&
edgept->flags[RUNLENGTH] == 1
/*and neighours free */
/*and neighbours free */
&& edgept->next->flags[FLAGS] & FIXED &&
(edgept->prev->flags[FLAGS] & FIXED) == 0
/*same pair of dirs */

View File

@ -333,7 +333,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
return false;
}
if (version > 0) {
// The next field being true indicates that the abiguity should
// The next field being true indicates that the ambiguity should
// always be substituted (e.g. '' should always be changed to ").
// For such "certain" n -> m ambigs tesseract will insert character
// fragments for the n pieces in the unicharset. AmbigsFound()

View File

@ -190,7 +190,7 @@ void SetAdaptiveThreshold(float Threshold);
* @param Blob blob to be classified
* @param[out] Choices List of choices found by adaptive matcher.
* filled on return with the choices found by the
* class pruner and the ratings therefrom. Also
* class pruner and the ratings there from. Also
* contains the detailed results of the integer matcher.
*
*/

View File

@ -322,7 +322,7 @@ public:
* initialized to NO_EDGE. Since the punctuation dawg includes the empty
* pattern " " (meaning anything without surrounding punctuation), having a
* single entry for the punctuation dawg will cover all dawgs reachable
* therefrom -- that includes all number and word dawgs. The only dawg
* there from -- that includes all number and word dawgs. The only dawg
* non-reachable from the punctuation_dawg is the pattern dawg.
* If hyphen state needs to be applied, initial dawg_args->active_dawgs can
* be copied from the saved hyphen state (maintained by Dict).

View File

@ -335,7 +335,7 @@ private:
int xCoord);
// Calculates more accurate character boundaries which can be used to
// provide more acurate alternative symbol choices.
// provide more accurate alternative symbol choices.
static void calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
std::vector<int> *character_boundaries_, int maxWidth);

View File

@ -733,7 +733,7 @@ int StructuredTable::CountHorizontalIntersections(int y) {
}
// Counts how many text partitions are in this box.
// This is used to count partitons in cells, as that can indicate
// This is used to count partitions in cells, as that can indicate
// how "strong" a potential table row/column (or even full table) actually is.
int StructuredTable::CountPartitions(const TBOX &box) {
ColPartitionGridSearch gsearch(text_grid_);

View File

@ -776,7 +776,7 @@ void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) {
}
// (Re)Fit a line to the stored points. Returns false if the line
// is degenerate. Althougth the TabVector code mostly doesn't care about the
// is degenerate. Although the TabVector code mostly doesn't care about the
// direction of lines, XAtY would give silly results for a horizontal line.
// The class is mostly aimed at use for vertical lines representing
// horizontal tab stops.

View File

@ -363,7 +363,7 @@ public:
void Evaluate(const ICOORD &vertical, TabFind *finder);
// (Re)Fit a line to the stored points. Returns false if the line
// is degenerate. Althougth the TabVector code mostly doesn't care about the
// is degenerate. Although the TabVector code mostly doesn't care about the
// direction of lines, XAtY would give silly results for a horizontal line.
// The class is mostly aimed at use for vertical lines representing
// horizontal tab stops.

View File

@ -35,7 +35,7 @@ const int kWrongWayPenalty = 4;
// Ratio between parallel gap and perpendicular gap used to measure total
// distance of a box from a target box in curved textline space.
// parallel-gap is treated more favorably by this factor to allow catching
// quotes and elipsis at the end of textlines.
// quotes and ellipsis at the end of textlines.
const int kParaPerpDistRatio = 4;
// Multiple of scale_factor_ that the inter-line gap must be before we start
// padding the increment box perpendicular to the text line.

View File

@ -502,14 +502,14 @@ bool Textord::clean_noise_from_row( // remove empties
blob_box = outline->bounding_box();
blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
if (blob_size < textord_noise_sizelimit * row->x_height()) {
dot_count++; // count smal outlines
dot_count++; // count small outlines
}
if (!outline->child()->empty() &&
blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
super_norm_count++; // count smal outlines
super_norm_count++; // count small outlines
}
}
} else {
@ -598,14 +598,14 @@ void Textord::clean_noise_from_words( // remove empties
blob_box = outline->bounding_box();
blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
if (blob_size < textord_noise_sizelimit * row->x_height()) {
dot_count++; // count smal outlines
dot_count++; // count small outlines
}
if (!outline->child()->empty() &&
blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
norm_count++; // count smal outlines
norm_count++; // count small outlines
}
}
} else {

View File

@ -64,7 +64,7 @@ void Textord::to_spacing(ICOORD page_tr, // topright of page
block_non_space_gap_width);
// Make sure relative values of block-level space and non-space gap
// widths are reasonable. The ratio of 1:3 is also used in
// block_spacing_stats, to corrrect the block_space_gap_width
// block_spacing_stats, to correct the block_space_gap_width
// Useful for arabic and hindi, when the non-space gap width is
// often over-estimated and should not be trusted. A similar ratio
// is found in block_spacing_stats.
@ -1695,7 +1695,7 @@ TBOX Textord::reduced_box_next(TO_ROW *row, // current row
* the xheight.
*
*
* !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
* !!!!!!! WON'T WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
* "home". Perhaps we need something which say if the width ABOVE the
* xht alone includes the whole of the reduced width, then use the full
* blob box - Might still fail on italic F

View File

@ -78,7 +78,7 @@ const int kMinRampSize = 1000;
// With no dilation, after covolution, the images are so light that a heavy
// constant offset is required to make the 0 image look reasonable. A simple
// constant offset multiple of exposure to undo this value is enough to achieve
// all the required lightening. This gives the advantage that exposure level 1
// all the required lighting. This gives the advantage that exposure level 1
// with a single dilation gives a good impression of the broken-yet-too-dark
// problem that is often seen in scans.
// A small random rotation gives some varying greyscale values on the edges,

View File

@ -69,7 +69,7 @@
### Fonts
* Microsoft fonts: arialbi.ttf, times.ttf, verdana.ttf - [instalation guide](https://www.makeuseof.com/tag/how-to-install-microsoft-core-fonts-in-ubuntu-linux/)
* Microsoft fonts: arialbi.ttf, times.ttf, verdana.ttf - [installation guide](https://www.makeuseof.com/tag/how-to-install-microsoft-core-fonts-in-ubuntu-linux/)
* [ae_Arab.ttf](https://www.wfonts.com/download/data/2014/12/03/ae-arab/ae-arab.zip)
* dejavu-fonts: [DejaVuSans-ExtraLight.ttf](https://dejavu-fonts.github.io/Download.html)
* [Lohit-Hindi.ttf](https://raw.githubusercontent.com/pratul/packageofpractices/master/assets/fonts/Lohit-Hindi.ttf)

View File

@ -182,7 +182,7 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
}
dbg_lines.push_back(absl::StrCat(correct[i].ascii, annotation));
}
LOG(INFO) << "Discrepency!\n" << absl::StrJoin(dbg_lines, "\n");
LOG(INFO) << "Discrepancy!\n" << absl::StrJoin(dbg_lines, "\n");
}
}