mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-08-06 13:56:47 +08:00
Fix some typos (most found by codespell)
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
14505484c1
commit
0401b9470c
@ -498,7 +498,7 @@ public:
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for desciptions of other parameters.
|
||||
* See ProcessPages for descriptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
|
@ -85,7 +85,7 @@ public class ScrollView {
|
||||
}
|
||||
|
||||
/**
|
||||
* The main program loop. Basically loops trough receiving messages and
|
||||
* The main program loop. Basically loops through receiving messages and
|
||||
* processing them and then sending messages (if there are any).
|
||||
*/
|
||||
private static void IOLoop() {
|
||||
|
@ -235,7 +235,7 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Sync pr_it with the wth WordData.
|
||||
// Sync pr_it with the WordData.
|
||||
while (pr_it->word() != nullptr && pr_it->word() != word->word) {
|
||||
pr_it->forward();
|
||||
}
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include "errcode.h" // for ASSERT_HOST
|
||||
#include "normalis.h" // for kBlnXHeight, kBlnBaselineOffset
|
||||
#include "pageres.h" // for WERD_RES_IT, WERD_RES, WERD_RES_LIST
|
||||
#include "params.h" // for IntParam, StringParam, BoolParam, Doub...
|
||||
#include "params.h" // for IntParam, StringParam, BoolParam, DoubleParam, ...
|
||||
#include "ratngs.h" // for WERD_CHOICE, FREQ_DAWG_PERM, NUMBER_PERM
|
||||
#include "rect.h" // for TBOX
|
||||
#include "stepblob.h" // for C_BLOB_IT, C_BLOB_LIST, C_BLOB
|
||||
|
@ -256,7 +256,7 @@ Tesseract::Tesseract()
|
||||
, INT_MEMBER(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?", this->params())
|
||||
, double_MEMBER(fixsp_small_outlines_size, 0.28, "Small if lt xht x this", this->params())
|
||||
, BOOL_MEMBER(tessedit_prefer_joined_punct, false, "Reward punctuation joins", this->params())
|
||||
, INT_MEMBER(fixsp_done_mode, 1, "What constitues done for spacing", this->params())
|
||||
, INT_MEMBER(fixsp_done_mode, 1, "What constitutes done for spacing", this->params())
|
||||
, INT_MEMBER(debug_fix_space_level, 0, "Contextual fixspace debug", this->params())
|
||||
, STRING_MEMBER(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers", this->params())
|
||||
, INT_MEMBER(x_ht_acceptance_tolerance, 8,
|
||||
|
@ -902,7 +902,7 @@ public:
|
||||
INT_VAR_H(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?");
|
||||
double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this");
|
||||
BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctuation joins");
|
||||
INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing");
|
||||
INT_VAR_H(fixsp_done_mode, 1, "What constitutes done for spacing");
|
||||
INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug");
|
||||
STRING_VAR_H(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers");
|
||||
INT_VAR_H(x_ht_acceptance_tolerance, 8, "Max allowed deviation of blob top outside of font data");
|
||||
|
@ -93,7 +93,7 @@ enum BlobSpecialTextType {
|
||||
BSTT_NONE, // No special.
|
||||
BSTT_ITALIC, // Italic style.
|
||||
BSTT_DIGIT, // Digit symbols.
|
||||
BSTT_MATH, // Mathmatical symobls (not including digit).
|
||||
BSTT_MATH, // Mathematical symbols (not including digit).
|
||||
BSTT_UNCLEAR, // Characters with low recognition rate.
|
||||
BSTT_SKIP, // Characters that we skip labeling (usually too small).
|
||||
BSTT_COUNT
|
||||
|
@ -217,7 +217,7 @@ public:
|
||||
std::vector<std::vector<std::pair<const char *, float>>> timesteps;
|
||||
// Stores the lstm choices of every timestep segmented by character
|
||||
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> segmented_timesteps;
|
||||
// Symbolchoices aquired during CTC
|
||||
// Symbolchoices acquired during CTC
|
||||
std::vector<std::vector<std::pair<const char *, float>>> CTC_symbol_choices;
|
||||
// Stores if the timestep vector starts with a space
|
||||
bool leading_space = false;
|
||||
|
@ -296,7 +296,7 @@ void fix2( // polygonal approx
|
||||
/*single fixed step */
|
||||
if (edgept->flags[FLAGS] & FIXED &&
|
||||
edgept->flags[RUNLENGTH] == 1
|
||||
/*and neighours free */
|
||||
/*and neighbours free */
|
||||
&& edgept->next->flags[FLAGS] & FIXED &&
|
||||
(edgept->prev->flags[FLAGS] & FIXED) == 0
|
||||
/*same pair of dirs */
|
||||
|
@ -333,7 +333,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
|
||||
return false;
|
||||
}
|
||||
if (version > 0) {
|
||||
// The next field being true indicates that the abiguity should
|
||||
// The next field being true indicates that the ambiguity should
|
||||
// always be substituted (e.g. '' should always be changed to ").
|
||||
// For such "certain" n -> m ambigs tesseract will insert character
|
||||
// fragments for the n pieces in the unicharset. AmbigsFound()
|
||||
|
@ -190,7 +190,7 @@ void SetAdaptiveThreshold(float Threshold);
|
||||
* @param Blob blob to be classified
|
||||
* @param[out] Choices List of choices found by adaptive matcher.
|
||||
* filled on return with the choices found by the
|
||||
* class pruner and the ratings therefrom. Also
|
||||
* class pruner and the ratings there from. Also
|
||||
* contains the detailed results of the integer matcher.
|
||||
*
|
||||
*/
|
||||
|
@ -322,7 +322,7 @@ public:
|
||||
* initialized to NO_EDGE. Since the punctuation dawg includes the empty
|
||||
* pattern " " (meaning anything without surrounding punctuation), having a
|
||||
* single entry for the punctuation dawg will cover all dawgs reachable
|
||||
* therefrom -- that includes all number and word dawgs. The only dawg
|
||||
* there from -- that includes all number and word dawgs. The only dawg
|
||||
* non-reachable from the punctuation_dawg is the pattern dawg.
|
||||
* If hyphen state needs to be applied, initial dawg_args->active_dawgs can
|
||||
* be copied from the saved hyphen state (maintained by Dict).
|
||||
|
@ -335,7 +335,7 @@ private:
|
||||
int xCoord);
|
||||
|
||||
// Calculates more accurate character boundaries which can be used to
|
||||
// provide more acurate alternative symbol choices.
|
||||
// provide more accurate alternative symbol choices.
|
||||
static void calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
|
||||
std::vector<int> *character_boundaries_, int maxWidth);
|
||||
|
||||
|
@ -733,7 +733,7 @@ int StructuredTable::CountHorizontalIntersections(int y) {
|
||||
}
|
||||
|
||||
// Counts how many text partitions are in this box.
|
||||
// This is used to count partitons in cells, as that can indicate
|
||||
// This is used to count partitions in cells, as that can indicate
|
||||
// how "strong" a potential table row/column (or even full table) actually is.
|
||||
int StructuredTable::CountPartitions(const TBOX &box) {
|
||||
ColPartitionGridSearch gsearch(text_grid_);
|
||||
|
@ -776,7 +776,7 @@ void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) {
|
||||
}
|
||||
|
||||
// (Re)Fit a line to the stored points. Returns false if the line
|
||||
// is degenerate. Althougth the TabVector code mostly doesn't care about the
|
||||
// is degenerate. Although the TabVector code mostly doesn't care about the
|
||||
// direction of lines, XAtY would give silly results for a horizontal line.
|
||||
// The class is mostly aimed at use for vertical lines representing
|
||||
// horizontal tab stops.
|
||||
|
@ -363,7 +363,7 @@ public:
|
||||
void Evaluate(const ICOORD &vertical, TabFind *finder);
|
||||
|
||||
// (Re)Fit a line to the stored points. Returns false if the line
|
||||
// is degenerate. Althougth the TabVector code mostly doesn't care about the
|
||||
// is degenerate. Although the TabVector code mostly doesn't care about the
|
||||
// direction of lines, XAtY would give silly results for a horizontal line.
|
||||
// The class is mostly aimed at use for vertical lines representing
|
||||
// horizontal tab stops.
|
||||
|
@ -35,7 +35,7 @@ const int kWrongWayPenalty = 4;
|
||||
// Ratio between parallel gap and perpendicular gap used to measure total
|
||||
// distance of a box from a target box in curved textline space.
|
||||
// parallel-gap is treated more favorably by this factor to allow catching
|
||||
// quotes and elipsis at the end of textlines.
|
||||
// quotes and ellipsis at the end of textlines.
|
||||
const int kParaPerpDistRatio = 4;
|
||||
// Multiple of scale_factor_ that the inter-line gap must be before we start
|
||||
// padding the increment box perpendicular to the text line.
|
||||
|
@ -502,14 +502,14 @@ bool Textord::clean_noise_from_row( // remove empties
|
||||
blob_box = outline->bounding_box();
|
||||
blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
|
||||
if (blob_size < textord_noise_sizelimit * row->x_height()) {
|
||||
dot_count++; // count smal outlines
|
||||
dot_count++; // count small outlines
|
||||
}
|
||||
if (!outline->child()->empty() &&
|
||||
blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
|
||||
blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
|
||||
blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
|
||||
blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
|
||||
super_norm_count++; // count smal outlines
|
||||
super_norm_count++; // count small outlines
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -598,14 +598,14 @@ void Textord::clean_noise_from_words( // remove empties
|
||||
blob_box = outline->bounding_box();
|
||||
blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
|
||||
if (blob_size < textord_noise_sizelimit * row->x_height()) {
|
||||
dot_count++; // count smal outlines
|
||||
dot_count++; // count small outlines
|
||||
}
|
||||
if (!outline->child()->empty() &&
|
||||
blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
|
||||
blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
|
||||
blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
|
||||
blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
|
||||
norm_count++; // count smal outlines
|
||||
norm_count++; // count small outlines
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -64,7 +64,7 @@ void Textord::to_spacing(ICOORD page_tr, // topright of page
|
||||
block_non_space_gap_width);
|
||||
// Make sure relative values of block-level space and non-space gap
|
||||
// widths are reasonable. The ratio of 1:3 is also used in
|
||||
// block_spacing_stats, to corrrect the block_space_gap_width
|
||||
// block_spacing_stats, to correct the block_space_gap_width
|
||||
// Useful for arabic and hindi, when the non-space gap width is
|
||||
// often over-estimated and should not be trusted. A similar ratio
|
||||
// is found in block_spacing_stats.
|
||||
@ -1695,7 +1695,7 @@ TBOX Textord::reduced_box_next(TO_ROW *row, // current row
|
||||
* the xheight.
|
||||
*
|
||||
*
|
||||
* !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
|
||||
* !!!!!!! WON'T WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
|
||||
* "home". Perhaps we need something which say if the width ABOVE the
|
||||
* xht alone includes the whole of the reduced width, then use the full
|
||||
* blob box - Might still fail on italic F
|
||||
|
@ -78,7 +78,7 @@ const int kMinRampSize = 1000;
|
||||
// With no dilation, after covolution, the images are so light that a heavy
|
||||
// constant offset is required to make the 0 image look reasonable. A simple
|
||||
// constant offset multiple of exposure to undo this value is enough to achieve
|
||||
// all the required lightening. This gives the advantage that exposure level 1
|
||||
// all the required lighting. This gives the advantage that exposure level 1
|
||||
// with a single dilation gives a good impression of the broken-yet-too-dark
|
||||
// problem that is often seen in scans.
|
||||
// A small random rotation gives some varying greyscale values on the edges,
|
||||
|
@ -69,7 +69,7 @@
|
||||
|
||||
### Fonts
|
||||
|
||||
* Microsoft fonts: arialbi.ttf, times.ttf, verdana.ttf - [instalation guide](https://www.makeuseof.com/tag/how-to-install-microsoft-core-fonts-in-ubuntu-linux/)
|
||||
* Microsoft fonts: arialbi.ttf, times.ttf, verdana.ttf - [installation guide](https://www.makeuseof.com/tag/how-to-install-microsoft-core-fonts-in-ubuntu-linux/)
|
||||
* [ae_Arab.ttf](https://www.wfonts.com/download/data/2014/12/03/ae-arab/ae-arab.zip)
|
||||
* dejavu-fonts: [DejaVuSans-ExtraLight.ttf](https://dejavu-fonts.github.io/Download.html)
|
||||
* [Lohit-Hindi.ttf](https://raw.githubusercontent.com/pratul/packageofpractices/master/assets/fonts/Lohit-Hindi.ttf)
|
||||
|
@ -182,7 +182,7 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
|
||||
}
|
||||
dbg_lines.push_back(absl::StrCat(correct[i].ascii, annotation));
|
||||
}
|
||||
LOG(INFO) << "Discrepency!\n" << absl::StrJoin(dbg_lines, "\n");
|
||||
LOG(INFO) << "Discrepancy!\n" << absl::StrJoin(dbg_lines, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user