mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-12 05:13:14 +08:00
Fixed typos and improved comments
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@753 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
5e79160afb
commit
da1047f020
1
README
1
README
@ -26,6 +26,7 @@ Dependencies and Licenses
|
|||||||
|
|
||||||
Leptonica is required. (www.leptonica.com). Tesseract no longer compiles
|
Leptonica is required. (www.leptonica.com). Tesseract no longer compiles
|
||||||
without Leptonica.
|
without Leptonica.
|
||||||
|
Libtiff is no longer required as a direct dependency.
|
||||||
|
|
||||||
|
|
||||||
Installing and Running Tesseract
|
Installing and Running Tesseract
|
||||||
|
@ -277,7 +277,7 @@ void RecomputeMarginsAndClearHypotheses(
|
|||||||
GenericVector<RowScratchRegisters> *rows, int start, int end,
|
GenericVector<RowScratchRegisters> *rows, int start, int end,
|
||||||
int percentile);
|
int percentile);
|
||||||
|
|
||||||
// Return the minimum inter-word space in rows[row_start, row_end).
|
// Return the median inter-word space in rows[row_start, row_end).
|
||||||
int InterwordSpace(const GenericVector<RowScratchRegisters> &rows,
|
int InterwordSpace(const GenericVector<RowScratchRegisters> &rows,
|
||||||
int row_start, int row_end);
|
int row_start, int row_end);
|
||||||
|
|
||||||
|
@ -138,7 +138,16 @@ class BLOB_CHOICE: public ELIST_LINK
|
|||||||
UNICHAR_ID unichar_id_; // unichar id
|
UNICHAR_ID unichar_id_; // unichar id
|
||||||
inT16 fontinfo_id_; // char font information
|
inT16 fontinfo_id_; // char font information
|
||||||
inT16 fontinfo_id2_; // 2nd choice font information
|
inT16 fontinfo_id2_; // 2nd choice font information
|
||||||
|
// Rating is the classifier distance weighted by the length of the outline
|
||||||
|
// in the blob. In terms of probability, classifier distance is -klog p such
|
||||||
|
// that the resulting distance is in the range [0, 1] and then
|
||||||
|
// rating = w (-k log p) where w is the weight for the length of the outline.
|
||||||
|
// Sums of ratings may be compared meaningfully for words of different
|
||||||
|
// segmentation.
|
||||||
float rating_; // size related
|
float rating_; // size related
|
||||||
|
// Certainty is a number in [-20, 0] indicating the classifier certainty
|
||||||
|
// of the choice. In terms of probability, certainty = 20 (k log p) where
|
||||||
|
// k is defined as above to normalize -klog p to the range [0, 1].
|
||||||
float certainty_; // absolute
|
float certainty_; // absolute
|
||||||
int script_id_;
|
int script_id_;
|
||||||
// Stores language model information about this BLOB_CHOICE. Used during
|
// Stores language model information about this BLOB_CHOICE. Used during
|
||||||
@ -408,7 +417,9 @@ class WERD_CHOICE {
|
|||||||
char *fragment_lengths_; // number of fragments in each unichar
|
char *fragment_lengths_; // number of fragments in each unichar
|
||||||
int reserved_; // size of the above arrays
|
int reserved_; // size of the above arrays
|
||||||
int length_; // word length
|
int length_; // word length
|
||||||
|
// Rating is the sum of the ratings of the individual blobs in the word.
|
||||||
float rating_; // size related
|
float rating_; // size related
|
||||||
|
// certainty is the min (worst) certainty of the individual blobs in the word.
|
||||||
float certainty_; // absolute
|
float certainty_; // absolute
|
||||||
uinT8 permuter_; // permuter code
|
uinT8 permuter_; // permuter code
|
||||||
bool fragment_mark_; // if true, indicates that this choice
|
bool fragment_mark_; // if true, indicates that this choice
|
||||||
|
@ -181,12 +181,10 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
|
|||||||
CLASS_PRUNER_RESULTS CPResults) {
|
CLASS_PRUNER_RESULTS CPResults) {
|
||||||
assert(Choices != NULL);
|
assert(Choices != NULL);
|
||||||
ADAPT_RESULTS *Results = new ADAPT_RESULTS();
|
ADAPT_RESULTS *Results = new ADAPT_RESULTS();
|
||||||
|
Results->Initialize();
|
||||||
|
|
||||||
if (AdaptedTemplates == NULL)
|
if (AdaptedTemplates == NULL)
|
||||||
AdaptedTemplates = NewAdaptedTemplates (true);
|
AdaptedTemplates = NewAdaptedTemplates (true);
|
||||||
|
|
||||||
Results->Initialize();
|
|
||||||
|
|
||||||
DoAdaptiveMatch(Blob, denorm, Results);
|
DoAdaptiveMatch(Blob, denorm, Results);
|
||||||
if (CPResults != NULL)
|
if (CPResults != NULL)
|
||||||
memcpy(CPResults, Results->CPResults,
|
memcpy(CPResults, Results->CPResults,
|
||||||
@ -903,7 +901,14 @@ int Classify::AdaptableWord(TWERD *Word,
|
|||||||
BestChoiceLength > 0 &&
|
BestChoiceLength > 0 &&
|
||||||
BestChoiceLength == Word->NumBlobs() &&
|
BestChoiceLength == Word->NumBlobs() &&
|
||||||
BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
|
BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
|
||||||
|
// This basically ensures that the word is at least a dictionary match
|
||||||
|
// (freq word, user word, system dawg word, etc).
|
||||||
|
// Since all the other adjustments will make adjust factor higher
|
||||||
|
// than higher than adaptable_score=1.1+0.05=1.15
|
||||||
|
// Since these are other flags that ensure that the word is dict word,
|
||||||
|
// this check could be at times redundant.
|
||||||
getDict().CurrentBestChoiceAdjustFactor() <= adaptable_score &&
|
getDict().CurrentBestChoiceAdjustFactor() <= adaptable_score &&
|
||||||
|
// Make sure that alternative choices are not dictionary words.
|
||||||
getDict().AlternativeChoicesWorseThan(adaptable_score) &&
|
getDict().AlternativeChoicesWorseThan(adaptable_score) &&
|
||||||
getDict().CurrentBestChoiceIs(BestChoiceWord);
|
getDict().CurrentBestChoiceIs(BestChoiceWord);
|
||||||
}
|
}
|
||||||
@ -2487,18 +2492,12 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
|
|||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
/**
|
/**
|
||||||
* This routine steps thru each matching class in Results
|
* This routine discards extra digits or punctuation from the results.
|
||||||
* and removes it from the match list if its rating
|
* We keep only the top 2 punctuation answers and the top 1 digit answer if
|
||||||
* is worse than the BestRating plus a pad. In other words,
|
* present.
|
||||||
* all good matches get moved to the front of the classes
|
|
||||||
* array.
|
|
||||||
*
|
*
|
||||||
* @param Results contains matches to be filtered
|
* @param Results contains matches to be filtered
|
||||||
*
|
*
|
||||||
* Globals:
|
|
||||||
* - matcher_bad_match_pad defines a "bad match"
|
|
||||||
*
|
|
||||||
* @note Exceptions: none
|
|
||||||
* @note History: Tue Mar 12 13:51:03 1991, DSJ, Created.
|
* @note History: Tue Mar 12 13:51:03 1991, DSJ, Created.
|
||||||
*/
|
*/
|
||||||
void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
|
void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
|
||||||
|
@ -1846,7 +1846,6 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
|
|||||||
* proto from the class description and adds a rendering of
|
* proto from the class description and adds a rendering of
|
||||||
* the proto onto the ShapeList.
|
* the proto onto the ShapeList.
|
||||||
*
|
*
|
||||||
* @param ShapeList shape list to append proto rendering onto
|
|
||||||
* @param Class class that proto is contained in
|
* @param Class class that proto is contained in
|
||||||
* @param ProtoId id of proto to be rendered
|
* @param ProtoId id of proto to be rendered
|
||||||
* @param color color to render proto in
|
* @param color color to render proto in
|
||||||
|
Loading…
Reference in New Issue
Block a user