mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-22 18:13:42 +08:00
Merge pull request #1754 from stweil/fix
Fix compiler warnings [-Wmissing-prototypes]
This commit is contained in:
commit
09f4179e89
@ -2622,7 +2622,7 @@ void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
|
|||||||
* Return a TBLOB * from the whole pix.
|
* Return a TBLOB * from the whole pix.
|
||||||
* To be freed later with delete.
|
* To be freed later with delete.
|
||||||
*/
|
*/
|
||||||
TBLOB *make_tesseract_blob(float baseline, float xheight,
|
static TBLOB *make_tesseract_blob(float baseline, float xheight,
|
||||||
float descender, float ascender,
|
float descender, float ascender,
|
||||||
bool numeric_mode, Pix* pix) {
|
bool numeric_mode, Pix* pix) {
|
||||||
TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
|
TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
|
||||||
|
@ -202,7 +202,7 @@ void TessPDFRenderer::AppendPDFObject(const char *data) {
|
|||||||
// Helper function to prevent us from accidentally writing
|
// Helper function to prevent us from accidentally writing
|
||||||
// scientific notation to an HOCR or PDF file. Besides, three
|
// scientific notation to an HOCR or PDF file. Besides, three
|
||||||
// decimal points are all you really need.
|
// decimal points are all you really need.
|
||||||
double prec(double x) {
|
static double prec(double x) {
|
||||||
double kPrecision = 1000.0;
|
double kPrecision = 1000.0;
|
||||||
double a = round(x * kPrecision) / kPrecision;
|
double a = round(x * kPrecision) / kPrecision;
|
||||||
if (a == -0)
|
if (a == -0)
|
||||||
@ -210,7 +210,7 @@ double prec(double x) {
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
long dist2(int x1, int y1, int x2, int y2) {
|
static long dist2(int x1, int y1, int x2, int y2) {
|
||||||
return (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1);
|
return (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -222,7 +222,7 @@ long dist2(int x1, int y1, int x2, int y2) {
|
|||||||
// left-to-right no matter what the reading order is. We need the
|
// left-to-right no matter what the reading order is. We need the
|
||||||
// word baseline in reading order, so we do that conversion here. Returns
|
// word baseline in reading order, so we do that conversion here. Returns
|
||||||
// the word's baseline origin and length.
|
// the word's baseline origin and length.
|
||||||
void GetWordBaseline(int writing_direction, int ppi, int height,
|
static void GetWordBaseline(int writing_direction, int ppi, int height,
|
||||||
int word_x1, int word_y1, int word_x2, int word_y2,
|
int word_x1, int word_y1, int word_x2, int word_y2,
|
||||||
int line_x1, int line_y1, int line_x2, int line_y2,
|
int line_x1, int line_y1, int line_x2, int line_y2,
|
||||||
double *x0, double *y0, double *length) {
|
double *x0, double *y0, double *length) {
|
||||||
@ -264,7 +264,7 @@ void GetWordBaseline(int writing_direction, int ppi, int height,
|
|||||||
// RTL
|
// RTL
|
||||||
// [ x' ] = [ a b ][ x ] = [-1 0 ] [ cos sin ][ x ]
|
// [ x' ] = [ a b ][ x ] = [-1 0 ] [ cos sin ][ x ]
|
||||||
// [ y' ] [ c d ][ y ] [ 0 1 ] [-sin cos ][ y ]
|
// [ y' ] [ c d ][ y ] [ 0 1 ] [-sin cos ][ y ]
|
||||||
void AffineMatrix(int writing_direction,
|
static void AffineMatrix(int writing_direction,
|
||||||
int line_x1, int line_y1, int line_x2, int line_y2,
|
int line_x1, int line_y1, int line_x2, int line_y2,
|
||||||
double *a, double *b, double *c, double *d) {
|
double *a, double *b, double *c, double *d) {
|
||||||
double theta = atan2(static_cast<double>(line_y1 - line_y2),
|
double theta = atan2(static_cast<double>(line_y1 - line_y2),
|
||||||
|
@ -156,7 +156,8 @@ void OSResults::accumulate(const OSResults& osr) {
|
|||||||
|
|
||||||
// Detect and erase horizontal/vertical lines and picture regions from the
|
// Detect and erase horizontal/vertical lines and picture regions from the
|
||||||
// image, so that non-text blobs are removed from consideration.
|
// image, so that non-text blobs are removed from consideration.
|
||||||
void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
|
static void remove_nontext_regions(tesseract::Tesseract *tess,
|
||||||
|
BLOCK_LIST *blocks,
|
||||||
TO_BLOCK_LIST *to_blocks) {
|
TO_BLOCK_LIST *to_blocks) {
|
||||||
Pix *pix = tess->pix_binary();
|
Pix *pix = tess->pix_binary();
|
||||||
ASSERT_HOST(pix != nullptr);
|
ASSERT_HOST(pix != nullptr);
|
||||||
|
@ -42,23 +42,6 @@
|
|||||||
#define CTRL_NEWLINE '\012' //newline
|
#define CTRL_NEWLINE '\012' //newline
|
||||||
#define CTRL_HARDLINE '\015' //cr
|
#define CTRL_HARDLINE '\015' //cr
|
||||||
|
|
||||||
/**********************************************************************
|
|
||||||
* pixels_to_pts
|
|
||||||
*
|
|
||||||
* Convert an integer number of pixels to the nearest integer
|
|
||||||
* number of points.
|
|
||||||
**********************************************************************/
|
|
||||||
|
|
||||||
int32_t pixels_to_pts( //convert coords
|
|
||||||
int32_t pixels,
|
|
||||||
int32_t pix_res //resolution
|
|
||||||
) {
|
|
||||||
float pts; //converted value
|
|
||||||
|
|
||||||
pts = pixels * 72.0 / pix_res;
|
|
||||||
return (int32_t) (pts + 0.5); //round it
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
void Tesseract::output_pass( //Tess output pass //send to api
|
void Tesseract::output_pass( //Tess output pass //send to api
|
||||||
PAGE_RES_IT &page_res_it,
|
PAGE_RES_IT &page_res_it,
|
||||||
|
@ -56,6 +56,11 @@ const ParagraphModel *kCrownLeft
|
|||||||
const ParagraphModel *kCrownRight
|
const ParagraphModel *kCrownRight
|
||||||
= reinterpret_cast<ParagraphModel *>(0xDEAD888F);
|
= reinterpret_cast<ParagraphModel *>(0xDEAD888F);
|
||||||
|
|
||||||
|
// Do the text and geometry of two rows support a paragraph break between them?
|
||||||
|
static bool LikelyParagraphStart(const RowScratchRegisters &before,
|
||||||
|
const RowScratchRegisters &after,
|
||||||
|
tesseract::ParagraphJustification j);
|
||||||
|
|
||||||
// Given the width of a typical space between words, what is the threshold
|
// Given the width of a typical space between words, what is the threshold
|
||||||
// by which by which we think left and right alignments for paragraphs
|
// by which by which we think left and right alignments for paragraphs
|
||||||
// can vary and still be aligned.
|
// can vary and still be aligned.
|
||||||
@ -128,7 +133,7 @@ static void PrintTable(const GenericVector<GenericVector<STRING> > &rows,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
STRING RtlEmbed(const STRING &word, bool rtlify) {
|
static STRING RtlEmbed(const STRING &word, bool rtlify) {
|
||||||
if (rtlify)
|
if (rtlify)
|
||||||
return STRING(kRLE) + word + STRING(kPDF);
|
return STRING(kRLE) + word + STRING(kPDF);
|
||||||
return word;
|
return word;
|
||||||
@ -200,34 +205,34 @@ static void PrintRowRange(const GenericVector<RowScratchRegisters> &rows,
|
|||||||
|
|
||||||
// ============= Brain Dead Language Model (ASCII Version) ===================
|
// ============= Brain Dead Language Model (ASCII Version) ===================
|
||||||
|
|
||||||
bool IsLatinLetter(int ch) {
|
static bool IsLatinLetter(int ch) {
|
||||||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsDigitLike(int ch) {
|
static bool IsDigitLike(int ch) {
|
||||||
return ch == 'o' || ch == 'O' || ch == 'l' || ch == 'I';
|
return ch == 'o' || ch == 'O' || ch == 'l' || ch == 'I';
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsOpeningPunct(int ch) {
|
static bool IsOpeningPunct(int ch) {
|
||||||
return strchr("'\"({[", ch) != nullptr;
|
return strchr("'\"({[", ch) != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsTerminalPunct(int ch) {
|
static bool IsTerminalPunct(int ch) {
|
||||||
return strchr(":'\".?!]})", ch) != nullptr;
|
return strchr(":'\".?!]})", ch) != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return a pointer after consuming as much text as qualifies as roman numeral.
|
// Return a pointer after consuming as much text as qualifies as roman numeral.
|
||||||
const char *SkipChars(const char *str, const char *toskip) {
|
static const char *SkipChars(const char *str, const char *toskip) {
|
||||||
while (*str != '\0' && strchr(toskip, *str)) { str++; }
|
while (*str != '\0' && strchr(toskip, *str)) { str++; }
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *SkipChars(const char *str, bool (*skip)(int)) {
|
static const char *SkipChars(const char *str, bool (*skip)(int)) {
|
||||||
while (*str != '\0' && skip(*str)) { str++; }
|
while (*str != '\0' && skip(*str)) { str++; }
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *SkipOne(const char *str, const char *toskip) {
|
static const char *SkipOne(const char *str, const char *toskip) {
|
||||||
if (*str != '\0' && strchr(toskip, *str)) return str + 1;
|
if (*str != '\0' && strchr(toskip, *str)) return str + 1;
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
@ -235,7 +240,7 @@ const char *SkipOne(const char *str, const char *toskip) {
|
|||||||
// Return whether it is very likely that this is a numeral marker that could
|
// Return whether it is very likely that this is a numeral marker that could
|
||||||
// start a list item. Some examples include:
|
// start a list item. Some examples include:
|
||||||
// A I iii. VI (2) 3.5. [C-4]
|
// A I iii. VI (2) 3.5. [C-4]
|
||||||
bool LikelyListNumeral(const STRING &word) {
|
static bool LikelyListNumeral(const STRING &word) {
|
||||||
const char *kRomans = "ivxlmdIVXLMD";
|
const char *kRomans = "ivxlmdIVXLMD";
|
||||||
const char *kDigits = "012345789";
|
const char *kDigits = "012345789";
|
||||||
const char *kOpen = "[{(";
|
const char *kOpen = "[{(";
|
||||||
@ -269,7 +274,7 @@ bool LikelyListNumeral(const STRING &word) {
|
|||||||
return *pos == '\0';
|
return *pos == '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LikelyListMark(const STRING &word) {
|
static bool LikelyListMark(const STRING &word) {
|
||||||
const char *kListMarks = "0Oo*.,+.";
|
const char *kListMarks = "0Oo*.,+.";
|
||||||
return word.size() == 1 && strchr(kListMarks, word[0]) != nullptr;
|
return word.size() == 1 && strchr(kListMarks, word[0]) != nullptr;
|
||||||
}
|
}
|
||||||
@ -335,7 +340,7 @@ int UnicodeSpanSkipper::SkipAlpha(int pos) {
|
|||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LikelyListMarkUnicode(int ch) {
|
static bool LikelyListMarkUnicode(int ch) {
|
||||||
if (ch < 0x80) {
|
if (ch < 0x80) {
|
||||||
STRING single_ch;
|
STRING single_ch;
|
||||||
single_ch += ch;
|
single_ch += ch;
|
||||||
@ -364,7 +369,7 @@ bool LikelyListMarkUnicode(int ch) {
|
|||||||
// Return whether it is very likely that this is a numeral marker that could
|
// Return whether it is very likely that this is a numeral marker that could
|
||||||
// start a list item. Some examples include:
|
// start a list item. Some examples include:
|
||||||
// A I iii. VI (2) 3.5. [C-4]
|
// A I iii. VI (2) 3.5. [C-4]
|
||||||
bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) {
|
static bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) {
|
||||||
if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0)))
|
if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0)))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@ -672,7 +677,7 @@ class SimpleClusterer {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Return the index of the cluster closest to value.
|
// Return the index of the cluster closest to value.
|
||||||
int ClosestCluster(const GenericVector<Cluster> &clusters, int value) {
|
static int ClosestCluster(const GenericVector<Cluster> &clusters, int value) {
|
||||||
int best_index = 0;
|
int best_index = 0;
|
||||||
for (int i = 0; i < clusters.size(); i++) {
|
for (int i = 0; i < clusters.size(); i++) {
|
||||||
if (abs(value - clusters[i].center) <
|
if (abs(value - clusters[i].center) <
|
||||||
@ -698,9 +703,8 @@ void SimpleClusterer::GetClusters(GenericVector<Cluster> *clusters) {
|
|||||||
|
|
||||||
// Calculate left- and right-indent tab stop values seen in
|
// Calculate left- and right-indent tab stop values seen in
|
||||||
// rows[row_start, row_end) given a tolerance of tolerance.
|
// rows[row_start, row_end) given a tolerance of tolerance.
|
||||||
void CalculateTabStops(GenericVector<RowScratchRegisters> *rows,
|
static void CalculateTabStops(GenericVector<RowScratchRegisters> *rows,
|
||||||
int row_start, int row_end,
|
int row_start, int row_end, int tolerance,
|
||||||
int tolerance,
|
|
||||||
GenericVector<Cluster> *left_tabs,
|
GenericVector<Cluster> *left_tabs,
|
||||||
GenericVector<Cluster> *right_tabs) {
|
GenericVector<Cluster> *right_tabs) {
|
||||||
if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end))
|
if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end))
|
||||||
@ -814,11 +818,10 @@ void CalculateTabStops(GenericVector<RowScratchRegisters> *rows,
|
|||||||
// Case 2b: Fully Justified. (eop_threshold > 0)
|
// Case 2b: Fully Justified. (eop_threshold > 0)
|
||||||
// We mark a line as short (end of paragraph) if the offside indent
|
// We mark a line as short (end of paragraph) if the offside indent
|
||||||
// is greater than eop_threshold.
|
// is greater than eop_threshold.
|
||||||
void MarkRowsWithModel(GenericVector<RowScratchRegisters> *rows,
|
static void MarkRowsWithModel(GenericVector<RowScratchRegisters> *rows,
|
||||||
int row_start, int row_end,
|
int row_start, int row_end,
|
||||||
const ParagraphModel *model,
|
const ParagraphModel *model,
|
||||||
bool ltr,
|
bool ltr, int eop_threshold) {
|
||||||
int eop_threshold) {
|
|
||||||
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end))
|
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end))
|
||||||
return;
|
return;
|
||||||
for (int row = row_start; row < row_end; row++) {
|
for (int row = row_start; row < row_end; row++) {
|
||||||
@ -992,7 +995,7 @@ struct GeometricClassifierState {
|
|||||||
// [script direction: first indent, body indent]
|
// [script direction: first indent, body indent]
|
||||||
// (A1) LtR: 2,0 RtL: 0,0 (B1) LtR: 0,0 RtL: 2,0
|
// (A1) LtR: 2,0 RtL: 0,0 (B1) LtR: 0,0 RtL: 2,0
|
||||||
// (A2) LtR: 2,0 RtL: CrR (B2) LtR: CrL RtL: 2,0
|
// (A2) LtR: 2,0 RtL: CrR (B2) LtR: CrL RtL: 2,0
|
||||||
void GeometricClassifyThreeTabStopTextBlock(
|
static void GeometricClassifyThreeTabStopTextBlock(
|
||||||
int debug_level,
|
int debug_level,
|
||||||
GeometricClassifierState &s,
|
GeometricClassifierState &s,
|
||||||
ParagraphTheory *theory) {
|
ParagraphTheory *theory) {
|
||||||
@ -1084,7 +1087,7 @@ void GeometricClassifyThreeTabStopTextBlock(
|
|||||||
// have capital letters to go on (e.g. Hebrew, Arabic, Hindi, Chinese),
|
// have capital letters to go on (e.g. Hebrew, Arabic, Hindi, Chinese),
|
||||||
// it's worth guessing that (A1b) is the correct interpretation if there are
|
// it's worth guessing that (A1b) is the correct interpretation if there are
|
||||||
// far more "full" lines than "short" lines.
|
// far more "full" lines than "short" lines.
|
||||||
void GeometricClassify(int debug_level,
|
static void GeometricClassify(int debug_level,
|
||||||
GenericVector<RowScratchRegisters> *rows,
|
GenericVector<RowScratchRegisters> *rows,
|
||||||
int row_start, int row_end,
|
int row_start, int row_end,
|
||||||
ParagraphTheory *theory) {
|
ParagraphTheory *theory) {
|
||||||
@ -1462,7 +1465,7 @@ void ParagraphModelSmearer::Smear() {
|
|||||||
|
|
||||||
// Find out what ParagraphModels are actually used, and discard any
|
// Find out what ParagraphModels are actually used, and discard any
|
||||||
// that are not.
|
// that are not.
|
||||||
void DiscardUnusedModels(const GenericVector<RowScratchRegisters> &rows,
|
static void DiscardUnusedModels(const GenericVector<RowScratchRegisters> &rows,
|
||||||
ParagraphTheory *theory) {
|
ParagraphTheory *theory) {
|
||||||
SetOfModels used_models;
|
SetOfModels used_models;
|
||||||
for (int i = 0; i < rows.size(); i++) {
|
for (int i = 0; i < rows.size(); i++) {
|
||||||
@ -1495,8 +1498,7 @@ void DiscardUnusedModels(const GenericVector<RowScratchRegisters> &rows,
|
|||||||
// Comb backwards through the row scratch registers, and turn any
|
// Comb backwards through the row scratch registers, and turn any
|
||||||
// sequences of body lines of equivalent type abutted against the beginning
|
// sequences of body lines of equivalent type abutted against the beginning
|
||||||
// or a body or start line of a different type into a crown paragraph.
|
// or a body or start line of a different type into a crown paragraph.
|
||||||
void DowngradeWeakestToCrowns(int debug_level,
|
static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
|
||||||
ParagraphTheory *theory,
|
|
||||||
GenericVector<RowScratchRegisters> *rows) {
|
GenericVector<RowScratchRegisters> *rows) {
|
||||||
int start;
|
int start;
|
||||||
for (int end = rows->size(); end > 0; end = start) {
|
for (int end = rows->size(); end > 0; end = start) {
|
||||||
@ -1668,7 +1670,7 @@ bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
|
|||||||
return after.ri_->rword_box.width() < available_space;
|
return after.ri_->rword_box.width() < available_space;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TextSupportsBreak(const RowScratchRegisters &before,
|
static bool TextSupportsBreak(const RowScratchRegisters &before,
|
||||||
const RowScratchRegisters &after) {
|
const RowScratchRegisters &after) {
|
||||||
if (before.ri_->ltr) {
|
if (before.ri_->ltr) {
|
||||||
return before.ri_->rword_likely_ends_idea &&
|
return before.ri_->rword_likely_ends_idea &&
|
||||||
@ -1679,14 +1681,7 @@ bool TextSupportsBreak(const RowScratchRegisters &before,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LikelyParagraphStart(const RowScratchRegisters &before,
|
static bool LikelyParagraphStart(const RowScratchRegisters &before,
|
||||||
const RowScratchRegisters &after) {
|
|
||||||
return before.ri_->num_words == 0 ||
|
|
||||||
(FirstWordWouldHaveFit(before, after) &&
|
|
||||||
TextSupportsBreak(before, after));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool LikelyParagraphStart(const RowScratchRegisters &before,
|
|
||||||
const RowScratchRegisters &after,
|
const RowScratchRegisters &after,
|
||||||
tesseract::ParagraphJustification j) {
|
tesseract::ParagraphJustification j) {
|
||||||
return before.ri_->num_words == 0 ||
|
return before.ri_->num_words == 0 ||
|
||||||
@ -1699,7 +1694,7 @@ bool LikelyParagraphStart(const RowScratchRegisters &before,
|
|||||||
// If we can't produce a unique model justification_ = JUSTIFICATION_UNKNOWN.
|
// If we can't produce a unique model justification_ = JUSTIFICATION_UNKNOWN.
|
||||||
// If the rows given could be a consistent start to a paragraph, set *consistent
|
// If the rows given could be a consistent start to a paragraph, set *consistent
|
||||||
// true.
|
// true.
|
||||||
ParagraphModel InternalParagraphModelByOutline(
|
static ParagraphModel InternalParagraphModelByOutline(
|
||||||
const GenericVector<RowScratchRegisters> *rows,
|
const GenericVector<RowScratchRegisters> *rows,
|
||||||
int start, int end, int tolerance, bool *consistent) {
|
int start, int end, int tolerance, bool *consistent) {
|
||||||
int ltr_line_count = 0;
|
int ltr_line_count = 0;
|
||||||
@ -1800,7 +1795,7 @@ ParagraphModel InternalParagraphModelByOutline(
|
|||||||
// would fit them as a single paragraph. If nothing fits,
|
// would fit them as a single paragraph. If nothing fits,
|
||||||
// justification_ = JUSTIFICATION_UNKNOWN and print the paragraph to debug
|
// justification_ = JUSTIFICATION_UNKNOWN and print the paragraph to debug
|
||||||
// output if we're debugging.
|
// output if we're debugging.
|
||||||
ParagraphModel ParagraphModelByOutline(
|
static ParagraphModel ParagraphModelByOutline(
|
||||||
int debug_level,
|
int debug_level,
|
||||||
const GenericVector<RowScratchRegisters> *rows,
|
const GenericVector<RowScratchRegisters> *rows,
|
||||||
int start, int end, int tolerance) {
|
int start, int end, int tolerance) {
|
||||||
@ -1837,7 +1832,7 @@ bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows,
|
|||||||
// We only take the very strongest signals, as we don't want to get
|
// We only take the very strongest signals, as we don't want to get
|
||||||
// confused and marking up centered text, poetry, or source code as
|
// confused and marking up centered text, poetry, or source code as
|
||||||
// clearly part of a typical paragraph.
|
// clearly part of a typical paragraph.
|
||||||
void MarkStrongEvidence(GenericVector<RowScratchRegisters> *rows,
|
static void MarkStrongEvidence(GenericVector<RowScratchRegisters> *rows,
|
||||||
int row_start, int row_end) {
|
int row_start, int row_end) {
|
||||||
// Record patently obvious body text.
|
// Record patently obvious body text.
|
||||||
for (int i = row_start + 1; i < row_end; i++) {
|
for (int i = row_start + 1; i < row_end; i++) {
|
||||||
@ -1907,7 +1902,7 @@ void MarkStrongEvidence(GenericVector<RowScratchRegisters> *rows,
|
|||||||
// Look for sequences of a start line followed by some body lines in
|
// Look for sequences of a start line followed by some body lines in
|
||||||
// rows[row_start, row_end) and create ParagraphModels for them if
|
// rows[row_start, row_end) and create ParagraphModels for them if
|
||||||
// they seem coherent.
|
// they seem coherent.
|
||||||
void ModelStrongEvidence(int debug_level,
|
static void ModelStrongEvidence(int debug_level,
|
||||||
GenericVector<RowScratchRegisters> *rows,
|
GenericVector<RowScratchRegisters> *rows,
|
||||||
int row_start, int row_end,
|
int row_start, int row_end,
|
||||||
bool allow_flush_models,
|
bool allow_flush_models,
|
||||||
@ -2002,7 +1997,7 @@ void ModelStrongEvidence(int debug_level,
|
|||||||
// clues.
|
// clues.
|
||||||
// (3) Form models for any sequence of start + continuation lines.
|
// (3) Form models for any sequence of start + continuation lines.
|
||||||
// (4) Smear the paragraph models to cover surrounding text.
|
// (4) Smear the paragraph models to cover surrounding text.
|
||||||
void StrongEvidenceClassify(int debug_level,
|
static void StrongEvidenceClassify(int debug_level,
|
||||||
GenericVector<RowScratchRegisters> *rows,
|
GenericVector<RowScratchRegisters> *rows,
|
||||||
int row_start, int row_end,
|
int row_start, int row_end,
|
||||||
ParagraphTheory *theory) {
|
ParagraphTheory *theory) {
|
||||||
@ -2032,7 +2027,7 @@ void StrongEvidenceClassify(int debug_level,
|
|||||||
smearer.Smear();
|
smearer.Smear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SeparateSimpleLeaderLines(GenericVector<RowScratchRegisters> *rows,
|
static void SeparateSimpleLeaderLines(GenericVector<RowScratchRegisters> *rows,
|
||||||
int row_start, int row_end,
|
int row_start, int row_end,
|
||||||
ParagraphTheory *theory) {
|
ParagraphTheory *theory) {
|
||||||
for (int i = row_start + 1; i < row_end - 1; i++) {
|
for (int i = row_start + 1; i < row_end - 1; i++) {
|
||||||
@ -2048,7 +2043,7 @@ void SeparateSimpleLeaderLines(GenericVector<RowScratchRegisters> *rows,
|
|||||||
|
|
||||||
// Collect sequences of unique hypotheses in row registers and create proper
|
// Collect sequences of unique hypotheses in row registers and create proper
|
||||||
// paragraphs for them, referencing the paragraphs in row_owners.
|
// paragraphs for them, referencing the paragraphs in row_owners.
|
||||||
void ConvertHypothesizedModelRunsToParagraphs(
|
static void ConvertHypothesizedModelRunsToParagraphs(
|
||||||
int debug_level,
|
int debug_level,
|
||||||
const GenericVector<RowScratchRegisters> &rows,
|
const GenericVector<RowScratchRegisters> &rows,
|
||||||
GenericVector<PARA *> *row_owners,
|
GenericVector<PARA *> *row_owners,
|
||||||
@ -2147,7 +2142,8 @@ struct Interval {
|
|||||||
// (1) If a line is surrounded by lines of unknown type, it's weak.
|
// (1) If a line is surrounded by lines of unknown type, it's weak.
|
||||||
// (2) If two lines in a row are start lines for a given paragraph type, but
|
// (2) If two lines in a row are start lines for a given paragraph type, but
|
||||||
// after that the same paragraph type does not continue, they're weak.
|
// after that the same paragraph type does not continue, they're weak.
|
||||||
bool RowIsStranded(const GenericVector<RowScratchRegisters> &rows, int row) {
|
static bool RowIsStranded(const GenericVector<RowScratchRegisters> &rows,
|
||||||
|
int row) {
|
||||||
SetOfModels row_models;
|
SetOfModels row_models;
|
||||||
rows[row].StrongHypotheses(&row_models);
|
rows[row].StrongHypotheses(&row_models);
|
||||||
|
|
||||||
@ -2189,7 +2185,7 @@ bool RowIsStranded(const GenericVector<RowScratchRegisters> &rows, int row) {
|
|||||||
// + Crown paragraphs not immediately followed by a strongly modeled line.
|
// + Crown paragraphs not immediately followed by a strongly modeled line.
|
||||||
// + Single line paragraphs surrounded by text that doesn't match the
|
// + Single line paragraphs surrounded by text that doesn't match the
|
||||||
// model.
|
// model.
|
||||||
void LeftoverSegments(const GenericVector<RowScratchRegisters> &rows,
|
static void LeftoverSegments(const GenericVector<RowScratchRegisters> &rows,
|
||||||
GenericVector<Interval> *to_fix,
|
GenericVector<Interval> *to_fix,
|
||||||
int row_start, int row_end) {
|
int row_start, int row_end) {
|
||||||
to_fix->clear();
|
to_fix->clear();
|
||||||
@ -2367,7 +2363,7 @@ void DetectParagraphs(int debug_level,
|
|||||||
|
|
||||||
// ============ Code interfacing with the rest of Tesseract ==================
|
// ============ Code interfacing with the rest of Tesseract ==================
|
||||||
|
|
||||||
void InitializeTextAndBoxesPreRecognition(const MutableIterator &it,
|
static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it,
|
||||||
RowInfo *info) {
|
RowInfo *info) {
|
||||||
// Set up text, lword_text, and rword_text (mostly for debug printing).
|
// Set up text, lword_text, and rword_text (mostly for debug printing).
|
||||||
STRING fake_text;
|
STRING fake_text;
|
||||||
@ -2419,9 +2415,8 @@ void InitializeTextAndBoxesPreRecognition(const MutableIterator &it,
|
|||||||
|
|
||||||
// Given a Tesseract Iterator pointing to a text line, fill in the paragraph
|
// Given a Tesseract Iterator pointing to a text line, fill in the paragraph
|
||||||
// detector RowInfo with all relevant information from the row.
|
// detector RowInfo with all relevant information from the row.
|
||||||
void InitializeRowInfo(bool after_recognition,
|
static void InitializeRowInfo(bool after_recognition,
|
||||||
const MutableIterator &it,
|
const MutableIterator &it, RowInfo *info) {
|
||||||
RowInfo *info) {
|
|
||||||
if (it.PageResIt()->row() != nullptr) {
|
if (it.PageResIt()->row() != nullptr) {
|
||||||
ROW *row = it.PageResIt()->row()->row;
|
ROW *row = it.PageResIt()->row()->row;
|
||||||
info->pix_ldistance = row->lmargin();
|
info->pix_ldistance = row->lmargin();
|
||||||
|
@ -291,11 +291,6 @@ bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
|
|||||||
bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows,
|
bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows,
|
||||||
int start, int end, const ParagraphModel *model);
|
int start, int end, const ParagraphModel *model);
|
||||||
|
|
||||||
// Do the text and geometry of two rows support a paragraph break between them?
|
|
||||||
bool LikelyParagraphStart(const RowScratchRegisters &before,
|
|
||||||
const RowScratchRegisters &after,
|
|
||||||
tesseract::ParagraphJustification j);
|
|
||||||
|
|
||||||
// Given a set of row_owners pointing to PARAs or nullptr (no paragraph known),
|
// Given a set of row_owners pointing to PARAs or nullptr (no paragraph known),
|
||||||
// normalize each row_owner to point to an actual PARA, and output the
|
// normalize each row_owner to point to an actual PARA, and output the
|
||||||
// paragraphs in order onto paragraphs.
|
// paragraphs in order onto paragraphs.
|
||||||
|
@ -203,12 +203,9 @@ void build_image_window(int width, int height) {
|
|||||||
* Display normalized baseline, x-height, ascender limit and descender limit
|
* Display normalized baseline, x-height, ascender limit and descender limit
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void display_bln_lines(ScrollView* window,
|
static void display_bln_lines(ScrollView* window, ScrollView::Color colour,
|
||||||
ScrollView::Color colour,
|
float scale_factor, float y_offset,
|
||||||
float scale_factor,
|
float minx, float maxx) {
|
||||||
float y_offset,
|
|
||||||
float minx,
|
|
||||||
float maxx) {
|
|
||||||
window->Pen(colour);
|
window->Pen(colour);
|
||||||
window->Line(minx, y_offset + scale_factor * DESC_HEIGHT,
|
window->Line(minx, y_offset + scale_factor * DESC_HEIGHT,
|
||||||
maxx, y_offset + scale_factor * DESC_HEIGHT);
|
maxx, y_offset + scale_factor * DESC_HEIGHT);
|
||||||
|
@ -53,7 +53,7 @@ FILE *Tesseract::init_recog_training(const STRING &fname) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Copies the bounding box from page_res_it->word() to the given TBOX.
|
// Copies the bounding box from page_res_it->word() to the given TBOX.
|
||||||
bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
|
static bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
|
||||||
while (page_res_it->block() != nullptr && page_res_it->word() == nullptr)
|
while (page_res_it->block() != nullptr && page_res_it->word() == nullptr)
|
||||||
page_res_it->forward();
|
page_res_it->forward();
|
||||||
|
|
||||||
|
@ -43,10 +43,11 @@ namespace tesseract {
|
|||||||
* or superscript letter based only on y position. Also do this for the
|
* or superscript letter based only on y position. Also do this for the
|
||||||
* right side.
|
* right side.
|
||||||
*/
|
*/
|
||||||
void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index,
|
static void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index,
|
||||||
int super_y_bottom, int sub_y_top,
|
int super_y_bottom, int sub_y_top,
|
||||||
ScriptPos *leading_pos, int *num_leading_outliers,
|
ScriptPos *leading_pos, int *num_leading_outliers,
|
||||||
ScriptPos *trailing_pos, int *num_trailing_outliers) {
|
ScriptPos *trailing_pos,
|
||||||
|
int *num_trailing_outliers) {
|
||||||
ScriptPos sp_unused1, sp_unused2;
|
ScriptPos sp_unused1, sp_unused2;
|
||||||
int unused1, unused2;
|
int unused1, unused2;
|
||||||
if (!leading_pos) leading_pos = &sp_unused1;
|
if (!leading_pos) leading_pos = &sp_unused1;
|
||||||
|
@ -252,7 +252,7 @@ const BLOCK & source //from this
|
|||||||
// margin - return value, the distance from x,y to the left margin of the
|
// margin - return value, the distance from x,y to the left margin of the
|
||||||
// block containing it.
|
// block containing it.
|
||||||
// If all segments were to the right of x, we return false and 0.
|
// If all segments were to the right of x, we return false and 0.
|
||||||
bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
|
static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
|
||||||
bool found = false;
|
bool found = false;
|
||||||
*margin = 0;
|
*margin = 0;
|
||||||
if (segments->empty())
|
if (segments->empty())
|
||||||
@ -282,7 +282,7 @@ bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
|
|||||||
// margin - return value, the distance from x,y to the right margin of the
|
// margin - return value, the distance from x,y to the right margin of the
|
||||||
// block containing it.
|
// block containing it.
|
||||||
// If all segments were to the left of x, we return false and 0.
|
// If all segments were to the left of x, we return false and 0.
|
||||||
bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) {
|
static bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) {
|
||||||
bool found = false;
|
bool found = false;
|
||||||
*margin = 0;
|
*margin = 0;
|
||||||
if (segments->empty())
|
if (segments->empty())
|
||||||
|
@ -359,7 +359,8 @@ void C_BLOB::move( // reposition blob
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Static helper for C_BLOB::rotate to allow recursion of child outlines.
|
// Static helper for C_BLOB::rotate to allow recursion of child outlines.
|
||||||
void RotateOutlineList(const FCOORD& rotation, C_OUTLINE_LIST* outlines) {
|
static void RotateOutlineList(const FCOORD& rotation,
|
||||||
|
C_OUTLINE_LIST* outlines) {
|
||||||
C_OUTLINE_LIST new_outlines;
|
C_OUTLINE_LIST new_outlines;
|
||||||
C_OUTLINE_IT src_it(outlines);
|
C_OUTLINE_IT src_it(outlines);
|
||||||
C_OUTLINE_IT dest_it(&new_outlines);
|
C_OUTLINE_IT dest_it(&new_outlines);
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
*
|
*
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "globaloc.h"
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <sys/syscall.h> // For SYS_gettid.
|
#include <sys/syscall.h> // For SYS_gettid.
|
||||||
@ -75,20 +76,17 @@ void err_exit() {
|
|||||||
ASSERT_HOST("Fatal error encountered!" == nullptr);
|
ASSERT_HOST("Fatal error encountered!" == nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: remove empty function?
|
||||||
void set_global_loc_code(int loc_code) {
|
void set_global_loc_code(int loc_code) {
|
||||||
// global_loc_code = loc_code;
|
// global_loc_code = loc_code;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: remove empty function?
|
||||||
void set_global_subloc_code(int loc_code) {
|
void set_global_subloc_code(int loc_code) {
|
||||||
// global_subloc_code = loc_code;
|
// global_subloc_code = loc_code;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: remove empty function?
|
||||||
void set_global_subsubloc_code(int loc_code) {
|
void set_global_subsubloc_code(int loc_code) {
|
||||||
// global_subsubloc_code = loc_code;
|
// global_subsubloc_code = loc_code;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -103,7 +103,7 @@ static inline int DigitValue(int ch, int base) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// IO (re-)implementations -----------------------------------------------------
|
// IO (re-)implementations -----------------------------------------------------
|
||||||
uintmax_t streamtoumax(FILE* s, int base) {
|
static uintmax_t streamtoumax(FILE* s, int base) {
|
||||||
int minus = 0;
|
int minus = 0;
|
||||||
uintmax_t v = 0;
|
uintmax_t v = 0;
|
||||||
int d, c = 0;
|
int d, c = 0;
|
||||||
@ -144,7 +144,7 @@ uintmax_t streamtoumax(FILE* s, int base) {
|
|||||||
return minus ? -v : v;
|
return minus ? -v : v;
|
||||||
}
|
}
|
||||||
|
|
||||||
double streamtofloat(FILE* s) {
|
static double streamtofloat(FILE* s) {
|
||||||
int minus = 0;
|
int minus = 0;
|
||||||
int v = 0;
|
int v = 0;
|
||||||
int d, c = 0;
|
int d, c = 0;
|
||||||
@ -191,39 +191,6 @@ double streamtofloat(FILE* s) {
|
|||||||
return minus ? -f : f;
|
return minus ? -f : f;
|
||||||
}
|
}
|
||||||
|
|
||||||
double strtofloat(const char* s) {
|
|
||||||
int minus = 0;
|
|
||||||
int v = 0;
|
|
||||||
int d;
|
|
||||||
int k = 1;
|
|
||||||
int w = 0;
|
|
||||||
|
|
||||||
while(*s && isspace(static_cast<unsigned char>(*s))) s++;
|
|
||||||
|
|
||||||
// Single optional + or -
|
|
||||||
if (*s == '-' || *s == '+') {
|
|
||||||
minus = (*s == '-');
|
|
||||||
s++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Actual number parsing
|
|
||||||
for (; *s && (d = DigitValue(*s, 10)) >= 0; s++)
|
|
||||||
v = v*10 + d;
|
|
||||||
if (*s == '.') {
|
|
||||||
for (++s; *s && (d = DigitValue(*s, 10)) >= 0; s++) {
|
|
||||||
w = w*10 + d;
|
|
||||||
k *= 10;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (*s == 'e' || *s == 'E')
|
|
||||||
tprintf("WARNING: Scientific Notation not supported!");
|
|
||||||
|
|
||||||
double f = static_cast<double>(v)
|
|
||||||
+ static_cast<double>(w) / static_cast<double>(k);
|
|
||||||
|
|
||||||
return minus ? -f : f;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int tvfscanf(FILE* stream, const char *format, va_list ap);
|
static int tvfscanf(FILE* stream, const char *format, va_list ap);
|
||||||
|
|
||||||
int tfscanf(FILE* stream, const char *format, ...) {
|
int tfscanf(FILE* stream, const char *format, ...) {
|
||||||
|
@ -87,7 +87,7 @@ void FreeTempProto(void *arg) {
|
|||||||
free(proto);
|
free(proto);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FreePermConfig(PERM_CONFIG Config) {
|
static void FreePermConfig(PERM_CONFIG Config) {
|
||||||
assert(Config != nullptr);
|
assert(Config != nullptr);
|
||||||
delete [] Config->Ambigs;
|
delete [] Config->Ambigs;
|
||||||
free(Config);
|
free(Config);
|
||||||
|
@ -166,8 +166,9 @@ void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm,
|
|||||||
|
|
||||||
// Helper normalizes the direction, assuming that it is at the given
|
// Helper normalizes the direction, assuming that it is at the given
|
||||||
// unnormed_pos, using the given denorm, starting at the root_denorm.
|
// unnormed_pos, using the given denorm, starting at the root_denorm.
|
||||||
uint8_t NormalizeDirection(uint8_t dir, const FCOORD& unnormed_pos,
|
static uint8_t NormalizeDirection(uint8_t dir, const FCOORD& unnormed_pos,
|
||||||
const DENORM& denorm, const DENORM* root_denorm) {
|
const DENORM& denorm,
|
||||||
|
const DENORM* root_denorm) {
|
||||||
// Convert direction to a vector.
|
// Convert direction to a vector.
|
||||||
FCOORD unnormed_end;
|
FCOORD unnormed_end;
|
||||||
unnormed_end.from_direction(dir);
|
unnormed_end.from_direction(dir);
|
||||||
|
@ -713,10 +713,8 @@ void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
|
|||||||
* Print debugging information for Configurations
|
* Print debugging information for Configurations
|
||||||
* @return none
|
* @return none
|
||||||
*/
|
*/
|
||||||
void IMDebugConfiguration(int FeatureNum,
|
static void IMDebugConfiguration(int FeatureNum, uint16_t ActualProtoNum,
|
||||||
uint16_t ActualProtoNum,
|
uint8_t Evidence, BIT_VECTOR ConfigMask,
|
||||||
uint8_t Evidence,
|
|
||||||
BIT_VECTOR ConfigMask,
|
|
||||||
uint32_t ConfigWord) {
|
uint32_t ConfigWord) {
|
||||||
cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
|
cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
|
||||||
FeatureNum, (int) ActualProtoNum, (int) Evidence);
|
FeatureNum, (int) ActualProtoNum, (int) Evidence);
|
||||||
@ -734,8 +732,7 @@ void IMDebugConfiguration(int FeatureNum,
|
|||||||
* Print debugging information for Configurations
|
* Print debugging information for Configurations
|
||||||
* @return none
|
* @return none
|
||||||
*/
|
*/
|
||||||
void IMDebugConfigurationSum(int FeatureNum,
|
static void IMDebugConfigurationSum(int FeatureNum, uint8_t *FeatureEvidence,
|
||||||
uint8_t *FeatureEvidence,
|
|
||||||
int32_t ConfigCount) {
|
int32_t ConfigCount) {
|
||||||
cprintf("F=%3d, C=", FeatureNum);
|
cprintf("F=%3d, C=", FeatureNum);
|
||||||
for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
|
for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
|
||||||
|
@ -674,8 +674,7 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) {
|
|||||||
|
|
||||||
} /* NewIntClass */
|
} /* NewIntClass */
|
||||||
|
|
||||||
|
static void free_int_class(INT_CLASS int_class) {
|
||||||
void free_int_class(INT_CLASS int_class) {
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < int_class->NumProtoSets; i++) {
|
for (i = 0; i < int_class->NumProtoSets; i++) {
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
** Filename: mfx.c
|
** Filename: mfx.c
|
||||||
** Purpose: Micro feature extraction routines
|
** Purpose: Micro feature extraction routines
|
||||||
** Author: Dan Johnson
|
** Author: Dan Johnson
|
||||||
** History: 7/21/89, DSJ, Created.
|
|
||||||
**
|
**
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -18,6 +17,7 @@
|
|||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
Include Files and Type Defines
|
Include Files and Type Defines
|
||||||
----------------------------------------------------------------------------*/
|
----------------------------------------------------------------------------*/
|
||||||
|
#include "mfx.h"
|
||||||
#include "mfdefs.h"
|
#include "mfdefs.h"
|
||||||
#include "mfoutline.h"
|
#include "mfoutline.h"
|
||||||
#include "clusttool.h" //NEEDED
|
#include "clusttool.h" //NEEDED
|
||||||
@ -93,7 +93,6 @@ MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
|
|||||||
return MicroFeatures;
|
return MicroFeatures;
|
||||||
} /* BlobMicroFeatures */
|
} /* BlobMicroFeatures */
|
||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------
|
/*---------------------------------------------------------------------------
|
||||||
Private Code
|
Private Code
|
||||||
---------------------------------------------------------------------------*/
|
---------------------------------------------------------------------------*/
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
** Filename: mfx.h
|
** Filename: mfx.h
|
||||||
** Purpose: Definition of micro-feature extraction routines
|
** Purpose: Definition of micro-feature extraction routines
|
||||||
** Author: Dan Johnson
|
** Author: Dan Johnson
|
||||||
** History: 5/29/89, DSJ, Created.
|
|
||||||
**
|
**
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -15,6 +14,7 @@
|
|||||||
** See the License for the specific language governing permissions and
|
** See the License for the specific language governing permissions and
|
||||||
** limitations under the License.
|
** limitations under the License.
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
|
|
||||||
#ifndef MFX_H
|
#ifndef MFX_H
|
||||||
#define MFX_H
|
#define MFX_H
|
||||||
|
|
||||||
@ -23,6 +23,10 @@
|
|||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
#include "mfdefs.h"
|
#include "mfdefs.h"
|
||||||
#include "params.h"
|
#include "params.h"
|
||||||
|
|
||||||
|
class DENORM;
|
||||||
|
struct TBLOB;
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
Variables
|
Variables
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
|
@ -191,37 +191,3 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} /* WriteFeatureSet */
|
} /* WriteFeatureSet */
|
||||||
|
|
||||||
/**
|
|
||||||
* Write a textual representation of FeatureDesc to File
|
|
||||||
* in the old format (i.e. the format used by the clusterer).
|
|
||||||
*
|
|
||||||
* This format is:
|
|
||||||
* @verbatim
|
|
||||||
* Number of Params
|
|
||||||
* Description of Param 1
|
|
||||||
* ...
|
|
||||||
* @endverbatim
|
|
||||||
* @param File open text file to write FeatureDesc to
|
|
||||||
* @param FeatureDesc feature descriptor to write to File
|
|
||||||
* @return none
|
|
||||||
*/
|
|
||||||
void WriteOldParamDesc(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
|
||||||
int i;
|
|
||||||
|
|
||||||
fprintf (File, "%d\n", FeatureDesc->NumParams);
|
|
||||||
for (i = 0; i < FeatureDesc->NumParams; i++) {
|
|
||||||
if (FeatureDesc->ParamDesc[i].Circular)
|
|
||||||
fprintf (File, "circular ");
|
|
||||||
else
|
|
||||||
fprintf (File, "linear ");
|
|
||||||
|
|
||||||
if (FeatureDesc->ParamDesc[i].NonEssential)
|
|
||||||
fprintf (File, "non-essential ");
|
|
||||||
else
|
|
||||||
fprintf (File, "essential ");
|
|
||||||
|
|
||||||
fprintf (File, "%f %f\n",
|
|
||||||
FeatureDesc->ParamDesc[i].Min, FeatureDesc->ParamDesc[i].Max);
|
|
||||||
}
|
|
||||||
} /* WriteOldParamDesc */
|
|
||||||
|
@ -109,7 +109,8 @@ void Dawg::iterate_words(const UNICHARSET &unicharset,
|
|||||||
iterate_words_rec(word, 0, cb);
|
iterate_words_rec(word, 0, cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CallWithUTF8(TessCallback1<const char *> *cb, const WERD_CHOICE *wc) {
|
static void CallWithUTF8(TessCallback1<const char *> *cb,
|
||||||
|
const WERD_CHOICE *wc) {
|
||||||
STRING s;
|
STRING s;
|
||||||
wc->string_and_lengths(&s, nullptr);
|
wc->string_and_lengths(&s, nullptr);
|
||||||
cb->Run(s.string());
|
cb->Run(s.string());
|
||||||
|
@ -201,7 +201,7 @@ Pix* IntGrid::ThresholdToPix(int threshold) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Make a Pix of the correct scaled size for the TraceOutline functions.
|
// Make a Pix of the correct scaled size for the TraceOutline functions.
|
||||||
Pix* GridReducedPix(const TBOX& box, int gridsize,
|
static Pix* GridReducedPix(const TBOX& box, int gridsize,
|
||||||
ICOORD bleft, int* left, int* bottom) {
|
ICOORD bleft, int* left, int* bottom) {
|
||||||
// Compute grid bounds of the outline and pad all round by 1.
|
// Compute grid bounds of the outline and pad all round by 1.
|
||||||
int grid_left = (box.left() - bleft.x()) / gridsize - 1;
|
int grid_left = (box.left() - bleft.x()) / gridsize - 1;
|
||||||
|
@ -133,7 +133,8 @@ static float MakeRowFromBlobs(float line_size,
|
|||||||
|
|
||||||
// Helper to make a row using the children of a single blob.
|
// Helper to make a row using the children of a single blob.
|
||||||
// Returns the mean size of the blobs created.
|
// Returns the mean size of the blobs created.
|
||||||
float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob, TO_ROW_IT* row_it) {
|
static float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob,
|
||||||
|
TO_ROW_IT* row_it) {
|
||||||
// The blobs made from the children will go in the small_blobs list.
|
// The blobs made from the children will go in the small_blobs list.
|
||||||
BLOBNBOX_IT bb_it(&block->small_blobs);
|
BLOBNBOX_IT bb_it(&block->small_blobs);
|
||||||
C_OUTLINE_IT ol_it(blob->out_list());
|
C_OUTLINE_IT ol_it(blob->out_list());
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
#ifndef GOOGLE_TESSERACT
|
#ifndef GOOGLE_TESSERACT
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
bool IntFlagExists(const char* flag_name, int32_t* value) {
|
static bool IntFlagExists(const char* flag_name, int32_t* value) {
|
||||||
STRING full_flag_name("FLAGS_");
|
STRING full_flag_name("FLAGS_");
|
||||||
full_flag_name += flag_name;
|
full_flag_name += flag_name;
|
||||||
GenericVector<IntParam*> empty;
|
GenericVector<IntParam*> empty;
|
||||||
@ -26,7 +26,7 @@ bool IntFlagExists(const char* flag_name, int32_t* value) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DoubleFlagExists(const char* flag_name, double* value) {
|
static bool DoubleFlagExists(const char* flag_name, double* value) {
|
||||||
STRING full_flag_name("FLAGS_");
|
STRING full_flag_name("FLAGS_");
|
||||||
full_flag_name += flag_name;
|
full_flag_name += flag_name;
|
||||||
GenericVector<DoubleParam*> empty;
|
GenericVector<DoubleParam*> empty;
|
||||||
@ -37,7 +37,7 @@ bool DoubleFlagExists(const char* flag_name, double* value) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BoolFlagExists(const char* flag_name, bool* value) {
|
static bool BoolFlagExists(const char* flag_name, bool* value) {
|
||||||
STRING full_flag_name("FLAGS_");
|
STRING full_flag_name("FLAGS_");
|
||||||
full_flag_name += flag_name;
|
full_flag_name += flag_name;
|
||||||
GenericVector<BoolParam*> empty;
|
GenericVector<BoolParam*> empty;
|
||||||
@ -48,7 +48,7 @@ bool BoolFlagExists(const char* flag_name, bool* value) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool StringFlagExists(const char* flag_name, const char** value) {
|
static bool StringFlagExists(const char* flag_name, const char** value) {
|
||||||
STRING full_flag_name("FLAGS_");
|
STRING full_flag_name("FLAGS_");
|
||||||
full_flag_name += flag_name;
|
full_flag_name += flag_name;
|
||||||
GenericVector<StringParam*> empty;
|
GenericVector<StringParam*> empty;
|
||||||
@ -58,7 +58,7 @@ bool StringFlagExists(const char* flag_name, const char** value) {
|
|||||||
return p != nullptr;
|
return p != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetIntFlagValue(const char* flag_name, const int32_t new_val) {
|
static void SetIntFlagValue(const char* flag_name, const int32_t new_val) {
|
||||||
STRING full_flag_name("FLAGS_");
|
STRING full_flag_name("FLAGS_");
|
||||||
full_flag_name += flag_name;
|
full_flag_name += flag_name;
|
||||||
GenericVector<IntParam*> empty;
|
GenericVector<IntParam*> empty;
|
||||||
@ -68,7 +68,7 @@ void SetIntFlagValue(const char* flag_name, const int32_t new_val) {
|
|||||||
p->set_value(new_val);
|
p->set_value(new_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetDoubleFlagValue(const char* flag_name, const double new_val) {
|
static void SetDoubleFlagValue(const char* flag_name, const double new_val) {
|
||||||
STRING full_flag_name("FLAGS_");
|
STRING full_flag_name("FLAGS_");
|
||||||
full_flag_name += flag_name;
|
full_flag_name += flag_name;
|
||||||
GenericVector<DoubleParam*> empty;
|
GenericVector<DoubleParam*> empty;
|
||||||
@ -78,7 +78,7 @@ void SetDoubleFlagValue(const char* flag_name, const double new_val) {
|
|||||||
p->set_value(new_val);
|
p->set_value(new_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetBoolFlagValue(const char* flag_name, const bool new_val) {
|
static void SetBoolFlagValue(const char* flag_name, const bool new_val) {
|
||||||
STRING full_flag_name("FLAGS_");
|
STRING full_flag_name("FLAGS_");
|
||||||
full_flag_name += flag_name;
|
full_flag_name += flag_name;
|
||||||
GenericVector<BoolParam*> empty;
|
GenericVector<BoolParam*> empty;
|
||||||
@ -88,7 +88,7 @@ void SetBoolFlagValue(const char* flag_name, const bool new_val) {
|
|||||||
p->set_value(new_val);
|
p->set_value(new_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetStringFlagValue(const char* flag_name, const char* new_val) {
|
static void SetStringFlagValue(const char* flag_name, const char* new_val) {
|
||||||
STRING full_flag_name("FLAGS_");
|
STRING full_flag_name("FLAGS_");
|
||||||
full_flag_name += flag_name;
|
full_flag_name += flag_name;
|
||||||
GenericVector<StringParam*> empty;
|
GenericVector<StringParam*> empty;
|
||||||
@ -98,19 +98,19 @@ void SetStringFlagValue(const char* flag_name, const char* new_val) {
|
|||||||
p->set_value(STRING(new_val));
|
p->set_value(STRING(new_val));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SafeAtoi(const char* str, int* val) {
|
static bool SafeAtoi(const char* str, int* val) {
|
||||||
char* endptr = nullptr;
|
char* endptr = nullptr;
|
||||||
*val = strtol(str, &endptr, 10);
|
*val = strtol(str, &endptr, 10);
|
||||||
return endptr != nullptr && *endptr == '\0';
|
return endptr != nullptr && *endptr == '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SafeAtod(const char* str, double* val) {
|
static bool SafeAtod(const char* str, double* val) {
|
||||||
char* endptr = nullptr;
|
char* endptr = nullptr;
|
||||||
*val = strtod(str, &endptr);
|
*val = strtod(str, &endptr);
|
||||||
return endptr != nullptr && *endptr == '\0';
|
return endptr != nullptr && *endptr == '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
void PrintCommandLineFlags() {
|
static void PrintCommandLineFlags() {
|
||||||
const char* kFlagNamePrefix = "FLAGS_";
|
const char* kFlagNamePrefix = "FLAGS_";
|
||||||
const int kFlagNamePrefixLen = strlen(kFlagNamePrefix);
|
const int kFlagNamePrefixLen = strlen(kFlagNamePrefix);
|
||||||
for (int i = 0; i < GlobalParams()->int_params.size(); ++i) {
|
for (int i = 0; i < GlobalParams()->int_params.size(); ++i) {
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
#include "trie.h"
|
#include "trie.h"
|
||||||
#include "unicharset.h"
|
#include "unicharset.h"
|
||||||
|
|
||||||
tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset,
|
static tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset,
|
||||||
const char *filename) {
|
const char *filename) {
|
||||||
const int kDictDebugLevel = 1;
|
const int kDictDebugLevel = 1;
|
||||||
tesseract::TFile dawg_file;
|
tesseract::TFile dawg_file;
|
||||||
@ -54,7 +54,7 @@ class WordOutputter {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// returns 0 if successful.
|
// returns 0 if successful.
|
||||||
int WriteDawgAsWordlist(const UNICHARSET &unicharset,
|
static int WriteDawgAsWordlist(const UNICHARSET &unicharset,
|
||||||
const tesseract::Dawg *dawg,
|
const tesseract::Dawg *dawg,
|
||||||
const char *outfile_name) {
|
const char *outfile_name) {
|
||||||
FILE *out = fopen(outfile_name, "wb");
|
FILE *out = fopen(outfile_name, "wb");
|
||||||
|
@ -35,7 +35,7 @@
|
|||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
bool is_hyphen_punc(const char32 ch) {
|
static bool is_hyphen_punc(const char32 ch) {
|
||||||
static const int kNumHyphenPuncUnicodes = 13;
|
static const int kNumHyphenPuncUnicodes = 13;
|
||||||
static const char32 kHyphenPuncUnicodes[kNumHyphenPuncUnicodes] = {
|
static const char32 kHyphenPuncUnicodes[kNumHyphenPuncUnicodes] = {
|
||||||
'-', 0x2010, 0x2011, 0x2012,
|
'-', 0x2010, 0x2011, 0x2012,
|
||||||
@ -53,7 +53,7 @@ bool is_hyphen_punc(const char32 ch) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_single_quote(const char32 ch) {
|
static bool is_single_quote(const char32 ch) {
|
||||||
static const int kNumSingleQuoteUnicodes = 8;
|
static const int kNumSingleQuoteUnicodes = 8;
|
||||||
static const char32 kSingleQuoteUnicodes[kNumSingleQuoteUnicodes] = {
|
static const char32 kSingleQuoteUnicodes[kNumSingleQuoteUnicodes] = {
|
||||||
'\'', '`',
|
'\'', '`',
|
||||||
@ -71,7 +71,7 @@ bool is_single_quote(const char32 ch) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_double_quote(const char32 ch) {
|
static bool is_double_quote(const char32 ch) {
|
||||||
static const int kNumDoubleQuoteUnicodes = 8;
|
static const int kNumDoubleQuoteUnicodes = 8;
|
||||||
static const char32 kDoubleQuoteUnicodes[kNumDoubleQuoteUnicodes] = {
|
static const char32 kDoubleQuoteUnicodes[kNumDoubleQuoteUnicodes] = {
|
||||||
'"',
|
'"',
|
||||||
|
@ -68,7 +68,7 @@ static bool RandBool(const double prob, TRand* rand) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
Pix* CairoARGB32ToPixFormat(cairo_surface_t *surface) {
|
static Pix* CairoARGB32ToPixFormat(cairo_surface_t *surface) {
|
||||||
if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) {
|
if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) {
|
||||||
printf("Unexpected surface format %d\n",
|
printf("Unexpected surface format %d\n",
|
||||||
cairo_image_surface_get_format(surface));
|
cairo_image_surface_get_format(surface));
|
||||||
|
@ -208,7 +208,7 @@ static std::string StringReplace(const std::string& in,
|
|||||||
// with "T", such that "AT" has spacing of -5, the entry/line for unichar "A"
|
// with "T", such that "AT" has spacing of -5, the entry/line for unichar "A"
|
||||||
// in .fontinfo file will be:
|
// in .fontinfo file will be:
|
||||||
// A 0 -1 T -5 V -7
|
// A 0 -1 T -5 V -7
|
||||||
void ExtractFontProperties(const std::string &utf8_text,
|
static void ExtractFontProperties(const std::string &utf8_text,
|
||||||
StringRenderer *render,
|
StringRenderer *render,
|
||||||
const std::string &output_base) {
|
const std::string &output_base) {
|
||||||
std::map<std::string, SpacingProperties> spacing_map;
|
std::map<std::string, SpacingProperties> spacing_map;
|
||||||
@ -308,8 +308,7 @@ void ExtractFontProperties(const std::string &utf8_text,
|
|||||||
File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
|
File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MakeIndividualGlyphs(Pix* pix,
|
static bool MakeIndividualGlyphs(Pix* pix, const std::vector<BoxChar*>& vbox,
|
||||||
const std::vector<BoxChar*>& vbox,
|
|
||||||
const int input_tiff_page) {
|
const int input_tiff_page) {
|
||||||
// If checks fail, return false without exiting text2image
|
// If checks fail, return false without exiting text2image
|
||||||
if (!pix) {
|
if (!pix) {
|
||||||
|
@ -123,7 +123,7 @@ void Validator::MoveResultsToDest(GraphemeNormMode g_mode,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CmpPairSecond(const std::pair<int, int>& p1,
|
static bool CmpPairSecond(const std::pair<int, int>& p1,
|
||||||
const std::pair<int, int>& p2) {
|
const std::pair<int, int>& p2) {
|
||||||
return p1.second < p2.second;
|
return p1.second < p2.second;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user