mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 22:43:45 +08:00
commit
e250f3422d
@ -171,14 +171,14 @@ class TESS_API ImageThresholder {
|
||||
/// The pix will always be PixDestroy()ed on destruction of the class.
|
||||
Pix* pix_;
|
||||
|
||||
int image_width_; //< Width of source pix_.
|
||||
int image_height_; //< Height of source pix_.
|
||||
int pix_channels_; //< Number of 8-bit channels in pix_.
|
||||
int pix_wpl_; //< Words per line of pix_.
|
||||
int image_width_; ///< Width of source pix_.
|
||||
int image_height_; ///< Height of source pix_.
|
||||
int pix_channels_; ///< Number of 8-bit channels in pix_.
|
||||
int pix_wpl_; ///< Words per line of pix_.
|
||||
// Limits of image rectangle to be processed.
|
||||
int scale_; //< Scale factor from original image.
|
||||
int yres_; //< y pixels/inch in source image.
|
||||
int estimated_res_; //< Resolution estimate from text size.
|
||||
int scale_; ///< Scale factor from original image.
|
||||
int yres_; ///< y pixels/inch in source image.
|
||||
int estimated_res_; ///< Resolution estimate from text size.
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
|
@ -31,12 +31,12 @@ ELISTIZE (BLOCK)
|
||||
*
|
||||
* Constructor for a simple rectangular block.
|
||||
*/
|
||||
BLOCK::BLOCK(const char *name, //< filename
|
||||
BOOL8 prop, //< proportional
|
||||
int16_t kern, //< kerning
|
||||
int16_t space, //< spacing
|
||||
int16_t xmin, //< bottom left
|
||||
int16_t ymin, int16_t xmax, //< top right
|
||||
BLOCK::BLOCK(const char *name, ///< filename
|
||||
BOOL8 prop, ///< proportional
|
||||
int16_t kern, ///< kerning
|
||||
int16_t space, ///< spacing
|
||||
int16_t xmin, ///< bottom left
|
||||
int16_t ymin, int16_t xmax, ///< top right
|
||||
int16_t ymax)
|
||||
: pdblk(xmin, ymin, xmax, ymax),
|
||||
filename(name),
|
||||
@ -192,8 +192,8 @@ void BLOCK::compress( // squash it up
|
||||
*/
|
||||
|
||||
void BLOCK::print( //print list of sides
|
||||
FILE*, //< file to print on
|
||||
bool dump //< print full detail
|
||||
FILE*, ///< file to print on
|
||||
bool dump ///< print full detail
|
||||
) {
|
||||
ICOORDELT_IT it = &pdblk.leftside; //iterator
|
||||
|
||||
|
@ -39,13 +39,13 @@ class BLOCK:public ELIST_LINK
|
||||
right_to_left_ = false;
|
||||
pdblk.hand_poly = nullptr;
|
||||
}
|
||||
BLOCK(const char *name, //< filename
|
||||
BOOL8 prop, //< proportional
|
||||
int16_t kern, //< kerning
|
||||
int16_t space, //< spacing
|
||||
int16_t xmin, //< bottom left
|
||||
BLOCK(const char *name, ///< filename
|
||||
BOOL8 prop, ///< proportional
|
||||
int16_t kern, ///< kerning
|
||||
int16_t space, ///< spacing
|
||||
int16_t xmin, ///< bottom left
|
||||
int16_t ymin,
|
||||
int16_t xmax, //< top right
|
||||
int16_t xmax, ///< top right
|
||||
int16_t ymax);
|
||||
|
||||
~BLOCK () = default;
|
||||
@ -189,26 +189,26 @@ class BLOCK:public ELIST_LINK
|
||||
void print(FILE* fp, bool dump);
|
||||
|
||||
BLOCK& operator=(const BLOCK & source);
|
||||
PDBLK pdblk; //< Page Description Block
|
||||
PDBLK pdblk; ///< Page Description Block
|
||||
|
||||
private:
|
||||
BOOL8 proportional; //< proportional
|
||||
bool right_to_left_; //< major script is right to left.
|
||||
int8_t kerning; //< inter blob gap
|
||||
int16_t spacing; //< inter word gap
|
||||
int16_t pitch; //< pitch of non-props
|
||||
int16_t font_class; //< correct font class
|
||||
int32_t xheight; //< height of chars
|
||||
float cell_over_xheight_; //< Ratio of cell height to xheight.
|
||||
STRING filename; //< name of block
|
||||
ROW_LIST rows; //< rows in block
|
||||
PARA_LIST paras_; //< paragraphs of block
|
||||
C_BLOB_LIST c_blobs; //< before textord
|
||||
C_BLOB_LIST rej_blobs; //< duff stuff
|
||||
FCOORD re_rotation_; //< How to transform coords back to image.
|
||||
FCOORD classify_rotation_; //< Apply this before classifying.
|
||||
FCOORD skew_; //< Direction of true horizontal.
|
||||
ICOORD median_size_; //< Median size of blobs.
|
||||
BOOL8 proportional; ///< proportional
|
||||
bool right_to_left_; ///< major script is right to left.
|
||||
int8_t kerning; ///< inter blob gap
|
||||
int16_t spacing; ///< inter word gap
|
||||
int16_t pitch; ///< pitch of non-props
|
||||
int16_t font_class; ///< correct font class
|
||||
int32_t xheight; ///< height of chars
|
||||
float cell_over_xheight_; ///< Ratio of cell height to xheight.
|
||||
STRING filename; ///< name of block
|
||||
ROW_LIST rows; ///< rows in block
|
||||
PARA_LIST paras_; ///< paragraphs of block
|
||||
C_BLOB_LIST c_blobs; ///< before textord
|
||||
C_BLOB_LIST rej_blobs; ///< duff stuff
|
||||
FCOORD re_rotation_; ///< How to transform coords back to image.
|
||||
FCOORD classify_rotation_; ///< Apply this before classifying.
|
||||
FCOORD skew_; ///< Direction of true horizontal.
|
||||
ICOORD median_size_; ///< Median size of blobs.
|
||||
};
|
||||
|
||||
// A function to print segmentation stats for the given block list.
|
||||
|
@ -30,8 +30,8 @@ struct Pix;
|
||||
CLISTIZEH (PDBLK)
|
||||
///page block
|
||||
class PDBLK {
|
||||
friend class BLOCK_RECT_IT; //< block iterator
|
||||
friend class BLOCK; //< Page Block
|
||||
friend class BLOCK_RECT_IT; ///< block iterator
|
||||
friend class BLOCK; ///< Page Block
|
||||
|
||||
public:
|
||||
/// empty constructor
|
||||
@ -40,9 +40,9 @@ class PDBLK {
|
||||
index_ = 0;
|
||||
}
|
||||
/// simple constructor
|
||||
PDBLK(int16_t xmin, //< bottom left
|
||||
PDBLK(int16_t xmin, ///< bottom left
|
||||
int16_t ymin,
|
||||
int16_t xmax, //< top right
|
||||
int16_t xmax, ///< top right
|
||||
int16_t ymax);
|
||||
|
||||
/// set vertex lists
|
||||
@ -93,11 +93,11 @@ class PDBLK {
|
||||
PDBLK &operator=(const PDBLK &source);
|
||||
|
||||
protected:
|
||||
POLY_BLOCK *hand_poly; //< weird as well
|
||||
ICOORDELT_LIST leftside; //< left side vertices
|
||||
ICOORDELT_LIST rightside; //< right side vertices
|
||||
TBOX box; //< bounding box
|
||||
int index_; //< Serial number of this block.
|
||||
POLY_BLOCK *hand_poly; ///< weird as well
|
||||
ICOORDELT_LIST leftside; ///< left side vertices
|
||||
ICOORDELT_LIST rightside; ///< right side vertices
|
||||
TBOX box; ///< bounding box
|
||||
int index_; ///< Serial number of this block.
|
||||
};
|
||||
|
||||
class DLLSYM BLOCK_RECT_IT //rectangle iterator
|
||||
@ -134,10 +134,10 @@ class DLLSYM BLOCK_RECT_IT //rectangle iterator
|
||||
}
|
||||
|
||||
private:
|
||||
int16_t ymin; //< bottom of rectangle
|
||||
int16_t ymax; //< top of rectangle
|
||||
PDBLK *block; //< block to iterate
|
||||
ICOORDELT_IT left_it; //< boundary iterators
|
||||
int16_t ymin; ///< bottom of rectangle
|
||||
int16_t ymax; ///< top of rectangle
|
||||
PDBLK *block; ///< block to iterate
|
||||
ICOORDELT_IT left_it; ///< boundary iterators
|
||||
ICOORDELT_IT right_it;
|
||||
};
|
||||
|
||||
@ -167,8 +167,8 @@ class DLLSYM BLOCK_LINE_IT
|
||||
int16_t &xext);
|
||||
|
||||
private:
|
||||
PDBLK * block; //< block to iterate
|
||||
BLOCK_RECT_IT rect_it; //< rectangle iterator
|
||||
PDBLK * block; ///< block to iterate
|
||||
BLOCK_RECT_IT rect_it; ///< rectangle iterator
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -155,8 +155,8 @@ class ICOORD
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
|
||||
protected:
|
||||
int16_t xcoord; //< x value
|
||||
int16_t ycoord; //< y value
|
||||
int16_t xcoord; ///< x value
|
||||
int16_t ycoord; ///< y value
|
||||
};
|
||||
|
||||
class DLLSYM ICOORDELT:public ELIST_LINK, public ICOORD
|
||||
|
@ -26,9 +26,9 @@
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#define FIRST_COLOUR ScrollView::RED //< first rainbow colour
|
||||
#define LAST_COLOUR ScrollView::AQUAMARINE //< last rainbow colour
|
||||
#define CHILD_COLOUR ScrollView::BROWN //< colour of children
|
||||
#define FIRST_COLOUR ScrollView::RED ///< first rainbow colour
|
||||
#define LAST_COLOUR ScrollView::AQUAMARINE ///< last rainbow colour
|
||||
#define CHILD_COLOUR ScrollView::BROWN ///< colour of children
|
||||
|
||||
const ERRCODE CANT_SCALE_EDGESTEPS =
|
||||
"Attempted to scale an edgestep format word";
|
||||
@ -115,8 +115,8 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text)
|
||||
* The C_BLOBs are not copied so the source list is emptied.
|
||||
*/
|
||||
|
||||
WERD::WERD(C_BLOB_LIST * blob_list, //< In word order
|
||||
WERD * clone) //< Source of flags
|
||||
WERD::WERD(C_BLOB_LIST * blob_list, ///< In word order
|
||||
WERD * clone) ///< Source of flags
|
||||
: flags(clone->flags),
|
||||
script_id_(clone->script_id_),
|
||||
correct(clone->correct) {
|
||||
|
@ -28,30 +28,30 @@
|
||||
|
||||
enum WERD_FLAGS
|
||||
{
|
||||
W_SEGMENTED, //< correctly segmented
|
||||
W_ITALIC, //< italic text
|
||||
W_BOLD, //< bold text
|
||||
W_BOL, //< start of line
|
||||
W_EOL, //< end of line
|
||||
W_NORMALIZED, //< flags
|
||||
W_SCRIPT_HAS_XHEIGHT, //< x-height concept makes sense.
|
||||
W_SCRIPT_IS_LATIN, //< Special case latin for y. splitting.
|
||||
W_DONT_CHOP, //< fixed pitch chopped
|
||||
W_REP_CHAR, //< repeated character
|
||||
W_FUZZY_SP, //< fuzzy space
|
||||
W_FUZZY_NON, //< fuzzy nonspace
|
||||
W_INVERSE //< white on black
|
||||
W_SEGMENTED, ///< correctly segmented
|
||||
W_ITALIC, ///< italic text
|
||||
W_BOLD, ///< bold text
|
||||
W_BOL, ///< start of line
|
||||
W_EOL, ///< end of line
|
||||
W_NORMALIZED, ///< flags
|
||||
W_SCRIPT_HAS_XHEIGHT, ///< x-height concept makes sense.
|
||||
W_SCRIPT_IS_LATIN, ///< Special case latin for y. splitting.
|
||||
W_DONT_CHOP, ///< fixed pitch chopped
|
||||
W_REP_CHAR, ///< repeated character
|
||||
W_FUZZY_SP, ///< fuzzy space
|
||||
W_FUZZY_NON, ///< fuzzy nonspace
|
||||
W_INVERSE ///< white on black
|
||||
};
|
||||
|
||||
enum DISPLAY_FLAGS
|
||||
{
|
||||
/* Display flags bit number allocations */
|
||||
DF_BOX, //< Bounding box
|
||||
DF_TEXT, //< Correct ascii
|
||||
DF_POLYGONAL, //< Polyg approx
|
||||
DF_EDGE_STEP, //< Edge steps
|
||||
DF_BN_POLYGONAL, //< BL normalisd polyapx
|
||||
DF_BLAMER //< Blamer information
|
||||
DF_BOX, ///< Bounding box
|
||||
DF_TEXT, ///< Correct ascii
|
||||
DF_POLYGONAL, ///< Polyg approx
|
||||
DF_EDGE_STEP, ///< Edge steps
|
||||
DF_BN_POLYGONAL, ///< BL normalisd polyapx
|
||||
DF_BLAMER ///< Blamer information
|
||||
};
|
||||
|
||||
class ROW; //forward decl
|
||||
|
@ -24,10 +24,10 @@ namespace tesseract {
|
||||
|
||||
extern const char *kUTF8LineSeparator;
|
||||
extern const char *kUTF8ParagraphSeparator;
|
||||
extern const char *kLRM; //< Left-to-Right Mark
|
||||
extern const char *kRLM; //< Right-to-Left Mark
|
||||
extern const char *kRLE; //< Right-to-Left Embedding
|
||||
extern const char *kPDF; //< Pop Directional Formatting
|
||||
extern const char *kLRM; ///< Left-to-Right Mark
|
||||
extern const char *kRLM; ///< Right-to-Left Mark
|
||||
extern const char *kRLE; ///< Right-to-Left Embedding
|
||||
extern const char *kPDF; ///< Pop Directional Formatting
|
||||
|
||||
/// The following are confusable internal word punctuation symbols
|
||||
/// which we normalize to the first variant when matching in dawgs.
|
||||
|
@ -24,11 +24,9 @@
|
||||
using tesseract::TFile;
|
||||
|
||||
//---------------Global Data Definitions and Declarations--------------------
|
||||
#define TOKENSIZE 80 //< max size of tokens read from an input file
|
||||
#define TOKENSIZE 80 ///< max size of tokens read from an input file
|
||||
#define QUOTED_TOKENSIZE "79"
|
||||
#define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space
|
||||
//#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block
|
||||
// size)
|
||||
#define MAXSAMPLESIZE 65535 ///< max num of dimensions in feature space
|
||||
|
||||
/**
|
||||
* This routine reads a single integer from the specified
|
||||
|
@ -69,11 +69,11 @@ class MinK {
|
||||
const Element* elements() { return elements_; }
|
||||
|
||||
private:
|
||||
const Key max_key_; //< the maximum possible Key
|
||||
Element *elements_; //< unsorted array of elements
|
||||
int elements_count_; //< the number of results collected so far
|
||||
int k_; //< the number of results we want from the search
|
||||
int max_index_; //< the index of the result with the largest key
|
||||
const Key max_key_; ///< the maximum possible Key
|
||||
Element *elements_; ///< unsorted array of elements
|
||||
int elements_count_; ///< the number of results collected so far
|
||||
int k_; ///< the number of results we want from the search
|
||||
int max_index_; ///< the index of the result with the largest key
|
||||
};
|
||||
|
||||
template<typename Key, typename Value>
|
||||
@ -132,8 +132,8 @@ class KDTreeSearch {
|
||||
|
||||
KDTREE *tree_;
|
||||
float *query_point_;
|
||||
float *sb_min_; //< search box minimum
|
||||
float *sb_max_; //< search box maximum
|
||||
float *sb_min_; ///< search box minimum
|
||||
float *sb_max_; ///< search box maximum
|
||||
MinK<float, void *> results_;
|
||||
};
|
||||
|
||||
|
@ -1,13 +1,8 @@
|
||||
/*****************************************************************************
|
||||
*
|
||||
* File: blkocc.cpp (Formerly blockocc.c)
|
||||
* File: blkocc.cpp (Formerly blockocc.c)
|
||||
* Description: Block Occupancy routines
|
||||
* Author: Chris Newton
|
||||
* Created: Fri Nov 8
|
||||
* Modified:
|
||||
* Language: C++
|
||||
* Package: N/A
|
||||
* Status: Experimental (Do Not Distribute)
|
||||
*
|
||||
* (c) Copyright 1991, Hewlett-Packard Company.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -51,13 +46,13 @@ static void horizontal_coutline_projection(C_OUTLINE *outline,
|
||||
*/
|
||||
|
||||
bool test_underline( //look for underlines
|
||||
bool testing_on, //< drawing blob
|
||||
C_BLOB* blob, //< blob to test
|
||||
int16_t baseline, //< coords of baseline
|
||||
int16_t xheight //< height of line
|
||||
bool testing_on, ///< drawing blob
|
||||
C_BLOB* blob, ///< blob to test
|
||||
int16_t baseline, ///< coords of baseline
|
||||
int16_t xheight ///< height of line
|
||||
) {
|
||||
int16_t occ;
|
||||
int16_t blob_width; //width of blob
|
||||
int16_t blob_width; //width of blob
|
||||
TBOX blob_box; //bounding box
|
||||
int32_t desc_occ;
|
||||
int32_t x_occ;
|
||||
@ -121,8 +116,8 @@ bool test_underline( //look for underlines
|
||||
*/
|
||||
|
||||
static void horizontal_cblob_projection( //project outlines
|
||||
C_BLOB *blob, //< blob to project
|
||||
STATS *stats //< output
|
||||
C_BLOB *blob, ///< blob to project
|
||||
STATS *stats ///< output
|
||||
) {
|
||||
//outlines of blob
|
||||
C_OUTLINE_IT out_it = blob->out_list ();
|
||||
@ -141,13 +136,13 @@ static void horizontal_cblob_projection( //project outlines
|
||||
*/
|
||||
|
||||
static void horizontal_coutline_projection( //project outlines
|
||||
C_OUTLINE *outline, //< outline to project
|
||||
STATS *stats //< output
|
||||
C_OUTLINE *outline, ///< outline to project
|
||||
STATS *stats ///< output
|
||||
) {
|
||||
ICOORD pos; //current point
|
||||
ICOORD step; //edge step
|
||||
int32_t length; //of outline
|
||||
int16_t stepindex; //current step
|
||||
int32_t length; //of outline
|
||||
int16_t stepindex; //current step
|
||||
C_OUTLINE_IT out_it = outline->child ();
|
||||
|
||||
pos = outline->start_pos ();
|
||||
|
@ -364,15 +364,15 @@ class TabFind : public AlignedBlob {
|
||||
void ApplyTabConstraints();
|
||||
|
||||
protected:
|
||||
ICOORD vertical_skew_; //< Estimate of true vertical in this image.
|
||||
int resolution_; //< Of source image in pixels per inch.
|
||||
ICOORD vertical_skew_; ///< Estimate of true vertical in this image.
|
||||
int resolution_; ///< Of source image in pixels per inch.
|
||||
private:
|
||||
ICOORD image_origin_; //< Top-left of image in deskewed coords
|
||||
TabVector_LIST vectors_; //< List of rule line and tabstops.
|
||||
TabVector_IT v_it_; //< Iterator for searching vectors_.
|
||||
TabVector_LIST dead_vectors_; //< Separators and unpartnered tab vectors.
|
||||
ICOORD image_origin_; ///< Top-left of image in deskewed coords
|
||||
TabVector_LIST vectors_; ///< List of rule line and tabstops.
|
||||
TabVector_IT v_it_; ///< Iterator for searching vectors_.
|
||||
TabVector_LIST dead_vectors_; ///< Separators and unpartnered tab vectors.
|
||||
// List of commonly occurring width ranges with x=min and y=max.
|
||||
ICOORDELT_LIST column_widths_; //< List of commonly occurring width ranges.
|
||||
ICOORDELT_LIST column_widths_; ///< List of commonly occurring width ranges.
|
||||
/** Callback to test an int for being a common width. */
|
||||
WidthCallback* width_cb_;
|
||||
// Sets of bounding boxes that are candidate tab stops.
|
||||
|
@ -73,7 +73,7 @@ struct LanguageModelNgramInfo {
|
||||
LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
|
||||
: context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc),
|
||||
ngram_and_classifier_cost(ncc) {}
|
||||
STRING context; //< context string
|
||||
STRING context; ///< context string
|
||||
/// Length of the context measured by advancing using UNICHAR::utf8_step()
|
||||
/// (should be at most the order of the character ngram model used).
|
||||
int context_unichar_step_len;
|
||||
@ -163,13 +163,13 @@ struct ViterbiStateEntry : public ELIST_LINK {
|
||||
|
||||
/// Various information about the characters on the path represented
|
||||
/// by this ViterbiStateEntry.
|
||||
float ratings_sum; //< sum of ratings of character on the path
|
||||
float min_certainty; //< minimum certainty on the path
|
||||
int adapted; //< number of BLOB_CHOICES from adapted templates
|
||||
int length; //< number of characters on the path
|
||||
float outline_length; //< length of the outline so far
|
||||
LMConsistencyInfo consistency_info; //< path consistency info
|
||||
AssociateStats associate_stats; //< character widths/gaps/seams
|
||||
float ratings_sum; ///< sum of ratings of character on the path
|
||||
float min_certainty; ///< minimum certainty on the path
|
||||
int adapted; ///< number of BLOB_CHOICES from adapted templates
|
||||
int length; ///< number of characters on the path
|
||||
float outline_length; ///< length of the outline so far
|
||||
LMConsistencyInfo consistency_info; ///< path consistency info
|
||||
AssociateStats associate_stats; ///< character widths/gaps/seams
|
||||
|
||||
/// Flags for marking the entry as a top choice path with
|
||||
/// the smallest rating or lower/upper case letters).
|
||||
@ -183,7 +183,7 @@ struct ViterbiStateEntry : public ELIST_LINK {
|
||||
/// (owned by ViterbiStateEntry).
|
||||
LanguageModelNgramInfo *ngram_info;
|
||||
|
||||
bool updated; //< set to true if the entry has just been created/updated
|
||||
bool updated; ///< set to true if the entry has just been created/updated
|
||||
/// UTF8 string representing the path corresponding to this vse.
|
||||
/// Populated only in when language_model_debug_level > 0.
|
||||
STRING *debug_str;
|
||||
|
Loading…
Reference in New Issue
Block a user