mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-13 06:08:52 +08:00
Merge pull request #133 from stweil/master
Fix some typos found by codespell
This commit is contained in:
commit
eb34cb1c94
@ -29,7 +29,7 @@
|
|||||||
*
|
*
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
TBOX::TBOX( //construtor
|
TBOX::TBOX( //constructor
|
||||||
const ICOORD pt1, //one corner
|
const ICOORD pt1, //one corner
|
||||||
const ICOORD pt2 //the other corner
|
const ICOORD pt2 //the other corner
|
||||||
) {
|
) {
|
||||||
|
@ -307,7 +307,7 @@ class DLLSYM TBOX { // bounding box
|
|||||||
*
|
*
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
inline TBOX::TBOX( // construtor
|
inline TBOX::TBOX( // constructor
|
||||||
const FCOORD pt // floating centre
|
const FCOORD pt // floating centre
|
||||||
) {
|
) {
|
||||||
bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ()));
|
bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ()));
|
||||||
|
@ -331,7 +331,7 @@ class GenericVector {
|
|||||||
void init(int size);
|
void init(int size);
|
||||||
|
|
||||||
// We are assuming that the object generally placed in thie
|
// We are assuming that the object generally placed in thie
|
||||||
// vector are small enough that for efficiency it makes sence
|
// vector are small enough that for efficiency it makes sense
|
||||||
// to start with a larger initial size.
|
// to start with a larger initial size.
|
||||||
static const int kDefaultVectorSize = 4;
|
static const int kDefaultVectorSize = 4;
|
||||||
inT32 size_used_;
|
inT32 size_used_;
|
||||||
|
@ -219,7 +219,7 @@ class UNICHARSET {
|
|||||||
|
|
||||||
// Return the UTF8 representation corresponding to the given UNICHAR_ID after
|
// Return the UTF8 representation corresponding to the given UNICHAR_ID after
|
||||||
// resolving any private encodings internal to Tesseract. This method is
|
// resolving any private encodings internal to Tesseract. This method is
|
||||||
// preferrable to id_to_unichar for outputting text that will be visible to
|
// preferable to id_to_unichar for outputting text that will be visible to
|
||||||
// external applications.
|
// external applications.
|
||||||
const char* const id_to_unichar_ext(UNICHAR_ID id) const;
|
const char* const id_to_unichar_ext(UNICHAR_ID id) const;
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ class CharSamp : public Bmp8 {
|
|||||||
label32_ = NULL;
|
label32_ = NULL;
|
||||||
}
|
}
|
||||||
if (label32 != NULL) {
|
if (label32 != NULL) {
|
||||||
// remove any byte order markes if any
|
// remove any byte order marks if any
|
||||||
if (label32[0] == 0xfeff) {
|
if (label32[0] == 0xfeff) {
|
||||||
label32++;
|
label32++;
|
||||||
}
|
}
|
||||||
|
@ -807,7 +807,7 @@ KERNEL(
|
|||||||
// HistogramRect Kernel: Accumulate
|
// HistogramRect Kernel: Accumulate
|
||||||
// assumes 4 channels, i.e., bytes_per_pixel = 4
|
// assumes 4 channels, i.e., bytes_per_pixel = 4
|
||||||
// assumes number of pixels is multiple of 8
|
// assumes number of pixels is multiple of 8
|
||||||
// data is layed out as
|
// data is laid out as
|
||||||
// ch0 ch1 ...
|
// ch0 ch1 ...
|
||||||
// bin0 bin1 bin2... bin0...
|
// bin0 bin1 bin2... bin0...
|
||||||
// rpt0,1,2...256 rpt0,1,2...
|
// rpt0,1,2...256 rpt0,1,2...
|
||||||
|
@ -495,7 +495,7 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c
|
|||||||
printf("[OD] write binary[%s] failed\n", fileName);
|
printf("[OD] write binary[%s] failed\n", fileName);
|
||||||
return 0;
|
return 0;
|
||||||
} //else
|
} //else
|
||||||
printf("[OD] write binary[%s] succesfully\n", fileName);
|
printf("[OD] write binary[%s] successfully\n", fileName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1630,7 +1630,7 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
|
|||||||
}
|
}
|
||||||
else if (xp > 0 || xn > 0 )
|
else if (xp > 0 || xn > 0 )
|
||||||
{
|
{
|
||||||
//Specfic Horizontal pass kernel for half width < 32
|
//Specific Horizontal pass kernel for half width < 32
|
||||||
rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor_32word", &status );
|
rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor_32word", &status );
|
||||||
isEven = (xp != xn);
|
isEven = (xp != xn);
|
||||||
|
|
||||||
@ -2371,7 +2371,7 @@ OpenclDevice::pixGetLinesCL(PIX *pixd,
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* HistogramRect
|
* HistogramRect
|
||||||
* Otsu Thresholding Operations
|
* Otsu Thresholding Operations
|
||||||
* histogramAllChannels is layed out as all channel 0, then all channel 1...
|
* histogramAllChannels is laid out as all channel 0, then all channel 1...
|
||||||
* only supports 1 or 4 channels (bytes_per_pixel)
|
* only supports 1 or 4 channels (bytes_per_pixel)
|
||||||
************************************************************************/
|
************************************************************************/
|
||||||
int OpenclDevice::HistogramRectOCL(
|
int OpenclDevice::HistogramRectOCL(
|
||||||
|
@ -56,7 +56,7 @@ rm -f testing/reports/$setname.times
|
|||||||
while read page dir
|
while read page dir
|
||||||
do
|
do
|
||||||
# A pages file may be a list of files with subdirs or maybe just
|
# A pages file may be a list of files with subdirs or maybe just
|
||||||
# a plain list of files so accomodate both.
|
# a plain list of files so accommodate both.
|
||||||
if [ "$dir" ]
|
if [ "$dir" ]
|
||||||
then
|
then
|
||||||
srcdir="$imdir/$dir"
|
srcdir="$imdir/$dir"
|
||||||
|
@ -402,7 +402,7 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
|
|||||||
// Compute skew tolerance.
|
// Compute skew tolerance.
|
||||||
int skew_tolerance = p.max_v_gap / kMaxSkewFactor;
|
int skew_tolerance = p.max_v_gap / kMaxSkewFactor;
|
||||||
// Calculate xmin and xmax of the search box so that it contains
|
// Calculate xmin and xmax of the search box so that it contains
|
||||||
// all possibly relevant boxes upto p.max_v_gap above or below accoording
|
// all possibly relevant boxes up to p.max_v_gap above or below accoording
|
||||||
// to top_to_bottom.
|
// to top_to_bottom.
|
||||||
// Start with a notion of vertical with the current estimate.
|
// Start with a notion of vertical with the current estimate.
|
||||||
int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y();
|
int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y();
|
||||||
@ -502,7 +502,7 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
|
|||||||
}
|
}
|
||||||
if ((p.right_tab && neighbour->leader_on_right()) ||
|
if ((p.right_tab && neighbour->leader_on_right()) ||
|
||||||
(!p.right_tab && neighbour->leader_on_left()))
|
(!p.right_tab && neighbour->leader_on_left()))
|
||||||
continue; // Neigbours of leaders are not allowed to be used.
|
continue; // Neighbours of leaders are not allowed to be used.
|
||||||
if (n_x <= x_at_n_y + p.r_align_tolerance &&
|
if (n_x <= x_at_n_y + p.r_align_tolerance &&
|
||||||
n_x >= x_at_n_y - p.l_align_tolerance) {
|
n_x >= x_at_n_y - p.l_align_tolerance) {
|
||||||
// Aligned so keep it. If it is a marked tab save it as result,
|
// Aligned so keep it. If it is a marked tab save it as result,
|
||||||
|
@ -154,7 +154,7 @@ IntGrid* IntGrid::NeighbourhoodSum() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if more than half the area of the rect is covered by grid
|
// Returns true if more than half the area of the rect is covered by grid
|
||||||
// cells that are over the theshold.
|
// cells that are over the threshold.
|
||||||
bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const {
|
bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const {
|
||||||
int min_x, min_y, max_x, max_y;
|
int min_x, min_y, max_x, max_y;
|
||||||
GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
|
GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
|
||||||
|
@ -127,7 +127,7 @@ class IntGrid : public GridBase {
|
|||||||
grid_[grid_y * gridwidth_ + grid_x] = value;
|
grid_[grid_y * gridwidth_ + grid_x] = value;
|
||||||
}
|
}
|
||||||
// Returns true if more than half the area of the rect is covered by grid
|
// Returns true if more than half the area of the rect is covered by grid
|
||||||
// cells that are over the theshold.
|
// cells that are over the threshold.
|
||||||
bool RectMostlyOverThreshold(const TBOX& rect, int threshold) const;
|
bool RectMostlyOverThreshold(const TBOX& rect, int threshold) const;
|
||||||
|
|
||||||
// Returns true if any cell value in the given rectangle is zero.
|
// Returns true if any cell value in the given rectangle is zero.
|
||||||
@ -292,7 +292,7 @@ template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {
|
|||||||
// Return the next bbox in the search or NULL if done.
|
// Return the next bbox in the search or NULL if done.
|
||||||
BBC* NextFullSearch();
|
BBC* NextFullSearch();
|
||||||
|
|
||||||
// Start a new radius search. Will search in a spiral upto a
|
// Start a new radius search. Will search in a spiral up to a
|
||||||
// given maximum radius in grid cells from the given center in pixels.
|
// given maximum radius in grid cells from the given center in pixels.
|
||||||
void StartRadSearch(int x, int y, int max_radius);
|
void StartRadSearch(int x, int y, int max_radius);
|
||||||
// Return the next bbox in the radius search or NULL if the
|
// Return the next bbox in the radius search or NULL if the
|
||||||
@ -750,7 +750,7 @@ void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartSideSearch(int x,
|
|||||||
int ymin, int ymax) {
|
int ymin, int ymax) {
|
||||||
// Right search records the x in x_origin_, the ymax in y_origin_
|
// Right search records the x in x_origin_, the ymax in y_origin_
|
||||||
// and the size of the vertical strip to search in radius_.
|
// and the size of the vertical strip to search in radius_.
|
||||||
// To guarantee finding overlapping objects of upto twice the
|
// To guarantee finding overlapping objects of up to twice the
|
||||||
// given size, double the height.
|
// given size, double the height.
|
||||||
radius_ = ((ymax - ymin) * 2 + grid_->gridsize_ - 1) / grid_->gridsize_;
|
radius_ = ((ymax - ymin) * 2 + grid_->gridsize_ - 1) / grid_->gridsize_;
|
||||||
rad_index_ = 0;
|
rad_index_ = 0;
|
||||||
|
@ -39,7 +39,7 @@ CLASS REGION_OCC
|
|||||||
built in sorted order of min x. Overlapping REGION_OCCs are not permitted on
|
built in sorted order of min x. Overlapping REGION_OCCs are not permitted on
|
||||||
a single list. An overlapping region to be added causes the existing region
|
a single list. An overlapping region to be added causes the existing region
|
||||||
to be extended. This extension may result in the following REGION_OCC on the
|
to be extended. This extension may result in the following REGION_OCC on the
|
||||||
list overlapping the ammended one. In this case the ammended REGION_OCC is
|
list overlapping the amended one. In this case the amended REGION_OCC is
|
||||||
further extended to include the range of the following one, so that the
|
further extended to include the range of the following one, so that the
|
||||||
following one can be deleted.
|
following one can be deleted.
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ CCNonTextDetect::CCNonTextDetect(int gridsize,
|
|||||||
noise_density_(NULL) {
|
noise_density_(NULL) {
|
||||||
// TODO(rays) break max_noise_count_ out into an area-proportional
|
// TODO(rays) break max_noise_count_ out into an area-proportional
|
||||||
// value, as now plus an additive constant for the number of text blobs
|
// value, as now plus an additive constant for the number of text blobs
|
||||||
// in the 3x3 neigbourhood - maybe 9.
|
// in the 3x3 neighbourhood - maybe 9.
|
||||||
}
|
}
|
||||||
|
|
||||||
CCNonTextDetect::~CCNonTextDetect() {
|
CCNonTextDetect::~CCNonTextDetect() {
|
||||||
|
@ -340,7 +340,7 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color,
|
|||||||
// into thinking they are dealing with left-to-right text.
|
// into thinking they are dealing with left-to-right text.
|
||||||
// To do this, we reflect the needed data in the y-axis and then reflect
|
// To do this, we reflect the needed data in the y-axis and then reflect
|
||||||
// the blocks back after they have been created. This is a temporary
|
// the blocks back after they have been created. This is a temporary
|
||||||
// arrangment that is confined to this function only, so the reflection
|
// arrangement that is confined to this function only, so the reflection
|
||||||
// is completely invisible in the output blocks.
|
// is completely invisible in the output blocks.
|
||||||
// The only objects reflected are:
|
// The only objects reflected are:
|
||||||
// The vertical separator lines that have already been found;
|
// The vertical separator lines that have already been found;
|
||||||
@ -869,7 +869,7 @@ void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Moves start in the direction of step, upto, but not including end while
|
// Moves start in the direction of step, up to, but not including end while
|
||||||
// the only incompatible regions are no more than kMaxIncompatibleColumnCount
|
// the only incompatible regions are no more than kMaxIncompatibleColumnCount
|
||||||
// in size, and the compatible regions beyond are bigger.
|
// in size, and the compatible regions beyond are bigger.
|
||||||
void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs,
|
void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs,
|
||||||
|
@ -212,7 +212,7 @@ class ColumnFinder : public TabFind {
|
|||||||
const bool* any_columns_possible,
|
const bool* any_columns_possible,
|
||||||
int column_set_id,
|
int column_set_id,
|
||||||
int* best_start, int* best_end);
|
int* best_start, int* best_end);
|
||||||
// Moves start in the direction of step, upto, but not including end while
|
// Moves start in the direction of step, up to, but not including end while
|
||||||
// the only incompatible regions are no more than kMaxIncompatibleColumnCount
|
// the only incompatible regions are no more than kMaxIncompatibleColumnCount
|
||||||
// in size, and the compatible regions beyond are bigger.
|
// in size, and the compatible regions beyond are bigger.
|
||||||
void ExtendRangePastSmallGaps(int** column_set_costs,
|
void ExtendRangePastSmallGaps(int** column_set_costs,
|
||||||
|
@ -981,7 +981,7 @@ int ColPartition::CountOverlappingBoxes(const TBOX& box) {
|
|||||||
return overlap_count;
|
return overlap_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Computes and sets the type_ and first_colum_, last_column_ and column_set_.
|
// Computes and sets the type_ and first_column_, last_column_ and column_set_.
|
||||||
// resolution refers to the ppi resolution of the image.
|
// resolution refers to the ppi resolution of the image.
|
||||||
void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) {
|
void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) {
|
||||||
int first_spanned_col = -1;
|
int first_spanned_col = -1;
|
||||||
@ -2194,7 +2194,7 @@ bool ColPartition::IsInSameColumnAs(const ColPartition& part) const {
|
|||||||
void ColPartition::SmoothSpacings(int resolution, int page_height,
|
void ColPartition::SmoothSpacings(int resolution, int page_height,
|
||||||
ColPartition_LIST* parts) {
|
ColPartition_LIST* parts) {
|
||||||
// The task would be trivial if we didn't have to allow for blips -
|
// The task would be trivial if we didn't have to allow for blips -
|
||||||
// occasional offsets in spacing caused by anomolous text, such as all
|
// occasional offsets in spacing caused by anomalous text, such as all
|
||||||
// caps, groups of descenders, joined words, Arabic etc.
|
// caps, groups of descenders, joined words, Arabic etc.
|
||||||
// The neighbourhood stores a consecutive group of partitions so that
|
// The neighbourhood stores a consecutive group of partitions so that
|
||||||
// blips can be detected correctly, yet conservatively enough to not
|
// blips can be detected correctly, yet conservatively enough to not
|
||||||
|
@ -695,7 +695,7 @@ class ColPartition : public ELIST2_LINK {
|
|||||||
// one partner. This makes block creation simpler.
|
// one partner. This makes block creation simpler.
|
||||||
// If get_desperate is true, goes to more desperate merge methods
|
// If get_desperate is true, goes to more desperate merge methods
|
||||||
// to merge flowing text before breaking partnerships.
|
// to merge flowing text before breaking partnerships.
|
||||||
void RefinePartners(PolyBlockType type, bool get_desparate,
|
void RefinePartners(PolyBlockType type, bool get_desperate,
|
||||||
ColPartitionGrid* grid);
|
ColPartitionGrid* grid);
|
||||||
|
|
||||||
// Returns true if this column partition is in the same column as
|
// Returns true if this column partition is in the same column as
|
||||||
@ -713,7 +713,7 @@ class ColPartition : public ELIST2_LINK {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// enum to refer to the entries in a neigbourhood of lines.
|
// enum to refer to the entries in a neighbourhood of lines.
|
||||||
// Used by SmoothSpacings to test for blips with OKSpacingBlip.
|
// Used by SmoothSpacings to test for blips with OKSpacingBlip.
|
||||||
enum SpacingNeighbourhood {
|
enum SpacingNeighbourhood {
|
||||||
PN_ABOVE2,
|
PN_ABOVE2,
|
||||||
|
@ -106,7 +106,7 @@ void ColPartitionGrid::HandleClick(int x, int y) {
|
|||||||
// Merges ColPartitions in the grid that look like they belong in the same
|
// Merges ColPartitions in the grid that look like they belong in the same
|
||||||
// textline.
|
// textline.
|
||||||
// For all partitions in the grid, calls the box_cb permanent callback
|
// For all partitions in the grid, calls the box_cb permanent callback
|
||||||
// to compute the search box, seaches the box, and if a candidate is found,
|
// to compute the search box, searches the box, and if a candidate is found,
|
||||||
// calls the confirm_cb to check any more rules. If the confirm_cb returns
|
// calls the confirm_cb to check any more rules. If the confirm_cb returns
|
||||||
// true, then the partitions are merged.
|
// true, then the partitions are merged.
|
||||||
// Both callbacks are deleted before returning.
|
// Both callbacks are deleted before returning.
|
||||||
@ -1438,7 +1438,7 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition* part,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Smoothes the region type/flow type of the given part by looking at local
|
// Smoothes the region type/flow type of the given part by looking at local
|
||||||
// neigbours and the given image mask. Searches a padded rectangle with the
|
// neighbours and the given image mask. Searches a padded rectangle with the
|
||||||
// padding truncated on one size of the part's box in turn for each side,
|
// padding truncated on one size of the part's box in turn for each side,
|
||||||
// using the result (if any) that has the least distance to all neighbours
|
// using the result (if any) that has the least distance to all neighbours
|
||||||
// that contribute to the decision. This biases in favor of rectangular
|
// that contribute to the decision. This biases in favor of rectangular
|
||||||
@ -1759,7 +1759,7 @@ void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns,
|
|||||||
part->set_right_margin(right_margin);
|
part->set_right_margin(right_margin);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Starting at x, and going in the specified direction, upto x_limit, finds
|
// Starting at x, and going in the specified direction, up to x_limit, finds
|
||||||
// the margin for the given y range by searching sideways,
|
// the margin for the given y range by searching sideways,
|
||||||
// and ignoring not_this.
|
// and ignoring not_this.
|
||||||
int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
|
int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
|
||||||
|
@ -45,7 +45,7 @@ class ColPartitionGrid : public BBGrid<ColPartition,
|
|||||||
// Merges ColPartitions in the grid that look like they belong in the same
|
// Merges ColPartitions in the grid that look like they belong in the same
|
||||||
// textline.
|
// textline.
|
||||||
// For all partitions in the grid, calls the box_cb permanent callback
|
// For all partitions in the grid, calls the box_cb permanent callback
|
||||||
// to compute the search box, seaches the box, and if a candidate is found,
|
// to compute the search box, searches the box, and if a candidate is found,
|
||||||
// calls the confirm_cb to check any more rules. If the confirm_cb returns
|
// calls the confirm_cb to check any more rules. If the confirm_cb returns
|
||||||
// true, then the partitions are merged.
|
// true, then the partitions are merged.
|
||||||
// Both callbacks are deleted before returning.
|
// Both callbacks are deleted before returning.
|
||||||
@ -200,7 +200,7 @@ class ColPartitionGrid : public BBGrid<ColPartition,
|
|||||||
bool debug, ColPartition_CLIST* candidates);
|
bool debug, ColPartition_CLIST* candidates);
|
||||||
|
|
||||||
// Smoothes the region type/flow type of the given part by looking at local
|
// Smoothes the region type/flow type of the given part by looking at local
|
||||||
// neigbours and the given image mask. Searches a padded rectangle with the
|
// neighbours and the given image mask. Searches a padded rectangle with the
|
||||||
// padding truncated on one size of the part's box in turn for each side,
|
// padding truncated on one size of the part's box in turn for each side,
|
||||||
// using the result (if any) that has the least distance to all neighbours
|
// using the result (if any) that has the least distance to all neighbours
|
||||||
// that contribute to the decision. This biases in favor of rectangular
|
// that contribute to the decision. This biases in favor of rectangular
|
||||||
@ -246,7 +246,7 @@ class ColPartitionGrid : public BBGrid<ColPartition,
|
|||||||
// neighbours that vertically overlap significantly.
|
// neighbours that vertically overlap significantly.
|
||||||
void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);
|
void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);
|
||||||
|
|
||||||
// Starting at x, and going in the specified direction, upto x_limit, finds
|
// Starting at x, and going in the specified direction, up to x_limit, finds
|
||||||
// the margin for the given y range by searching sideways,
|
// the margin for the given y range by searching sideways,
|
||||||
// and ignoring not_this.
|
// and ignoring not_this.
|
||||||
int FindMargin(int x, bool right_to_left, int x_limit,
|
int FindMargin(int x, bool right_to_left, int x_limit,
|
||||||
|
@ -735,7 +735,7 @@ C_OUTLINE *join_chopped_fragments( //join pieces
|
|||||||
* join_segments
|
* join_segments
|
||||||
*
|
*
|
||||||
* Join the two edgestep fragments such that the second comes after
|
* Join the two edgestep fragments such that the second comes after
|
||||||
* the first and the gap beween them is closed.
|
* the first and the gap between them is closed.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
void join_segments( //join pieces
|
void join_segments( //join pieces
|
||||||
|
@ -648,7 +648,7 @@ static void CutChunkFromParts(const TBOX& box, const TBOX& im_box,
|
|||||||
TBOX part_box = part->bounding_box();
|
TBOX part_box = part->bounding_box();
|
||||||
if (part_box.overlap(box)) {
|
if (part_box.overlap(box)) {
|
||||||
// This part must be cut and replaced with the remains. There are
|
// This part must be cut and replaced with the remains. There are
|
||||||
// upto 4 pieces to be made. Start with the first one and use
|
// up to 4 pieces to be made. Start with the first one and use
|
||||||
// add_before_stay_put. For each piece if it has no black pixels
|
// add_before_stay_put. For each piece if it has no black pixels
|
||||||
// left, just don't make the box.
|
// left, just don't make the box.
|
||||||
// Above box.
|
// Above box.
|
||||||
|
@ -671,7 +671,7 @@ BOOL8 find_best_dropout_row( //find neighbours
|
|||||||
TO_ROW_IT *row_it, //current position
|
TO_ROW_IT *row_it, //current position
|
||||||
BOOL8 testing_on //correct orientation
|
BOOL8 testing_on //correct orientation
|
||||||
) {
|
) {
|
||||||
inT32 next_index; //of neigbouring row
|
inT32 next_index; //of neighbouring row
|
||||||
inT32 row_offset; //from current row
|
inT32 row_offset; //from current row
|
||||||
inT32 abs_dist; //absolute distance
|
inT32 abs_dist; //absolute distance
|
||||||
inT8 row_inc; //increment to row_index
|
inT8 row_inc; //increment to row_index
|
||||||
|
@ -66,7 +66,7 @@ class FPCUTPT
|
|||||||
inT16 pitch, //proposed pitch
|
inT16 pitch, //proposed pitch
|
||||||
inT16 pitch_error); //allowed tolerance
|
inT16 pitch_error); //allowed tolerance
|
||||||
|
|
||||||
inT32 position() { //acces func
|
inT32 position() { //access func
|
||||||
return xpos;
|
return xpos;
|
||||||
}
|
}
|
||||||
double cost_function() {
|
double cost_function() {
|
||||||
|
@ -46,7 +46,7 @@ class FPSEGPT:public ELIST_LINK
|
|||||||
FPSEGPT_LIST *prev_list); //previous segment
|
FPSEGPT_LIST *prev_list); //previous segment
|
||||||
FPSEGPT(FPCUTPT *cutpt); //build from new type
|
FPSEGPT(FPCUTPT *cutpt); //build from new type
|
||||||
|
|
||||||
inT32 position() { //acces func
|
inT32 position() { //access func
|
||||||
return xpos;
|
return xpos;
|
||||||
}
|
}
|
||||||
double cost_function() {
|
double cost_function() {
|
||||||
|
@ -75,7 +75,7 @@ const int kMaxTextLineBlobRatio = 5;
|
|||||||
const int kMinTextLineBlobRatio = 3;
|
const int kMinTextLineBlobRatio = 3;
|
||||||
// Fraction of box area covered by image to make a blob image.
|
// Fraction of box area covered by image to make a blob image.
|
||||||
const double kMinImageArea = 0.5;
|
const double kMinImageArea = 0.5;
|
||||||
// Upto 30 degrees is allowed for rotations of diacritic blobs.
|
// Up to 30 degrees is allowed for rotations of diacritic blobs.
|
||||||
// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
|
// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
|
||||||
// so that the assert there never fails.
|
// so that the assert there never fails.
|
||||||
const double kCosMaxSkewAngle = 0.866025;
|
const double kCosMaxSkewAngle = 0.866025;
|
||||||
|
@ -371,7 +371,7 @@ class TabFind : public AlignedBlob {
|
|||||||
TabVector_LIST vectors_; //< List of rule line and tabstops.
|
TabVector_LIST vectors_; //< List of rule line and tabstops.
|
||||||
TabVector_IT v_it_; //< Iterator for searching vectors_.
|
TabVector_IT v_it_; //< Iterator for searching vectors_.
|
||||||
TabVector_LIST dead_vectors_; //< Separators and unpartnered tab vectors.
|
TabVector_LIST dead_vectors_; //< Separators and unpartnered tab vectors.
|
||||||
// List of commonly occuring width ranges with x=min and y=max.
|
// List of commonly occurring width ranges with x=min and y=max.
|
||||||
ICOORDELT_LIST column_widths_; //< List of commonly occurring width ranges.
|
ICOORDELT_LIST column_widths_; //< List of commonly occurring width ranges.
|
||||||
/** Callback to test an int for being a common width. */
|
/** Callback to test an int for being a common width. */
|
||||||
WidthCallback* width_cb_;
|
WidthCallback* width_cb_;
|
||||||
|
@ -943,7 +943,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
|||||||
return true;
|
return true;
|
||||||
|
|
||||||
// return true if the maximum gap found is smaller than the minimum allowed
|
// return true if the maximum gap found is smaller than the minimum allowed
|
||||||
// max_gap in a text partition. This indicates that there is no signficant
|
// max_gap in a text partition. This indicates that there is no significant
|
||||||
// space in the partition, hence it is likely a single word.
|
// space in the partition, hence it is likely a single word.
|
||||||
return largest_partition_gap_found < min_gap;
|
return largest_partition_gap_found < min_gap;
|
||||||
}
|
}
|
||||||
@ -954,7 +954,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
|||||||
// Note that this includes overlapping leaders. However, it does not
|
// Note that this includes overlapping leaders. However, it does not
|
||||||
// include leaders in different columns on the page.
|
// include leaders in different columns on the page.
|
||||||
// Possible false-positive will include lists, such as a table of contents.
|
// Possible false-positive will include lists, such as a table of contents.
|
||||||
// As these arise, the agressive nature of this search may need to be
|
// As these arise, the aggressive nature of this search may need to be
|
||||||
// trimmed down.
|
// trimmed down.
|
||||||
bool TableFinder::HasLeaderAdjacent(const ColPartition& part) {
|
bool TableFinder::HasLeaderAdjacent(const ColPartition& part) {
|
||||||
if (part.flow() == BTFT_LEADER)
|
if (part.flow() == BTFT_LEADER)
|
||||||
|
@ -685,7 +685,7 @@ int StructuredTable::CountHorizontalIntersections(int y) {
|
|||||||
|
|
||||||
// Counts how many text partitions are in this box.
|
// Counts how many text partitions are in this box.
|
||||||
// This is used to count partitons in cells, as that can indicate
|
// This is used to count partitons in cells, as that can indicate
|
||||||
// how "strong" a potential table row/colum (or even full table) actually is.
|
// how "strong" a potential table row/column (or even full table) actually is.
|
||||||
int StructuredTable::CountPartitions(const TBOX& box) {
|
int StructuredTable::CountPartitions(const TBOX& box) {
|
||||||
ColPartitionGridSearch gsearch(text_grid_);
|
ColPartitionGridSearch gsearch(text_grid_);
|
||||||
gsearch.SetUniqueMode(true);
|
gsearch.SetUniqueMode(true);
|
||||||
@ -740,7 +740,7 @@ StructuredTable* TableRecognizer::RecognizeTable(const TBOX& guess) {
|
|||||||
table->set_line_grid(line_grid_);
|
table->set_line_grid(line_grid_);
|
||||||
table->set_max_text_height(max_text_height_);
|
table->set_max_text_height(max_text_height_);
|
||||||
|
|
||||||
// Try to solve ths simple case, a table with *both*
|
// Try to solve this simple case, a table with *both*
|
||||||
// vertical and horizontal lines.
|
// vertical and horizontal lines.
|
||||||
if (RecognizeLinedTable(guess, table))
|
if (RecognizeLinedTable(guess, table))
|
||||||
return table;
|
return table;
|
||||||
|
@ -75,7 +75,7 @@ Textord::Textord(CCStruct* ccstruct)
|
|||||||
BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
|
BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
BOOL_MEMBER(tosp_fuzzy_limit_all, true,
|
BOOL_MEMBER(tosp_fuzzy_limit_all, true,
|
||||||
"Dont restrict kn->sp fuzzy limit to tables",
|
"Don't restrict kn->sp fuzzy limit to tables",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
|
BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
|
||||||
"Use within xht gap for wd breaks",
|
"Use within xht gap for wd breaks",
|
||||||
@ -86,7 +86,7 @@ Textord::Textord(CCStruct* ccstruct)
|
|||||||
"Only use within xht gap for wd breaks",
|
"Only use within xht gap for wd breaks",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
BOOL_MEMBER(tosp_rule_9_test_punct, false,
|
BOOL_MEMBER(tosp_rule_9_test_punct, false,
|
||||||
"Dont chng kn to space next to punct",
|
"Don't chng kn to space next to punct",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
|
BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
@ -169,7 +169,7 @@ Textord::Textord(CCStruct* ccstruct)
|
|||||||
double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
|
double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
double_MEMBER(tosp_min_sane_kn_sp, 1.5,
|
double_MEMBER(tosp_min_sane_kn_sp, 1.5,
|
||||||
"Dont trust spaces less than this time kn",
|
"Don't trust spaces less than this time kn",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
double_MEMBER(tosp_init_guess_kn_mult, 2.2,
|
double_MEMBER(tosp_init_guess_kn_mult, 2.2,
|
||||||
"Thresh guess - mult kn by this",
|
"Thresh guess - mult kn by this",
|
||||||
@ -181,7 +181,7 @@ Textord::Textord(CCStruct* ccstruct)
|
|||||||
"Multiplier on kn to limit thresh",
|
"Multiplier on kn to limit thresh",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
double_MEMBER(tosp_flip_caution, 0.0,
|
double_MEMBER(tosp_flip_caution, 0.0,
|
||||||
"Dont autoflip kn to sp when large separation",
|
"Don't autoflip kn to sp when large separation",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
double_MEMBER(tosp_large_kerning, 0.19,
|
double_MEMBER(tosp_large_kerning, 0.19,
|
||||||
"Limit use of xht gap with large kns",
|
"Limit use of xht gap with large kns",
|
||||||
@ -190,10 +190,10 @@ Textord::Textord(CCStruct* ccstruct)
|
|||||||
"Limit use of xht gap with odd small kns",
|
"Limit use of xht gap with odd small kns",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
double_MEMBER(tosp_near_lh_edge, 0,
|
double_MEMBER(tosp_near_lh_edge, 0,
|
||||||
"Dont reduce box if the top left is non blank",
|
"Don't reduce box if the top left is non blank",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
|
double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
|
||||||
"Dont let sp minus kn get too small",
|
"Don't let sp minus kn get too small",
|
||||||
ccstruct_->params()),
|
ccstruct_->params()),
|
||||||
double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
|
double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
|
||||||
"How wide fuzzies need context",
|
"How wide fuzzies need context",
|
||||||
|
@ -286,7 +286,7 @@ class Textord {
|
|||||||
BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess");
|
BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess");
|
||||||
BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?");
|
BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?");
|
||||||
BOOL_VAR_H(tosp_fuzzy_limit_all, true,
|
BOOL_VAR_H(tosp_fuzzy_limit_all, true,
|
||||||
"Dont restrict kn->sp fuzzy limit to tables");
|
"Don't restrict kn->sp fuzzy limit to tables");
|
||||||
BOOL_VAR_H(tosp_stats_use_xht_gaps, true,
|
BOOL_VAR_H(tosp_stats_use_xht_gaps, true,
|
||||||
"Use within xht gap for wd breaks");
|
"Use within xht gap for wd breaks");
|
||||||
BOOL_VAR_H(tosp_use_xht_gaps, true,
|
BOOL_VAR_H(tosp_use_xht_gaps, true,
|
||||||
@ -294,7 +294,7 @@ class Textord {
|
|||||||
BOOL_VAR_H(tosp_only_use_xht_gaps, false,
|
BOOL_VAR_H(tosp_only_use_xht_gaps, false,
|
||||||
"Only use within xht gap for wd breaks");
|
"Only use within xht gap for wd breaks");
|
||||||
BOOL_VAR_H(tosp_rule_9_test_punct, false,
|
BOOL_VAR_H(tosp_rule_9_test_punct, false,
|
||||||
"Dont chng kn to space next to punct");
|
"Don't chng kn to space next to punct");
|
||||||
BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip");
|
BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip");
|
||||||
BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip");
|
BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip");
|
||||||
BOOL_VAR_H(tosp_improve_thresh, false,
|
BOOL_VAR_H(tosp_improve_thresh, false,
|
||||||
@ -350,7 +350,7 @@ class Textord {
|
|||||||
double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
|
double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
|
||||||
double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
|
double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
|
||||||
double_VAR_H(tosp_min_sane_kn_sp, 1.5,
|
double_VAR_H(tosp_min_sane_kn_sp, 1.5,
|
||||||
"Dont trust spaces less than this time kn");
|
"Don't trust spaces less than this time kn");
|
||||||
double_VAR_H(tosp_init_guess_kn_mult, 2.2,
|
double_VAR_H(tosp_init_guess_kn_mult, 2.2,
|
||||||
"Thresh guess - mult kn by this");
|
"Thresh guess - mult kn by this");
|
||||||
double_VAR_H(tosp_init_guess_xht_mult, 0.28,
|
double_VAR_H(tosp_init_guess_xht_mult, 0.28,
|
||||||
@ -358,15 +358,15 @@ class Textord {
|
|||||||
double_VAR_H(tosp_max_sane_kn_thresh, 5.0,
|
double_VAR_H(tosp_max_sane_kn_thresh, 5.0,
|
||||||
"Multiplier on kn to limit thresh");
|
"Multiplier on kn to limit thresh");
|
||||||
double_VAR_H(tosp_flip_caution, 0.0,
|
double_VAR_H(tosp_flip_caution, 0.0,
|
||||||
"Dont autoflip kn to sp when large separation");
|
"Don't autoflip kn to sp when large separation");
|
||||||
double_VAR_H(tosp_large_kerning, 0.19,
|
double_VAR_H(tosp_large_kerning, 0.19,
|
||||||
"Limit use of xht gap with large kns");
|
"Limit use of xht gap with large kns");
|
||||||
double_VAR_H(tosp_dont_fool_with_small_kerns, -1,
|
double_VAR_H(tosp_dont_fool_with_small_kerns, -1,
|
||||||
"Limit use of xht gap with odd small kns");
|
"Limit use of xht gap with odd small kns");
|
||||||
double_VAR_H(tosp_near_lh_edge, 0,
|
double_VAR_H(tosp_near_lh_edge, 0,
|
||||||
"Dont reduce box if the top left is non blank");
|
"Don't reduce box if the top left is non blank");
|
||||||
double_VAR_H(tosp_silly_kn_sp_gap, 0.2,
|
double_VAR_H(tosp_silly_kn_sp_gap, 0.2,
|
||||||
"Dont let sp minus kn get too small");
|
"Don't let sp minus kn get too small");
|
||||||
double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75,
|
double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75,
|
||||||
"How wide fuzzies need context");
|
"How wide fuzzies need context");
|
||||||
// tordmain.cpp ///////////////////////////////////////////
|
// tordmain.cpp ///////////////////////////////////////////
|
||||||
|
@ -1084,7 +1084,7 @@ BOOL8 count_pitch_stats( //find lines
|
|||||||
return FALSE;
|
return FALSE;
|
||||||
prev_valid = FALSE;
|
prev_valid = FALSE;
|
||||||
prev_centre = 0;
|
prev_centre = 0;
|
||||||
prev_right = 0; //stop complier warning
|
prev_right = 0; //stop compiler warning
|
||||||
joined_box = blob_it.data ()->bounding_box ();
|
joined_box = blob_it.data ()->bounding_box ();
|
||||||
do {
|
do {
|
||||||
blob_it.forward ();
|
blob_it.forward ();
|
||||||
|
@ -419,7 +419,7 @@ void Textord::row_spacing_stats(
|
|||||||
if (suspected_table &&
|
if (suspected_table &&
|
||||||
(row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {
|
(row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {
|
||||||
if (tosp_debug_level > 5)
|
if (tosp_debug_level > 5)
|
||||||
tprintf ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f.\n",
|
tprintf ("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n",
|
||||||
block_idx, row_idx,
|
block_idx, row_idx,
|
||||||
row->kern_size, row->space_threshold, row->space_size);
|
row->kern_size, row->space_threshold, row->space_size);
|
||||||
row->space_threshold =
|
row->space_threshold =
|
||||||
@ -442,7 +442,7 @@ void Textord::row_spacing_stats(
|
|||||||
row->xheight / 2);
|
row->xheight / 2);
|
||||||
if (tosp_debug_level > 5)
|
if (tosp_debug_level > 5)
|
||||||
tprintf
|
tprintf
|
||||||
("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
|
("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
|
||||||
block_idx, row_idx, row->kern_size, row->space_threshold,
|
block_idx, row_idx, row->kern_size, row->space_threshold,
|
||||||
row->space_size, sane_space);
|
row->space_size, sane_space);
|
||||||
row->space_size = sane_space;
|
row->space_size = sane_space;
|
||||||
@ -455,7 +455,7 @@ void Textord::row_spacing_stats(
|
|||||||
MAX (row->kern_size, 2.5)));
|
MAX (row->kern_size, 2.5)));
|
||||||
if (row->space_threshold > sane_threshold) {
|
if (row->space_threshold > sane_threshold) {
|
||||||
if (tosp_debug_level > 5)
|
if (tosp_debug_level > 5)
|
||||||
tprintf ("B:%d R:%d -- DONT BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
|
tprintf ("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
|
||||||
block_idx, row_idx,
|
block_idx, row_idx,
|
||||||
row->kern_size,
|
row->kern_size,
|
||||||
row->space_threshold, row->space_size, sane_threshold);
|
row->space_threshold, row->space_size, sane_threshold);
|
||||||
@ -498,7 +498,7 @@ void Textord::row_spacing_stats(
|
|||||||
MIN (inT32 (ceil (tosp_fuzzy_space_factor * row->xheight)),
|
MIN (inT32 (ceil (tosp_fuzzy_space_factor * row->xheight)),
|
||||||
inT32 (row->space_size));
|
inT32 (row->space_size));
|
||||||
if (row->min_space <= row->space_threshold)
|
if (row->min_space <= row->space_threshold)
|
||||||
//Dont be silly
|
//Don't be silly
|
||||||
row->min_space = row->space_threshold + 1;
|
row->min_space = row->space_threshold + 1;
|
||||||
/*
|
/*
|
||||||
Lets try to guess the max certain kern gap by looking at the cluster of
|
Lets try to guess the max certain kern gap by looking at the cluster of
|
||||||
@ -542,7 +542,7 @@ void Textord::row_spacing_stats(
|
|||||||
/* Ensure that ANY space less than some multiplier times the kern size is
|
/* Ensure that ANY space less than some multiplier times the kern size is
|
||||||
fuzzy. In tables there is a risk of erroneously setting a small space size
|
fuzzy. In tables there is a risk of erroneously setting a small space size
|
||||||
when there are no real spaces. Sometimes tables have text squashed into
|
when there are no real spaces. Sometimes tables have text squashed into
|
||||||
columns so that the kn->sp ratio is small anyway - this means that we cant
|
columns so that the kn->sp ratio is small anyway - this means that we can't
|
||||||
use this to force a wider separation - hence we rely on context to join any
|
use this to force a wider separation - hence we rely on context to join any
|
||||||
dubious breaks. */
|
dubious breaks. */
|
||||||
|
|
||||||
@ -559,7 +559,7 @@ void Textord::row_spacing_stats(
|
|||||||
row->kern_size));
|
row->kern_size));
|
||||||
}
|
}
|
||||||
if (row->max_nonspace > row->space_threshold) {
|
if (row->max_nonspace > row->space_threshold) {
|
||||||
//Dont be silly
|
//Don't be silly
|
||||||
row->max_nonspace = row->space_threshold;
|
row->max_nonspace = row->space_threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -700,7 +700,7 @@ BOOL8 Textord::isolated_row_stats(TO_ROW *row,
|
|||||||
((small_gaps_count / (float) total) < tosp_enough_small_gaps) ||
|
((small_gaps_count / (float) total) < tosp_enough_small_gaps) ||
|
||||||
(total - small_gaps_count < 1)) {
|
(total - small_gaps_count < 1)) {
|
||||||
if (tosp_debug_level > 5)
|
if (tosp_debug_level > 5)
|
||||||
tprintf ("B:%d R:%d -- Cant do isolated row stats.\n",
|
tprintf ("B:%d R:%d -- Can't do isolated row stats.\n",
|
||||||
block_idx, row_idx);
|
block_idx, row_idx);
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
@ -1728,7 +1728,7 @@ BOOL8 Textord::ignore_big_gap(TO_ROW *row,
|
|||||||
inT16 gap = right - left + 1;
|
inT16 gap = right - left + 1;
|
||||||
|
|
||||||
if (tosp_ignore_big_gaps > 999)
|
if (tosp_ignore_big_gaps > 999)
|
||||||
return FALSE; //Dont ignore
|
return FALSE; //Don't ignore
|
||||||
if (tosp_ignore_big_gaps > 0)
|
if (tosp_ignore_big_gaps > 0)
|
||||||
return (gap > tosp_ignore_big_gaps * row->xheight);
|
return (gap > tosp_ignore_big_gaps * row->xheight);
|
||||||
if (gap > tosp_ignore_very_big_gaps * row->xheight)
|
if (gap > tosp_ignore_very_big_gaps * row->xheight)
|
||||||
@ -1757,7 +1757,7 @@ BOOL8 Textord::ignore_big_gap(TO_ROW *row,
|
|||||||
* Compute the bounding box of this blob with merging of x overlaps
|
* Compute the bounding box of this blob with merging of x overlaps
|
||||||
* but no pre-chopping.
|
* but no pre-chopping.
|
||||||
* Then move the iterator on to the start of the next blob.
|
* Then move the iterator on to the start of the next blob.
|
||||||
* DONT reduce the box for small things - eg punctuation.
|
* DON'T reduce the box for small things - eg punctuation.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
TBOX Textord::reduced_box_next(
|
TBOX Textord::reduced_box_next(
|
||||||
TO_ROW *row, // current row
|
TO_ROW *row, // current row
|
||||||
|
@ -50,7 +50,7 @@ EXTERN double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight");
|
|||||||
EXTERN double_VAR (textord_words_default_nonspace, 0.2,
|
EXTERN double_VAR (textord_words_default_nonspace, 0.2,
|
||||||
"Fraction of xheight");
|
"Fraction of xheight");
|
||||||
EXTERN double_VAR (textord_words_initial_lower, 0.25,
|
EXTERN double_VAR (textord_words_initial_lower, 0.25,
|
||||||
"Max inital cluster size");
|
"Max initial cluster size");
|
||||||
EXTERN double_VAR (textord_words_initial_upper, 0.15,
|
EXTERN double_VAR (textord_words_initial_upper, 0.15,
|
||||||
"Min initial cluster spacing");
|
"Min initial cluster spacing");
|
||||||
EXTERN double_VAR (textord_words_minlarge, 0.75,
|
EXTERN double_VAR (textord_words_minlarge, 0.75,
|
||||||
@ -67,7 +67,7 @@ EXTERN double_VAR (textord_pitch_rowsimilarity, 0.08,
|
|||||||
"Fraction of xheight for sameness");
|
"Fraction of xheight for sameness");
|
||||||
EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE,
|
EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE,
|
||||||
"Scale scores on big words");
|
"Scale scores on big words");
|
||||||
EXTERN double_VAR (words_initial_lower, 0.5, "Max inital cluster size");
|
EXTERN double_VAR (words_initial_lower, 0.5, "Max initial cluster size");
|
||||||
EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing");
|
EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing");
|
||||||
EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight");
|
EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight");
|
||||||
EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight");
|
EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight");
|
||||||
|
@ -52,7 +52,7 @@ extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight");
|
|||||||
extern double_VAR_H (textord_words_default_nonspace, 0.2,
|
extern double_VAR_H (textord_words_default_nonspace, 0.2,
|
||||||
"Fraction of xheight");
|
"Fraction of xheight");
|
||||||
extern double_VAR_H (textord_words_initial_lower, 0.25,
|
extern double_VAR_H (textord_words_initial_lower, 0.25,
|
||||||
"Max inital cluster size");
|
"Max initial cluster size");
|
||||||
extern double_VAR_H (textord_words_initial_upper, 0.15,
|
extern double_VAR_H (textord_words_initial_upper, 0.15,
|
||||||
"Min initial cluster spacing");
|
"Min initial cluster spacing");
|
||||||
extern double_VAR_H (textord_words_minlarge, 0.75,
|
extern double_VAR_H (textord_words_minlarge, 0.75,
|
||||||
@ -69,7 +69,7 @@ extern double_VAR_H (textord_pitch_rowsimilarity, 0.08,
|
|||||||
"Fraction of xheight for sameness");
|
"Fraction of xheight for sameness");
|
||||||
extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE,
|
extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE,
|
||||||
"Scale scores on big words");
|
"Scale scores on big words");
|
||||||
extern double_VAR_H (words_initial_lower, 0.5, "Max inital cluster size");
|
extern double_VAR_H (words_initial_lower, 0.5, "Max initial cluster size");
|
||||||
extern double_VAR_H (words_initial_upper, 0.15,
|
extern double_VAR_H (words_initial_upper, 0.15,
|
||||||
"Min initial cluster spacing");
|
"Min initial cluster spacing");
|
||||||
extern double_VAR_H (words_default_prop_nonspace, 0.25,
|
extern double_VAR_H (words_default_prop_nonspace, 0.25,
|
||||||
|
@ -263,14 +263,14 @@ cmap_table cmap = {
|
|||||||
0x0000, /* encodingID = 0 */
|
0x0000, /* encodingID = 0 */
|
||||||
0x20000000, /* Offset of data */
|
0x20000000, /* Offset of data */
|
||||||
0x0600, /* STart of Apple table (format 6) */
|
0x0600, /* STart of Apple table (format 6) */
|
||||||
0x0C00, /* lenght of table (12) */
|
0x0C00, /* length of table (12) */
|
||||||
0x0000, /* Language must be 0 for non-Apple or
|
0x0000, /* Language must be 0 for non-Apple or
|
||||||
non-specific language */
|
non-specific language */
|
||||||
0x0000, /* firstCode = 0 */
|
0x0000, /* firstCode = 0 */
|
||||||
0x0100, /* number of codes is 1 */
|
0x0100, /* number of codes is 1 */
|
||||||
0x0000, /* GID is 0 */
|
0x0000, /* GID is 0 */
|
||||||
0x0600, /* Start of MS Table (format 4) */
|
0x0600, /* Start of MS Table (format 4) */
|
||||||
0x0C00, /* lenght of table (12) */
|
0x0C00, /* length of table (12) */
|
||||||
0x0000, /* Language must be 0 for non-Apple or
|
0x0000, /* Language must be 0 for non-Apple or
|
||||||
non-specific language */
|
non-specific language */
|
||||||
0x0000, /* firstCode = 0 */
|
0x0000, /* firstCode = 0 */
|
||||||
|
@ -175,7 +175,7 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
|
|||||||
right = prev->x;
|
right = prev->x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Left becomes the max right of all next boxes foward to the first
|
// Left becomes the max right of all next boxes forward to the first
|
||||||
// space or newline.
|
// space or newline.
|
||||||
for (int j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != NULL &&
|
for (int j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != NULL &&
|
||||||
(*boxes)[j]->ch_ != "\t";
|
(*boxes)[j]->ch_ != "\t";
|
||||||
|
@ -78,7 +78,7 @@ int main(int argc, char **argv) {
|
|||||||
printf("Error combining tessdata files into %s\n",
|
printf("Error combining tessdata files into %s\n",
|
||||||
output_file.string());
|
output_file.string());
|
||||||
} else {
|
} else {
|
||||||
printf("Output %s created sucessfully.\n", output_file.string());
|
printf("Output %s created successfully.\n", output_file.string());
|
||||||
}
|
}
|
||||||
} else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 ||
|
} else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 ||
|
||||||
strcmp(argv[1], "-u") == 0)) {
|
strcmp(argv[1], "-u") == 0)) {
|
||||||
|
@ -318,7 +318,7 @@ const char *GetNextFilename(int argc, const char* const * argv) {
|
|||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
/**
|
/**
|
||||||
* This routine searches thru a list of labeled lists to find
|
* This routine searches through a list of labeled lists to find
|
||||||
* a list with the specified label. If a matching labeled list
|
* a list with the specified label. If a matching labeled list
|
||||||
* cannot be found, NULL is returned.
|
* cannot be found, NULL is returned.
|
||||||
* @param List list to search
|
* @param List list to search
|
||||||
@ -461,10 +461,10 @@ void FreeTrainingSamples(LIST CharList) {
|
|||||||
LIST FeatureList;
|
LIST FeatureList;
|
||||||
|
|
||||||
|
|
||||||
iterate(CharList) { /* iterate thru all of the fonts */
|
iterate(CharList) { /* iterate through all of the fonts */
|
||||||
char_sample = (LABELEDLIST) first_node(CharList);
|
char_sample = (LABELEDLIST) first_node(CharList);
|
||||||
FeatureList = char_sample->List;
|
FeatureList = char_sample->List;
|
||||||
iterate(FeatureList) { /* iterate thru all of the classes */
|
iterate(FeatureList) { /* iterate through all of the classes */
|
||||||
FeatureSet = (FEATURE_SET) first_node(FeatureList);
|
FeatureSet = (FEATURE_SET) first_node(FeatureList);
|
||||||
FreeFeatureSet(FeatureSet);
|
FreeFeatureSet(FeatureSet);
|
||||||
}
|
}
|
||||||
@ -743,7 +743,7 @@ void FreeLabeledClassList (
|
|||||||
{
|
{
|
||||||
MERGE_CLASS MergeClass;
|
MERGE_CLASS MergeClass;
|
||||||
|
|
||||||
iterate (ClassList) /* iterate thru all of the fonts */
|
iterate (ClassList) /* iterate through all of the fonts */
|
||||||
{
|
{
|
||||||
MergeClass = (MERGE_CLASS) first_node (ClassList);
|
MergeClass = (MERGE_CLASS) first_node (ClassList);
|
||||||
free (MergeClass->Label);
|
free (MergeClass->Label);
|
||||||
@ -841,7 +841,7 @@ void FreeNormProtoList (
|
|||||||
{
|
{
|
||||||
LABELEDLIST char_sample;
|
LABELEDLIST char_sample;
|
||||||
|
|
||||||
iterate (CharList) /* iterate thru all of the fonts */
|
iterate (CharList) /* iterate through all of the fonts */
|
||||||
{
|
{
|
||||||
char_sample = (LABELEDLIST) first_node (CharList);
|
char_sample = (LABELEDLIST) first_node (CharList);
|
||||||
FreeLabeledList (char_sample);
|
FreeLabeledList (char_sample);
|
||||||
|
@ -149,7 +149,7 @@ void ComputeMergedProto (PROTO p1,
|
|||||||
} /* ComputeMergedProto */
|
} /* ComputeMergedProto */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This routine searches thru all of the prototypes in
|
* This routine searches through all of the prototypes in
|
||||||
* Class and returns the id of the proto which would provide
|
* Class and returns the id of the proto which would provide
|
||||||
* the best approximation of Prototype. If no close
|
* the best approximation of Prototype. If no close
|
||||||
* approximation can be found, NO_PROTO is returned.
|
* approximation can be found, NO_PROTO is returned.
|
||||||
|
@ -217,7 +217,7 @@ bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) {
|
|||||||
== PANGO_VARIANT_SMALL_CAPS);
|
== PANGO_VARIANT_SMALL_CAPS);
|
||||||
|
|
||||||
is_bold_ = (pango_font_description_get_weight(desc) >= PANGO_WEIGHT_BOLD);
|
is_bold_ = (pango_font_description_get_weight(desc) >= PANGO_WEIGHT_BOLD);
|
||||||
// We dont have a way to detect whether a font is of type Fraktur. The fonts
|
// We don't have a way to detect whether a font is of type Fraktur. The fonts
|
||||||
// we currently use all have "Fraktur" in their family name, so we do a
|
// we currently use all have "Fraktur" in their family name, so we do a
|
||||||
// fragile but functional check for that here.
|
// fragile but functional check for that here.
|
||||||
is_fraktur_ = (strcasestr(family, "Fraktur") != NULL);
|
is_fraktur_ = (strcasestr(family, "Fraktur") != NULL);
|
||||||
|
@ -33,7 +33,7 @@ typedef signed int char32;
|
|||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
// Data holder class for a font, intented to avoid having to work with Pango or
|
// Data holder class for a font, intended to avoid having to work with Pango or
|
||||||
// FontConfig-specific objects directly.
|
// FontConfig-specific objects directly.
|
||||||
class PangoFontInfo {
|
class PangoFontInfo {
|
||||||
public:
|
public:
|
||||||
|
@ -50,7 +50,7 @@ class StringRenderer {
|
|||||||
StringRenderer(const string& font_desc, int page_width, int page_height);
|
StringRenderer(const string& font_desc, int page_width, int page_height);
|
||||||
~StringRenderer();
|
~StringRenderer();
|
||||||
|
|
||||||
// Renders the text with the chosen font and returns the byte offset upto
|
// Renders the text with the chosen font and returns the byte offset up to
|
||||||
// which the text could be rendered so as to fit the specified page
|
// which the text could be rendered so as to fit the specified page
|
||||||
// dimensions.
|
// dimensions.
|
||||||
int RenderToImage(const char* text, int text_length, Pix** pix);
|
int RenderToImage(const char* text, int text_length, Pix** pix);
|
||||||
|
@ -44,7 +44,7 @@
|
|||||||
|
|
||||||
const int kSvPort = 8461;
|
const int kSvPort = 8461;
|
||||||
const int kMaxMsgSize = 4096;
|
const int kMaxMsgSize = 4096;
|
||||||
const int kMaxIntPairSize = 45; // Holds %d,%d, for upto 64 bit.
|
const int kMaxIntPairSize = 45; // Holds %d,%d, for up to 64 bit.
|
||||||
|
|
||||||
#include "svutil.h"
|
#include "svutil.h"
|
||||||
|
|
||||||
@ -342,7 +342,7 @@ void* ScrollView::StartEventHandler(void* a) {
|
|||||||
k = i;
|
k = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If we didnt find anything we had an old alarm and just sleep again.
|
// If we didn't find anything we had an old alarm and just sleep again.
|
||||||
if (new_event != NULL) {
|
if (new_event != NULL) {
|
||||||
sv->event_table_[k] = NULL;
|
sv->event_table_[k] = NULL;
|
||||||
sv->mutex_->Unlock();
|
sv->mutex_->Unlock();
|
||||||
|
@ -327,7 +327,7 @@ class ScrollView {
|
|||||||
// be unique among menubar eventIDs.
|
// be unique among menubar eventIDs.
|
||||||
void MenuItem(const char* parent, const char* name, int cmdEvent);
|
void MenuItem(const char* parent, const char* name, int cmdEvent);
|
||||||
|
|
||||||
// This adds a new checkbox entry, which might initally be flagged.
|
// This adds a new checkbox entry, which might initially be flagged.
|
||||||
void MenuItem(const char* parent, const char* name,
|
void MenuItem(const char* parent, const char* name,
|
||||||
int cmdEvent, bool flagged);
|
int cmdEvent, bool flagged);
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ typedef struct
|
|||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
* new_measurement
|
* new_measurement
|
||||||
*
|
*
|
||||||
* Initalize a record to hold a measurement of a group of individual
|
* Initialize a record to hold a measurement of a group of individual
|
||||||
* samples.
|
* samples.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user