2007-03-08 04:03:40 +08:00
|
|
|
/**********************************************************************
|
2017-07-03 05:35:47 +08:00
|
|
|
* File: edgblob.cpp (Formerly edgeloop.c)
|
2007-03-08 04:03:40 +08:00
|
|
|
* Description: Functions to clean up an outline before approximation.
|
|
|
|
* Author: Ray Smith
|
|
|
|
* Created: Tue Mar 26 16:56:25 GMT 1991
|
|
|
|
*
|
2009-07-11 10:39:56 +08:00
|
|
|
*(C) Copyright 1991, Hewlett-Packard Ltd.
|
|
|
|
** Licensed under the Apache License, Version 2.0(the "License");
|
2007-03-08 04:03:40 +08:00
|
|
|
** you may not use this file except in compliance with the License.
|
|
|
|
** You may obtain a copy of the License at
|
|
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
** See the License for the specific language governing permissions and
|
|
|
|
** limitations under the License.
|
|
|
|
*
|
|
|
|
**********************************************************************/
|
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
#include "scanedg.h"
|
|
|
|
#include "drawedg.h"
|
|
|
|
#include "edgloop.h"
|
|
|
|
#include "edgblob.h"
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2010-09-30 23:53:40 +08:00
|
|
|
// Include automatically generated configuration file if running autoconf.
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config_auto.h"
|
|
|
|
#endif
|
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
#define EXTERN
|
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
// Control parameters used in outline_complexity(), which rejects an outline
|
|
|
|
// if any one of the 3 conditions is satisfied:
|
|
|
|
// - number of children exceeds edges_max_children_per_outline
|
|
|
|
// - number of nested layers exceeds edges_max_children_layers
|
|
|
|
// - joint complexity exceeds edges_children_count_limit(as in child_count())
|
|
|
|
EXTERN BOOL_VAR(edges_use_new_outline_complexity, FALSE,
|
|
|
|
"Use the new outline complexity module");
|
|
|
|
EXTERN INT_VAR(edges_max_children_per_outline, 10,
|
|
|
|
"Max number of children inside a character outline");
|
|
|
|
EXTERN INT_VAR(edges_max_children_layers, 5,
|
|
|
|
"Max layers of nested children inside a character outline");
|
|
|
|
EXTERN BOOL_VAR(edges_debug, FALSE,
|
|
|
|
"turn on debugging for this module");
|
|
|
|
|
|
|
|
|
|
|
|
EXTERN INT_VAR(edges_children_per_grandchild, 10,
|
|
|
|
"Importance ratio for chucking outlines");
|
|
|
|
EXTERN INT_VAR(edges_children_count_limit, 45,
|
|
|
|
"Max holes allowed in blob");
|
|
|
|
EXTERN BOOL_VAR(edges_children_fix, FALSE,
|
|
|
|
"Remove boxy parents of char-like children");
|
|
|
|
EXTERN INT_VAR(edges_min_nonhole, 12,
|
|
|
|
"Min pixels for potential char in box");
|
|
|
|
EXTERN INT_VAR(edges_patharea_ratio, 40,
|
|
|
|
"Max lensq/area for acceptable child outline");
|
|
|
|
EXTERN double_VAR(edges_childarea, 0.5,
|
|
|
|
"Min area fraction of child outline");
|
|
|
|
EXTERN double_VAR(edges_boxarea, 0.875,
|
|
|
|
"Min area fraction of grandchild for box");
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name OL_BUCKETS::OL_BUCKETS
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Construct an array of buckets for associating outlines into blobs.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
OL_BUCKETS::OL_BUCKETS(
|
|
|
|
ICOORD bleft, // corners
|
|
|
|
ICOORD tright): bl(bleft), tr(tright) {
|
|
|
|
bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1;
|
|
|
|
bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1;
|
|
|
|
// make array
|
2018-05-22 22:55:45 +08:00
|
|
|
buckets.reset(new C_OUTLINE_LIST[bxdim * bydim]);
|
2007-03-08 04:03:40 +08:00
|
|
|
index = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name OL_BUCKETS::operator(
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Return a pointer to a list of C_OUTLINEs corresponding to the
|
|
|
|
* given pixel coordinates.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
C_OUTLINE_LIST *
|
2009-07-11 10:39:56 +08:00
|
|
|
OL_BUCKETS::operator()( // array access
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int16_t x, // image coords
|
|
|
|
int16_t y) {
|
2009-07-11 10:39:56 +08:00
|
|
|
return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name OL_BUCKETS::outline_complexity
|
2009-07-11 10:39:56 +08:00
|
|
|
*
|
|
|
|
* This is the new version of count_child.
|
|
|
|
*
|
|
|
|
* The goal of this function is to determine if an outline and its
|
|
|
|
* interiors could be part of a character blob. This is done by
|
|
|
|
* computing a "complexity" index for the outline, which is the return
|
|
|
|
* value of this function, and checking it against a threshold.
|
|
|
|
* The max_count is used for short-circuiting the recursion and forcing
|
|
|
|
* a rejection that guarantees to fail the threshold test.
|
|
|
|
* The complexity F for outline X with N children X[i] is
|
|
|
|
* F(X) = N + sum_i F(X[i]) * edges_children_per_grandchild
|
|
|
|
* so each layer of nesting increases complexity exponentially.
|
|
|
|
* An outline can be rejected as a text blob candidate if its complexity
|
|
|
|
* is too high, has too many children(likely a container), or has too
|
|
|
|
* many layers of nested inner loops. This has the side-effect of
|
|
|
|
* flattening out boxed or reversed video text regions.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2009-07-11 10:39:56 +08:00
|
|
|
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t OL_BUCKETS::outline_complexity(
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE *outline, // parent outline
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t max_count, // max output
|
|
|
|
int16_t depth // recurion depth
|
2009-07-11 10:39:56 +08:00
|
|
|
) {
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int16_t xmin, xmax; // coord limits
|
|
|
|
int16_t ymin, ymax;
|
|
|
|
int16_t xindex, yindex; // current bucket
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE *child; // current child
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t child_count; // no of children
|
|
|
|
int32_t grandchild_count; // no of grandchildren
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE_IT child_it; // search iterator
|
|
|
|
|
|
|
|
TBOX olbox = outline->bounding_box();
|
|
|
|
xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
|
|
|
|
xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
|
|
|
|
ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
|
|
|
|
ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
|
|
|
|
child_count = 0;
|
|
|
|
grandchild_count = 0;
|
|
|
|
if (++depth > edges_max_children_layers) // nested loops are too deep
|
|
|
|
return max_count + depth;
|
|
|
|
|
|
|
|
for (yindex = ymin; yindex <= ymax; yindex++) {
|
|
|
|
for (xindex = xmin; xindex <= xmax; xindex++) {
|
|
|
|
child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
|
|
|
|
if (child_it.empty())
|
|
|
|
continue;
|
|
|
|
for (child_it.mark_cycle_pt(); !child_it.cycled_list();
|
|
|
|
child_it.forward()) {
|
|
|
|
child = child_it.data();
|
|
|
|
if (child == outline || !(*child < *outline))
|
|
|
|
continue;
|
|
|
|
child_count++;
|
|
|
|
|
|
|
|
if (child_count > edges_max_children_per_outline) { // too fragmented
|
|
|
|
if (edges_debug)
|
|
|
|
tprintf("Discard outline on child_count=%d > "
|
|
|
|
"max_children_per_outline=%d\n",
|
|
|
|
child_count,
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
static_cast<int32_t>(edges_max_children_per_outline));
|
2009-07-11 10:39:56 +08:00
|
|
|
return max_count + child_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute the "complexity" of each child recursively
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t remaining_count = max_count - child_count - grandchild_count;
|
2009-07-11 10:39:56 +08:00
|
|
|
if (remaining_count > 0)
|
|
|
|
grandchild_count += edges_children_per_grandchild *
|
|
|
|
outline_complexity(child, remaining_count, depth);
|
|
|
|
if (child_count + grandchild_count > max_count) { // too complex
|
|
|
|
if (edges_debug)
|
|
|
|
tprintf("Disgard outline on child_count=%d + grandchild_count=%d "
|
|
|
|
"> max_count=%d\n",
|
|
|
|
child_count, grandchild_count, max_count);
|
|
|
|
return child_count + grandchild_count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return child_count + grandchild_count;
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name OL_BUCKETS::count_children
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Find number of descendants of this outline.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2013-09-23 23:26:50 +08:00
|
|
|
// TODO(rays) Merge with outline_complexity.
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t OL_BUCKETS::count_children( // recursive count
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE *outline, // parent outline
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t max_count // max output
|
2007-03-08 04:03:40 +08:00
|
|
|
) {
|
2018-05-21 03:46:46 +08:00
|
|
|
bool parent_box; // could it be boxy
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int16_t xmin, xmax; // coord limits
|
|
|
|
int16_t ymin, ymax;
|
|
|
|
int16_t xindex, yindex; // current bucket
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE *child; // current child
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t child_count; // no of children
|
|
|
|
int32_t grandchild_count; // no of grandchildren
|
|
|
|
int32_t parent_area; // potential box
|
2009-07-11 10:39:56 +08:00
|
|
|
FLOAT32 max_parent_area; // potential box
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t child_area; // current child
|
|
|
|
int32_t child_length; // current child
|
2008-04-22 08:34:11 +08:00
|
|
|
TBOX olbox;
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE_IT child_it; // search iterator
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
olbox = outline->bounding_box();
|
|
|
|
xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
|
|
|
|
xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
|
|
|
|
ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
|
|
|
|
ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
|
2007-03-08 04:03:40 +08:00
|
|
|
child_count = 0;
|
|
|
|
grandchild_count = 0;
|
|
|
|
parent_area = 0;
|
|
|
|
max_parent_area = 0;
|
2018-05-21 03:46:46 +08:00
|
|
|
parent_box = true;
|
2007-03-08 04:03:40 +08:00
|
|
|
for (yindex = ymin; yindex <= ymax; yindex++) {
|
|
|
|
for (xindex = xmin; xindex <= xmax; xindex++) {
|
2009-07-11 10:39:56 +08:00
|
|
|
child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
|
|
|
|
if (child_it.empty())
|
2007-03-08 04:03:40 +08:00
|
|
|
continue;
|
2009-07-11 10:39:56 +08:00
|
|
|
for (child_it.mark_cycle_pt(); !child_it.cycled_list();
|
|
|
|
child_it.forward()) {
|
|
|
|
child = child_it.data();
|
2007-03-08 04:03:40 +08:00
|
|
|
if (child != outline && *child < *outline) {
|
|
|
|
child_count++;
|
2007-07-18 09:15:07 +08:00
|
|
|
if (child_count <= max_count) {
|
2009-07-11 10:39:56 +08:00
|
|
|
int max_grand =(max_count - child_count) /
|
2007-07-18 09:15:07 +08:00
|
|
|
edges_children_per_grandchild;
|
|
|
|
if (max_grand > 0)
|
2009-07-11 10:39:56 +08:00
|
|
|
grandchild_count += count_children(child, max_grand) *
|
2007-07-18 09:15:07 +08:00
|
|
|
edges_children_per_grandchild;
|
|
|
|
else
|
|
|
|
grandchild_count += count_children(child, 1);
|
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
if (child_count + grandchild_count > max_count) {
|
2009-07-11 10:39:56 +08:00
|
|
|
if (edges_debug)
|
|
|
|
tprintf("Discarding parent with child count=%d, gc=%d\n",
|
|
|
|
child_count,grandchild_count);
|
2007-03-08 04:03:40 +08:00
|
|
|
return child_count + grandchild_count;
|
|
|
|
}
|
|
|
|
if (parent_area == 0) {
|
2009-07-11 10:39:56 +08:00
|
|
|
parent_area = outline->outer_area();
|
2007-03-08 04:03:40 +08:00
|
|
|
if (parent_area < 0)
|
|
|
|
parent_area = -parent_area;
|
2009-07-11 10:39:56 +08:00
|
|
|
max_parent_area = outline->bounding_box().area() * edges_boxarea;
|
2007-03-08 04:03:40 +08:00
|
|
|
if (parent_area < max_parent_area)
|
2018-05-21 03:46:46 +08:00
|
|
|
parent_box = false;
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
2009-07-11 10:39:56 +08:00
|
|
|
if (parent_box &&
|
|
|
|
(!edges_children_fix ||
|
|
|
|
child->bounding_box().height() > edges_min_nonhole)) {
|
|
|
|
child_area = child->outer_area();
|
2007-03-08 04:03:40 +08:00
|
|
|
if (child_area < 0)
|
|
|
|
child_area = -child_area;
|
|
|
|
if (edges_children_fix) {
|
|
|
|
if (parent_area - child_area < max_parent_area) {
|
2018-05-21 03:46:46 +08:00
|
|
|
parent_box = false;
|
2007-03-08 04:03:40 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (grandchild_count > 0) {
|
2009-07-11 10:39:56 +08:00
|
|
|
if (edges_debug)
|
|
|
|
tprintf("Discarding parent of area %d, child area=%d, max%g "
|
|
|
|
"with gc=%d\n",
|
|
|
|
parent_area, child_area, max_parent_area,
|
|
|
|
grandchild_count);
|
2007-03-08 04:03:40 +08:00
|
|
|
return max_count + 1;
|
|
|
|
}
|
2009-07-11 10:39:56 +08:00
|
|
|
child_length = child->pathlength();
|
2007-03-08 04:03:40 +08:00
|
|
|
if (child_length * child_length >
|
2009-07-11 10:39:56 +08:00
|
|
|
child_area * edges_patharea_ratio) {
|
|
|
|
if (edges_debug)
|
|
|
|
tprintf("Discarding parent of area %d, child area=%d, max%g "
|
|
|
|
"with child length=%d\n",
|
|
|
|
parent_area, child_area, max_parent_area,
|
|
|
|
child_length);
|
2007-03-08 04:03:40 +08:00
|
|
|
return max_count + 1;
|
|
|
|
}
|
|
|
|
}
|
2009-07-11 10:39:56 +08:00
|
|
|
if (child_area < child->bounding_box().area() * edges_childarea) {
|
|
|
|
if (edges_debug)
|
|
|
|
tprintf("Discarding parent of area %d, child area=%d, max%g "
|
|
|
|
"with child rect=%d\n",
|
|
|
|
parent_area, child_area, max_parent_area,
|
|
|
|
child->bounding_box().area());
|
2007-03-08 04:03:40 +08:00
|
|
|
return max_count + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return child_count + grandchild_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name OL_BUCKETS::extract_children
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Find number of descendants of this outline.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
void OL_BUCKETS::extract_children( // recursive count
|
|
|
|
C_OUTLINE *outline, // parent outline
|
|
|
|
C_OUTLINE_IT *it // destination iterator
|
2007-03-08 04:03:40 +08:00
|
|
|
) {
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int16_t xmin, xmax; // coord limits
|
|
|
|
int16_t ymin, ymax;
|
|
|
|
int16_t xindex, yindex; // current bucket
|
2008-04-22 08:34:11 +08:00
|
|
|
TBOX olbox;
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE_IT child_it; // search iterator
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
olbox = outline->bounding_box();
|
|
|
|
xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
|
|
|
|
xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
|
|
|
|
ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
|
|
|
|
ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
|
2007-03-08 04:03:40 +08:00
|
|
|
for (yindex = ymin; yindex <= ymax; yindex++) {
|
|
|
|
for (xindex = xmin; xindex <= xmax; xindex++) {
|
2009-07-11 10:39:56 +08:00
|
|
|
child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
|
|
|
|
for (child_it.mark_cycle_pt(); !child_it.cycled_list();
|
|
|
|
child_it.forward()) {
|
|
|
|
if (*child_it.data() < *outline) {
|
|
|
|
it->add_after_then_move(child_it.extract());
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name extract_edges
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Run the edge detector over the block and return a list of blobs.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
void extract_edges(Pix* pix, // thresholded image
|
|
|
|
BLOCK *block) { // block to scan
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE_LIST outlines; // outlines in block
|
2007-03-08 04:03:40 +08:00
|
|
|
C_OUTLINE_IT out_it = &outlines;
|
|
|
|
|
2018-04-19 19:55:39 +08:00
|
|
|
block_edges(pix, &(block->pdblk), &out_it);
|
2010-11-24 02:34:14 +08:00
|
|
|
ICOORD bleft; // block box
|
|
|
|
ICOORD tright;
|
2018-04-19 19:55:39 +08:00
|
|
|
block->pdblk.bounding_box(bleft, tright);
|
2009-07-11 10:39:56 +08:00
|
|
|
// make blobs
|
2007-07-18 09:15:07 +08:00
|
|
|
outlines_to_blobs(block, bleft, tright, &outlines);
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name outlines_to_blobs
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Gather together outlines into blobs using the usual bucket sort.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
void outlines_to_blobs( // find blobs
|
|
|
|
BLOCK *block, // block to scan
|
|
|
|
ICOORD bleft,
|
2007-03-08 04:03:40 +08:00
|
|
|
ICOORD tright,
|
|
|
|
C_OUTLINE_LIST *outlines) {
|
2009-07-11 10:39:56 +08:00
|
|
|
// make buckets
|
2007-07-18 09:15:07 +08:00
|
|
|
OL_BUCKETS buckets(bleft, tright);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2007-07-18 09:15:07 +08:00
|
|
|
fill_buckets(outlines, &buckets);
|
|
|
|
empty_buckets(block, &buckets);
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name fill_buckets
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Run the edge detector over the block and return a list of blobs.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
void fill_buckets( // find blobs
|
|
|
|
C_OUTLINE_LIST *outlines, // outlines in block
|
|
|
|
OL_BUCKETS *buckets // output buckets
|
2007-03-08 04:03:40 +08:00
|
|
|
) {
|
2009-07-11 10:39:56 +08:00
|
|
|
TBOX ol_box; // outline box
|
|
|
|
C_OUTLINE_IT out_it = outlines; // iterator
|
|
|
|
C_OUTLINE_IT bucket_it; // iterator in bucket
|
|
|
|
C_OUTLINE *outline; // current outline
|
|
|
|
|
|
|
|
for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
|
|
|
|
outline = out_it.extract(); // take off list
|
|
|
|
// get box
|
|
|
|
ol_box = outline->bounding_box();
|
|
|
|
bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom()));
|
|
|
|
bucket_it.add_to_end(outline);
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name empty_buckets
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Run the edge detector over the block and return a list of blobs.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
void empty_buckets( // find blobs
|
|
|
|
BLOCK *block, // block to scan
|
|
|
|
OL_BUCKETS *buckets // output buckets
|
2007-03-08 04:03:40 +08:00
|
|
|
) {
|
2018-05-21 03:46:46 +08:00
|
|
|
bool good_blob; // healthy blob
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE_LIST outlines; // outlines in block
|
|
|
|
// iterator
|
2007-03-08 04:03:40 +08:00
|
|
|
C_OUTLINE_IT out_it = &outlines;
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE_IT bucket_it = buckets->start_scan();
|
|
|
|
C_OUTLINE_IT parent_it; // parent outline
|
|
|
|
C_BLOB_IT good_blobs = block->blob_list();
|
|
|
|
C_BLOB_IT junk_blobs = block->reject_blobs();
|
|
|
|
|
|
|
|
while (!bucket_it.empty()) {
|
|
|
|
out_it.set_to_list(&outlines);
|
2007-03-08 04:03:40 +08:00
|
|
|
do {
|
2009-07-11 10:39:56 +08:00
|
|
|
parent_it = bucket_it; // find outermost
|
2012-02-02 10:53:04 +08:00
|
|
|
do {
|
|
|
|
bucket_it.forward();
|
|
|
|
} while (!bucket_it.at_first() &&
|
|
|
|
!(*parent_it.data() < *bucket_it.data()));
|
|
|
|
} while (!bucket_it.at_first());
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
// move to new list
|
|
|
|
out_it.add_after_then_move(parent_it.extract());
|
|
|
|
good_blob = capture_children(buckets, &junk_blobs, &out_it);
|
2013-09-23 23:26:50 +08:00
|
|
|
C_BLOB::ConstructBlobsFromOutlines(good_blob, &outlines, &good_blobs,
|
|
|
|
&junk_blobs);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:39:56 +08:00
|
|
|
bucket_it.set_to_list(buckets->scan_next());
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-08-20 02:57:57 +08:00
|
|
|
/**
|
|
|
|
* @name capture_children
|
2007-03-08 04:03:40 +08:00
|
|
|
*
|
|
|
|
* Find all neighbouring outlines that are children of this outline
|
|
|
|
* and either move them to the output list or declare this outline
|
|
|
|
* illegal and return FALSE.
|
2010-08-20 02:57:57 +08:00
|
|
|
*/
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2018-05-21 03:46:46 +08:00
|
|
|
bool capture_children( // find children
|
|
|
|
OL_BUCKETS* buckets, // bucket sort clanss
|
|
|
|
C_BLOB_IT* reject_it, // dead grandchildren
|
|
|
|
C_OUTLINE_IT* blob_it // output outlines
|
|
|
|
) {
|
2009-07-11 10:39:56 +08:00
|
|
|
C_OUTLINE *outline; // master outline
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int32_t child_count; // no of children
|
2009-07-11 10:39:56 +08:00
|
|
|
|
|
|
|
outline = blob_it->data();
|
|
|
|
if (edges_use_new_outline_complexity)
|
|
|
|
child_count = buckets->outline_complexity(outline,
|
|
|
|
edges_children_count_limit,
|
|
|
|
0);
|
|
|
|
else
|
|
|
|
child_count = buckets->count_children(outline,
|
|
|
|
edges_children_count_limit);
|
2007-03-08 04:03:40 +08:00
|
|
|
if (child_count > edges_children_count_limit)
|
2018-05-21 03:46:46 +08:00
|
|
|
return false;
|
2009-07-11 10:39:56 +08:00
|
|
|
|
|
|
|
if (child_count > 0)
|
|
|
|
buckets->extract_children(outline, blob_it);
|
2018-05-21 03:46:46 +08:00
|
|
|
return true;
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|