2007-03-08 04:03:40 +08:00
|
|
|
/******************************************************************************
|
|
|
|
** Filename: features.h
|
|
|
|
** Purpose: Generic definition of a feature.
|
|
|
|
** Author: Dan Johnson
|
|
|
|
** History: Sun May 20 10:28:30 1990, DSJ, Created.
|
|
|
|
**
|
|
|
|
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
** you may not use this file except in compliance with the License.
|
|
|
|
** You may obtain a copy of the License at
|
|
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
** See the License for the specific language governing permissions and
|
|
|
|
** limitations under the License.
|
|
|
|
******************************************************************************/
|
|
|
|
#ifndef FEATURES_H
|
|
|
|
#define FEATURES_H
|
|
|
|
|
|
|
|
/**----------------------------------------------------------------------------
|
|
|
|
Include Files and Type Defines
|
|
|
|
----------------------------------------------------------------------------**/
|
2010-11-24 02:34:14 +08:00
|
|
|
#include "blobs.h"
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
class DENORM;
|
2013-09-23 23:15:06 +08:00
|
|
|
struct INT_FX_RESULT_STRUCT;
|
2010-11-24 02:34:14 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
#undef Min
|
|
|
|
#undef Max
|
|
|
|
#define FEAT_NAME_SIZE 80
|
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
// define trap errors which can be caused by this module
|
2007-03-08 04:03:40 +08:00
|
|
|
#define ILLEGAL_FEATURE_PARAM 1000
|
|
|
|
#define ILLEGAL_NUM_FEATURES 1001
|
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
// A character is described by multiple sets of extracted features. Each
|
|
|
|
// set contains a number of features of a particular type, for example, a
|
|
|
|
// set of bays, or a set of closures, or a set of microfeatures. Each
|
|
|
|
// feature consists of a number of parameters. All features within a
|
|
|
|
// feature set contain the same number of parameters. All circular
|
|
|
|
// parameters are required to be the first parameters in the feature.
|
|
|
|
|
|
|
|
struct PARAM_DESC {
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
int8_t Circular; // TRUE if dimension wraps around
|
|
|
|
int8_t NonEssential; // TRUE if dimension not used in searches
|
2010-11-24 02:34:14 +08:00
|
|
|
FLOAT32 Min; // low end of range for circular dimensions
|
|
|
|
FLOAT32 Max; // high end of range for circular dimensions
|
|
|
|
FLOAT32 Range; // Max - Min
|
|
|
|
FLOAT32 HalfRange; // (Max - Min)/2
|
|
|
|
FLOAT32 MidRange; // (Max + Min)/2
|
|
|
|
};
|
|
|
|
|
|
|
|
struct FEATURE_DESC_STRUCT {
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
uint16_t NumParams; // total # of params
|
2010-11-24 02:34:14 +08:00
|
|
|
const char *ShortName; // short name for feature
|
|
|
|
const PARAM_DESC *ParamDesc; // array - one per param
|
|
|
|
};
|
|
|
|
typedef FEATURE_DESC_STRUCT *FEATURE_DESC;
|
|
|
|
|
|
|
|
struct FEATURE_STRUCT {
|
|
|
|
const FEATURE_DESC_STRUCT *Type; // points to description of feature type
|
|
|
|
FLOAT32 Params[1]; // variable size array - params for feature
|
|
|
|
};
|
2007-03-08 04:03:40 +08:00
|
|
|
typedef FEATURE_STRUCT *FEATURE;
|
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
struct FEATURE_SET_STRUCT {
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
uint16_t NumFeatures; // number of features in set
|
|
|
|
uint16_t MaxNumFeatures; // maximum size of feature set
|
2010-11-24 02:34:14 +08:00
|
|
|
FEATURE Features[1]; // variable size array of features
|
|
|
|
};
|
2007-03-08 04:03:40 +08:00
|
|
|
typedef FEATURE_SET_STRUCT *FEATURE_SET;
|
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
// A generic character description as a char pointer. In reality, it will be
|
|
|
|
// a pointer to some data structure. Paired feature extractors/matchers need
|
|
|
|
// to agree on the data structure to be used, however, the high level
|
|
|
|
// classifier does not need to know the details of this data structure.
|
|
|
|
typedef char *CHAR_FEATURES;
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
/*----------------------------------------------------------------------
|
|
|
|
Macros for defining the parameters of a new features
|
|
|
|
----------------------------------------------------------------------*/
|
|
|
|
#define StartParamDesc(Name) \
|
2010-11-24 02:34:14 +08:00
|
|
|
const PARAM_DESC Name[] = {
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
#define DefineParam(Circular, NonEssential, Min, Max) \
|
|
|
|
{Circular, NonEssential, Min, Max, \
|
|
|
|
(Max) - (Min), (((Max) - (Min))/2.0), (((Max) + (Min))/2.0)},
|
|
|
|
|
|
|
|
#define EndParamDesc };
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------
|
|
|
|
Macro for describing a new feature. The parameters of the macro
|
|
|
|
are as follows:
|
|
|
|
|
2012-02-02 10:57:42 +08:00
|
|
|
DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName)
|
2007-03-08 04:03:40 +08:00
|
|
|
----------------------------------------------------------------------*/
|
2012-02-02 10:57:42 +08:00
|
|
|
#define DefineFeature(Name, NL, NC, SN, PN) \
|
2010-11-24 02:34:14 +08:00
|
|
|
const FEATURE_DESC_STRUCT Name = { \
|
|
|
|
((NL) + (NC)), SN, PN};
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
/*----------------------------------------------------------------------
|
|
|
|
Generic routines that work for all feature types
|
|
|
|
----------------------------------------------------------------------*/
|
2008-04-22 08:23:04 +08:00
|
|
|
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2008-04-22 08:23:04 +08:00
|
|
|
void FreeFeature(FEATURE Feature);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2008-04-22 08:23:04 +08:00
|
|
|
void FreeFeatureSet(FEATURE_SET FeatureSet);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2008-04-22 08:23:04 +08:00
|
|
|
FEATURE_SET NewFeatureSet(int NumFeatures);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2015-05-13 09:13:15 +08:00
|
|
|
void WriteFeature(FEATURE Feature, STRING* str);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2015-05-13 09:13:15 +08:00
|
|
|
void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str);
|
2010-11-24 02:34:14 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
#endif
|