mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-28 13:49:35 +08:00
4523ce9f7d
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20
83 lines
3.2 KiB
C++
83 lines
3.2 KiB
C++
/******************************************************************************
|
|
** Filename: normfeat.c
|
|
** Purpose: Definition of char normalization features.
|
|
** Author: Dan Johnson
|
|
** History: 12/14/90, DSJ, Created.
|
|
**
|
|
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
******************************************************************************/
|
|
/**----------------------------------------------------------------------------
|
|
Include Files and Type Defines
|
|
----------------------------------------------------------------------------**/
|
|
#include "normfeat.h"
|
|
|
|
#include "intfx.h"
|
|
#include "featdefs.h"
|
|
#include "mfoutline.h"
|
|
|
|
/**----------------------------------------------------------------------------
|
|
Public Code
|
|
----------------------------------------------------------------------------**/
|
|
|
|
// Return the length of the outline in baseline normalized form.
|
|
FLOAT32 ActualOutlineLength(FEATURE Feature) {
|
|
return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
// Return the character normalization feature for a blob.
|
|
//
|
|
// The features returned are in a scale where the x-height has been
|
|
// normalized to live in the region y = [-0.25 .. 0.25]. Example ranges
|
|
// for English below are based on the Linux font collection on 2009-12-04:
|
|
//
|
|
// Params[CharNormY]
|
|
// The y coordinate of the grapheme's centroid.
|
|
// English: [-0.27, 0.71]
|
|
//
|
|
// Params[CharNormLength]
|
|
// The length of the grapheme's outline (tiny segments discarded),
|
|
// divided by 10.0=LENGTH_COMPRESSION.
|
|
// English: [0.16, 0.85]
|
|
//
|
|
// Params[CharNormRx]
|
|
// The radius of gyration about the x axis, as measured from CharNormY.
|
|
// English: [0.011, 0.34]
|
|
//
|
|
// Params[CharNormRy]
|
|
// The radius of gyration about the y axis, as measured from
|
|
// the x center of the grapheme's bounding box.
|
|
// English: [0.011, 0.31]
|
|
//
|
|
FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& denorm) {
|
|
FEATURE_SET feature_set = NewFeatureSet(1);
|
|
FEATURE feature = NewFeature(&CharNormDesc);
|
|
|
|
INT_FEATURE_ARRAY blfeatures;
|
|
INT_FEATURE_ARRAY cnfeatures;
|
|
INT_FX_RESULT_STRUCT FXInfo;
|
|
|
|
ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &FXInfo);
|
|
|
|
feature->Params[CharNormY] =
|
|
MF_SCALE_FACTOR * (FXInfo.Ymean - BASELINE_OFFSET);
|
|
feature->Params[CharNormLength] =
|
|
MF_SCALE_FACTOR * FXInfo.Length / LENGTH_COMPRESSION;
|
|
feature->Params[CharNormRx] = MF_SCALE_FACTOR * FXInfo.Rx;
|
|
feature->Params[CharNormRy] = MF_SCALE_FACTOR * FXInfo.Ry;
|
|
|
|
AddFeature(feature_set, feature);
|
|
|
|
return feature_set;
|
|
} /* ExtractCharNormFeatures */
|