tesseract/src/ccstruct/normalis.h

320 lines
14 KiB
C
Raw Normal View History

/**********************************************************************
* File: normalis.h (Formerly denorm.h)
* Description: Code for the DENORM class.
* Author: Ray Smith
* Created: Thu Apr 23 09:22:43 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef NORMALIS_H
#define NORMALIS_H
2021-01-01 05:18:36 +08:00
#include <tesseract/export.h>
2020-12-31 22:58:36 +08:00
struct Pix;
namespace tesseract {
const int kBlnCellHeight = 256; // Full-height for baseline normalization.
const int kBlnXHeight = 128; // x-height for baseline normalization.
const int kBlnBaselineOffset = 64; // offset for baseline normalization.
class BLOCK;
class FCOORD;
class TBOX;
class UNICHARSET;
struct TBLOB;
struct TPOINT;
template <typename T> class GenericVector;
// Possible normalization methods. Use NEGATIVE values as these also
// double up as markers for the last sub-classifier.
enum NormalizationMode {
NM_BASELINE = -3, // The original BL normalization mode.
NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
};
class TESS_API DENORM {
public:
DENORM();
// Copying a DENORM is allowed.
DENORM(const DENORM &);
DENORM& operator=(const DENORM&);
~DENORM();
// Setup the normalization transformation parameters.
// The normalizations applied to a blob are as follows:
// 1. An optional block layout rotation that was applied during layout
// analysis to make the textlines horizontal.
// 2. A normalization transformation (LocalNormTransform):
// Subtract the "origin"
// Apply an x,y scaling.
// Apply an optional rotation.
// Add back a final translation.
// The origin is in the block-rotated space, and is usually something like
// the x-middle of the word at the baseline.
// 3. Zero or more further normalization transformations that are applied
// in sequence, with a similar pattern to the first normalization transform.
//
// A DENORM holds the parameters of a single normalization, and can execute
// both the LocalNormTransform (a forwards normalization), and the
// LocalDenormTransform which is an inverse transform or de-normalization.
// A DENORM may point to a predecessor DENORM, which is actually the earlier
// normalization, so the full normalization sequence involves executing all
// predecessors first and then the transform in "this".
// Let x be image coordinates and that we have normalization classes A, B, C
// where we first apply A then B then C to get normalized x':
// x' = CBAx
// Then the backwards (to original coordinates) would be:
// x = A^-1 B^-1 C^-1 x'
// and A = B->predecessor_ and B = C->predecessor_
// NormTransform executes all predecessors recursively, and then this.
// NormTransform would be used to transform an image-based feature to
// normalized space for use in a classifier
// DenormTransform inverts this and then all predecessors. It can be
// used to get back to the original image coordinates from normalized space.
// The LocalNormTransform member executes just the transformation
// in "this" without the layout rotation or any predecessors. It would be
// used to run each successive normalization, eg the word normalization,
// and later the character normalization.
// Arguments:
// block: if not nullptr, then this is the first transformation, and
// block->re_rotation() needs to be used after the Denorm
// transformation to get back to the image coords.
// rotation: if not nullptr, apply this rotation after translation to the
// origin and scaling. (Usually a classify rotation.)
// predecessor: if not nullptr, then predecessor has been applied to the
// input space and needs to be undone to complete the inverse.
// The above pointers are not owned by this DENORM and are assumed to live
// longer than this denorm, except rotation, which is deep copied on input.
//
// x_origin: The x origin which will be mapped to final_xshift in the result.
// y_origin: The y origin which will be mapped to final_yshift in the result.
// Added to result of row->baseline(x) if not nullptr.
//
// x_scale: scale factor for the x-coordinate.
// y_scale: scale factor for the y-coordinate. Ignored if segs is given.
// Note that these scale factors apply to the same x and y system as the
// x-origin and y-origin apply, ie after any block rotation, but before
// the rotation argument is applied.
//
// final_xshift: The x component of the final translation.
// final_yshift: The y component of the final translation.
//
// In theory, any of the commonly used normalizations can be setup here:
// * Traditional baseline normalization on a word:
// SetupNormalization(block, nullptr, nullptr,
// box.x_middle(), baseline,
// kBlnXHeight / x_height, kBlnXHeight / x_height,
// 0, kBlnBaselineOffset);
// * "Numeric mode" baseline normalization on a word, in which the blobs
// are positioned with the bottom as the baseline is achieved by making
// a separate DENORM for each blob.
// SetupNormalization(block, nullptr, nullptr,
// box.x_middle(), box.bottom(),
// kBlnXHeight / x_height, kBlnXHeight / x_height,
// 0, kBlnBaselineOffset);
// * Anisotropic character normalization used by IntFx.
// SetupNormalization(nullptr, nullptr, denorm,
// centroid_x, centroid_y,
// 51.2 / ry, 51.2 / rx, 128, 128);
// * Normalize blob height to x-height (current OSD):
// SetupNormalization(nullptr, &rotation, nullptr,
// box.rotational_x_middle(rotation),
// box.rotational_y_middle(rotation),
// kBlnXHeight / box.rotational_height(rotation),
// kBlnXHeight / box.rotational_height(rotation),
// 0, kBlnBaselineOffset);
// * Secondary normalization for classification rotation (current):
// FCOORD rotation = block->classify_rotation();
// float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
// SetupNormalization(nullptr, &rotation, denorm,
// box.rotational_x_middle(rotation),
// box.rotational_y_middle(rotation),
// target_height / box.rotational_height(rotation),
// target_height / box.rotational_height(rotation),
// 0, kBlnBaselineOffset);
// * Proposed new normalizations for CJK: Between them there is then
// no need for further normalization at all, and the character fills the cell.
// ** Replacement for baseline normalization on a word:
// Scales height and width independently so that modal height and pitch
// fill the cell respectively.
// float cap_height = x_height / CCStruct::kXHeightCapRatio;
// SetupNormalization(block, nullptr, nullptr,
// box.x_middle(), cap_height / 2.0f,
// kBlnCellHeight / fixed_pitch,
// kBlnCellHeight / cap_height,
// 0, 0);
// ** Secondary normalization for classification (with rotation) (proposed):
// Requires a simple translation to the center of the appropriate character
// cell, no further scaling and a simple rotation (or nothing) about the
// cell center.
// FCOORD rotation = block->classify_rotation();
// SetupNormalization(nullptr, &rotation, denorm,
// fixed_pitch_cell_center,
// 0.0f,
// 1.0f,
// 1.0f,
// 0, 0);
void SetupNormalization(const BLOCK* block,
const FCOORD* rotation,
const DENORM* predecessor,
float x_origin, float y_origin,
float x_scale, float y_scale,
float final_xshift, float final_yshift);
// Sets up the DENORM to execute a non-linear transformation based on
// preserving an even distribution of stroke edges. The transformation
// operates only within the given box, scaling input coords within the box
// non-linearly to a box of target_width by target_height, with all other
// coords being clipped to the box edge. As with SetupNormalization above,
// final_xshift and final_yshift are applied after scaling, and the bottom-
// left of box is used as a pre-scaling origin.
// x_coords is a collection of the x-coords of vertical edges for each
// y-coord starting at box.bottom().
// y_coords is a collection of the y-coords of horizontal edges for each
// x-coord starting at box.left().
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
// The second-level vectors must all be sorted in ascending order.
void SetupNonLinear(const DENORM* predecessor, const TBOX& box,
float target_width, float target_height,
float final_xshift, float final_yshift,
const GenericVector<GenericVector<int> >& x_coords,
const GenericVector<GenericVector<int> >& y_coords);
// Transforms the given coords one step forward to normalized space, without
// using any block rotation or predecessor.
void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const;
void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const;
// Transforms the given coords forward to normalized space using the
// full transformation sequence defined by the block rotation, the
// predecessors, deepest first, and finally this. If first_norm is not nullptr,
// then the first and deepest transformation used is first_norm, ending
// with this, and the block rotation will not be applied.
void NormTransform(const DENORM* first_norm, const TPOINT& pt,
TPOINT* transformed) const;
void NormTransform(const DENORM* first_norm, const FCOORD& pt,
FCOORD* transformed) const;
// Transforms the given coords one step back to source space, without
// using to any block rotation or predecessor.
void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
// Transforms the given coords all the way back to source image space using
// the full transformation sequence defined by this and its predecessors
// recursively, shallowest first, and finally any block re_rotation.
// If last_denorm is not nullptr, then the last transformation used will
// be last_denorm, and the block re_rotation will never be executed.
void DenormTransform(const DENORM* last_denorm, const TPOINT& pt,
TPOINT* original) const;
void DenormTransform(const DENORM* last_denorm, const FCOORD& pt,
FCOORD* original) const;
// Normalize a blob using blob transformations. Less accurate, but
// more accurately copies the old way.
void LocalNormBlob(TBLOB* blob) const;
// Fills in the x-height range accepted by the given unichar_id in blob
// coordinates, given its bounding box in the usual baseline-normalized
// coordinates, with some initial crude x-height estimate (such as word
// size) and this denoting the transformation that was used.
// Also returns the amount the character must have shifted up or down.
void XHeightRange(int unichar_id, const UNICHARSET& unicharset,
const TBOX& bbox,
float* min_xht,
float* max_xht,
float* yshift) const;
// Prints the content of the DENORM for debug purposes.
void Print() const;
Pix* pix() const {
return pix_;
}
void set_pix(Pix* pix) {
pix_ = pix;
}
bool inverse() const {
return inverse_;
}
void set_inverse(bool value) {
inverse_ = value;
}
const DENORM* RootDenorm() const {
if (predecessor_ != nullptr)
return predecessor_->RootDenorm();
return this;
}
const DENORM* predecessor() const {
return predecessor_;
}
// Accessors - perhaps should not be needed.
float x_scale() const {
return x_scale_;
}
float y_scale() const {
return y_scale_;
}
const BLOCK* block() const {
return block_;
}
void set_block(const BLOCK* block) {
block_ = block;
}
private:
// Free allocated memory and clear pointers.
void Clear();
// Setup default values.
void Init();
// Best available image.
Pix* pix_;
// True if the source image is white-on-black.
bool inverse_;
// Block the word came from. If not null, block->re_rotation() takes the
// "untransformed" coordinates even further back to the original image.
// Used only on the first DENORM in a chain.
const BLOCK* block_;
// Rotation to apply between translation to the origin and scaling.
const FCOORD* rotation_;
// Previous transformation in a chain.
const DENORM* predecessor_;
// Non-linear transformation maps directly from each integer offset from the
// origin to the corresponding x-coord. Owned by the DENORM.
GenericVector<float>* x_map_;
// Non-linear transformation maps directly from each integer offset from the
// origin to the corresponding y-coord. Owned by the DENORM.
GenericVector<float>* y_map_;
// x-coordinate to be mapped to final_xshift_ in the result.
float x_origin_;
// y-coordinate to be mapped to final_yshift_ in the result.
float y_origin_;
// Scale factors for x and y coords. Applied to pre-rotation system.
float x_scale_;
float y_scale_;
// Destination coords of the x_origin_ and y_origin_.
float final_xshift_;
float final_yshift_;
};
} // namespace tesseract
#endif