/********************************************************************** * File: normalis.h (Formerly denorm.h) * Description: Code for the DENORM class. * Author: Ray Smith * Created: Thu Apr 23 09:22:43 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #ifndef NORMALIS_H #define NORMALIS_H #include struct Pix; namespace tesseract { const int kBlnCellHeight = 256; // Full-height for baseline normalization. const int kBlnXHeight = 128; // x-height for baseline normalization. const int kBlnBaselineOffset = 64; // offset for baseline normalization. class BLOCK; class FCOORD; class TBOX; class UNICHARSET; struct TBLOB; struct TPOINT; template class GenericVector; // Possible normalization methods. Use NEGATIVE values as these also // double up as markers for the last sub-classifier. enum NormalizationMode { NM_BASELINE = -3, // The original BL normalization mode. NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic. NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. }; class TESS_API DENORM { public: DENORM(); // Copying a DENORM is allowed. DENORM(const DENORM &); DENORM& operator=(const DENORM&); ~DENORM(); // Setup the normalization transformation parameters. // The normalizations applied to a blob are as follows: // 1. An optional block layout rotation that was applied during layout // analysis to make the textlines horizontal. // 2. A normalization transformation (LocalNormTransform): // Subtract the "origin" // Apply an x,y scaling. // Apply an optional rotation. // Add back a final translation. // The origin is in the block-rotated space, and is usually something like // the x-middle of the word at the baseline. // 3. Zero or more further normalization transformations that are applied // in sequence, with a similar pattern to the first normalization transform. // // A DENORM holds the parameters of a single normalization, and can execute // both the LocalNormTransform (a forwards normalization), and the // LocalDenormTransform which is an inverse transform or de-normalization. // A DENORM may point to a predecessor DENORM, which is actually the earlier // normalization, so the full normalization sequence involves executing all // predecessors first and then the transform in "this". // Let x be image coordinates and that we have normalization classes A, B, C // where we first apply A then B then C to get normalized x': // x' = CBAx // Then the backwards (to original coordinates) would be: // x = A^-1 B^-1 C^-1 x' // and A = B->predecessor_ and B = C->predecessor_ // NormTransform executes all predecessors recursively, and then this. // NormTransform would be used to transform an image-based feature to // normalized space for use in a classifier // DenormTransform inverts this and then all predecessors. It can be // used to get back to the original image coordinates from normalized space. // The LocalNormTransform member executes just the transformation // in "this" without the layout rotation or any predecessors. It would be // used to run each successive normalization, eg the word normalization, // and later the character normalization. // Arguments: // block: if not nullptr, then this is the first transformation, and // block->re_rotation() needs to be used after the Denorm // transformation to get back to the image coords. // rotation: if not nullptr, apply this rotation after translation to the // origin and scaling. (Usually a classify rotation.) // predecessor: if not nullptr, then predecessor has been applied to the // input space and needs to be undone to complete the inverse. // The above pointers are not owned by this DENORM and are assumed to live // longer than this denorm, except rotation, which is deep copied on input. // // x_origin: The x origin which will be mapped to final_xshift in the result. // y_origin: The y origin which will be mapped to final_yshift in the result. // Added to result of row->baseline(x) if not nullptr. // // x_scale: scale factor for the x-coordinate. // y_scale: scale factor for the y-coordinate. Ignored if segs is given. // Note that these scale factors apply to the same x and y system as the // x-origin and y-origin apply, ie after any block rotation, but before // the rotation argument is applied. // // final_xshift: The x component of the final translation. // final_yshift: The y component of the final translation. // // In theory, any of the commonly used normalizations can be setup here: // * Traditional baseline normalization on a word: // SetupNormalization(block, nullptr, nullptr, // box.x_middle(), baseline, // kBlnXHeight / x_height, kBlnXHeight / x_height, // 0, kBlnBaselineOffset); // * "Numeric mode" baseline normalization on a word, in which the blobs // are positioned with the bottom as the baseline is achieved by making // a separate DENORM for each blob. // SetupNormalization(block, nullptr, nullptr, // box.x_middle(), box.bottom(), // kBlnXHeight / x_height, kBlnXHeight / x_height, // 0, kBlnBaselineOffset); // * Anisotropic character normalization used by IntFx. // SetupNormalization(nullptr, nullptr, denorm, // centroid_x, centroid_y, // 51.2 / ry, 51.2 / rx, 128, 128); // * Normalize blob height to x-height (current OSD): // SetupNormalization(nullptr, &rotation, nullptr, // box.rotational_x_middle(rotation), // box.rotational_y_middle(rotation), // kBlnXHeight / box.rotational_height(rotation), // kBlnXHeight / box.rotational_height(rotation), // 0, kBlnBaselineOffset); // * Secondary normalization for classification rotation (current): // FCOORD rotation = block->classify_rotation(); // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio; // SetupNormalization(nullptr, &rotation, denorm, // box.rotational_x_middle(rotation), // box.rotational_y_middle(rotation), // target_height / box.rotational_height(rotation), // target_height / box.rotational_height(rotation), // 0, kBlnBaselineOffset); // * Proposed new normalizations for CJK: Between them there is then // no need for further normalization at all, and the character fills the cell. // ** Replacement for baseline normalization on a word: // Scales height and width independently so that modal height and pitch // fill the cell respectively. // float cap_height = x_height / CCStruct::kXHeightCapRatio; // SetupNormalization(block, nullptr, nullptr, // box.x_middle(), cap_height / 2.0f, // kBlnCellHeight / fixed_pitch, // kBlnCellHeight / cap_height, // 0, 0); // ** Secondary normalization for classification (with rotation) (proposed): // Requires a simple translation to the center of the appropriate character // cell, no further scaling and a simple rotation (or nothing) about the // cell center. // FCOORD rotation = block->classify_rotation(); // SetupNormalization(nullptr, &rotation, denorm, // fixed_pitch_cell_center, // 0.0f, // 1.0f, // 1.0f, // 0, 0); void SetupNormalization(const BLOCK* block, const FCOORD* rotation, const DENORM* predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift); // Sets up the DENORM to execute a non-linear transformation based on // preserving an even distribution of stroke edges. The transformation // operates only within the given box, scaling input coords within the box // non-linearly to a box of target_width by target_height, with all other // coords being clipped to the box edge. As with SetupNormalization above, // final_xshift and final_yshift are applied after scaling, and the bottom- // left of box is used as a pre-scaling origin. // x_coords is a collection of the x-coords of vertical edges for each // y-coord starting at box.bottom(). // y_coords is a collection of the y-coords of horizontal edges for each // x-coord starting at box.left(). // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. // The second-level vectors must all be sorted in ascending order. void SetupNonLinear(const DENORM* predecessor, const TBOX& box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector >& x_coords, const GenericVector >& y_coords); // Transforms the given coords one step forward to normalized space, without // using any block rotation or predecessor. void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const; void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const; // Transforms the given coords forward to normalized space using the // full transformation sequence defined by the block rotation, the // predecessors, deepest first, and finally this. If first_norm is not nullptr, // then the first and deepest transformation used is first_norm, ending // with this, and the block rotation will not be applied. void NormTransform(const DENORM* first_norm, const TPOINT& pt, TPOINT* transformed) const; void NormTransform(const DENORM* first_norm, const FCOORD& pt, FCOORD* transformed) const; // Transforms the given coords one step back to source space, without // using to any block rotation or predecessor. void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const; void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const; // Transforms the given coords all the way back to source image space using // the full transformation sequence defined by this and its predecessors // recursively, shallowest first, and finally any block re_rotation. // If last_denorm is not nullptr, then the last transformation used will // be last_denorm, and the block re_rotation will never be executed. void DenormTransform(const DENORM* last_denorm, const TPOINT& pt, TPOINT* original) const; void DenormTransform(const DENORM* last_denorm, const FCOORD& pt, FCOORD* original) const; // Normalize a blob using blob transformations. Less accurate, but // more accurately copies the old way. void LocalNormBlob(TBLOB* blob) const; // Fills in the x-height range accepted by the given unichar_id in blob // coordinates, given its bounding box in the usual baseline-normalized // coordinates, with some initial crude x-height estimate (such as word // size) and this denoting the transformation that was used. // Also returns the amount the character must have shifted up or down. void XHeightRange(int unichar_id, const UNICHARSET& unicharset, const TBOX& bbox, float* min_xht, float* max_xht, float* yshift) const; // Prints the content of the DENORM for debug purposes. void Print() const; Pix* pix() const { return pix_; } void set_pix(Pix* pix) { pix_ = pix; } bool inverse() const { return inverse_; } void set_inverse(bool value) { inverse_ = value; } const DENORM* RootDenorm() const { if (predecessor_ != nullptr) return predecessor_->RootDenorm(); return this; } const DENORM* predecessor() const { return predecessor_; } // Accessors - perhaps should not be needed. float x_scale() const { return x_scale_; } float y_scale() const { return y_scale_; } const BLOCK* block() const { return block_; } void set_block(const BLOCK* block) { block_ = block; } private: // Free allocated memory and clear pointers. void Clear(); // Setup default values. void Init(); // Best available image. Pix* pix_; // True if the source image is white-on-black. bool inverse_; // Block the word came from. If not null, block->re_rotation() takes the // "untransformed" coordinates even further back to the original image. // Used only on the first DENORM in a chain. const BLOCK* block_; // Rotation to apply between translation to the origin and scaling. const FCOORD* rotation_; // Previous transformation in a chain. const DENORM* predecessor_; // Non-linear transformation maps directly from each integer offset from the // origin to the corresponding x-coord. Owned by the DENORM. GenericVector* x_map_; // Non-linear transformation maps directly from each integer offset from the // origin to the corresponding y-coord. Owned by the DENORM. GenericVector* y_map_; // x-coordinate to be mapped to final_xshift_ in the result. float x_origin_; // y-coordinate to be mapped to final_yshift_ in the result. float y_origin_; // Scale factors for x and y coords. Applied to pre-rotation system. float x_scale_; float y_scale_; // Destination coords of the x_origin_ and y_origin_. float final_xshift_; float final_yshift_; }; } // namespace tesseract #endif