/* -*-C-*- ****************************************************************************** * File: matrix.h (Formerly matrix.h) * Description: Generic 2-d array/matrix and banded triangular matrix class. * Author: Ray Smith * TODO(rays) Separate from ratings matrix, which it also contains: * * Descrition: Ratings matrix class (specialization of banded matrix). * Segmentation search matrix of lists of BLOB_CHOICE. * Author: Mark Seaman, OCR Technology * Created: Wed May 16 13:22:06 1990 * Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt * Language: C * Package: N/A * Status: Experimental (Do Not Distribute) * * (c) Copyright 1990, Hewlett-Packard Company. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * *********************************************************************************/ #ifndef TESSERACT_CCSTRUCT_MATRIX_H_ #define TESSERACT_CCSTRUCT_MATRIX_H_ #include #include "kdpair.h" #include "points.h" #include "serialis.h" #include "unicharset.h" class BLOB_CHOICE; class BLOB_CHOICE_LIST; #define NOT_CLASSIFIED static_cast(0) // A generic class to hold a 2-D matrix with entries of type T, but can also // act as a base class for other implementations, such as a triangular or // banded matrix. template class GENERIC_2D_ARRAY { public: // Initializes the array size, and empty element, but cannot allocate memory // for the subclasses or initialize because calls to the num_elements // member will be routed to the base class implementation. Subclasses can // either pass the memory in, or allocate after by calling Resize(). GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array) : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) { size_allocated_ = dim1 * dim2; } // Original constructor for a full rectangular matrix DOES allocate memory // and initialize it to empty. GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty) : empty_(empty), dim1_(dim1), dim2_(dim2) { int new_size = dim1 * dim2; array_ = new T[new_size]; size_allocated_ = new_size; for (int i = 0; i < size_allocated_; ++i) array_[i] = empty_; } // Default constructor for array allocation. Use Resize to set the size. GENERIC_2D_ARRAY() : array_(NULL), empty_(static_cast(0)), dim1_(0), dim2_(0), size_allocated_(0) { } GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY& src) : array_(NULL), empty_(static_cast(0)), dim1_(0), dim2_(0), size_allocated_(0) { *this = src; } virtual ~GENERIC_2D_ARRAY() { delete[] array_; } void operator=(const GENERIC_2D_ARRAY& src) { ResizeNoInit(src.dim1(), src.dim2()); memcpy(array_, src.array_, num_elements() * sizeof(array_[0])); } // Reallocate the array to the given size. Does not keep old data, but does // not initialize the array either. void ResizeNoInit(int size1, int size2) { int new_size = size1 * size2; if (new_size > size_allocated_) { delete [] array_; array_ = new T[new_size]; size_allocated_ = new_size; } dim1_ = size1; dim2_ = size2; } // Reallocate the array to the given size. Does not keep old data. void Resize(int size1, int size2, const T& empty) { empty_ = empty; ResizeNoInit(size1, size2); Clear(); } // Reallocate the array to the given size, keeping old data. void ResizeWithCopy(int size1, int size2) { if (size1 != dim1_ || size2 != dim2_) { int new_size = size1 * size2; T* new_array = new T[new_size]; for (int col = 0; col < size1; ++col) { for (int row = 0; row < size2; ++row) { int old_index = col * dim2() + row; int new_index = col * size2 + row; if (col < dim1_ && row < dim2_) { new_array[new_index] = array_[old_index]; } else { new_array[new_index] = empty_; } } } delete[] array_; array_ = new_array; dim1_ = size1; dim2_ = size2; size_allocated_ = new_size; } } // Sets all the elements of the array to the empty value. void Clear() { int total_size = num_elements(); for (int i = 0; i < total_size; ++i) array_[i] = empty_; } // Writes to the given file. Returns false in case of error. // Only works with bitwise-serializeable types! bool Serialize(FILE* fp) const { if (!SerializeSize(fp)) return false; if (fwrite(&empty_, sizeof(empty_), 1, fp) != 1) return false; int size = num_elements(); if (fwrite(array_, sizeof(*array_), size, fp) != size) return false; return true; } bool Serialize(tesseract::TFile* fp) const { if (!SerializeSize(fp)) return false; if (fp->FWrite(&empty_, sizeof(empty_), 1) != 1) return false; int size = num_elements(); if (fp->FWrite(array_, sizeof(*array_), size) != size) return false; return true; } // Reads from the given file. Returns false in case of error. // Only works with bitwise-serializeable types! // If swap is true, assumes a big/little-endian swap is needed. bool DeSerialize(bool swap, FILE* fp) { if (!DeSerializeSize(swap, fp)) return false; if (fread(&empty_, sizeof(empty_), 1, fp) != 1) return false; if (swap) ReverseN(&empty_, sizeof(empty_)); int size = num_elements(); if (fread(array_, sizeof(*array_), size, fp) != size) return false; if (swap) { for (int i = 0; i < size; ++i) ReverseN(&array_[i], sizeof(array_[i])); } return true; } bool DeSerialize(tesseract::TFile* fp) { if (!DeSerializeSize(fp)) return false; if (fp->FReadEndian(&empty_, sizeof(empty_), 1) != 1) return false; int size = num_elements(); if (fp->FReadEndian(array_, sizeof(*array_), size) != size) return false; return true; } // Writes to the given file. Returns false in case of error. // Assumes a T::Serialize(FILE*) const function. bool SerializeClasses(FILE* fp) const { if (!SerializeSize(fp)) return false; if (!empty_.Serialize(fp)) return false; int size = num_elements(); for (int i = 0; i < size; ++i) { if (!array_[i].Serialize(fp)) return false; } return true; } // Reads from the given file. Returns false in case of error. // Assumes a T::DeSerialize(bool swap, FILE*) function. // If swap is true, assumes a big/little-endian swap is needed. bool DeSerializeClasses(bool swap, FILE* fp) { if (!DeSerializeSize(swap, fp)) return false; if (!empty_.DeSerialize(swap, fp)) return false; int size = num_elements(); for (int i = 0; i < size; ++i) { if (!array_[i].DeSerialize(swap, fp)) return false; } return true; } // Provide the dimensions of this rectangular matrix. int dim1() const { return dim1_; } int dim2() const { return dim2_; } // Returns the number of elements in the array. // Banded/triangular matrices may override. virtual int num_elements() const { return dim1_ * dim2_; } // Expression to select a specific location in the matrix. The matrix is // stored COLUMN-major, so the left-most index is the most significant. // This allows [][] access to use indices in the same order as (,). virtual int index(int column, int row) const { return (column * dim2_ + row); } // Put a list element into the matrix at a specific location. void put(ICOORD pos, const T& thing) { array_[this->index(pos.x(), pos.y())] = thing; } void put(int column, int row, const T& thing) { array_[this->index(column, row)] = thing; } // Get the item at a specified location from the matrix. T get(ICOORD pos) const { return array_[this->index(pos.x(), pos.y())]; } T get(int column, int row) const { return array_[this->index(column, row)]; } // Return a reference to the element at the specified location. const T& operator()(int column, int row) const { return array_[this->index(column, row)]; } T& operator()(int column, int row) { return array_[this->index(column, row)]; } // Allow access using array[column][row]. NOTE that the indices are // in the same left-to-right order as the () indexing. T* operator[](int column) { return &array_[this->index(column, 0)]; } const T* operator[](int column) const { return &array_[this->index(column, 0)]; } // Adds addend to *this, element-by-element. void operator+=(const GENERIC_2D_ARRAY& addend) { if (dim2_ == addend.dim2_) { // Faster if equal size in the major dimension. int size = MIN(num_elements(), addend.num_elements()); for (int i = 0; i < size; ++i) { array_[i] += addend.array_[i]; } } else { for (int x = 0; x < dim1_; x++) { for (int y = 0; y < dim2_; y++) { (*this)(x, y) += addend(x, y); } } } } // Subtracts minuend from *this, element-by-element. void operator-=(const GENERIC_2D_ARRAY& minuend) { if (dim2_ == minuend.dim2_) { // Faster if equal size in the major dimension. int size = MIN(num_elements(), minuend.num_elements()); for (int i = 0; i < size; ++i) { array_[i] -= minuend.array_[i]; } } else { for (int x = 0; x < dim1_; x++) { for (int y = 0; y < dim2_; y++) { (*this)(x, y) -= minuend(x, y); } } } } // Adds addend to all elements. void operator+=(const T& addend) { int size = num_elements(); for (int i = 0; i < size; ++i) { array_[i] += addend; } } // Multiplies *this by factor, element-by-element. void operator*=(const T& factor) { int size = num_elements(); for (int i = 0; i < size; ++i) { array_[i] *= factor; } } // Clips *this to the given range. void Clip(const T& rangemin, const T& rangemax) { int size = num_elements(); for (int i = 0; i < size; ++i) { array_[i] = ClipToRange(array_[i], rangemin, rangemax); } } // Returns true if all elements of *this are within the given range. // Only uses operator< bool WithinBounds(const T& rangemin, const T& rangemax) const { int size = num_elements(); for (int i = 0; i < size; ++i) { const T& value = array_[i]; if (value < rangemin || rangemax < value) return false; } return true; } // Normalize the whole array. double Normalize() { int size = num_elements(); if (size <= 0) return 0.0; // Compute the mean. double mean = 0.0; for (int i = 0; i < size; ++i) { mean += array_[i]; } mean /= size; // Subtract the mean and compute the standard deviation. double sd = 0.0; for (int i = 0; i < size; ++i) { double normed = array_[i] - mean; array_[i] = normed; sd += normed * normed; } sd = sqrt(sd / size); if (sd > 0.0) { // Divide by the sd. for (int i = 0; i < size; ++i) { array_[i] /= sd; } } return sd; } // Returns the maximum value of the array. T Max() const { int size = num_elements(); if (size <= 0) return empty_; // Compute the max. T max_value = array_[0]; for (int i = 1; i < size; ++i) { const T& value = array_[i]; if (value > max_value) max_value = value; } return max_value; } // Returns the maximum absolute value of the array. T MaxAbs() const { int size = num_elements(); if (size <= 0) return empty_; // Compute the max. T max_abs = static_cast(0); for (int i = 0; i < size; ++i) { T value = static_cast(fabs(array_[i])); if (value > max_abs) max_abs = value; } return max_abs; } // Accumulates the element-wise sums of squares of src into *this. void SumSquares(const GENERIC_2D_ARRAY& src, T decay_factor) { T update_factor = 1.0 - decay_factor; int size = num_elements(); for (int i = 0; i < size; ++i) { array_[i] = array_[i] * decay_factor + update_factor * src.array_[i] * src.array_[i]; } } // Scales each element using the adam algorithm, ie array_[i] by // sqrt(sqsum[i] + epsilon)). void AdamUpdate(const GENERIC_2D_ARRAY& sum, const GENERIC_2D_ARRAY& sqsum, T epsilon) { int size = num_elements(); for (int i = 0; i < size; ++i) { array_[i] += sum.array_[i] / (sqrt(sqsum.array_[i]) + epsilon); } } void AssertFinite() const { int size = num_elements(); for (int i = 0; i < size; ++i) { ASSERT_HOST(isfinite(array_[i])); } } // REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a // num_dims-dimensional array/tensor with dimensions given by dims, (ordered // from most significant to least significant, the same as standard C arrays) // and moves src_dim to dest_dim, with the initial dest_dim and any dimensions // in between shifted towards the hole left by src_dim. Example: // Current data content: array_=[0, 1, 2, ....119] // perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]... // but the current dimensions are irrelevant. // num_dims = 4, dims=[5, 4, 3, 2] // src_dim=3, dest_dim=1 // tensor=[[[[0, 1][2, 3][4, 5]] // [[6, 7][8, 9][10, 11]] // [[12, 13][14, 15][16, 17]] // [[18, 19][20, 21][22, 23]]] // [[[24, 25]... // output dims =[5, 2, 4, 3] // output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]] // [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]] // [[[24, 26, 28]... // which is stored in the array_ as: // [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...] // NOTE: the 2 stored matrix dimensions are simply copied from *this. To // change the dimensions after the transpose, use ResizeNoInit. // Higher dimensions above 2 are strictly the responsibility of the caller. void RotatingTranspose(const int* dims, int num_dims, int src_dim, int dest_dim, GENERIC_2D_ARRAY* result) const { int max_d = MAX(src_dim, dest_dim); int min_d = MIN(src_dim, dest_dim); // In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the // ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1] // being contiguous blocks of data that will move together, and // [d0, min_d -1] being replicas of the transpose operation. // num_replicas represents the large dimensions unchanged by the operation. // move_size represents the small dimensions unchanged by the operation. // src_step represents the stride in the src between each adjacent group // in the destination. int num_replicas = 1, move_size = 1, src_step = 1; for (int d = 0; d < min_d; ++d) num_replicas *= dims[d]; for (int d = max_d + 1; d < num_dims; ++d) move_size *= dims[d]; for (int d = src_dim + 1; d < num_dims; ++d) src_step *= dims[d]; if (src_dim > dest_dim) src_step *= dims[src_dim]; // wrap_size is the size of a single replica, being the amount that is // handled num_replicas times. int wrap_size = move_size; for (int d = min_d; d <= max_d; ++d) wrap_size *= dims[d]; result->ResizeNoInit(dim1_, dim2_); result->empty_ = empty_; const T* src = array_; T* dest = result->array_; for (int replica = 0; replica < num_replicas; ++replica) { for (int start = 0; start < src_step; start += move_size) { for (int pos = start; pos < wrap_size; pos += src_step) { memcpy(dest, src + pos, sizeof(*dest) * move_size); dest += move_size; } } src += wrap_size; } } // Delete objects pointed to by array_[i]. void delete_matrix_pointers() { int size = num_elements(); for (int i = 0; i < size; ++i) { T matrix_cell = array_[i]; if (matrix_cell != empty_) delete matrix_cell; } } protected: // Factored helper to serialize the size. bool SerializeSize(FILE* fp) const { inT32 size = dim1_; if (fwrite(&size, sizeof(size), 1, fp) != 1) return false; size = dim2_; if (fwrite(&size, sizeof(size), 1, fp) != 1) return false; return true; } bool SerializeSize(tesseract::TFile* fp) const { inT32 size = dim1_; if (fp->FWrite(&size, sizeof(size), 1) != 1) return false; size = dim2_; if (fp->FWrite(&size, sizeof(size), 1) != 1) return false; return true; } // Factored helper to deserialize the size. // If swap is true, assumes a big/little-endian swap is needed. bool DeSerializeSize(bool swap, FILE* fp) { inT32 size1, size2; if (fread(&size1, sizeof(size1), 1, fp) != 1) return false; if (fread(&size2, sizeof(size2), 1, fp) != 1) return false; if (swap) { ReverseN(&size1, sizeof(size1)); ReverseN(&size2, sizeof(size2)); } Resize(size1, size2, empty_); return true; } bool DeSerializeSize(tesseract::TFile* fp) { inT32 size1, size2; if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false; if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false; Resize(size1, size2, empty_); return true; } T* array_; T empty_; // The unused cell. int dim1_; // Size of the 1st dimension in indexing functions. int dim2_; // Size of the 2nd dimension in indexing functions. // The total size to which the array can be expanded before a realloc is // needed. If Resize is used, memory is retained so it can be re-expanded // without a further alloc, and this stores the allocated size. int size_allocated_; }; // A generic class to store a banded triangular matrix with entries of type T. // In this array, the nominally square matrix is dim1_ x dim1_, and dim2_ is // the number of bands, INCLUDING the diagonal. The storage is thus of size // dim1_ * dim2_ and index(col, row) = col * dim2_ + row - col, and an // assert will fail if row < col or row - col >= dim2. template class BandTriMatrix : public GENERIC_2D_ARRAY { public: // Allocate a piece of memory to hold a 2d-array of the given dimension. // Initialize all the elements of the array to empty instead of assuming // that a default constructor can be used. BandTriMatrix(int dim1, int dim2, const T& empty) : GENERIC_2D_ARRAY(dim1, dim2, empty) { } // The default destructor will do. // Provide the dimensions of this matrix. // dimension is the size of the nominally square matrix. int dimension() const { return this->dim1_; } // bandwidth is the number of bands in the matrix, INCLUDING the diagonal. int bandwidth() const { return this->dim2_; } // Expression to select a specific location in the matrix. The matrix is // stored COLUMN-major, so the left-most index is the most significant. // This allows [][] access to use indices in the same order as (,). virtual int index(int column, int row) const { ASSERT_HOST(row >= column); ASSERT_HOST(row - column < this->dim2_); return column * this->dim2_ + row - column; } // Appends array2 corner-to-corner to *this, making an array of dimension // equal to the sum of the individual dimensions. // array2 is not destroyed, but is left empty, as all elements are moved // to *this. void AttachOnCorner(BandTriMatrix* array2) { int new_dim1 = this->dim1_ + array2->dim1_; int new_dim2 = MAX(this->dim2_, array2->dim2_); T* new_array = new T[new_dim1 * new_dim2]; for (int col = 0; col < new_dim1; ++col) { for (int j = 0; j < new_dim2; ++j) { int new_index = col * new_dim2 + j; if (col < this->dim1_ && j < this->dim2_) { new_array[new_index] = this->get(col, col + j); } else if (col >= this->dim1_ && j < array2->dim2_) { new_array[new_index] = array2->get(col - this->dim1_, col - this->dim1_ + j); array2->put(col - this->dim1_, col - this->dim1_ + j, NULL); } else { new_array[new_index] = this->empty_; } } } delete[] this->array_; this->array_ = new_array; this->dim1_ = new_dim1; this->dim2_ = new_dim2; } }; class MATRIX : public BandTriMatrix { public: MATRIX(int dimension, int bandwidth) : BandTriMatrix(dimension, bandwidth, NOT_CLASSIFIED) {} // Returns true if there are any real classification results. bool Classified(int col, int row, int wildcard_id) const; // Expands the existing matrix in-place to make the band wider, without // losing any existing data. void IncreaseBandSize(int bandwidth); // Returns a bigger MATRIX with a new column and row in the matrix in order // to split the blob at the given (ind,ind) diagonal location. // Entries are relocated to the new MATRIX using the transformation defined // by MATRIX_COORD::MapForSplit. // Transfers the pointer data to the new MATRIX and deletes *this. MATRIX* ConsumeAndMakeBigger(int ind); // Makes and returns a deep copy of *this, including all the BLOB_CHOICEs // on the lists, but not any LanguageModelState that may be attached to the // BLOB_CHOICEs. MATRIX* DeepCopy() const; // Print a shortened version of the contents of the matrix. void print(const UNICHARSET &unicharset) const; }; struct MATRIX_COORD { static void Delete(void *arg) { MATRIX_COORD *c = static_cast(arg); delete c; } // Default constructor required by GenericHeap. MATRIX_COORD() : col(0), row(0) {} MATRIX_COORD(int c, int r): col(c), row(r) {} ~MATRIX_COORD() {} bool Valid(const MATRIX &m) const { return 0 <= col && col < m.dimension() && col <= row && row < col + m.bandwidth() && row < m.dimension(); } // Remaps the col,row pair to split the blob at the given (ind,ind) diagonal // location. // Entries at (i,j) for i in [0,ind] and j in [ind,dim) move to (i,j+1), // making a new row at ind. // Entries at (i,j) for i in [ind+1,dim) and j in [i,dim) move to (i+i,j+1), // making a new column at ind+1. void MapForSplit(int ind) { ASSERT_HOST(row >= col); if (col > ind) ++col; if (row >= ind) ++row; ASSERT_HOST(row >= col); } int col; int row; }; // The MatrixCoordPair contains a MATRIX_COORD and its priority. typedef tesseract::KDPairInc MatrixCoordPair; #endif // TESSERACT_CCSTRUCT_MATRIX_H_