tesseract/ccstruct/matrix.h

627 lines
22 KiB
C
Raw Normal View History

/* -*-C-*-
******************************************************************************
* File: matrix.h (Formerly matrix.h)
* Description: Generic 2-d array/matrix and banded triangular matrix class.
* Author: Ray Smith
* TODO(rays) Separate from ratings matrix, which it also contains:
*
* Descrition: Ratings matrix class (specialization of banded matrix).
* Segmentation search matrix of lists of BLOB_CHOICE.
* Author: Mark Seaman, OCR Technology
* Created: Wed May 16 13:22:06 1990
* Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Experimental (Do Not Distribute)
*
* (c) Copyright 1990, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*********************************************************************************/
#ifndef TESSERACT_CCSTRUCT_MATRIX_H_
#define TESSERACT_CCSTRUCT_MATRIX_H_
#include <math.h>
#include "kdpair.h"
#include "points.h"
#include "serialis.h"
#include "unicharset.h"
class BLOB_CHOICE;
class BLOB_CHOICE_LIST;
2017-05-11 06:40:31 +08:00
#define NOT_CLASSIFIED static_cast<BLOB_CHOICE_LIST*>(0)
// A generic class to hold a 2-D matrix with entries of type T, but can also
// act as a base class for other implementations, such as a triangular or
// banded matrix.
template <class T>
class GENERIC_2D_ARRAY {
public:
// Initializes the array size, and empty element, but cannot allocate memory
// for the subclasses or initialize because calls to the num_elements
// member will be routed to the base class implementation. Subclasses can
// either pass the memory in, or allocate after by calling Resize().
GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array)
: empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) {
size_allocated_ = dim1 * dim2;
}
// Original constructor for a full rectangular matrix DOES allocate memory
// and initialize it to empty.
GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty)
: empty_(empty), dim1_(dim1), dim2_(dim2) {
int new_size = dim1 * dim2;
array_ = new T[new_size];
size_allocated_ = new_size;
for (int i = 0; i < size_allocated_; ++i)
array_[i] = empty_;
}
// Default constructor for array allocation. Use Resize to set the size.
GENERIC_2D_ARRAY()
: array_(NULL), empty_(static_cast<T>(0)), dim1_(0), dim2_(0),
size_allocated_(0) {
}
GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY<T>& src)
: array_(NULL), empty_(static_cast<T>(0)), dim1_(0), dim2_(0),
size_allocated_(0) {
*this = src;
}
virtual ~GENERIC_2D_ARRAY() { delete[] array_; }
void operator=(const GENERIC_2D_ARRAY<T>& src) {
ResizeNoInit(src.dim1(), src.dim2());
memcpy(array_, src.array_, num_elements() * sizeof(array_[0]));
}
// Reallocate the array to the given size. Does not keep old data, but does
// not initialize the array either.
void ResizeNoInit(int size1, int size2) {
int new_size = size1 * size2;
if (new_size > size_allocated_) {
delete [] array_;
array_ = new T[new_size];
size_allocated_ = new_size;
}
dim1_ = size1;
dim2_ = size2;
}
// Reallocate the array to the given size. Does not keep old data.
void Resize(int size1, int size2, const T& empty) {
empty_ = empty;
ResizeNoInit(size1, size2);
Clear();
}
// Reallocate the array to the given size, keeping old data.
void ResizeWithCopy(int size1, int size2) {
if (size1 != dim1_ || size2 != dim2_) {
int new_size = size1 * size2;
T* new_array = new T[new_size];
for (int col = 0; col < size1; ++col) {
for (int row = 0; row < size2; ++row) {
int old_index = col * dim2() + row;
int new_index = col * size2 + row;
if (col < dim1_ && row < dim2_) {
new_array[new_index] = array_[old_index];
} else {
new_array[new_index] = empty_;
}
}
}
delete[] array_;
array_ = new_array;
dim1_ = size1;
dim2_ = size2;
size_allocated_ = new_size;
}
}
// Sets all the elements of the array to the empty value.
void Clear() {
int total_size = num_elements();
for (int i = 0; i < total_size; ++i)
array_[i] = empty_;
}
// Writes to the given file. Returns false in case of error.
// Only works with bitwise-serializeable types!
bool Serialize(FILE* fp) const {
if (!SerializeSize(fp)) return false;
if (fwrite(&empty_, sizeof(empty_), 1, fp) != 1) return false;
int size = num_elements();
if (fwrite(array_, sizeof(*array_), size, fp) != size) return false;
return true;
}
bool Serialize(tesseract::TFile* fp) const {
if (!SerializeSize(fp)) return false;
if (fp->FWrite(&empty_, sizeof(empty_), 1) != 1) return false;
int size = num_elements();
if (fp->FWrite(array_, sizeof(*array_), size) != size) return false;
return true;
}
// Reads from the given file. Returns false in case of error.
// Only works with bitwise-serializeable types!
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp) {
if (!DeSerializeSize(swap, fp)) return false;
if (fread(&empty_, sizeof(empty_), 1, fp) != 1) return false;
if (swap) ReverseN(&empty_, sizeof(empty_));
int size = num_elements();
if (fread(array_, sizeof(*array_), size, fp) != size) return false;
if (swap) {
for (int i = 0; i < size; ++i)
ReverseN(&array_[i], sizeof(array_[i]));
}
return true;
}
bool DeSerialize(tesseract::TFile* fp) {
if (!DeSerializeSize(fp)) return false;
if (fp->FReadEndian(&empty_, sizeof(empty_), 1) != 1) return false;
int size = num_elements();
if (fp->FReadEndian(array_, sizeof(*array_), size) != size) return false;
return true;
}
// Writes to the given file. Returns false in case of error.
// Assumes a T::Serialize(FILE*) const function.
bool SerializeClasses(FILE* fp) const {
if (!SerializeSize(fp)) return false;
if (!empty_.Serialize(fp)) return false;
int size = num_elements();
for (int i = 0; i < size; ++i) {
if (!array_[i].Serialize(fp)) return false;
}
return true;
}
// Reads from the given file. Returns false in case of error.
// Assumes a T::DeSerialize(bool swap, FILE*) function.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeClasses(bool swap, FILE* fp) {
if (!DeSerializeSize(swap, fp)) return false;
if (!empty_.DeSerialize(swap, fp)) return false;
int size = num_elements();
for (int i = 0; i < size; ++i) {
if (!array_[i].DeSerialize(swap, fp)) return false;
}
return true;
}
// Provide the dimensions of this rectangular matrix.
int dim1() const { return dim1_; }
int dim2() const { return dim2_; }
// Returns the number of elements in the array.
// Banded/triangular matrices may override.
virtual int num_elements() const { return dim1_ * dim2_; }
// Expression to select a specific location in the matrix. The matrix is
// stored COLUMN-major, so the left-most index is the most significant.
// This allows [][] access to use indices in the same order as (,).
virtual int index(int column, int row) const {
return (column * dim2_ + row);
}
// Put a list element into the matrix at a specific location.
void put(ICOORD pos, const T& thing) {
array_[this->index(pos.x(), pos.y())] = thing;
}
void put(int column, int row, const T& thing) {
array_[this->index(column, row)] = thing;
}
// Get the item at a specified location from the matrix.
T get(ICOORD pos) const {
return array_[this->index(pos.x(), pos.y())];
}
T get(int column, int row) const {
return array_[this->index(column, row)];
}
// Return a reference to the element at the specified location.
const T& operator()(int column, int row) const {
return array_[this->index(column, row)];
}
T& operator()(int column, int row) {
return array_[this->index(column, row)];
}
// Allow access using array[column][row]. NOTE that the indices are
// in the same left-to-right order as the () indexing.
T* operator[](int column) {
return &array_[this->index(column, 0)];
}
const T* operator[](int column) const {
return &array_[this->index(column, 0)];
}
// Adds addend to *this, element-by-element.
void operator+=(const GENERIC_2D_ARRAY<T>& addend) {
if (dim2_ == addend.dim2_) {
// Faster if equal size in the major dimension.
int size = MIN(num_elements(), addend.num_elements());
for (int i = 0; i < size; ++i) {
array_[i] += addend.array_[i];
}
} else {
for (int x = 0; x < dim1_; x++) {
for (int y = 0; y < dim2_; y++) {
(*this)(x, y) += addend(x, y);
}
}
}
}
// Subtracts minuend from *this, element-by-element.
void operator-=(const GENERIC_2D_ARRAY<T>& minuend) {
if (dim2_ == minuend.dim2_) {
// Faster if equal size in the major dimension.
int size = MIN(num_elements(), minuend.num_elements());
for (int i = 0; i < size; ++i) {
array_[i] -= minuend.array_[i];
}
} else {
for (int x = 0; x < dim1_; x++) {
for (int y = 0; y < dim2_; y++) {
(*this)(x, y) -= minuend(x, y);
}
}
}
}
// Adds addend to all elements.
void operator+=(const T& addend) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] += addend;
}
}
// Multiplies *this by factor, element-by-element.
void operator*=(const T& factor) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] *= factor;
}
}
// Clips *this to the given range.
void Clip(const T& rangemin, const T& rangemax) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] = ClipToRange(array_[i], rangemin, rangemax);
}
}
// Returns true if all elements of *this are within the given range.
// Only uses operator<
bool WithinBounds(const T& rangemin, const T& rangemax) const {
int size = num_elements();
for (int i = 0; i < size; ++i) {
const T& value = array_[i];
if (value < rangemin || rangemax < value)
return false;
}
return true;
}
// Normalize the whole array.
double Normalize() {
int size = num_elements();
if (size <= 0) return 0.0;
// Compute the mean.
double mean = 0.0;
for (int i = 0; i < size; ++i) {
mean += array_[i];
}
mean /= size;
// Subtract the mean and compute the standard deviation.
double sd = 0.0;
for (int i = 0; i < size; ++i) {
double normed = array_[i] - mean;
array_[i] = normed;
sd += normed * normed;
}
sd = sqrt(sd / size);
if (sd > 0.0) {
// Divide by the sd.
for (int i = 0; i < size; ++i) {
array_[i] /= sd;
}
}
return sd;
}
// Returns the maximum value of the array.
T Max() const {
int size = num_elements();
if (size <= 0) return empty_;
// Compute the max.
T max_value = array_[0];
for (int i = 1; i < size; ++i) {
const T& value = array_[i];
if (value > max_value) max_value = value;
}
return max_value;
}
// Returns the maximum absolute value of the array.
T MaxAbs() const {
int size = num_elements();
if (size <= 0) return empty_;
// Compute the max.
T max_abs = static_cast<T>(0);
for (int i = 0; i < size; ++i) {
T value = static_cast<T>(fabs(array_[i]));
if (value > max_abs) max_abs = value;
}
return max_abs;
}
// Accumulates the element-wise sums of squares of src into *this.
void SumSquares(const GENERIC_2D_ARRAY<T>& src) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] += src.array_[i] * src.array_[i];
}
}
// Scales each element using the ada-grad algorithm, ie array_[i] by
// sqrt(num_samples/max(1,sqsum[i])).
void AdaGradScaling(const GENERIC_2D_ARRAY<T>& sqsum, int num_samples) {
int size = num_elements();
for (int i = 0; i < size; ++i) {
array_[i] *= sqrt(num_samples / MAX(1.0, sqsum.array_[i]));
}
}
void AssertFinite() const {
int size = num_elements();
for (int i = 0; i < size; ++i) {
ASSERT_HOST(isfinite(array_[i]));
}
}
// REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a
// num_dims-dimensional array/tensor with dimensions given by dims, (ordered
// from most significant to least significant, the same as standard C arrays)
// and moves src_dim to dest_dim, with the initial dest_dim and any dimensions
// in between shifted towards the hole left by src_dim. Example:
// Current data content: array_=[0, 1, 2, ....119]
// perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]...
// but the current dimensions are irrelevant.
// num_dims = 4, dims=[5, 4, 3, 2]
// src_dim=3, dest_dim=1
// tensor=[[[[0, 1][2, 3][4, 5]]
// [[6, 7][8, 9][10, 11]]
// [[12, 13][14, 15][16, 17]]
// [[18, 19][20, 21][22, 23]]]
// [[[24, 25]...
// output dims =[5, 2, 4, 3]
// output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]]
// [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]]
// [[[24, 26, 28]...
// which is stored in the array_ as:
// [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...]
// NOTE: the 2 stored matrix dimensions are simply copied from *this. To
// change the dimensions after the transpose, use ResizeNoInit.
// Higher dimensions above 2 are strictly the responsibility of the caller.
void RotatingTranspose(const int* dims, int num_dims, int src_dim,
int dest_dim, GENERIC_2D_ARRAY<T>* result) const {
int max_d = MAX(src_dim, dest_dim);
int min_d = MIN(src_dim, dest_dim);
// In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the
// ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1]
// being contiguous blocks of data that will move together, and
// [d0, min_d -1] being replicas of the transpose operation.
// num_replicas represents the large dimensions unchanged by the operation.
// move_size represents the small dimensions unchanged by the operation.
// src_step represents the stride in the src between each adjacent group
// in the destination.
int num_replicas = 1, move_size = 1, src_step = 1;
for (int d = 0; d < min_d; ++d) num_replicas *= dims[d];
for (int d = max_d + 1; d < num_dims; ++d) move_size *= dims[d];
for (int d = src_dim + 1; d < num_dims; ++d) src_step *= dims[d];
if (src_dim > dest_dim) src_step *= dims[src_dim];
// wrap_size is the size of a single replica, being the amount that is
// handled num_replicas times.
int wrap_size = move_size;
for (int d = min_d; d <= max_d; ++d) wrap_size *= dims[d];
result->ResizeNoInit(dim1_, dim2_);
result->empty_ = empty_;
const T* src = array_;
T* dest = result->array_;
for (int replica = 0; replica < num_replicas; ++replica) {
for (int start = 0; start < src_step; start += move_size) {
for (int pos = start; pos < wrap_size; pos += src_step) {
memcpy(dest, src + pos, sizeof(*dest) * move_size);
dest += move_size;
}
}
src += wrap_size;
}
}
// Delete objects pointed to by array_[i].
void delete_matrix_pointers() {
int size = num_elements();
for (int i = 0; i < size; ++i) {
T matrix_cell = array_[i];
if (matrix_cell != empty_)
delete matrix_cell;
}
}
protected:
// Factored helper to serialize the size.
bool SerializeSize(FILE* fp) const {
inT32 size = dim1_;
if (fwrite(&size, sizeof(size), 1, fp) != 1) return false;
size = dim2_;
if (fwrite(&size, sizeof(size), 1, fp) != 1) return false;
return true;
}
bool SerializeSize(tesseract::TFile* fp) const {
inT32 size = dim1_;
if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
size = dim2_;
if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
return true;
}
// Factored helper to deserialize the size.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeSize(bool swap, FILE* fp) {
inT32 size1, size2;
if (fread(&size1, sizeof(size1), 1, fp) != 1) return false;
if (fread(&size2, sizeof(size2), 1, fp) != 1) return false;
if (swap) {
ReverseN(&size1, sizeof(size1));
ReverseN(&size2, sizeof(size2));
}
Resize(size1, size2, empty_);
return true;
}
bool DeSerializeSize(tesseract::TFile* fp) {
inT32 size1, size2;
if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false;
if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false;
Resize(size1, size2, empty_);
return true;
}
T* array_;
T empty_; // The unused cell.
int dim1_; // Size of the 1st dimension in indexing functions.
int dim2_; // Size of the 2nd dimension in indexing functions.
// The total size to which the array can be expanded before a realloc is
// needed. If Resize is used, memory is retained so it can be re-expanded
// without a further alloc, and this stores the allocated size.
int size_allocated_;
};
// A generic class to store a banded triangular matrix with entries of type T.
// In this array, the nominally square matrix is dim1_ x dim1_, and dim2_ is
// the number of bands, INCLUDING the diagonal. The storage is thus of size
// dim1_ * dim2_ and index(col, row) = col * dim2_ + row - col, and an
// assert will fail if row < col or row - col >= dim2.
template <class T>
class BandTriMatrix : public GENERIC_2D_ARRAY<T> {
public:
// Allocate a piece of memory to hold a 2d-array of the given dimension.
// Initialize all the elements of the array to empty instead of assuming
// that a default constructor can be used.
BandTriMatrix(int dim1, int dim2, const T& empty)
: GENERIC_2D_ARRAY<T>(dim1, dim2, empty) {
}
// The default destructor will do.
// Provide the dimensions of this matrix.
// dimension is the size of the nominally square matrix.
int dimension() const { return this->dim1_; }
// bandwidth is the number of bands in the matrix, INCLUDING the diagonal.
int bandwidth() const { return this->dim2_; }
// Expression to select a specific location in the matrix. The matrix is
// stored COLUMN-major, so the left-most index is the most significant.
// This allows [][] access to use indices in the same order as (,).
virtual int index(int column, int row) const {
ASSERT_HOST(row >= column);
ASSERT_HOST(row - column < this->dim2_);
return column * this->dim2_ + row - column;
}
// Appends array2 corner-to-corner to *this, making an array of dimension
// equal to the sum of the individual dimensions.
// array2 is not destroyed, but is left empty, as all elements are moved
// to *this.
void AttachOnCorner(BandTriMatrix<T>* array2) {
int new_dim1 = this->dim1_ + array2->dim1_;
int new_dim2 = MAX(this->dim2_, array2->dim2_);
T* new_array = new T[new_dim1 * new_dim2];
for (int col = 0; col < new_dim1; ++col) {
for (int j = 0; j < new_dim2; ++j) {
int new_index = col * new_dim2 + j;
if (col < this->dim1_ && j < this->dim2_) {
new_array[new_index] = this->get(col, col + j);
} else if (col >= this->dim1_ && j < array2->dim2_) {
new_array[new_index] = array2->get(col - this->dim1_,
col - this->dim1_ + j);
array2->put(col - this->dim1_, col - this->dim1_ + j, NULL);
} else {
new_array[new_index] = this->empty_;
}
}
}
delete[] this->array_;
this->array_ = new_array;
this->dim1_ = new_dim1;
this->dim2_ = new_dim2;
}
};
class MATRIX : public BandTriMatrix<BLOB_CHOICE_LIST *> {
public:
MATRIX(int dimension, int bandwidth)
: BandTriMatrix<BLOB_CHOICE_LIST *>(dimension, bandwidth, NOT_CLASSIFIED) {}
// Returns true if there are any real classification results.
bool Classified(int col, int row, int wildcard_id) const;
// Expands the existing matrix in-place to make the band wider, without
// losing any existing data.
void IncreaseBandSize(int bandwidth);
// Returns a bigger MATRIX with a new column and row in the matrix in order
// to split the blob at the given (ind,ind) diagonal location.
// Entries are relocated to the new MATRIX using the transformation defined
// by MATRIX_COORD::MapForSplit.
// Transfers the pointer data to the new MATRIX and deletes *this.
MATRIX* ConsumeAndMakeBigger(int ind);
// Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
// on the lists, but not any LanguageModelState that may be attached to the
// BLOB_CHOICEs.
MATRIX* DeepCopy() const;
// Print a shortened version of the contents of the matrix.
void print(const UNICHARSET &unicharset) const;
};
struct MATRIX_COORD {
static void Delete(void *arg) {
MATRIX_COORD *c = static_cast<MATRIX_COORD *>(arg);
delete c;
}
// Default constructor required by GenericHeap.
MATRIX_COORD() : col(0), row(0) {}
MATRIX_COORD(int c, int r): col(c), row(r) {}
~MATRIX_COORD() {}
bool Valid(const MATRIX &m) const {
return 0 <= col && col < m.dimension() &&
col <= row && row < col + m.bandwidth() && row < m.dimension();
}
// Remaps the col,row pair to split the blob at the given (ind,ind) diagonal
// location.
// Entries at (i,j) for i in [0,ind] and j in [ind,dim) move to (i,j+1),
// making a new row at ind.
// Entries at (i,j) for i in [ind+1,dim) and j in [i,dim) move to (i+i,j+1),
// making a new column at ind+1.
void MapForSplit(int ind) {
ASSERT_HOST(row >= col);
if (col > ind) ++col;
if (row >= ind) ++row;
ASSERT_HOST(row >= col);
}
int col;
int row;
};
// The MatrixCoordPair contains a MATRIX_COORD and its priority.
typedef tesseract::KDPairInc<float, MATRIX_COORD> MatrixCoordPair;
#endif // TESSERACT_CCSTRUCT_MATRIX_H_