tesseract/lstm/stridemap.h

138 lines
5.5 KiB
C++

///////////////////////////////////////////////////////////////////////
// File: stridemap.h
// Description: Indexing into a 4-d tensor held in a 2-d Array.
// Author: Ray Smith
// Created: Fri Sep 20 16:00:31 PST 2016
//
// (C) Copyright 2016, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_LSTM_STRIDEMAP_H_
#define TESSERACT_LSTM_STRIDEMAP_H_
#include <string.h>
#include <vector>
#include "tprintf.h"
namespace tesseract {
// Enum describing the dimensions of the 'Tensor' in a NetworkIO.
// A NetworkIO is analogous to a TF Tensor, except that the number of dimensions
// is fixed (4), and they always have the same meaning. The underlying
// representation is a 2-D array, for which the product batch*height*width
// is always dim1 and depth is always dim2. FlexDimensions is used only for
// batch, height, width with the StrideMap, and therefore represents the runtime
// shape. The build-time shape is defined by StaticShape.
enum FlexDimensions {
FD_BATCH, // Index of multiple images.
FD_HEIGHT, // y-coordinate in image.
FD_WIDTH, // x-coordinate in image.
FD_DIMSIZE, // Number of flexible non-depth dimensions.
};
// Encapsulation of information relating to the mapping from [batch][y][x] to
// the first index into the 2-d array underlying a NetworkIO.
class StrideMap {
public:
// Class holding the non-depth indices.
class Index {
public:
explicit Index(const StrideMap& stride_map) : stride_map_(&stride_map) {
InitToFirst();
}
Index(const StrideMap& stride_map, int batch, int y, int x)
: stride_map_(&stride_map) {
indices_[FD_BATCH] = batch;
indices_[FD_HEIGHT] = y;
indices_[FD_WIDTH] = x;
SetTFromIndices();
}
// Accesses the index to the underlying array.
int t() const { return t_; }
int index(FlexDimensions dimension) const { return indices_[dimension]; }
// Initializes the indices to the first valid location.
void InitToFirst() {
memset(indices_, 0, sizeof(indices_));
t_ = 0;
}
// Initializes the indices to the last valid location.
void InitToLast() { InitToLastOfBatch(MaxIndexOfDim(FD_BATCH)); }
// Returns true if *this is a valid index.
bool IsValid() const;
// Returns true if the index of the given dimension is the last.
bool IsLast(FlexDimensions dimension) const;
// Given that the dimensions up to and including dim-1 are valid, returns
// the maximum index for dimension dim.
int MaxIndexOfDim(FlexDimensions dim) const;
// Adds the given offset to the given dimension. Returns true if the result
// makes a valid index.
bool AddOffset(int offset, FlexDimensions dimension);
// Increments the index in some encapsulated way that guarantees to remain
// valid until it returns false, meaning that the iteration is complete.
bool Increment();
// Decrements the index in some encapsulated way that guarantees to remain
// valid until it returns false, meaning that the iteration (that started
// with InitToLast()) is complete.
bool Decrement();
private:
// Initializes the indices to the last valid location in the given batch
// index.
void InitToLastOfBatch(int batch);
// Computes and sets t_ from the current indices_.
void SetTFromIndices();
// Map into which *this is an index.
const StrideMap* stride_map_;
// Index to the first dimension of the underlying array.
int t_;
// Indices into the individual dimensions.
int indices_[FD_DIMSIZE];
};
StrideMap() {
memset(shape_, 0, sizeof(shape_));
memset(t_increments_, 0, sizeof(t_increments_));
}
// Default copy constructor and operator= are OK to use here!
// Sets up the stride for the given array of height, width pairs.
void SetStride(const std::vector<std::pair<int, int>>& h_w_pairs);
// Scales width and height dimensions by the given factors.
void ScaleXY(int x_factor, int y_factor);
// Reduces width to 1, across the batch, whatever the input size.
void ReduceWidthTo1();
// Transposes the width and height dimensions.
void TransposeXY();
// Returns the size of the given dimension.
int Size(FlexDimensions dimension) const { return shape_[dimension]; }
// Returns the total width required.
int Width() const { return t_increments_[FD_BATCH] * shape_[FD_BATCH]; }
private:
// Computes t_increments_ from shape_.
void ComputeTIncrements();
// The size of each non-depth dimension.
int shape_[FD_DIMSIZE];
// Precomputed 't' increments for each dimension. This is the value of
// the given dimension in the packed 3-d array that the shape_ represents.
int t_increments_[FD_DIMSIZE];
// Vector of size shape_[FD_BATCH] holds the height of each image in a batch.
std::vector<int> heights_;
// Vector of size shape_[FD_BATCH] holds the width of each image in a batch.
std::vector<int> widths_;
};
} // namespace tesseract
#endif // TESSERACT_LSTM_STRIDEMAP_H_