/////////////////////////////////////////////////////////////////////// // File: networkio.cpp // Description: Network input/output data, allowing float/int implementations. // Author: Ray Smith // Created: Thu Jun 19 13:01:31 PST 2014 // // (C) Copyright 2014, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /////////////////////////////////////////////////////////////////////// #include "networkio.h" #include "allheaders.h" #include "functions.h" #include "statistc.h" #include "tprintf.h" namespace tesseract { // Minimum value to output for certainty. const float kMinCertainty = -20.0f; // Probability corresponding to kMinCertainty. const float kMinProb = exp(kMinCertainty); // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim. void NetworkIO::Resize2d(bool int_mode, int width, int num_features) { stride_map_ = StrideMap(); int_mode_ = int_mode; if (int_mode_) { i_.ResizeNoInit(width, num_features); } else { f_.ResizeNoInit(width, num_features); } } // Resizes to a specific stride_map. void NetworkIO::ResizeToMap(bool int_mode, const StrideMap& stride_map, int num_features) { // If this assert fails, it most likely got here through an uninitialized // scratch element, ie call NetworkScratch::IO::Resizexxx() not // NetworkIO::Resizexxx()!! ASSERT_HOST(this != NULL); stride_map_ = stride_map; int_mode_ = int_mode; if (int_mode_) { i_.ResizeNoInit(stride_map.Width(), num_features); } else { f_.ResizeNoInit(stride_map.Width(), num_features); } ZeroInvalidElements(); } // Shrinks image size by x_scale,y_scale, and use given number of features. void NetworkIO::ResizeScaled(const NetworkIO& src, int x_scale, int y_scale, int num_features) { StrideMap stride_map = src.stride_map_; stride_map.ScaleXY(x_scale, y_scale); ResizeToMap(src.int_mode_, stride_map, num_features); } // Resizes to just 1 x-coord, whatever the input. void NetworkIO::ResizeXTo1(const NetworkIO& src, int num_features) { StrideMap stride_map = src.stride_map_; stride_map.ReduceWidthTo1(); ResizeToMap(src.int_mode_, stride_map, num_features); } // Initialize all the array to zero. void NetworkIO::Zero() { int width = Width(); // Zero out the everything. Column-by-column in case it is aligned. for (int t = 0; t < width; ++t) { ZeroTimeStep(t); } } // Initializes to zero all elements of the array that do not correspond to // valid image positions. (If a batch of different-sized images are packed // together, then there will be padding pixels.) void NetworkIO::ZeroInvalidElements() { int num_features = NumFeatures(); int full_width = stride_map_.Size(FD_WIDTH); int full_height = stride_map_.Size(FD_HEIGHT); StrideMap::Index b_index(stride_map_); do { int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1; if (end_x < full_width) { // The width is small, so fill for every valid y. StrideMap::Index y_index(b_index); int fill_size = num_features * (full_width - end_x); do { StrideMap::Index z_index(y_index); z_index.AddOffset(end_x, FD_WIDTH); if (int_mode_) { ZeroVector(fill_size, i_[z_index.t()]); } else { ZeroVector(fill_size, f_[z_index.t()]); } } while (y_index.AddOffset(1, FD_HEIGHT)); } int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1; if (end_y < full_height) { // The height is small, so fill in the space in one go. StrideMap::Index y_index(b_index); y_index.AddOffset(end_y, FD_HEIGHT); int fill_size = num_features * full_width * (full_height - end_y); if (int_mode_) { ZeroVector(fill_size, i_[y_index.t()]); } else { ZeroVector(fill_size, f_[y_index.t()]); } } } while (b_index.AddOffset(1, FD_BATCH)); } // Helper computes a black point and white point to contrast-enhance an image. // The computation is based on the assumption that the image is of a single line // of text, so a horizontal line through the middle of the image passes through // at least some of it, so local minima and maxima are a good proxy for black // and white pixel samples. static void ComputeBlackWhite(Pix* pix, float* black, float* white) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); STATS mins(0, 256), maxes(0, 256); if (width >= 3) { int y = height / 2; l_uint32* line = pixGetData(pix) + pixGetWpl(pix) * y; int prev = GET_DATA_BYTE(line, 0); int curr = GET_DATA_BYTE(line, 1); for (int x = 1; x + 1 < width; ++x) { int next = GET_DATA_BYTE(line, x + 1); if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) { // Local minimum. mins.add(curr, 1); } if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) { // Local maximum. maxes.add(curr, 1); } prev = curr; curr = next; } } if (mins.get_total() == 0) mins.add(0, 1); if (maxes.get_total() == 0) maxes.add(255, 1); *black = mins.ile(0.25); *white = maxes.ile(0.75); } // Sets up the array from the given image, using the currently set int_mode_. // If the image width doesn't match the shape, the image is truncated or padded // with noise to match. void NetworkIO::FromPix(const StaticShape& shape, const Pix* pix, TRand* randomizer) { std::vector pixes(1, pix); FromPixes(shape, pixes, randomizer); } // Sets up the array from the given set of images, using the currently set // int_mode_. If the image width doesn't match the shape, the images are // truncated or padded with noise to match. void NetworkIO::FromPixes(const StaticShape& shape, const std::vector& pixes, TRand* randomizer) { int target_height = shape.height(); int target_width = shape.width(); std::vector> h_w_pairs; for (auto pix : pixes) { Pix* var_pix = const_cast(pix); int width = pixGetWidth(var_pix); if (target_width != 0) width = target_width; int height = pixGetHeight(var_pix); if (target_height != 0) height = target_height; h_w_pairs.emplace_back(height, width); } stride_map_.SetStride(h_w_pairs); ResizeToMap(int_mode(), stride_map_, shape.depth()); // Iterate over the images again to copy the data. for (size_t b = 0; b < pixes.size(); ++b) { Pix* pix = const_cast(pixes[b]); float black = 0.0f, white = 255.0f; if (shape.depth() != 3) ComputeBlackWhite(pix, &black, &white); float contrast = (white - black) / 2.0f; if (contrast <= 0.0f) contrast = 1.0f; if (shape.height() == 1) { Copy1DGreyImage(b, pix, black, contrast, randomizer); } else { Copy2DImage(b, pix, black, contrast, randomizer); } } } // Copies the given pix to *this at the given batch index, stretching and // clipping the pixel values so that [black, black + 2*contrast] maps to the // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int. // This is a 2-d operation in the sense that the output depth is the number // of input channels, the height is the height of the image, and the width // is the width of the image, or truncated/padded with noise if the width // is a fixed size. void NetworkIO::Copy2DImage(int batch, Pix* pix, float black, float contrast, TRand* randomizer) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); int wpl = pixGetWpl(pix); StrideMap::Index index(stride_map_); index.AddOffset(batch, FD_BATCH); int t = index.t(); int target_height = stride_map_.Size(FD_HEIGHT); int target_width = stride_map_.Size(FD_WIDTH); int num_features = NumFeatures(); bool color = num_features == 3; if (width > target_width) width = target_width; uinT32* line = pixGetData(pix); for (int y = 0; y < target_height; ++y, line += wpl) { int x = 0; if (y < height) { for (x = 0; x < width; ++x, ++t) { if (color) { int f = 0; for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) { int pixel = GET_DATA_BYTE(line + x, c); SetPixel(t, f++, pixel, black, contrast); } } else { int pixel = GET_DATA_BYTE(line, x); SetPixel(t, 0, pixel, black, contrast); } } } for (; x < target_width; ++x) Randomize(t++, 0, num_features, randomizer); } } // Copies the given pix to *this at the given batch index, as Copy2DImage // above, except that the output depth is the height of the input image, the // output height is 1, and the output width as for Copy2DImage. // The image is thus treated as a 1-d set of vertical pixel strips. void NetworkIO::Copy1DGreyImage(int batch, Pix* pix, float black, float contrast, TRand* randomizer) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); ASSERT_HOST(height == NumFeatures()); int wpl = pixGetWpl(pix); StrideMap::Index index(stride_map_); index.AddOffset(batch, FD_BATCH); int t = index.t(); int target_width = stride_map_.Size(FD_WIDTH); if (width > target_width) width = target_width; int x; for (x = 0; x < width; ++x, ++t) { for (int y = 0; y < height; ++y) { uinT32* line = pixGetData(pix) + wpl * y; int pixel = GET_DATA_BYTE(line, x); SetPixel(t, y, pixel, black, contrast); } } for (; x < target_width; ++x) Randomize(t++, 0, height, randomizer); } // Helper stores the pixel value in i_ or f_ according to int_mode_. // t: is the index from the StrideMap corresponding to the current // [batch,y,x] position // f: is the index into the depth/channel // pixel: the value of the pixel from the image (in one channel) // black: the pixel value to map to the lowest of the range of *this // contrast: the range of pixel values to stretch to half the range of *this. void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) { float float_pixel = (pixel - black) / contrast - 1.0f; if (int_mode_) { i_[t][f] = ClipToRange(IntCastRounded((MAX_INT8 + 1) * float_pixel), -MAX_INT8, MAX_INT8); } else { f_[t][f] = float_pixel; } } // Converts the array to a Pix. Must be pixDestroyed after use. Pix* NetworkIO::ToPix() const { // Count the width of the image, and find the max multiplication factor. int im_width = stride_map_.Size(FD_WIDTH); int im_height = stride_map_.Size(FD_HEIGHT); int num_features = NumFeatures(); int feature_factor = 1; if (num_features == 3) { // Special hack for color. num_features = 1; feature_factor = 3; } Pix* pix = pixCreate(im_width, im_height * num_features, 32); StrideMap::Index index(stride_map_); do { int im_x = index.index(FD_WIDTH); int top_im_y = index.index(FD_HEIGHT); int im_y = top_im_y; int t = index.t(); if (int_mode_) { const inT8* features = i_[t]; for (int y = 0; y < num_features; ++y, im_y += im_height) { int pixel = features[y * feature_factor]; // 1 or 2 features use greyscale. int red = ClipToRange(pixel + 128, 0, 255); int green = red, blue = red; if (feature_factor == 3) { // With 3 features assume RGB color. green = ClipToRange(features[y * feature_factor + 1] + 128, 0, 255); blue = ClipToRange(features[y * feature_factor + 2] + 128, 0, 255); } else if (num_features > 3) { // More than 3 features use false yellow/blue color, assuming a signed // input in the range [-1,1]. red = abs(pixel) * 2; if (pixel >= 0) { green = red; blue = 0; } else { blue = red; green = red = 0; } } pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT)); } } else { const float* features = f_[t]; for (int y = 0; y < num_features; ++y, im_y += im_height) { float pixel = features[y * feature_factor]; // 1 or 2 features use greyscale. int red = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); int green = red, blue = red; if (feature_factor == 3) { // With 3 features assume RGB color. pixel = features[y * feature_factor + 1]; green = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); pixel = features[y * feature_factor + 2]; blue = ClipToRange(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); } else if (num_features > 3) { // More than 3 features use false yellow/blue color, assuming a signed // input in the range [-1,1]. red = ClipToRange(IntCastRounded(fabs(pixel) * 255), 0, 255); if (pixel >= 0) { green = red; blue = 0; } else { blue = red; green = red = 0; } } pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT)); } } } while (index.Increment()); return pix; } // Prints the first and last num timesteps of the array for each feature. void NetworkIO::Print(int num) const { int num_features = NumFeatures(); for (int y = 0; y < num_features; ++y) { for (int t = 0; t < Width(); ++t) { if (num == 0 || t < num || t + num >= Width()) { if (int_mode_) { tprintf(" %g", static_cast(i_[t][y]) / MAX_INT8); } else { tprintf(" %g", f_[t][y]); } } } tprintf("\n"); } } // Copies a single time step from src. void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO& src, int src_t) { ASSERT_HOST(int_mode_ == src.int_mode_); if (int_mode_) { memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0])); } else { memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0])); } } // Copies a part of single time step from src. void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO& src, int src_t, int src_offset) { ASSERT_HOST(int_mode_ == src.int_mode_); if (int_mode_) { memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset, num_features * sizeof(i_[0][0])); } else { memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset, num_features * sizeof(f_[0][0])); } } // Zeroes a single time step. void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) { if (int_mode_) { ZeroVector(num_features, i_[t] + offset); } else { ZeroVector(num_features, f_[t] + offset); } } // Sets the given range to random values. void NetworkIO::Randomize(int t, int offset, int num_features, TRand* randomizer) { if (int_mode_) { inT8* line = i_[t] + offset; for (int i = 0; i < num_features; ++i) line[i] = IntCastRounded(randomizer->SignedRand(MAX_INT8)); } else { // float mode. float* line = f_[t] + offset; for (int i = 0; i < num_features; ++i) line[i] = randomizer->SignedRand(1.0); } } // Helper returns the label and score of the best choice over a range. int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float* rating, float* certainty) const { if (t_end <= t_start) return -1; int max_char = -1; float min_score = 0.0f; for (int c = 0; c < NumFeatures(); ++c) { if (c == not_this || c == null_ch) continue; ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty); if (max_char < 0 || *rating < min_score) { min_score = *rating; max_char = c; } } ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty); return max_char; } // Helper returns the rating and certainty of the choice over a range in output. void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float* rating, float* certainty) const { ASSERT_HOST(!int_mode_); *rating = 0.0f; *certainty = 0.0f; if (t_end <= t_start || t_end <= 0) return; float ratings[3] = {0.0f, 0.0f, 0.0f}; float certs[3] = {0.0f, 0.0f, 0.0f}; for (int t = t_start; t < t_end; ++t) { const float* line = f_[t]; float score = ProbToCertainty(line[choice]); float zero = ProbToCertainty(line[null_ch]); if (t == t_start) { ratings[2] = MAX_FLOAT32; ratings[1] = -score; certs[1] = score; } else { for (int i = 2; i >= 1; --i) { if (ratings[i] > ratings[i - 1]) { ratings[i] = ratings[i - 1]; certs[i] = certs[i - 1]; } } ratings[2] -= zero; if (zero < certs[2]) certs[2] = zero; ratings[1] -= score; if (score < certs[1]) certs[1] = score; } ratings[0] -= zero; if (zero < certs[0]) certs[0] = zero; } int best_i = ratings[2] < ratings[1] ? 2 : 1; *rating = ratings[best_i] + t_end - t_start; *certainty = certs[best_i]; } // Returns the index (label) of the best value at the given timestep, // excluding not_this and not_that, and if not null, sets the score to the // log of the corresponding value. int NetworkIO::BestLabel(int t, int not_this, int not_that, float* score) const { ASSERT_HOST(!int_mode_); int best_index = -1; float best_score = -MAX_FLOAT32; const float* line = f_[t]; for (int i = 0; i < f_.dim2(); ++i) { if (line[i] > best_score && i != not_this && i != not_that) { best_score = line[i]; best_index = i; } } if (score != NULL) *score = ProbToCertainty(best_score); return best_index; } // Returns the best start position out of [start, end) (into which all labels // must fit) to obtain the highest cumulative score for the given labels. int NetworkIO::PositionOfBestMatch(const GenericVector& labels, int start, int end) const { int length = labels.size(); int last_start = end - length; int best_start = -1; double best_score = 0.0; for (int s = start; s <= last_start; ++s) { double score = ScoreOfLabels(labels, s); if (score > best_score || best_start < 0) { best_score = score; best_start = s; } } return best_start; } // Returns the cumulative score of the given labels starting at start, and // using one label per time-step. double NetworkIO::ScoreOfLabels(const GenericVector& labels, int start) const { int length = labels.size(); double score = 0.0; for (int i = 0; i < length; ++i) { score += f_(start + i, labels[i]); } return score; } // Helper function sets all the outputs for a single timestep, such that // label has value ok_score, and the other labels share 1 - ok_score. void NetworkIO::SetActivations(int t, int label, float ok_score) { ASSERT_HOST(!int_mode_); int num_classes = NumFeatures(); float bad_score = (1.0f - ok_score) / (num_classes - 1); float* targets = f_[t]; for (int i = 0; i < num_classes; ++i) targets[i] = bad_score; targets[label] = ok_score; } // Modifies the values, only if needed, so that the given label is // the winner at the given time step t. void NetworkIO::EnsureBestLabel(int t, int label) { ASSERT_HOST(!int_mode_); if (BestLabel(t, NULL) != label) { // Output value needs enhancing. Third all the other elements and add the // remainder to best_label. int num_classes = NumFeatures(); float* targets = f_[t]; for (int c = 0; c < num_classes; ++c) { if (c == label) { targets[c] += (1.0 - targets[c]) * (2 / 3.0); } else { targets[c] /= 3.0; } } } } // Helper function converts prob to certainty taking the minimum into account. /* static */ float NetworkIO::ProbToCertainty(float prob) { return prob > kMinProb ? log(prob) : kMinCertainty; } // Returns true if there is any bad value that is suspiciously like a GT // error. Assuming that *this is the difference(gradient) between target // and forward output, returns true if there is a large negative value // (correcting a very confident output) for which there is no corresponding // positive value in an adjacent timestep for the same feature index. This // allows the box-truthed samples to make fine adjustments to position while // stopping other disagreements of confident output with ground truth. bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const { int num_features = NumFeatures(); for (int t = 0; t < Width(); ++t) { const float* features = f_[t]; for (int y = 0; y < num_features; ++y) { float grad = features[y]; if (grad < -confidence_thr) { // Correcting strong output. Check for movement. if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) && (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) { return true; // No strong positive on either side. } } } } return false; } // Reads a single timestep to floats in the range [-1, 1]. void NetworkIO::ReadTimeStep(int t, double* output) const { if (int_mode_) { const inT8* line = i_[t]; for (int i = 0; i < i_.dim2(); ++i) { output[i] = static_cast(line[i]) / MAX_INT8; } } else { const float* line = f_[t]; for (int i = 0; i < f_.dim2(); ++i) { output[i] = static_cast(line[i]); } } } // Adds a single timestep to floats. void NetworkIO::AddTimeStep(int t, double* inout) const { int num_features = NumFeatures(); if (int_mode_) { const inT8* line = i_[t]; for (int i = 0; i < num_features; ++i) { inout[i] += static_cast(line[i]) / MAX_INT8; } } else { const float* line = f_[t]; for (int i = 0; i < num_features; ++i) { inout[i] += line[i]; } } } // Adds part of a single timestep to floats. void NetworkIO::AddTimeStepPart(int t, int offset, int num_features, float* inout) const { if (int_mode_) { const inT8* line = i_[t] + offset; for (int i = 0; i < num_features; ++i) { inout[i] += static_cast(line[i]) / MAX_INT8; } } else { const float* line = f_[t] + offset; for (int i = 0; i < num_features; ++i) { inout[i] += line[i]; } } } // Writes a single timestep from floats in the range [-1, 1]. void NetworkIO::WriteTimeStep(int t, const double* input) { WriteTimeStepPart(t, 0, NumFeatures(), input); } // Writes a single timestep from floats in the range [-1, 1] writing only // num_features elements of input to (*this)[t], starting at offset. void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const double* input) { if (int_mode_) { inT8* line = i_[t] + offset; for (int i = 0; i < num_features; ++i) { line[i] = ClipToRange(IntCastRounded(input[i] * MAX_INT8), -MAX_INT8, MAX_INT8); } } else { float* line = f_[t] + offset; for (int i = 0; i < num_features; ++i) { line[i] = static_cast(input[i]); } } } // Maxpools a single time step from src. void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t, int* max_line) { ASSERT_HOST(int_mode_ == src.int_mode_); if (int_mode_) { int dim = i_.dim2(); inT8* dest_line = i_[dest_t]; const inT8* src_line = src.i_[src_t]; for (int i = 0; i < dim; ++i) { if (dest_line[i] < src_line[i]) { dest_line[i] = src_line[i]; max_line[i] = src_t; } } } else { int dim = f_.dim2(); float* dest_line = f_[dest_t]; const float* src_line = src.f_[src_t]; for (int i = 0; i < dim; ++i) { if (dest_line[i] < src_line[i]) { dest_line[i] = src_line[i]; max_line[i] = src_t; } } } } // Runs maxpool backward, using maxes to index timesteps in *this. void NetworkIO::MaxpoolBackward(const NetworkIO& fwd, const GENERIC_2D_ARRAY& maxes) { ASSERT_HOST(!int_mode_); Zero(); StrideMap::Index index(fwd.stride_map_); do { int t = index.t(); const int* max_line = maxes[t]; const float* fwd_line = fwd.f_[t]; int num_features = fwd.f_.dim2(); for (int i = 0; i < num_features; ++i) { f_[max_line[i]][i] = fwd_line[i]; } } while (index.Increment()); } // Returns the min over time of the maxes over features of the outputs. float NetworkIO::MinOfMaxes() const { float min_max = 0.0f; int width = Width(); int num_features = NumFeatures(); for (int t = 0; t < width; ++t) { float max_value = -MAX_FLOAT32; if (int_mode_) { const inT8* column = i_[t]; for (int i = 0; i < num_features; ++i) { if (column[i] > max_value) max_value = column[i]; } } else { const float* column = f_[t]; for (int i = 0; i < num_features; ++i) { if (column[i] > max_value) max_value = column[i]; } } if (t == 0 || max_value < min_max) min_max = max_value; } return min_max; } // Computes combined results for a combiner that chooses between an existing // input and itself, with an additional output to indicate the choice. void NetworkIO::CombineOutputs(const NetworkIO& base_output, const NetworkIO& combiner_output) { int no = base_output.NumFeatures(); ASSERT_HOST(combiner_output.NumFeatures() == no + 1); Resize(base_output, no); int width = Width(); if (int_mode_) { // Number of outputs from base and final result. for (int t = 0; t < width; ++t) { inT8* out_line = i_[t]; const inT8* base_line = base_output.i_[t]; const inT8* comb_line = combiner_output.i_[t]; float base_weight = static_cast(comb_line[no]) / MAX_INT8; float boost_weight = 1.0f - base_weight; for (int i = 0; i < no; ++i) { out_line[i] = IntCastRounded(base_line[i] * base_weight + comb_line[i] * boost_weight); } } } else { for (int t = 0; t < width; ++t) { float* out_line = f_[t]; const float* base_line = base_output.f_[t]; const float* comb_line = combiner_output.f_[t]; float base_weight = comb_line[no]; float boost_weight = 1.0f - base_weight; for (int i = 0; i < no; ++i) { out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight; } } } } // Computes deltas for a combiner that chooses between 2 sets of inputs. void NetworkIO::ComputeCombinerDeltas(const NetworkIO& fwd_deltas, const NetworkIO& base_output) { ASSERT_HOST(!int_mode_); // Compute the deltas for the combiner. int width = Width(); int no = NumFeatures() - 1; ASSERT_HOST(fwd_deltas.NumFeatures() == no); ASSERT_HOST(base_output.NumFeatures() == no); // Number of outputs from base and final result. for (int t = 0; t < width; ++t) { const float* delta_line = fwd_deltas.f_[t]; const float* base_line = base_output.f_[t]; float* comb_line = f_[t]; float base_weight = comb_line[no]; float boost_weight = 1.0f - base_weight; float max_base_delta = 0.0; for (int i = 0; i < no; ++i) { // What did the combiner actually produce? float output = base_line[i] * base_weight + comb_line[i] * boost_weight; // Reconstruct the target from the delta. float comb_target = delta_line[i] + output; comb_line[i] = comb_target - comb_line[i]; float base_delta = fabs(comb_target - base_line[i]); if (base_delta > max_base_delta) max_base_delta = base_delta; } if (max_base_delta >= 0.5) { // The base network got it wrong. The combiner should output the right // answer and 0 for the base network. comb_line[no] = 0.0 - base_weight; } else { // The base network was right. The combiner should flag that. for (int i = 0; i < no; ++i) { // All other targets are 0. if (comb_line[i] > 0.0) comb_line[i] -= 1.0; } comb_line[no] = 1.0 - base_weight; } } } // Copies the array checking that the types match. void NetworkIO::CopyAll(const NetworkIO& src) { ASSERT_HOST(src.int_mode_ == int_mode_); f_ = src.f_; } // Checks that both are floats and adds the src array to *this. void NetworkIO::AddAllToFloat(const NetworkIO& src) { ASSERT_HOST(!int_mode_); ASSERT_HOST(!src.int_mode_); f_ += src.f_; } // Subtracts the array from a float array. src must also be float. void NetworkIO::SubtractAllFromFloat(const NetworkIO& src) { ASSERT_HOST(!int_mode_); ASSERT_HOST(!src.int_mode_); f_ -= src.f_; } // Copies src to *this, with maxabs normalization to match scale. void NetworkIO::CopyWithNormalization(const NetworkIO& src, const NetworkIO& scale) { ASSERT_HOST(!int_mode_); ASSERT_HOST(!src.int_mode_); ASSERT_HOST(!scale.int_mode_); float src_max = src.f_.MaxAbs(); ASSERT_HOST(std::isfinite(src_max)); float scale_max = scale.f_.MaxAbs(); ASSERT_HOST(std::isfinite(scale_max)); if (src_max > 0.0f) { float factor = scale_max / src_max; for (int t = 0; t < src.Width(); ++t) { const float* src_ptr = src.f_[t]; float* dest_ptr = f_[t]; for (int i = 0; i < src.f_.dim2(); ++i) dest_ptr[i] = src_ptr[i] * factor; } } else { f_.Clear(); } } // Copies src to *this with independent reversal of the y dimension. void NetworkIO::CopyWithYReversal(const NetworkIO& src) { int num_features = src.NumFeatures(); Resize(src, num_features); StrideMap::Index b_index(src.stride_map_); do { int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1; StrideMap::Index fwd_index(b_index); StrideMap::Index rev_index(b_index); rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT); do { int fwd_t = fwd_index.t(); int rev_t = rev_index.t(); for (int x = 0; x < width; ++x) CopyTimeStepFrom(rev_t++, src, fwd_t++); } while (fwd_index.AddOffset(1, FD_HEIGHT) && rev_index.AddOffset(-1, FD_HEIGHT)); } while (b_index.AddOffset(1, FD_BATCH)); } // Copies src to *this with independent reversal of the x dimension. void NetworkIO::CopyWithXReversal(const NetworkIO& src) { int num_features = src.NumFeatures(); Resize(src, num_features); StrideMap::Index b_index(src.stride_map_); do { StrideMap::Index y_index(b_index); do { StrideMap::Index fwd_index(y_index); StrideMap::Index rev_index(y_index); rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH); do { CopyTimeStepFrom(rev_index.t(), src, fwd_index.t()); } while (fwd_index.AddOffset(1, FD_WIDTH) && rev_index.AddOffset(-1, FD_WIDTH)); } while (y_index.AddOffset(1, FD_HEIGHT)); } while (b_index.AddOffset(1, FD_BATCH)); } // Copies src to *this with independent transpose of the x and y dimensions. void NetworkIO::CopyWithXYTranspose(const NetworkIO& src) { int num_features = src.NumFeatures(); stride_map_ = src.stride_map_; stride_map_.TransposeXY(); ResizeToMap(src.int_mode(), stride_map_, num_features); StrideMap::Index src_b_index(src.stride_map_); StrideMap::Index dest_b_index(stride_map_); do { StrideMap::Index src_y_index(src_b_index); StrideMap::Index dest_x_index(dest_b_index); do { StrideMap::Index src_x_index(src_y_index); StrideMap::Index dest_y_index(dest_x_index); do { CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t()); } while (src_x_index.AddOffset(1, FD_WIDTH) && dest_y_index.AddOffset(1, FD_HEIGHT)); } while (src_y_index.AddOffset(1, FD_HEIGHT) && dest_x_index.AddOffset(1, FD_WIDTH)); } while (src_b_index.AddOffset(1, FD_BATCH) && dest_b_index.AddOffset(1, FD_BATCH)); } // Copies src to *this, at the given feature_offset, returning the total // feature offset after the copy. Multiple calls will stack outputs from // multiple sources in feature space. int NetworkIO::CopyPacking(const NetworkIO& src, int feature_offset) { ASSERT_HOST(int_mode_ == src.int_mode_); int width = src.Width(); ASSERT_HOST(width <= Width()); int num_features = src.NumFeatures(); ASSERT_HOST(num_features + feature_offset <= NumFeatures()); if (int_mode_) { for (int t = 0; t < width; ++t) { memcpy(i_[t] + feature_offset, src.i_[t], num_features * sizeof(i_[t][0])); } for (int t = width; t < i_.dim1(); ++t) { memset(i_[t], 0, num_features * sizeof(i_[t][0])); } } else { for (int t = 0; t < width; ++t) { memcpy(f_[t] + feature_offset, src.f_[t], num_features * sizeof(f_[t][0])); } for (int t = width; t < f_.dim1(); ++t) { memset(f_[t], 0, num_features * sizeof(f_[t][0])); } } return num_features + feature_offset; } // Opposite of CopyPacking, fills *this with a part of src, starting at // feature_offset, and picking num_features. void NetworkIO::CopyUnpacking(const NetworkIO& src, int feature_offset, int num_features) { Resize(src, num_features); int width = src.Width(); ASSERT_HOST(num_features + feature_offset <= src.NumFeatures()); if (int_mode_) { for (int t = 0; t < width; ++t) { memcpy(i_[t], src.i_[t] + feature_offset, num_features * sizeof(i_[t][0])); } } else { for (int t = 0; t < width; ++t) { memcpy(f_[t], src.f_[t] + feature_offset, num_features * sizeof(f_[t][0])); } } } // Transposes the float part of *this into dest. void NetworkIO::Transpose(TransposedArray* dest) const { int width = Width(); dest->ResizeNoInit(NumFeatures(), width); for (int t = 0; t < width; ++t) dest->WriteStrided(t, f_[t]); } // Clips the content of a single time-step to +/-range. void NetworkIO::ClipVector(int t, float range) { ASSERT_HOST(!int_mode_); float* v = f_[t]; int dim = f_.dim2(); for (int i = 0; i < dim; ++i) v[i] = ClipToRange(v[i], -range, range); } } // namespace tesseract.