mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-03 00:49:01 +08:00
144 lines
5.9 KiB
C++
144 lines
5.9 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: plumbing.h
|
|
// Description: Base class for networks that organize other networks
|
|
// eg series or parallel.
|
|
// Author: Ray Smith
|
|
// Created: Mon May 12 08:11:36 PST 2014
|
|
//
|
|
// (C) Copyright 2014, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef TESSERACT_LSTM_PLUMBING_H_
|
|
#define TESSERACT_LSTM_PLUMBING_H_
|
|
|
|
#include "genericvector.h"
|
|
#include "matrix.h"
|
|
#include "network.h"
|
|
|
|
namespace tesseract {
|
|
|
|
// Holds a collection of other networks and forwards calls to each of them.
|
|
class Plumbing : public Network {
|
|
public:
|
|
// ni_ and no_ will be set by AddToStack.
|
|
explicit Plumbing(const STRING& name);
|
|
virtual ~Plumbing();
|
|
|
|
// Returns the required shape input to the network.
|
|
virtual StaticShape InputShape() const { return stack_[0]->InputShape(); }
|
|
virtual STRING spec() const {
|
|
return "Sub-classes of Plumbing must implement spec()!";
|
|
}
|
|
|
|
// Returns true if the given type is derived from Plumbing, and thus contains
|
|
// multiple sub-networks that can have their own learning rate.
|
|
virtual bool IsPlumbingType() const { return true; }
|
|
|
|
// Suspends/Enables training by setting the training_ flag. Serialize and
|
|
// DeSerialize only operate on the run-time data if state is false.
|
|
virtual void SetEnableTraining(bool state);
|
|
|
|
// Sets flags that control the action of the network. See NetworkFlags enum
|
|
// for bit values.
|
|
virtual void SetNetworkFlags(uinT32 flags);
|
|
|
|
// Sets up the network for training. Initializes weights using weights of
|
|
// scale `range` picked according to the random number generator `randomizer`.
|
|
// Note that randomizer is a borrowed pointer that should outlive the network
|
|
// and should not be deleted by any of the networks.
|
|
// Returns the number of weights initialized.
|
|
virtual int InitWeights(float range, TRand* randomizer);
|
|
|
|
// Converts a float network to an int network.
|
|
virtual void ConvertToInt();
|
|
|
|
// Provides a pointer to a TRand for any networks that care to use it.
|
|
// Note that randomizer is a borrowed pointer that should outlive the network
|
|
// and should not be deleted by any of the networks.
|
|
virtual void SetRandomizer(TRand* randomizer);
|
|
|
|
// Adds the given network to the stack.
|
|
virtual void AddToStack(Network* network);
|
|
|
|
// Sets needs_to_backprop_ to needs_backprop and returns true if
|
|
// needs_backprop || any weights in this network so the next layer forward
|
|
// can be told to produce backprop for this layer if needed.
|
|
virtual bool SetupNeedsBackprop(bool needs_backprop);
|
|
|
|
// Returns an integer reduction factor that the network applies to the
|
|
// time sequence. Assumes that any 2-d is already eliminated. Used for
|
|
// scaling bounding boxes of truth data.
|
|
// WARNING: if GlobalMinimax is used to vary the scale, this will return
|
|
// the last used scale factor. Call it before any forward, and it will return
|
|
// the minimum scale factor of the paths through the GlobalMinimax.
|
|
virtual int XScaleFactor() const;
|
|
|
|
// Provides the (minimum) x scale factor to the network (of interest only to
|
|
// input units) so they can determine how to scale bounding boxes.
|
|
virtual void CacheXScaleFactor(int factor);
|
|
|
|
// Provides debug output on the weights.
|
|
virtual void DebugWeights();
|
|
|
|
// Returns the current stack.
|
|
const PointerVector<Network>& stack() const {
|
|
return stack_;
|
|
}
|
|
// Returns a set of strings representing the layer-ids of all layers below.
|
|
void EnumerateLayers(const STRING* prefix,
|
|
GenericVector<STRING>* layers) const;
|
|
// Returns a pointer to the network layer corresponding to the given id.
|
|
Network* GetLayer(const char* id) const;
|
|
// Returns the learning rate for a specific layer of the stack.
|
|
float LayerLearningRate(const char* id) const {
|
|
const float* lr_ptr = LayerLearningRatePtr(id);
|
|
ASSERT_HOST(lr_ptr != NULL);
|
|
return *lr_ptr;
|
|
}
|
|
// Scales the learning rate for a specific layer of the stack.
|
|
void ScaleLayerLearningRate(const char* id, double factor) {
|
|
float* lr_ptr = LayerLearningRatePtr(id);
|
|
ASSERT_HOST(lr_ptr != NULL);
|
|
*lr_ptr *= factor;
|
|
}
|
|
// Returns a pointer to the learning rate for the given layer id.
|
|
float* LayerLearningRatePtr(const char* id) const;
|
|
|
|
// Writes to the given file. Returns false in case of error.
|
|
virtual bool Serialize(TFile* fp) const;
|
|
// Reads from the given file. Returns false in case of error.
|
|
// If swap is true, assumes a big/little-endian swap is needed.
|
|
virtual bool DeSerialize(bool swap, TFile* fp);
|
|
|
|
// Updates the weights using the given learning rate and momentum.
|
|
// num_samples is the quotient to be used in the adagrad computation iff
|
|
// use_ada_grad_ is true.
|
|
virtual void Update(float learning_rate, float momentum, int num_samples);
|
|
// Sums the products of weight updates in *this and other, splitting into
|
|
// positive (same direction) in *same and negative (different direction) in
|
|
// *changed.
|
|
virtual void CountAlternators(const Network& other, double* same,
|
|
double* changed) const;
|
|
|
|
protected:
|
|
// The networks.
|
|
PointerVector<Network> stack_;
|
|
// Layer-specific learning rate iff network_flags_ & NF_LAYER_SPECIFIC_LR.
|
|
// One element for each element of stack_.
|
|
GenericVector<float> learning_rates_;
|
|
};
|
|
|
|
} // namespace tesseract.
|
|
|
|
#endif // TESSERACT_LSTM_PLUMBING_H_
|
|
|