Updated comments on RemapOutputs

This commit is contained in:
Ray Smith 2017-09-08 10:24:00 +01:00
parent 0382222d85
commit bf774382e8
13 changed files with 38 additions and 49 deletions

View File

@ -84,11 +84,9 @@ int FullyConnected::InitWeights(float range, TRand* randomizer) {
return num_weights_;
}
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Recursively searches the network for softmaxes with old_no outputs,
// and remaps their outputs according to code_map. See network.h for details.
int FullyConnected::RemapOutputs(int old_no, const std::vector<int>& code_map) {
if (type_ == NT_SOFTMAX && no_ == old_no) {
num_weights_ = weights_.RemapOutputs(code_map);

View File

@ -68,11 +68,8 @@ class FullyConnected : public Network {
// Sets up the network for training. Initializes weights using weights of
// scale `range` picked according to the random number generator `randomizer`.
virtual int InitWeights(float range, TRand* randomizer);
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Recursively searches the network for softmaxes with old_no outputs,
// and remaps their outputs according to code_map. See network.h for details.
int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
// Converts a float network to an int network.

View File

@ -140,11 +140,8 @@ int LSTM::InitWeights(float range, TRand* randomizer) {
return num_weights_;
}
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Recursively searches the network for softmaxes with old_no outputs,
// and remaps their outputs according to code_map. See network.h for details.
int LSTM::RemapOutputs(int old_no, const std::vector<int>& code_map) {
if (softmax_ != NULL) {
num_weights_ -= softmax_->num_weights();

View File

@ -76,11 +76,8 @@ class LSTM : public Network {
// Sets up the network for training. Initializes weights using weights of
// scale `range` picked according to the random number generator `randomizer`.
virtual int InitWeights(float range, TRand* randomizer);
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Recursively searches the network for softmaxes with old_no outputs,
// and remaps their outputs according to code_map. See network.h for details.
int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
// Converts a float network to an int network.

View File

@ -135,7 +135,7 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char* filename,
filename == old_traineddata) {
return true; // Normal checkpoint load complete.
}
tprintf("Code range changed from %d to %d!!\n", network_->NumOutputs(),
tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(),
recoder_.code_range());
if (old_traineddata == nullptr || *old_traineddata == '\0') {
tprintf("Must supply the old traineddata for code conversion!\n");

View File

@ -99,7 +99,7 @@ class LSTMTrainer : public LSTMRecognizer {
// Tries to deserialize a trainer from the given file and silently returns
// false in case of failure. If old_traineddata is not null, then it is
// assumed that the character set is to be re-mapped from old_traininddata to
// assumed that the character set is to be re-mapped from old_traineddata to
// the new, with consequent change in weight matrices etc.
bool TryLoadingCheckpoint(const char* filename, const char* old_traineddata);

View File

@ -172,11 +172,17 @@ class Network {
// and should not be deleted by any of the networks.
// Returns the number of weights initialized.
virtual int InitWeights(float range, TRand* randomizer);
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Changes the number of outputs to the outside world to the size of the given
// code_map. Recursively searches the entire network for Softmax layers that
// have exactly old_no outputs, and operates only on those, leaving all others
// unchanged. This enables networks with multiple output layers to get all
// their softmaxes updated, but if an internal layer, uses one of those
// softmaxes for input, then the inputs will effectively be scrambled.
// TODO(rays) Fix this before any such network is implemented.
// The softmaxes are resized by copying the old weight matrix entries for each
// output from code_map[output] where non-negative, and uses the mean (over
// all outputs) of the existing weights for all outputs with negative code_map
// entries. Returns the new number of weights.
virtual int RemapOutputs(int old_no, const std::vector<int>& code_map) {
return 0;
}

View File

@ -57,11 +57,8 @@ int Plumbing::InitWeights(float range, TRand* randomizer) {
return num_weights_;
}
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Recursively searches the network for softmaxes with old_no outputs,
// and remaps their outputs according to code_map. See network.h for details.
int Plumbing::RemapOutputs(int old_no, const std::vector<int>& code_map) {
num_weights_ = 0;
for (int i = 0; i < stack_.size(); ++i) {

View File

@ -57,11 +57,8 @@ class Plumbing : public Network {
// and should not be deleted by any of the networks.
// Returns the number of weights initialized.
virtual int InitWeights(float range, TRand* randomizer);
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Recursively searches the network for softmaxes with old_no outputs,
// and remaps their outputs according to code_map. See network.h for details.
int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
// Converts a float network to an int network.

View File

@ -60,11 +60,8 @@ int Series::InitWeights(float range, TRand* randomizer) {
return num_weights_;
}
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Recursively searches the network for softmaxes with old_no outputs,
// and remaps their outputs according to code_map. See network.h for details.
int Series::RemapOutputs(int old_no, const std::vector<int>& code_map) {
num_weights_ = 0;
tprintf("Num (Extended) outputs,weights in Series:\n");

View File

@ -46,11 +46,8 @@ class Series : public Plumbing {
// scale `range` picked according to the random number generator `randomizer`.
// Returns the number of weights initialized.
virtual int InitWeights(float range, TRand* randomizer);
// Changes the number of outputs to the size of the given code_map, copying
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights. Only operates on Softmax layers with old_no outputs.
// Recursively searches the network for softmaxes with old_no outputs,
// and remaps their outputs according to code_map. See network.h for details.
int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
// Sets needs_to_backprop_ to needs_backprop and returns true if

View File

@ -61,7 +61,10 @@ int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam,
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights.
// weights. Can be used to change the character set addressed by an output
// softmax.
// TODO(rays) A RemapInputs would also be useful, so a change can be made
// in the middle of a network.
int WeightMatrix::RemapOutputs(const std::vector<int>& code_map) {
GENERIC_2D_ARRAY<double> old_wf(wf_);
int old_no = wf_.dim1();

View File

@ -74,7 +74,10 @@ class WeightMatrix {
// the old weight matrix entries for each output from code_map[output] where
// non-negative, and uses the mean (over all outputs) of the existing weights
// for all outputs with negative code_map entries. Returns the new number of
// weights.
// weights. Can be used to change the character set addressed by an output
// softmax.
// TODO(rays) A RemapInputs would also be useful, so a change can be made
// in the middle of a network.
int RemapOutputs(const std::vector<int>& code_map);
// Converts a float network to an int network. Each set of input weights that