Updated comments on RemapOutputs

2025-01-18 22:43:45 +08:00 · 2017-09-08 10:24:00 +01:00 · 2017-09-08 10:24:00 +01:00 · bf774382e8
commit bf774382e8
parent 0382222d85
13 changed files with 38 additions and 49 deletions
--- a/lstm/fullyconnected.cpp
+++ b/lstm/fullyconnected.cpp
@ -84,11 +84,9 @@ int FullyConnected::InitWeights(float range, TRand* randomizer) {
  return num_weights_;
 }

-// Changes the number of outputs to the size of the given code_map, copying
-// the old weight matrix entries for each output from code_map[output] where
-// non-negative, and uses the mean (over all outputs) of the existing weights
-// for all outputs with negative code_map entries. Returns the new number of
-// weights. Only operates on Softmax layers with old_no outputs.
+// Recursively searches the network for softmaxes with old_no outputs,
+// and remaps their outputs according to code_map. See network.h for details.
+
 int FullyConnected::RemapOutputs(int old_no, const std::vector<int>& code_map) {
  if (type_ == NT_SOFTMAX && no_ == old_no) {
    num_weights_ = weights_.RemapOutputs(code_map);
--- a/lstm/fullyconnected.h
+++ b/lstm/fullyconnected.h
@ -68,11 +68,8 @@ class FullyConnected : public Network {
  // Sets up the network for training. Initializes weights using weights of
  // scale `range` picked according to the random number generator `randomizer`.
  virtual int InitWeights(float range, TRand* randomizer);
-  // Changes the number of outputs to the size of the given code_map, copying
-  // the old weight matrix entries for each output from code_map[output] where
-  // non-negative, and uses the mean (over all outputs) of the existing weights
-  // for all outputs with negative code_map entries. Returns the new number of
-  // weights. Only operates on Softmax layers with old_no outputs.
+  // Recursively searches the network for softmaxes with old_no outputs,
+  // and remaps their outputs according to code_map. See network.h for details.
  int RemapOutputs(int old_no, const std::vector<int>& code_map) override;

  // Converts a float network to an int network.
--- a/lstm/lstm.cpp
+++ b/lstm/lstm.cpp
@ -140,11 +140,8 @@ int LSTM::InitWeights(float range, TRand* randomizer) {
  return num_weights_;
 }

-// Changes the number of outputs to the size of the given code_map, copying
-// the old weight matrix entries for each output from code_map[output] where
-// non-negative, and uses the mean (over all outputs) of the existing weights
-// for all outputs with negative code_map entries. Returns the new number of
-// weights. Only operates on Softmax layers with old_no outputs.
+// Recursively searches the network for softmaxes with old_no outputs,
+// and remaps their outputs according to code_map. See network.h for details.
 int LSTM::RemapOutputs(int old_no, const std::vector<int>& code_map) {
  if (softmax_ != NULL) {
    num_weights_ -= softmax_->num_weights();
--- a/lstm/lstm.h
+++ b/lstm/lstm.h
@ -76,11 +76,8 @@ class LSTM : public Network {
  // Sets up the network for training. Initializes weights using weights of
  // scale `range` picked according to the random number generator `randomizer`.
  virtual int InitWeights(float range, TRand* randomizer);
-  // Changes the number of outputs to the size of the given code_map, copying
-  // the old weight matrix entries for each output from code_map[output] where
-  // non-negative, and uses the mean (over all outputs) of the existing weights
-  // for all outputs with negative code_map entries. Returns the new number of
-  // weights. Only operates on Softmax layers with old_no outputs.
+  // Recursively searches the network for softmaxes with old_no outputs,
+  // and remaps their outputs according to code_map. See network.h for details.
  int RemapOutputs(int old_no, const std::vector<int>& code_map) override;

  // Converts a float network to an int network.
--- a/lstm/lstmtrainer.cpp
+++ b/lstm/lstmtrainer.cpp
@ -135,7 +135,7 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char* filename,
      filename == old_traineddata) {
    return true;  // Normal checkpoint load complete.
  }
-  tprintf("Code range changed from %d to %d!!\n", network_->NumOutputs(),
+  tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(),
          recoder_.code_range());
  if (old_traineddata == nullptr || *old_traineddata == '\0') {
    tprintf("Must supply the old traineddata for code conversion!\n");
--- a/lstm/lstmtrainer.h
+++ b/lstm/lstmtrainer.h
@ -99,7 +99,7 @@ class LSTMTrainer : public LSTMRecognizer {

  // Tries to deserialize a trainer from the given file and silently returns
  // false in case of failure. If old_traineddata is not null, then it is
-  // assumed that the character set is to be re-mapped from old_traininddata to
+  // assumed that the character set is to be re-mapped from old_traineddata to
  // the new, with consequent change in weight matrices etc.
  bool TryLoadingCheckpoint(const char* filename, const char* old_traineddata);

--- a/lstm/network.h
+++ b/lstm/network.h
@ -172,11 +172,17 @@ class Network {
  // and should not be deleted by any of the networks.
  // Returns the number of weights initialized.
  virtual int InitWeights(float range, TRand* randomizer);
-  // Changes the number of outputs to the size of the given code_map, copying
-  // the old weight matrix entries for each output from code_map[output] where
-  // non-negative, and uses the mean (over all outputs) of the existing weights
-  // for all outputs with negative code_map entries. Returns the new number of
-  // weights. Only operates on Softmax layers with old_no outputs.
+  // Changes the number of outputs to the outside world to the size of the given
+  // code_map. Recursively searches the entire network for Softmax layers that
+  // have exactly old_no outputs, and operates only on those, leaving all others
+  // unchanged. This enables networks with multiple output layers to get all
+  // their softmaxes updated, but if an internal layer, uses one of those
+  // softmaxes for input, then the inputs will effectively be scrambled.
+  // TODO(rays) Fix this before any such network is implemented.
+  // The softmaxes are resized by copying the old weight matrix entries for each
+  // output from code_map[output] where non-negative, and uses the mean (over
+  // all outputs) of the existing weights for all outputs with negative code_map
+  // entries. Returns the new number of weights.
  virtual int RemapOutputs(int old_no, const std::vector<int>& code_map) {
    return 0;
  }
--- a/lstm/plumbing.cpp
+++ b/lstm/plumbing.cpp
@ -57,11 +57,8 @@ int Plumbing::InitWeights(float range, TRand* randomizer) {
  return num_weights_;
 }

-// Changes the number of outputs to the size of the given code_map, copying
-// the old weight matrix entries for each output from code_map[output] where
-// non-negative, and uses the mean (over all outputs) of the existing weights
-// for all outputs with negative code_map entries. Returns the new number of
-// weights. Only operates on Softmax layers with old_no outputs.
+// Recursively searches the network for softmaxes with old_no outputs,
+// and remaps their outputs according to code_map. See network.h for details.
 int Plumbing::RemapOutputs(int old_no, const std::vector<int>& code_map) {
  num_weights_ = 0;
  for (int i = 0; i < stack_.size(); ++i) {
--- a/lstm/plumbing.h
+++ b/lstm/plumbing.h
@ -57,11 +57,8 @@ class Plumbing : public Network {
  // and should not be deleted by any of the networks.
  // Returns the number of weights initialized.
  virtual int InitWeights(float range, TRand* randomizer);
-  // Changes the number of outputs to the size of the given code_map, copying
-  // the old weight matrix entries for each output from code_map[output] where
-  // non-negative, and uses the mean (over all outputs) of the existing weights
-  // for all outputs with negative code_map entries. Returns the new number of
-  // weights. Only operates on Softmax layers with old_no outputs.
+  // Recursively searches the network for softmaxes with old_no outputs,
+  // and remaps their outputs according to code_map. See network.h for details.
  int RemapOutputs(int old_no, const std::vector<int>& code_map) override;

  // Converts a float network to an int network.
--- a/lstm/series.cpp
+++ b/lstm/series.cpp
@ -60,11 +60,8 @@ int Series::InitWeights(float range, TRand* randomizer) {
  return num_weights_;
 }

-// Changes the number of outputs to the size of the given code_map, copying
-// the old weight matrix entries for each output from code_map[output] where
-// non-negative, and uses the mean (over all outputs) of the existing weights
-// for all outputs with negative code_map entries. Returns the new number of
-// weights. Only operates on Softmax layers with old_no outputs.
+// Recursively searches the network for softmaxes with old_no outputs,
+// and remaps their outputs according to code_map. See network.h for details.
 int Series::RemapOutputs(int old_no, const std::vector<int>& code_map) {
  num_weights_ = 0;
  tprintf("Num (Extended) outputs,weights in Series:\n");
--- a/lstm/series.h
+++ b/lstm/series.h
@ -46,11 +46,8 @@ class Series : public Plumbing {
  // scale `range` picked according to the random number generator `randomizer`.
  // Returns the number of weights initialized.
  virtual int InitWeights(float range, TRand* randomizer);
-  // Changes the number of outputs to the size of the given code_map, copying
-  // the old weight matrix entries for each output from code_map[output] where
-  // non-negative, and uses the mean (over all outputs) of the existing weights
-  // for all outputs with negative code_map entries. Returns the new number of
-  // weights. Only operates on Softmax layers with old_no outputs.
+  // Recursively searches the network for softmaxes with old_no outputs,
+  // and remaps their outputs according to code_map. See network.h for details.
  int RemapOutputs(int old_no, const std::vector<int>& code_map) override;

  // Sets needs_to_backprop_ to needs_backprop and returns true if
--- a/lstm/weightmatrix.cpp
+++ b/lstm/weightmatrix.cpp
@ -61,7 +61,10 @@ int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam,
 // the old weight matrix entries for each output from code_map[output] where
 // non-negative, and uses the mean (over all outputs) of the existing weights
 // for all outputs with negative code_map entries. Returns the new number of
-// weights.
+// weights. Can be used to change the character set addressed by an output
+// softmax.
+// TODO(rays) A RemapInputs would also be useful, so a change can be made
+// in the middle of a network.
 int WeightMatrix::RemapOutputs(const std::vector<int>& code_map) {
  GENERIC_2D_ARRAY<double> old_wf(wf_);
  int old_no = wf_.dim1();
--- a/lstm/weightmatrix.h
+++ b/lstm/weightmatrix.h
@ -74,7 +74,10 @@ class WeightMatrix {
  // the old weight matrix entries for each output from code_map[output] where
  // non-negative, and uses the mean (over all outputs) of the existing weights
  // for all outputs with negative code_map entries. Returns the new number of
-  // weights.
+  // weights. Can be used to change the character set addressed by an output
+  // softmax.
+  // TODO(rays) A RemapInputs would also be useful, so a change can be made
+  // in the middle of a network.
  int RemapOutputs(const std::vector<int>& code_map);

  // Converts a float network to an int network. Each set of input weights that