doc/nnet-modify-learning-rates_8cc_source.html

 // nnet2bin/nnet-modify-learning-rates.cc

 // Copyright 2013  Guoguo Chen

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet2/train-nnet.h"
 #include "nnet2/am-nnet.h"


 namespace kaldi {
 namespace nnet2 {
 void SetMaxChange(BaseFloat max_change, Nnet *nnet) {
   for (int32 c = 0; c < nnet->NumComponents(); c++) {
     Component *component = &(nnet->GetComponent(c));
     AffineComponentPreconditioned *ac =
         dynamic_cast<AffineComponentPreconditioned*>(component);
     if (ac != NULL)
       ac->SetMaxChange(max_change);
   }
 }
 }
 }

 int main(int argc, char *argv[]) {
   try {
     using namespace kaldi;
     using namespace kaldi::nnet2;
     typedef kaldi::int32 int32;
     typedef kaldi::int64 int64;

     const char *usage =
         "This program modifies the learning rates so as to equalize the\n"
         "relative changes in parameters for each layer, while keeping their\n"
         "geometric mean the same (or changing it to a value specified using\n"
         "the --average-learning-rate option).\n"
         "\n"
         "Usage: nnet-modify-learning-rates [options] <prev-model> \\\n"
         "                                  <cur-model> <modified-cur-model>\n"
         "e.g.: nnet-modify-learning-rates --average-learning-rate=0.0002 \\\n"
         "                                 5.mdl 6.mdl 6.mdl\n";

     bool binary_write = true;
     bool retroactive = false;
     BaseFloat average_learning_rate = 0.0;
     BaseFloat first_layer_factor = 1.0;
     BaseFloat last_layer_factor = 1.0;

     ParseOptions po(usage);
     po.Register("binary", &binary_write, "Write output in binary mode");
     po.Register("average-learning-rate", &average_learning_rate,
                 "If supplied, change learning rate geometric mean to the given "
                 "value.");
     po.Register("first-layer-factor", &first_layer_factor, "Factor that "
                 "reduces the target relative learning rate for first layer.");
     po.Register("last-layer-factor", &last_layer_factor, "Factor that "
                 "reduces the target relative learning rate for last layer.");
     po.Register("retroactive", &retroactive, "If true, scale the parameter "
                 "differences as well.");

     po.Read(argc, argv);

     if (po.NumArgs() != 3) {
       po.PrintUsage();
       exit(1);
     }

     KALDI_ASSERT(average_learning_rate >= 0);

     std::string prev_nnet_rxfilename = po.GetArg(1),
         cur_nnet_rxfilename = po.GetArg(2),
         modified_cur_nnet_rxfilename = po.GetOptArg(3);

     TransitionModel trans_model;
     AmNnet am_prev_nnet, am_cur_nnet;
     {
       bool binary_read;
       Input ki(prev_nnet_rxfilename, &binary_read);
       trans_model.Read(ki.Stream(), binary_read);
       am_prev_nnet.Read(ki.Stream(), binary_read);
     }
     {
       bool binary_read;
       Input ki(cur_nnet_rxfilename, &binary_read);
       trans_model.Read(ki.Stream(), binary_read);
       am_cur_nnet.Read(ki.Stream(), binary_read);
     }

     if (am_prev_nnet.GetNnet().GetParameterDim() !=
         am_cur_nnet.GetNnet().GetParameterDim()) {
       KALDI_WARN << "Parameter-dim mismatch, cannot equalize the relative "
                  << "changes in parameters for each layer.";
       exit(0);
     }

     int32 ret = 0;

     // Gets relative parameter differences.
     int32 num_updatable = am_prev_nnet.GetNnet().NumUpdatableComponents();
     Vector<BaseFloat> relative_diff(num_updatable);
     {
       Nnet diff_nnet(am_prev_nnet.GetNnet());
       diff_nnet.AddNnet(-1.0, am_cur_nnet.GetNnet());
       diff_nnet.ComponentDotProducts(diff_nnet, &relative_diff);
       relative_diff.ApplyPow(0.5);
       Vector<BaseFloat> baseline_prod(num_updatable);
       am_prev_nnet.GetNnet().ComponentDotProducts(am_prev_nnet.GetNnet(),
                                                   &baseline_prod);
       baseline_prod.ApplyPow(0.5);
       relative_diff.DivElements(baseline_prod);
       KALDI_LOG << "Relative parameter differences per layer are "
                 << relative_diff;

       // If relative parameter difference for a certain is zero, set it to the
       // mean of the rest values.
       int32 num_zero = 0;
       for (int32 i = 0; i < num_updatable; i++) {
         if (relative_diff(i) == 0.0) {
           num_zero++;
         }
       }
       if (num_zero > 0) {
         BaseFloat average_diff = relative_diff.Sum()
             / static_cast<BaseFloat>(num_updatable - num_zero);
         for (int32 i = 0; i < num_updatable; i++) {
           if (relative_diff(i) == 0.0) {
             relative_diff(i) = average_diff;
           }
         }
         KALDI_LOG << "Zeros detected in the relative parameter difference "
                   << "vector, updating the vector to " << relative_diff;
       }
     }

     // Gets learning rates for previous neural net.
     Vector<BaseFloat> prev_nnet_learning_rates(num_updatable),
         cur_nnet_learning_rates(num_updatable);
     am_prev_nnet.GetNnet().GetLearningRates(&prev_nnet_learning_rates);
     am_cur_nnet.GetNnet().GetLearningRates(&cur_nnet_learning_rates);
     KALDI_LOG << "Learning rates for previous model per layer are "
               << prev_nnet_learning_rates;
     KALDI_LOG << "Learning rates for current model per layer are "
               << cur_nnet_learning_rates;

     // Gets target geometric mean.
     BaseFloat target_geometric_mean = 0.0;
     if (average_learning_rate == 0.0) {
       target_geometric_mean = Exp(cur_nnet_learning_rates.SumLog()
                                   / static_cast<BaseFloat>(num_updatable));
     } else {
       target_geometric_mean = average_learning_rate;
     }
     KALDI_ASSERT(target_geometric_mean > 0.0);

     // Works out the new learning rates.  We start from the previous model;
     // this ensures that if this program is run twice, we get consistent
     // results even if it's overwritten the current model.
     Vector<BaseFloat> nnet_learning_rates(prev_nnet_learning_rates);
     nnet_learning_rates.DivElements(relative_diff);
     KALDI_ASSERT(last_layer_factor > 0.0);
     nnet_learning_rates(num_updatable - 1) *= last_layer_factor;
     KALDI_ASSERT(first_layer_factor > 0.0);
     nnet_learning_rates(0) *= first_layer_factor;
     BaseFloat cur_geometric_mean = Exp(nnet_learning_rates.SumLog()
                                  / static_cast<BaseFloat>(num_updatable));
     nnet_learning_rates.Scale(target_geometric_mean / cur_geometric_mean);
     KALDI_LOG << "New learning rates for current model per layer are "
               << nnet_learning_rates;

     // Changes the parameter differences if --retroactivate is set to true.
     if (retroactive) {
       Vector<BaseFloat> scale_factors(nnet_learning_rates);
       scale_factors.DivElements(prev_nnet_learning_rates);
       am_cur_nnet.GetNnet().AddNnet(-1.0, am_prev_nnet.GetNnet());
       am_cur_nnet.GetNnet().ScaleComponents(scale_factors);
       am_cur_nnet.GetNnet().AddNnet(1.0, am_prev_nnet.GetNnet());
       KALDI_LOG << "Scale parameter difference retroactively. Scaling factors "
                 << "are " << scale_factors;
     }

     // Sets learning rates and writes updated model.
     am_cur_nnet.GetNnet().SetLearningRates(nnet_learning_rates);

     SetMaxChange(0.0, &(am_cur_nnet.GetNnet()));

     Output ko(modified_cur_nnet_rxfilename, binary_write);
     trans_model.Write(ko.Stream(), binary_write);
     am_cur_nnet.Write(ko.Stream(), binary_write);

     return ret;
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
     return -1;
   }
 }
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet2::Nnet::GetComponent
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141

kaldi::Exp
double Exp(double x)
Definition: kaldi-math.h:83

kaldi::Input
Definition: kaldi-io.h:190

kaldi::nnet2::AffineComponentPreconditioned
Definition: nnet-component.h:948

kaldi::nnet2::Nnet::AddNnet
void AddNnet(const VectorBase< BaseFloat > &scales, const Nnet &other)
For each updatatable component, adds to it the corresponding element of "other" times the appropriate...
Definition: nnet-nnet.cc:576

kaldi::nnet2::Nnet::NumUpdatableComponents
int32 NumUpdatableComponents() const
Returns the number of updatable components.
Definition: nnet-nnet.cc:413

kaldi::nnet2::AmNnet
Definition: am-nnet.h:38

kaldi::ParseOptions::PrintUsage
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
Definition: parse-options.cc:393

kaldi::nnet2::Component
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
Definition: nnet-component.h:157

kaldi::nnet2::Nnet::ComponentDotProducts
void ComponentDotProducts(const Nnet &other, VectorBase< BaseFloat > *dot_prod) const
Definition: nnet-nnet.cc:207

kaldi::VectorBase::SumLog
Real SumLog() const
Returns sum of the logs of the elements.
Definition: kaldi-vector.cc:697

kaldi::nnet2::AmNnet::Read
void Read(std::istream &is, bool binary)
Definition: am-nnet.cc:39

main
int main(int argc, char *argv[])
Definition: nnet-modify-learning-rates.cc:41

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

kaldi::nnet2::Nnet::GetParameterDim
virtual int32 GetParameterDim() const
Definition: nnet-nnet.cc:657

kaldi::ParseOptions::Register
void Register(const std::string &name, bool *ptr, const std::string &doc)
Definition: parse-options.cc:56

kaldi::TransitionModel
Definition: transition-model.h:123

am-nnet.h

kaldi::nnet2::Nnet::NumComponents
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69

kaldi::nnet2::AffineComponentPreconditioned::SetMaxChange
void SetMaxChange(BaseFloat max_change)
Definition: nnet-component.h:965

kaldi::nnet2::Nnet::GetLearningRates
void GetLearningRates(VectorBase< BaseFloat > *learning_rates) const
Get all the learning rates in the neural net (the output must have dim equal to NumUpdatableComponent...
Definition: nnet-nnet.cc:476

kaldi::Input::Stream
std::istream & Stream()
Definition: kaldi-io.cc:826

kaldi::BaseFloat
float BaseFloat
Definition: kaldi-types.h:29

kaldi::ParseOptions
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36

kaldi::Output::Stream
std::ostream & Stream()
Definition: kaldi-io.cc:701

float

transition-model.h

kaldi::TransitionModel::Read
void Read(std::istream &is, bool binary)
Definition: transition-model.cc:394

kaldi::nnet2::AmNnet::Write
void Write(std::ostream &os, bool binary) const
Definition: am-nnet.cc:31

kaldi::nnet2::Nnet
Definition: nnet-nnet.h:63

kaldi::ParseOptions::Read
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Definition: parse-options.cc:311

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::ParseOptions::GetArg
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
Definition: parse-options.cc:202

kaldi::VectorBase::Scale
void Scale(Real alpha)
Multiplies all elements by this constant.
Definition: kaldi-vector.cc:963

kaldi::nnet2::SetMaxChange
void SetMaxChange(BaseFloat max_change, Nnet *nnet)
Definition: nnet-modify-learning-rates.cc:29

kaldi::nnet2::Nnet::ScaleComponents
void ScaleComponents(const VectorBase< BaseFloat > &scales)
Scales the parameters of each of the updatable components.
Definition: nnet-nnet.cc:421

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::ParseOptions::NumArgs
int NumArgs() const
Number of positional parameters (c.f. argc-1).
Definition: parse-options.cc:198

kaldi::TransitionModel::Write
void Write(std::ostream &os, bool binary) const
Definition: transition-model.cc:422

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::nnet2::Nnet::SetLearningRates
void SetLearningRates(BaseFloat learning_rates)
Set all the learning rates in the neural net to this value.
Definition: nnet-nnet.cc:346

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::VectorBase::ApplyPow
void ApplyPow(Real power)
Take all elements of vector to a power.
Definition: kaldi-vector.h:179

kaldi::VectorBase::DivElements
void DivElements(const VectorBase< Real > &v)
Divide element-by-element by a vector.
Definition: kaldi-vector.cc:1011

kaldi::Output
Definition: kaldi-io.h:124

kaldi::nnet2
Definition: am-nnet-test.cc:26

train-nnet.h

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi-common.h

kaldi::nnet2::AmNnet::GetNnet
const Nnet & GetNnet() const
Definition: am-nnet.h:61

kaldi::ParseOptions::GetOptArg
std::string GetOptArg(int param) const
Definition: parse-options.h:127