doc/nnet-chain-training_8cc_source.html

 // nnet3/nnet-chain-training.cc

 // Copyright      2015    Johns Hopkins University (author: Daniel Povey)
 //                2016    Xiaohui Zhang

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "nnet3/nnet-chain-training.h"
 #include "nnet3/nnet-utils.h"

 namespace kaldi {
 namespace nnet3 {

 NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts,
                                    const fst::StdVectorFst &den_fst,
                                    Nnet *nnet):
     opts_(opts),
     den_graph_(den_fst, nnet->OutputDim("output")),
     nnet_(nnet),
     compiler_(*nnet, opts_.nnet_config.optimize_config,
               opts_.nnet_config.compiler_config),
     num_minibatches_processed_(0),
     max_change_stats_(*nnet),
     srand_seed_(RandInt(0, 100000)) {
   if (opts.nnet_config.zero_component_stats)
     ZeroComponentStats(nnet);
   KALDI_ASSERT(opts.nnet_config.momentum >= 0.0 &&
                opts.nnet_config.max_param_change >= 0.0 &&
                opts.nnet_config.backstitch_training_interval > 0);
   delta_nnet_ = nnet_->Copy();
   ScaleNnet(0.0, delta_nnet_);

   if (opts.nnet_config.read_cache != "") {
     bool binary;
     try {
       Input ki(opts.nnet_config.read_cache, &binary);
       compiler_.ReadCache(ki.Stream(), binary);
       KALDI_LOG << "Read computation cache from " << opts.nnet_config.read_cache;
     } catch (...) {
       KALDI_WARN << "Could not open cached computation. "
                     "Probably this is the first training iteration.";
     }
   }
 }


 void NnetChainTrainer::Train(const NnetChainExample &chain_eg) {
   NVTX_RANGE(__func__);
   bool need_model_derivative = true;
   const NnetTrainerOptions &nnet_config = opts_.nnet_config;
   bool use_xent_regularization = (opts_.chain_config.xent_regularize != 0.0);
   ComputationRequest request;
   GetChainComputationRequest(*nnet_, chain_eg, need_model_derivative,
                              nnet_config.store_component_stats,
                              use_xent_regularization, need_model_derivative,
                              &request);
   std::shared_ptr<const NnetComputation> computation = compiler_.Compile(request);

   if (nnet_config.backstitch_training_scale > 0.0 && num_minibatches_processed_
       % nnet_config.backstitch_training_interval ==
       srand_seed_ % nnet_config.backstitch_training_interval) {
     // backstitch training is incompatible with momentum > 0
     KALDI_ASSERT(nnet_config.momentum == 0.0);
     FreezeNaturalGradient(true, delta_nnet_);
     bool is_backstitch_step1 = true;
     srand(srand_seed_ + num_minibatches_processed_);
     ResetGenerators(nnet_);
     TrainInternalBackstitch(chain_eg, *computation, is_backstitch_step1);
     FreezeNaturalGradient(false, delta_nnet_); // un-freeze natural gradient
     is_backstitch_step1 = false;
     srand(srand_seed_ + num_minibatches_processed_);
     ResetGenerators(nnet_);
     TrainInternalBackstitch(chain_eg, *computation, is_backstitch_step1);
   } else { // conventional training
     TrainInternal(chain_eg, *computation);
   }
   if (num_minibatches_processed_ == 0) {
     ConsolidateMemory(nnet_);
     ConsolidateMemory(delta_nnet_);
   }
   num_minibatches_processed_++;
 }

 void NnetChainTrainer::TrainInternal(const NnetChainExample &eg,
                                      const NnetComputation &computation) {
   NVTX_RANGE(__func__);
   const NnetTrainerOptions &nnet_config = opts_.nnet_config;
   // note: because we give the 1st arg (nnet_) as a pointer to the
   // constructor of 'computer', it will use that copy of the nnet to
   // store stats.
   NnetComputer computer(nnet_config.compute_config, computation,
                         nnet_, delta_nnet_);

   // give the inputs to the computer object.
   computer.AcceptInputs(*nnet_, eg.inputs);
   computer.Run();

   this->ProcessOutputs(false, eg, &computer);
   computer.Run();

   // If relevant, add in the part of the gradient that comes from
   // parameter-level L2 regularization.
   ApplyL2Regularization(*nnet_,
                         GetNumNvalues(eg.inputs, false) *
                         nnet_config.l2_regularize_factor,
                         delta_nnet_);

   // Updates the parameters of nnet
   bool success = UpdateNnetWithMaxChange(
       *delta_nnet_,
       nnet_config.max_param_change,
       1.0, 1.0 - nnet_config.momentum, nnet_,
       &max_change_stats_);

   // Scale down the batchnorm stats (keeps them fresh... this affects what
   // happens when we use the model with batchnorm test-mode set).
   ScaleBatchnormStats(nnet_config.batchnorm_stats_scale, nnet_);

   // The following will only do something if we have a LinearComponent
   // or AffineComponent with orthonormal-constraint set to a nonzero value.
   ConstrainOrthonormal(nnet_);

   // Scale delta_nnet
   if (success)
     ScaleNnet(nnet_config.momentum, delta_nnet_);
   else
     ScaleNnet(0.0, delta_nnet_);
 }

 void NnetChainTrainer::TrainInternalBackstitch(const NnetChainExample &eg,
                                                const NnetComputation &computation,
                                                bool is_backstitch_step1) {
   const NnetTrainerOptions &nnet_config = opts_.nnet_config;
   // note: because we give the 1st arg (nnet_) as a pointer to the
   // constructor of 'computer', it will use that copy of the nnet to
   // store stats.
   NnetComputer computer(nnet_config.compute_config, computation,
                         nnet_, delta_nnet_);
   // give the inputs to the computer object.
   computer.AcceptInputs(*nnet_, eg.inputs);
   computer.Run();

   bool is_backstitch_step2 = !is_backstitch_step1;
   this->ProcessOutputs(is_backstitch_step2, eg, &computer);
   computer.Run();

   BaseFloat max_change_scale, scale_adding;
   if (is_backstitch_step1) {
     // max-change is scaled by backstitch_training_scale;
     // delta_nnet is scaled by -backstitch_training_scale when added to nnet;
     max_change_scale = nnet_config.backstitch_training_scale;
     scale_adding = -nnet_config.backstitch_training_scale;
   } else {
     // max-change is scaled by 1 + backstitch_training_scale;
     // delta_nnet is scaled by 1 + backstitch_training_scale when added to nnet;
     max_change_scale = 1.0 + nnet_config.backstitch_training_scale;
     scale_adding = 1.0 + nnet_config.backstitch_training_scale;
     // If relevant, add in the part of the gradient that comes from L2
     // regularization.  It may not be optimally inefficient to do it on both
     // passes of the backstitch, like we do here, but it probably minimizes
     // any harmful interactions with the max-change.
     ApplyL2Regularization(*nnet_,
         1.0 / scale_adding * GetNumNvalues(eg.inputs, false) *
         nnet_config.l2_regularize_factor, delta_nnet_);
   }

   // Updates the parameters of nnet
   UpdateNnetWithMaxChange(
       *delta_nnet_, nnet_config.max_param_change,
       max_change_scale, scale_adding, nnet_,
       &max_change_stats_);

   if (is_backstitch_step1) {
     // The following will only do something if we have a LinearComponent or
     // AffineComponent with orthonormal-constraint set to a nonzero value. We
     // choose to do this only on the 1st backstitch step, for efficiency.
     ConstrainOrthonormal(nnet_);
   }

   if (!is_backstitch_step1) {
     // Scale down the batchnorm stats (keeps them fresh... this affects what
     // happens when we use the model with batchnorm test-mode set).  Do this
     // after backstitch step 2 so that the stats are scaled down before we start
     // the next minibatch.
     ScaleBatchnormStats(nnet_config.batchnorm_stats_scale, nnet_);
   }

   ScaleNnet(0.0, delta_nnet_);
 }

 void NnetChainTrainer::ProcessOutputs(bool is_backstitch_step2,
                                       const NnetChainExample &eg,
                                       NnetComputer *computer) {
   NVTX_RANGE(__func__);
   // normally the eg will have just one output named 'output', but
   // we don't assume this.
   // In backstitch training, the output-name with the "_backstitch" suffix is
   // the one computed after the first, backward step of backstitch.
   const std::string suffix = (is_backstitch_step2 ? "_backstitch" : "");
   std::vector<NnetChainSupervision>::const_iterator iter = eg.outputs.begin(),
       end = eg.outputs.end();
   for (; iter != end; ++iter) {
     const NnetChainSupervision &sup = *iter;
     int32 node_index = nnet_->GetNodeIndex(sup.name);
     if (node_index < 0 ||
         !nnet_->IsOutputNode(node_index))
       KALDI_ERR << "Network has no output named " << sup.name;

     const CuMatrixBase<BaseFloat> &nnet_output = computer->GetOutput(sup.name);
     CuMatrix<BaseFloat> nnet_output_deriv(nnet_output.NumRows(),
                                           nnet_output.NumCols(),
                                           kUndefined);

     bool use_xent = (opts_.chain_config.xent_regularize != 0.0);
     std::string xent_name = sup.name + "-xent";  // typically "output-xent".
     CuMatrix<BaseFloat> xent_deriv;

     BaseFloat tot_objf, tot_l2_term, tot_weight;

     ComputeChainObjfAndDeriv(opts_.chain_config, den_graph_,
                              sup.supervision, nnet_output,
                              &tot_objf, &tot_l2_term, &tot_weight,
                              &nnet_output_deriv,
                              (use_xent ? &xent_deriv : NULL));

     if (use_xent) {
       // this block computes the cross-entropy objective.
       const CuMatrixBase<BaseFloat> &xent_output = computer->GetOutput(
           xent_name);
       // at this point, xent_deriv is posteriors derived from the numerator
       // computation.  note, xent_objf has a factor of '.supervision.weight'
       BaseFloat xent_objf = TraceMatMat(xent_output, xent_deriv, kTrans);
       objf_info_[xent_name + suffix].UpdateStats(xent_name + suffix,
                                         opts_.nnet_config.print_interval,
                                         num_minibatches_processed_,
                                         tot_weight, xent_objf);
     }

     if (opts_.apply_deriv_weights && sup.deriv_weights.Dim() != 0) {
       CuVector<BaseFloat> cu_deriv_weights(sup.deriv_weights);
       nnet_output_deriv.MulRowsVec(cu_deriv_weights);
       if (use_xent)
         xent_deriv.MulRowsVec(cu_deriv_weights);
     }

     computer->AcceptInput(sup.name, &nnet_output_deriv);

     objf_info_[sup.name + suffix].UpdateStats(sup.name + suffix,
                                      opts_.nnet_config.print_interval,
                                      num_minibatches_processed_,
                                      tot_weight, tot_objf, tot_l2_term);

     if (use_xent) {
       xent_deriv.Scale(opts_.chain_config.xent_regularize);
       computer->AcceptInput(xent_name, &xent_deriv);
     }
   }
 }

 bool NnetChainTrainer::PrintTotalStats() const {
   unordered_map<std::string, ObjectiveFunctionInfo, StringHasher>::const_iterator
       iter = objf_info_.begin(),
       end = objf_info_.end();
   bool ans = false;
   for (; iter != end; ++iter) {
     const std::string &name = iter->first;
     const ObjectiveFunctionInfo &info = iter->second;
     ans = info.PrintTotalStats(name) || ans;
   }
   max_change_stats_.Print(*nnet_);
   return ans;
 }

 NnetChainTrainer::~NnetChainTrainer() {
   if (opts_.nnet_config.write_cache != "") {
     Output ko(opts_.nnet_config.write_cache, opts_.nnet_config.binary_write_cache);
     compiler_.WriteCache(ko.Stream(), opts_.nnet_config.binary_write_cache);
     KALDI_LOG << "Wrote computation cache to " << opts_.nnet_config.write_cache;
   }
   delete delta_nnet_;
 }


 } // namespace nnet3
 } // namespace kaldi
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet3::NnetChainTrainer::TrainInternal
void TrainInternal(const NnetChainExample &eg, const NnetComputation &computation)
Definition: nnet-chain-training.cc:97

kaldi::nnet3::ScaleNnet
void ScaleNnet(BaseFloat scale, Nnet *nnet)
Scales the nnet parameters and stats by this scale.
Definition: nnet-utils.cc:312

kaldi::nnet3::NnetChainSupervision::deriv_weights
Vector< BaseFloat > deriv_weights
This is a vector of per-frame weights, required to be between 0 and 1, that is applied to the derivat...
Definition: nnet-chain-example.h:77

kaldi::nnet3::NnetChainTrainingOptions
Definition: nnet-chain-training.h:35

kaldi::kUndefined
Definition: matrix-common.h:39

kaldi::Input
Definition: kaldi-io.h:190

kaldi::CuVector
Definition: matrix-common.h:74

kaldi::nnet3::NnetChainTrainer::~NnetChainTrainer
~NnetChainTrainer()
Definition: nnet-chain-training.cc:287

kaldi::nnet3::NnetChainTrainer::TrainInternalBackstitch
void TrainInternalBackstitch(const NnetChainExample &eg, const NnetComputation &computation, bool is_backstitch_step1)
Definition: nnet-chain-training.cc:143

kaldi::nnet3::NnetChainSupervision::supervision
chain::Supervision supervision
The supervision object, containing the FST.
Definition: nnet-chain-example.h:64

kaldi::nnet3::NnetChainExample::inputs
std::vector< NnetIo > inputs
&#39;inputs&#39; contains the input to the network– normally just it has just one element called "input"...
Definition: nnet-chain-example.h:115

kaldi::nnet3::ScaleBatchnormStats
void ScaleBatchnormStats(BaseFloat batchnorm_stats_scale, Nnet *nnet)
This function scales the batchorm stats of any batchnorm components (components of type BatchNormComp...
Definition: nnet-utils.cc:536

kaldi::nnet3::NnetChainTrainer::opts_
const NnetChainTrainingOptions opts_
Definition: nnet-chain-training.h:83

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet3::NnetTrainerOptions::l2_regularize_factor
BaseFloat l2_regularize_factor
Definition: nnet-training.h:40

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

fst::StdVectorFst
fst::StdVectorFst StdVectorFst
Definition: deterministic-fst-test.cc:59

kaldi::nnet3::NnetChainTrainer::den_graph_
chain::DenominatorGraph den_graph_
Definition: nnet-chain-training.h:85

kaldi::nnet3::NnetTrainerOptions::momentum
BaseFloat momentum
Definition: nnet-training.h:39

kaldi::nnet3::NnetChainSupervision::name
std::string name
the name of the output in the neural net; in simple setups it will just be "output".
Definition: nnet-chain-example.h:46

kaldi::nnet3::NnetTrainerOptions::backstitch_training_interval
int32 backstitch_training_interval
Definition: nnet-training.h:42

nnet-utils.h
This file contains some miscellaneous functions dealing with class Nnet.

kaldi::nnet3::NnetTrainerOptions::print_interval
int32 print_interval
Definition: nnet-training.h:37

kaldi::nnet3::ConstrainOrthonormal
void ConstrainOrthonormal(Nnet *nnet)
This function, to be called after processing every minibatch, is responsible for enforcing the orthog...
Definition: nnet-utils.cc:1108

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::CuMatrixBase::Scale
void Scale(Real value)
Definition: cu-matrix.cc:644

kaldi::nnet3::FreezeNaturalGradient
void FreezeNaturalGradient(bool freeze, Nnet *nnet)
Controls if natural gradient will be updated.
Definition: nnet-utils.cc:432

kaldi::nnet3::GetNumNvalues
int32 GetNumNvalues(const std::vector< NnetIo > &io_vec, bool exhaustive)
This utility function can be used to obtain the number of distinct &#39;n&#39; values in a training example...
Definition: nnet-utils.cc:2198

kaldi::nnet3::NnetComputer::AcceptInput
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
Definition: nnet-compute.cc:547

kaldi::nnet3::ResetGenerators
void ResetGenerators(Nnet *nnet)
This function calls &#39;ResetGenerator()&#39; on all components in &#39;nnet&#39; that inherit from class RandomComp...
Definition: nnet-utils.cc:582

kaldi::Input::Stream
std::istream & Stream()
Definition: kaldi-io.cc:826

kaldi::nnet3::ComputationRequest
Definition: nnet-computation.h:114

kaldi::Output::Stream
std::ostream & Stream()
Definition: kaldi-io.cc:701

kaldi::nnet3::NnetChainTrainer::srand_seed_
int32 srand_seed_
Definition: nnet-chain-training.h:104

kaldi::nnet3::NnetComputer::GetOutput
const CuMatrixBase< BaseFloat > & GetOutput(const std::string &node_name)
Definition: nnet-compute.cc:578

kaldi::nnet3::Nnet::IsOutputNode
bool IsOutputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kDescriptor and is not directly fo...
Definition: nnet-nnet.cc:112

kaldi::nnet3::ApplyL2Regularization
void ApplyL2Regularization(const Nnet &nnet, BaseFloat l2_regularize_scale, Nnet *delta_nnet)
This function is used as part of the regular training workflow, prior to UpdateNnetWithMaxChange().
Definition: nnet-utils.cc:2244

float

kaldi::nnet3::NnetChainTrainer::compiler_
CachingOptimizingCompiler compiler_
Definition: nnet-chain-training.h:89

kaldi::nnet3::NnetTrainerOptions::store_component_stats
bool store_component_stats
Definition: nnet-training.h:36

kaldi::nnet3::NnetChainExample::outputs
std::vector< NnetChainSupervision > outputs
&#39;outputs&#39; contains the chain output supervision.
Definition: nnet-chain-example.h:119

kaldi::nnet3::NnetChainTrainer::max_change_stats_
MaxChangeStats max_change_stats_
Definition: nnet-chain-training.h:97

kaldi::nnet3::NnetComputer::AcceptInputs
void AcceptInputs(const Nnet &nnet, const std::vector< NnetIo > &io)
This convenience function calls AcceptInput() in turn on all the inputs in the training example...
Definition: nnet-compute.cc:663

kaldi::nnet3::Nnet::Copy
Nnet * Copy() const
Definition: nnet-nnet.h:246

kaldi::nnet3::NnetChainTrainer::num_minibatches_processed_
int32 num_minibatches_processed_
Definition: nnet-chain-training.h:94

kaldi::nnet3::NnetChainExample
NnetChainExample is like NnetExample, but specialized for lattice-free (chain) training.
Definition: nnet-chain-example.h:110

kaldi::nnet3::NnetChainTrainingOptions::nnet_config
NnetTrainerOptions nnet_config
Definition: nnet-chain-training.h:36

kaldi::nnet3::Nnet
Definition: nnet-nnet.h:115

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::nnet3::ZeroComponentStats
void ZeroComponentStats(Nnet *nnet)
Zeroes the component stats in all nonlinear components in the nnet.
Definition: nnet-utils.cc:269

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::TraceMatMat
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
Definition: kaldi-matrix.cc:2692

kaldi::nnet3::ObjectiveFunctionInfo::PrintTotalStats
bool PrintTotalStats(const std::string &output_name) const
Definition: nnet-training.cc:312

kaldi::nnet3::NnetTrainerOptions::write_cache
std::string write_cache
Definition: nnet-training.h:45

kaldi::nnet3::NnetChainTrainer::Train
void Train(const NnetChainExample &eg)
Definition: nnet-chain-training.cc:60

kaldi::nnet3::NnetTrainerOptions::max_param_change
BaseFloat max_param_change
Definition: nnet-training.h:47

kaldi::nnet3::CachingOptimizingCompiler::Compile
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
Definition: nnet-optimize.cc:716

kaldi::nnet3::CachingOptimizingCompiler::ReadCache
void ReadCache(std::istream &is, bool binary)
Definition: nnet-optimize.cc:666

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::nnet3::ObjectiveFunctionInfo
Definition: nnet-training.h:123

kaldi::nnet3::CachingOptimizingCompiler::WriteCache
void WriteCache(std::ostream &os, bool binary)
Definition: nnet-optimize.cc:688

kaldi::nnet3::ConsolidateMemory
void ConsolidateMemory(Nnet *nnet)
This just calls ConsolidateMemory() on all the components of the nnet.
Definition: nnet-utils.cc:1147

kaldi::nnet3::NnetComputer
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59

kaldi::nnet3::NnetComputation
Definition: nnet-computation.h:303

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::nnet3::NnetChainSupervision
Definition: nnet-chain-example.h:43

NVTX_RANGE
#define NVTX_RANGE(name)
Definition: cu-common.h:143

kaldi::nnet3::NnetChainTrainer::NnetChainTrainer
NnetChainTrainer(const NnetChainTrainingOptions &config, const fst::StdVectorFst &den_fst, Nnet *nnet)
Definition: nnet-chain-training.cc:27

kaldi::nnet3::NnetChainTrainer::PrintTotalStats
bool PrintTotalStats() const
Definition: nnet-chain-training.cc:273

kaldi::nnet3::NnetTrainerOptions::read_cache
std::string read_cache
Definition: nnet-training.h:44

kaldi::nnet3::NnetChainTrainingOptions::chain_config
chain::ChainTrainingOptions chain_config
Definition: nnet-chain-training.h:37

kaldi::nnet3::NnetTrainerOptions
Definition: nnet-training.h:34

kaldi::Output
Definition: kaldi-io.h:124

kaldi::nnet3::MaxChangeStats::Print
void Print(const Nnet &nnet) const
Definition: nnet-utils.cc:2284

kaldi::nnet3::NnetTrainerOptions::binary_write_cache
bool binary_write_cache
Definition: nnet-training.h:46

nnet-chain-training.h

kaldi::nnet3::NnetChainTrainer::nnet_
Nnet * nnet_
Definition: nnet-chain-training.h:86

kaldi::nnet3::Nnet::GetNodeIndex
int32 GetNodeIndex(const std::string &node_name) const
returns index associated with this node name, or -1 if no such index.
Definition: nnet-nnet.cc:466

kaldi::nnet3::NnetChainTrainer::objf_info_
unordered_map< std::string, ObjectiveFunctionInfo, StringHasher > objf_info_
Definition: nnet-chain-training.h:99

kaldi::nnet3::NnetChainTrainingOptions::apply_deriv_weights
bool apply_deriv_weights
Definition: nnet-chain-training.h:38

kaldi::nnet3::GetChainComputationRequest
void GetChainComputationRequest(const Nnet &nnet, const NnetChainExample &eg, bool need_model_derivative, bool store_component_stats, bool use_xent_regularization, bool use_xent_derivative, ComputationRequest *request)
This function takes a NnetChainExample and produces a ComputationRequest.
Definition: nnet-chain-example.cc:290

kaldi::nnet3::NnetTrainerOptions::compute_config
NnetComputeOptions compute_config
Definition: nnet-training.h:49

kaldi::nnet3::NnetTrainerOptions::backstitch_training_scale
BaseFloat backstitch_training_scale
Definition: nnet-training.h:41

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::nnet3::NnetChainTrainer::delta_nnet_
Nnet * delta_nnet_
Definition: nnet-chain-training.h:87

kaldi::nnet3::NnetTrainerOptions::batchnorm_stats_scale
BaseFloat batchnorm_stats_scale
Definition: nnet-training.h:43

kaldi::CuMatrixBase::MulRowsVec
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i&#39;th row by scale[i]
Definition: cu-matrix.cc:792

kaldi::nnet3::NnetTrainerOptions::zero_component_stats
bool zero_component_stats
Definition: nnet-training.h:35

kaldi::nnet3::UpdateNnetWithMaxChange
bool UpdateNnetWithMaxChange(const Nnet &delta_nnet, BaseFloat max_param_change, BaseFloat max_change_scale, BaseFloat scale, Nnet *nnet, std::vector< int32 > *num_max_change_per_component_applied, int32 *num_max_change_global_applied)
This function does the operation &#39;*nnet += scale * delta_nnet&#39;, while respecting any max-parameter-ch...
Definition: nnet-utils.cc:2106

kaldi::RandInt
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95

kaldi::nnet3::NnetChainTrainer::ProcessOutputs
void ProcessOutputs(bool is_backstitch_step2, const NnetChainExample &eg, NnetComputer *computer)
Definition: nnet-chain-training.cc:204

kaldi::nnet3::NnetComputer::Run
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...
Definition: nnet-compute.cc:512