doc/nnet-training_8cc_source.html

 // nnet3/nnet-training.cc

 // Copyright      2015    Johns Hopkins University (author: Daniel Povey)
 //                2015    Xiaohui Zhang

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "nnet3/nnet-training.h"
 #include "nnet3/nnet-utils.h"

 namespace kaldi {
 namespace nnet3 {

 NnetTrainer::NnetTrainer(const NnetTrainerOptions &config,
                          Nnet *nnet):
     config_(config),
     nnet_(nnet),
     compiler_(*nnet, config_.optimize_config, config_.compiler_config),
     num_minibatches_processed_(0),
     max_change_stats_(*nnet),
     srand_seed_(RandInt(0, 100000)) {
   if (config.zero_component_stats)
     ZeroComponentStats(nnet);
   KALDI_ASSERT(config.momentum >= 0.0 &&
                config.max_param_change >= 0.0 &&
                config.backstitch_training_interval > 0);
   delta_nnet_ = nnet_->Copy();
   ScaleNnet(0.0, delta_nnet_);

   if (config_.read_cache != "") {
     bool binary;
     Input ki;
     if (ki.Open(config_.read_cache, &binary)) {
       compiler_.ReadCache(ki.Stream(), binary);
       KALDI_LOG << "Read computation cache from " << config_.read_cache;
     } else {
       KALDI_WARN << "Could not open cached computation. "
                     "Probably this is the first training iteration.";
     }
   }
 }


 void NnetTrainer::Train(const NnetExample &eg) {
   bool need_model_derivative = true;
   ComputationRequest request;
   GetComputationRequest(*nnet_, eg, need_model_derivative,
                         config_.store_component_stats,
                         &request);
   std::shared_ptr<const NnetComputation> computation = compiler_.Compile(request);

   if (config_.backstitch_training_scale > 0.0 &&
       num_minibatches_processed_ % config_.backstitch_training_interval ==
       srand_seed_ % config_.backstitch_training_interval) {
     // backstitch training is incompatible with momentum > 0
     KALDI_ASSERT(config_.momentum == 0.0);
     FreezeNaturalGradient(true, delta_nnet_);
     bool is_backstitch_step1 = true;
     srand(srand_seed_ + num_minibatches_processed_);
     ResetGenerators(nnet_);
     TrainInternalBackstitch(eg, *computation, is_backstitch_step1);
     FreezeNaturalGradient(false, delta_nnet_); // un-freeze natural gradient
     is_backstitch_step1 = false;
     srand(srand_seed_ + num_minibatches_processed_);
     ResetGenerators(nnet_);
     TrainInternalBackstitch(eg, *computation, is_backstitch_step1);
   } else { // conventional training
     TrainInternal(eg, *computation);
   }
   if (num_minibatches_processed_ == 0) {
     ConsolidateMemory(nnet_);
     ConsolidateMemory(delta_nnet_);
   }
   num_minibatches_processed_++;

 }

 void NnetTrainer::TrainInternal(const NnetExample &eg,
                                 const NnetComputation &computation) {
   // note: because we give the 1st arg (nnet_) as a pointer to the
   // constructor of 'computer', it will use that copy of the nnet to
   // store stats.
   NnetComputer computer(config_.compute_config, computation,
                         nnet_, delta_nnet_);
   // give the inputs to the computer object.
   computer.AcceptInputs(*nnet_, eg.io);
   computer.Run();

   this->ProcessOutputs(false, eg, &computer);
   computer.Run();

   // If relevant, add in the part of the gradient that comes from L2
   // regularization.
   ApplyL2Regularization(*nnet_,
                         GetNumNvalues(eg.io, false) * config_.l2_regularize_factor,
                         delta_nnet_);

   // Update the parameters of nnet
   bool success = UpdateNnetWithMaxChange(
       *delta_nnet_, config_.max_param_change,
       1.0, 1.0 - config_.momentum, nnet_, &max_change_stats_);

   // Scale down the batchnorm stats (keeps them fresh... this affects what
   // happens when we use the model with batchnorm test-mode set).
   ScaleBatchnormStats(config_.batchnorm_stats_scale, nnet_);

   // The following will only do something if we have a LinearComponent
   // or AffineComponent with orthonormal-constraint set to a nonzero value.
   ConstrainOrthonormal(nnet_);

   // Scale deta_nnet
   if (success)
     ScaleNnet(config_.momentum, delta_nnet_);
   else
     ScaleNnet(0.0, delta_nnet_);
 }

 void NnetTrainer::TrainInternalBackstitch(const NnetExample &eg,
                                           const NnetComputation &computation,
                                           bool is_backstitch_step1) {
   // note: because we give the 1st arg (nnet_) as a pointer to the
   // constructor of 'computer', it will use that copy of the nnet to
   // store stats.
   NnetComputer computer(config_.compute_config, computation,
                         nnet_, delta_nnet_);
   // give the inputs to the computer object.
   computer.AcceptInputs(*nnet_, eg.io);
   computer.Run();

   bool is_backstitch_step2 = !is_backstitch_step1;
   this->ProcessOutputs(is_backstitch_step2, eg, &computer);
   computer.Run();

   BaseFloat max_change_scale, scale_adding;
   if (is_backstitch_step1) {
     // max-change is scaled by backstitch_training_scale;
     // delta_nnet is scaled by -backstitch_training_scale when added to nnet;
     max_change_scale = config_.backstitch_training_scale;
     scale_adding = -config_.backstitch_training_scale;
   } else {
     // max-change is scaled by 1 +  backstitch_training_scale;
     // delta_nnet is scaled by 1 + backstitch_training_scale when added to nnet;
     max_change_scale = 1.0 + config_.backstitch_training_scale;
     scale_adding = 1.0 + config_.backstitch_training_scale;
     // If relevant, add in the part of the gradient that comes from L2
     // regularization.  It may not be optimally inefficient to do it on both
     // passes of the backstitch, like we do here, but it probably minimizes
     // any harmful interactions with the max-change.
     ApplyL2Regularization(*nnet_,
                           1.0 / scale_adding * GetNumNvalues(eg.io, false) *
                           config_.l2_regularize_factor, delta_nnet_);
   }

   // Updates the parameters of nnet
   UpdateNnetWithMaxChange(
       *delta_nnet_, config_.max_param_change,
       max_change_scale, scale_adding, nnet_,
       &max_change_stats_);

   if (is_backstitch_step1) {
     // The following will only do something if we have a LinearComponent or
     // AffineComponent with orthonormal-constraint set to a nonzero value. We
     // choose to do this only on the 1st backstitch step, for efficiency.
     ConstrainOrthonormal(nnet_);
   }

   if (!is_backstitch_step1) {
     // Scale down the batchnorm stats (keeps them fresh... this affects what
     // happens when we use the model with batchnorm test-mode set).  Do this
     // after backstitch step 2 so that the stats are scaled down before we start
     // the next minibatch.
     ScaleBatchnormStats(config_.batchnorm_stats_scale, nnet_);
   }

   ScaleNnet(0.0, delta_nnet_);
 }

 void NnetTrainer::ProcessOutputs(bool is_backstitch_step2,
                                  const NnetExample &eg,
                                  NnetComputer *computer) {
   // normally the eg will have just one output named 'output', but
   // we don't assume this.
   // In backstitch training, the output-name with the "_backstitch" suffix is
   // the one computed after the first, backward step of backstitch.
   const std::string suffix = (is_backstitch_step2 ? "_backstitch" : "");
   std::vector<NnetIo>::const_iterator iter = eg.io.begin(),
       end = eg.io.end();
   for (; iter != end; ++iter) {
     const NnetIo &io = *iter;
     int32 node_index = nnet_->GetNodeIndex(io.name);
     KALDI_ASSERT(node_index >= 0);
     if (nnet_->IsOutputNode(node_index)) {
       ObjectiveType obj_type = nnet_->GetNode(node_index).u.objective_type;
       BaseFloat tot_weight, tot_objf;
       bool supply_deriv = true;
       ComputeObjectiveFunction(io.features, obj_type, io.name,
                                supply_deriv, computer,
                                &tot_weight, &tot_objf);
       objf_info_[io.name + suffix].UpdateStats(io.name + suffix,
                                       config_.print_interval,
                                       num_minibatches_processed_,
                                       tot_weight, tot_objf);
     }
   }
 }

 bool NnetTrainer::PrintTotalStats() const {
   unordered_map<std::string, ObjectiveFunctionInfo, StringHasher>::const_iterator
       iter = objf_info_.begin(),
       end = objf_info_.end();
   std::vector<std::pair<std::string, const ObjectiveFunctionInfo*> > all_pairs;
   for (; iter != end; ++iter)
     all_pairs.push_back(std::pair<std::string, const ObjectiveFunctionInfo*>(
         iter->first, &(iter->second)));
   // ensure deterministic order of these names (this will matter in situations
   // where a script greps for the objective from the log).
   std::sort(all_pairs.begin(), all_pairs.end());
   bool ans = false;
   for (size_t i = 0; i < all_pairs.size(); i++) {
     const std::string &name = all_pairs[i].first;
     const ObjectiveFunctionInfo &info = *(all_pairs[i].second);
     bool ok = info.PrintTotalStats(name);
     ans = ans || ok;
   }
   max_change_stats_.Print(*nnet_);
   return ans;
 }

 void ObjectiveFunctionInfo::UpdateStats(
     const std::string &output_name,
     int32 minibatches_per_phase,
     int32 minibatch_counter,
     BaseFloat this_minibatch_weight,
     BaseFloat this_minibatch_tot_objf,
     BaseFloat this_minibatch_tot_aux_objf) {
   int32 phase = minibatch_counter / minibatches_per_phase;
   if (phase != current_phase) {
     KALDI_ASSERT(phase > current_phase);
     PrintStatsForThisPhase(output_name, minibatches_per_phase,
                            phase);
     current_phase = phase;
     tot_weight_this_phase = 0.0;
     tot_objf_this_phase = 0.0;
     tot_aux_objf_this_phase = 0.0;
     minibatches_this_phase = 0;
   }
   minibatches_this_phase++;
   tot_weight_this_phase += this_minibatch_weight;
   tot_objf_this_phase += this_minibatch_tot_objf;
   tot_aux_objf_this_phase += this_minibatch_tot_aux_objf;
   tot_weight += this_minibatch_weight;
   tot_objf += this_minibatch_tot_objf;
   tot_aux_objf += this_minibatch_tot_aux_objf;
 }

 void ObjectiveFunctionInfo::PrintStatsForThisPhase(
     const std::string &output_name,
     int32 minibatches_per_phase,
     int32 phase) const {
   int32 start_minibatch = current_phase * minibatches_per_phase,
       end_minibatch = phase * minibatches_per_phase - 1;

   if (tot_aux_objf_this_phase == 0.0) {
     if (minibatches_per_phase == minibatches_this_phase) {
       KALDI_LOG << "Average objective function for '" << output_name
                 << "' for minibatches " << start_minibatch
                 << '-' << end_minibatch << " is "
                 << (tot_objf_this_phase / tot_weight_this_phase) << " over "
                 << tot_weight_this_phase << " frames.";
     } else {
       KALDI_LOG << "Average objective function for '" << output_name
                 << " using " << minibatches_this_phase
                 << " minibatches in minibatch range " << start_minibatch
                 << '-' << end_minibatch << " is "
                 << (tot_objf_this_phase / tot_weight_this_phase) << " over "
                 << tot_weight_this_phase << " frames.";
     }
   } else {
     BaseFloat objf = (tot_objf_this_phase / tot_weight_this_phase),
         aux_objf = (tot_aux_objf_this_phase / tot_weight_this_phase),
         sum_objf = objf + aux_objf;
     if (minibatches_per_phase == minibatches_this_phase) {
       KALDI_LOG << "Average objective function for '" << output_name
                 << "' for minibatches " << start_minibatch
                 << '-' << end_minibatch << " is "
                 << objf << " + " << aux_objf << " = " << sum_objf
                 << " over " << tot_weight_this_phase << " frames.";
     } else {
       KALDI_LOG << "Average objective function for '" << output_name
                 << "' using " << minibatches_this_phase
                 << " minibatches in  minibatch range " << start_minibatch
                 << '-' << end_minibatch << " is "
                 << objf << " + " << aux_objf << " = " << sum_objf
                 << " over " << tot_weight_this_phase << " frames.";
     }
   }
 }

 bool ObjectiveFunctionInfo::PrintTotalStats(const std::string &name) const {
   BaseFloat objf = (tot_objf / tot_weight),
         aux_objf = (tot_aux_objf / tot_weight),
         sum_objf = objf + aux_objf;
   if (tot_aux_objf == 0.0) {
     KALDI_LOG << "Overall average objective function for '" << name << "' is "
               << (tot_objf / tot_weight) << " over " << tot_weight << " frames.";
   } else {
     KALDI_LOG << "Overall average objective function for '" << name << "' is "
               << objf << " + " << aux_objf << " = " << sum_objf
               << " over " << tot_weight << " frames.";
   }
   KALDI_LOG << "[this line is to be parsed by a script:] "
             << "log-prob-per-frame="
             << objf;
   return (tot_weight != 0.0);
 }

 NnetTrainer::~NnetTrainer() {
   if (config_.write_cache != "") {
     Output ko(config_.write_cache, config_.binary_write_cache);
     compiler_.WriteCache(ko.Stream(), config_.binary_write_cache);
     KALDI_LOG << "Wrote computation cache to " << config_.write_cache;
   }
   delete delta_nnet_;
 }

 void ComputeObjectiveFunction(const GeneralMatrix &supervision,
                               ObjectiveType objective_type,
                               const std::string &output_name,
                               bool supply_deriv,
                               NnetComputer *computer,
                               BaseFloat *tot_weight,
                               BaseFloat *tot_objf) {
   const CuMatrixBase<BaseFloat> &output = computer->GetOutput(output_name);

   if (output.NumCols() != supervision.NumCols())
     KALDI_ERR << "Nnet versus example output dimension (num-classes) "
               << "mismatch for '" << output_name << "': " << output.NumCols()
               << " (nnet) vs. " << supervision.NumCols() << " (egs)\n";

   switch (objective_type) {
     case kLinear: {
       // objective is x * y.
       switch (supervision.Type()) {
         case kSparseMatrix: {
           const SparseMatrix<BaseFloat> &post = supervision.GetSparseMatrix();
           CuSparseMatrix<BaseFloat> cu_post(post);
           // The cross-entropy objective is computed by a simple dot product,
           // because after the LogSoftmaxLayer, the output is already in the form
           // of log-likelihoods that are normalized to sum to one.
           *tot_weight = cu_post.Sum();
           *tot_objf = TraceMatSmat(output, cu_post, kTrans);
           if (supply_deriv) {
             CuMatrix<BaseFloat> output_deriv(output.NumRows(), output.NumCols(),
                                              kUndefined);
             cu_post.CopyToMat(&output_deriv);
             computer->AcceptInput(output_name, &output_deriv);
           }
           break;
         }
         case kFullMatrix: {
           // there is a redundant matrix copy in here if we're not using a GPU
           // but we don't anticipate this code branch being used in many cases.
           CuMatrix<BaseFloat> cu_post(supervision.GetFullMatrix());
           *tot_weight = cu_post.Sum();
           *tot_objf = TraceMatMat(output, cu_post, kTrans);
           if (supply_deriv)
             computer->AcceptInput(output_name, &cu_post);
           break;
         }
         case kCompressedMatrix: {
           Matrix<BaseFloat> post;
           supervision.GetMatrix(&post);
           CuMatrix<BaseFloat> cu_post;
           cu_post.Swap(&post);
           *tot_weight = cu_post.Sum();
           *tot_objf = TraceMatMat(output, cu_post, kTrans);
           if (supply_deriv)
             computer->AcceptInput(output_name, &cu_post);
           break;
         }
       }
       break;
     }
     case kQuadratic: {
       // objective is -0.5 (x - y)^2
       CuMatrix<BaseFloat> diff(supervision.NumRows(),
                                supervision.NumCols(),
                                kUndefined);
       diff.CopyFromGeneralMat(supervision);
       diff.AddMat(-1.0, output);
       *tot_weight = diff.NumRows();
       *tot_objf = -0.5 * TraceMatMat(diff, diff, kTrans);
       if (supply_deriv)
         computer->AcceptInput(output_name, &diff);
       break;
     }
     default:
       KALDI_ERR << "Objective function type " << objective_type
                 << " not handled.";
   }
 }


 } // namespace nnet3
 } // namespace kaldi
kaldi::nnet3::NnetExample
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:111

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet3::NnetIo
Definition: nnet-example.h:33

kaldi::nnet3::ScaleNnet
void ScaleNnet(BaseFloat scale, Nnet *nnet)
Scales the nnet parameters and stats by this scale.
Definition: nnet-utils.cc:312

kaldi::GeneralMatrix
This class is a wrapper that enables you to store a matrix in one of three forms: either as a Matrix<...
Definition: sparse-matrix.h:282

kaldi::kFullMatrix
Definition: sparse-matrix.h:272

kaldi::kUndefined
Definition: matrix-common.h:39

kaldi::nnet3::NnetTrainer::nnet_
Nnet * nnet_
Definition: nnet-training.h:208

kaldi::CuSparseMatrix::CopyToMat
void CopyToMat(CuMatrixBase< OtherReal > *dest, MatrixTransposeType trans=kNoTrans) const
Definition: cu-sparse-matrix.cc:622

kaldi::Input
Definition: kaldi-io.h:190

kaldi::nnet3::NnetTrainer::delta_nnet_
Nnet * delta_nnet_
Definition: nnet-training.h:209

kaldi::GeneralMatrix::GetMatrix
void GetMatrix(Matrix< BaseFloat > *mat) const
Outputs the contents as a matrix.
Definition: sparse-matrix.cc:817

kaldi::nnet3::ComputeObjectiveFunction
void ComputeObjectiveFunction(const GeneralMatrix &supervision, ObjectiveType objective_type, const std::string &output_name, bool supply_deriv, NnetComputer *computer, BaseFloat *tot_weight, BaseFloat *tot_objf)
This function computes the objective function, and if supply_deriv = true, supplies its derivative to...
Definition: nnet-training.cc:339

kaldi::nnet3::ObjectiveFunctionInfo::UpdateStats
void UpdateStats(const std::string &output_name, int32 minibatches_per_phase, int32 minibatch_counter, BaseFloat this_minibatch_weight, BaseFloat this_minibatch_tot_objf, BaseFloat this_minibatch_tot_aux_objf=0.0)
Definition: nnet-training.cc:242

kaldi::CuSparseMatrix::Sum
Real Sum() const
Definition: cu-sparse-matrix.cc:82

kaldi::nnet3::NnetTrainer::TrainInternalBackstitch
void TrainInternalBackstitch(const NnetExample &eg, const NnetComputation &computation, bool is_backstitch_step1)
Definition: nnet-training.cc:131

kaldi::Input::Open
bool Open(const std::string &rxfilename, bool *contents_binary=NULL)
Definition: kaldi-io-inl.h:26

kaldi::CuMatrixBase::Sum
Real Sum() const
Definition: cu-matrix.cc:3012

kaldi::nnet3::ScaleBatchnormStats
void ScaleBatchnormStats(BaseFloat batchnorm_stats_scale, Nnet *nnet)
This function scales the batchorm stats of any batchnorm components (components of type BatchNormComp...
Definition: nnet-utils.cc:536

kaldi::nnet3::kLinear
Definition: nnet-nnet.h:52

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet3::NnetIo::features
GeneralMatrix features
The features or labels.
Definition: nnet-example.h:46

kaldi::nnet3::NnetTrainer::Train
void Train(const NnetExample &eg)
Definition: nnet-training.cc:57

kaldi::Matrix< BaseFloat >

kaldi::GeneralMatrix::GetFullMatrix
const Matrix< BaseFloat > & GetFullMatrix() const
Returns the contents as a Matrix<BaseFloat>.
Definition: sparse-matrix.cc:878

kaldi::nnet3::NnetTrainerOptions::l2_regularize_factor
BaseFloat l2_regularize_factor
Definition: nnet-training.h:40

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

nnet-training.h

kaldi::nnet3::NetworkNode::objective_type
ObjectiveType objective_type
Definition: nnet-nnet.h:97

kaldi::nnet3::NnetTrainerOptions::momentum
BaseFloat momentum
Definition: nnet-training.h:39

kaldi::kSparseMatrix
Definition: sparse-matrix.h:274

kaldi::nnet3::NnetTrainerOptions::backstitch_training_interval
int32 backstitch_training_interval
Definition: nnet-training.h:42

nnet-utils.h
This file contains some miscellaneous functions dealing with class Nnet.

kaldi::nnet3::NnetTrainerOptions::print_interval
int32 print_interval
Definition: nnet-training.h:37

kaldi::nnet3::ConstrainOrthonormal
void ConstrainOrthonormal(Nnet *nnet)
This function, to be called after processing every minibatch, is responsible for enforcing the orthog...
Definition: nnet-utils.cc:1108

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::nnet3::FreezeNaturalGradient
void FreezeNaturalGradient(bool freeze, Nnet *nnet)
Controls if natural gradient will be updated.
Definition: nnet-utils.cc:432

kaldi::nnet3::GetNumNvalues
int32 GetNumNvalues(const std::vector< NnetIo > &io_vec, bool exhaustive)
This utility function can be used to obtain the number of distinct &#39;n&#39; values in a training example...
Definition: nnet-utils.cc:2198

kaldi::nnet3::NnetComputer::AcceptInput
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
Definition: nnet-compute.cc:547

kaldi::nnet3::ResetGenerators
void ResetGenerators(Nnet *nnet)
This function calls &#39;ResetGenerator()&#39; on all components in &#39;nnet&#39; that inherit from class RandomComp...
Definition: nnet-utils.cc:582

kaldi::GeneralMatrix::NumCols
MatrixIndexT NumCols() const
Definition: sparse-matrix.cc:791

kaldi::nnet3::Nnet::GetNode
const NetworkNode & GetNode(int32 node) const
returns const reference to a particular numbered network node.
Definition: nnet-nnet.h:146

kaldi::Input::Stream
std::istream & Stream()
Definition: kaldi-io.cc:826

kaldi::nnet3::ComputationRequest
Definition: nnet-computation.h:114

kaldi::kCompressedMatrix
Definition: sparse-matrix.h:273

kaldi::BaseFloat
float BaseFloat
Definition: kaldi-types.h:29

kaldi::nnet3::NnetTrainer::max_change_stats_
MaxChangeStats max_change_stats_
Definition: nnet-training.h:220

kaldi::Output::Stream
std::ostream & Stream()
Definition: kaldi-io.cc:701

kaldi::nnet3::kQuadratic
Definition: nnet-nnet.h:52

kaldi::nnet3::NnetComputer::GetOutput
const CuMatrixBase< BaseFloat > & GetOutput(const std::string &node_name)
Definition: nnet-compute.cc:578

kaldi::nnet3::Nnet::IsOutputNode
bool IsOutputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kDescriptor and is not directly fo...
Definition: nnet-nnet.cc:112

kaldi::nnet3::ApplyL2Regularization
void ApplyL2Regularization(const Nnet &nnet, BaseFloat l2_regularize_scale, Nnet *delta_nnet)
This function is used as part of the regular training workflow, prior to UpdateNnetWithMaxChange().
Definition: nnet-utils.cc:2244

float

kaldi::GeneralMatrix::Type
GeneralMatrixType Type() const
Returns the type of the matrix: kSparseMatrix, kCompressedMatrix or kFullMatrix.
Definition: sparse-matrix.cc:772

kaldi::nnet3::NnetTrainerOptions::store_component_stats
bool store_component_stats
Definition: nnet-training.h:36

kaldi::nnet3::NnetTrainer::objf_info_
unordered_map< std::string, ObjectiveFunctionInfo, StringHasher > objf_info_
Definition: nnet-training.h:222

kaldi::CuMatrix::Swap
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123

kaldi::nnet3::NnetComputer::AcceptInputs
void AcceptInputs(const Nnet &nnet, const std::vector< NnetIo > &io)
This convenience function calls AcceptInput() in turn on all the inputs in the training example...
Definition: nnet-compute.cc:663

kaldi::nnet3::Nnet::Copy
Nnet * Copy() const
Definition: nnet-nnet.h:246

kaldi::nnet3::Nnet
Definition: nnet-nnet.h:115

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::nnet3::NnetTrainer::compiler_
CachingOptimizingCompiler compiler_
Definition: nnet-training.h:212

kaldi::nnet3::NnetTrainer::NnetTrainer
NnetTrainer(const NnetTrainerOptions &config, Nnet *nnet)
Definition: nnet-training.cc:27

kaldi::nnet3::ZeroComponentStats
void ZeroComponentStats(Nnet *nnet)
Zeroes the component stats in all nonlinear components in the nnet.
Definition: nnet-utils.cc:269

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::TraceMatMat
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
Definition: kaldi-matrix.cc:2692

kaldi::nnet3::NnetTrainer::PrintTotalStats
bool PrintTotalStats() const
Definition: nnet-training.cc:220

kaldi::nnet3::ObjectiveFunctionInfo::PrintTotalStats
bool PrintTotalStats(const std::string &output_name) const
Definition: nnet-training.cc:312

kaldi::nnet3::NnetTrainer::config_
const NnetTrainerOptions config_
Definition: nnet-training.h:207

kaldi::nnet3::NnetTrainerOptions::write_cache
std::string write_cache
Definition: nnet-training.h:45

kaldi::SparseMatrix
Definition: matrix-common.h:65

kaldi::CuMatrixBase::CopyFromGeneralMat
void CopyFromGeneralMat(const GeneralMatrix &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:3096

kaldi::nnet3::NnetTrainerOptions::max_param_change
BaseFloat max_param_change
Definition: nnet-training.h:47

kaldi::nnet3::NnetTrainer::srand_seed_
int32 srand_seed_
Definition: nnet-training.h:227

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::nnet3::CachingOptimizingCompiler::Compile
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
Definition: nnet-optimize.cc:716

kaldi::nnet3::ObjectiveType
ObjectiveType
This enum is for a kind of annotation we associate with output nodes of the network; it&#39;s for the con...
Definition: nnet-nnet.h:52

kaldi::nnet3::CachingOptimizingCompiler::ReadCache
void ReadCache(std::istream &is, bool binary)
Definition: nnet-optimize.cc:666

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::CuMatrixBase::NumCols
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216

kaldi::nnet3::ObjectiveFunctionInfo
Definition: nnet-training.h:123

kaldi::nnet3::CachingOptimizingCompiler::WriteCache
void WriteCache(std::ostream &os, bool binary)
Definition: nnet-optimize.cc:688

kaldi::nnet3::ConsolidateMemory
void ConsolidateMemory(Nnet *nnet)
This just calls ConsolidateMemory() on all the components of the nnet.
Definition: nnet-utils.cc:1147

kaldi::nnet3::NnetComputer
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59

kaldi::nnet3::NnetComputation
Definition: nnet-computation.h:303

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::CuSparseMatrix
Definition: matrix-common.h:78

kaldi::GeneralMatrix::NumRows
MatrixIndexT NumRows() const
Definition: sparse-matrix.cc:781

kaldi::nnet3::NnetTrainer::TrainInternal
void TrainInternal(const NnetExample &eg, const NnetComputation &computation)
Definition: nnet-training.cc:91

kaldi::nnet3::NnetTrainerOptions::read_cache
std::string read_cache
Definition: nnet-training.h:44

kaldi::nnet3::NnetTrainerOptions
Definition: nnet-training.h:34

kaldi::Output
Definition: kaldi-io.h:124

kaldi::nnet3::MaxChangeStats::Print
void Print(const Nnet &nnet) const
Definition: nnet-utils.cc:2284

kaldi::nnet3::NnetIo::name
std::string name
the name of the input in the neural net; in simple setups it will just be "input".
Definition: nnet-example.h:36

kaldi::TraceMatSmat
Real TraceMatSmat(const MatrixBase< Real > &A, const SparseMatrix< Real > &B, MatrixTransposeType trans)
Definition: sparse-matrix.cc:704

kaldi::nnet3::NnetTrainerOptions::binary_write_cache
bool binary_write_cache
Definition: nnet-training.h:46

kaldi::nnet3::NetworkNode::u
union kaldi::nnet3::NetworkNode::@15 u

kaldi::nnet3::Nnet::GetNodeIndex
int32 GetNodeIndex(const std::string &node_name) const
returns index associated with this node name, or -1 if no such index.
Definition: nnet-nnet.cc:466

kaldi::GeneralMatrix::GetSparseMatrix
const SparseMatrix< BaseFloat > & GetSparseMatrix() const
Returns the contents as a SparseMatrix.
Definition: sparse-matrix.cc:854

kaldi::CuMatrixBase::NumRows
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

kaldi::nnet3::ObjectiveFunctionInfo::PrintStatsForThisPhase
void PrintStatsForThisPhase(const std::string &output_name, int32 minibatches_per_phase, int32 phase) const
Definition: nnet-training.cc:269

kaldi::nnet3::NnetTrainerOptions::compute_config
NnetComputeOptions compute_config
Definition: nnet-training.h:49

kaldi::nnet3::NnetExample::io
std::vector< NnetIo > io
"io" contains the input and output.
Definition: nnet-example.h:116

kaldi::nnet3::NnetTrainer::num_minibatches_processed_
int32 num_minibatches_processed_
Definition: nnet-training.h:217

kaldi::nnet3::NnetTrainerOptions::backstitch_training_scale
BaseFloat backstitch_training_scale
Definition: nnet-training.h:41

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::nnet3::NnetTrainerOptions::batchnorm_stats_scale
BaseFloat batchnorm_stats_scale
Definition: nnet-training.h:43

kaldi::nnet3::NnetTrainerOptions::zero_component_stats
bool zero_component_stats
Definition: nnet-training.h:35

kaldi::nnet3::UpdateNnetWithMaxChange
bool UpdateNnetWithMaxChange(const Nnet &delta_nnet, BaseFloat max_param_change, BaseFloat max_change_scale, BaseFloat scale, Nnet *nnet, std::vector< int32 > *num_max_change_per_component_applied, int32 *num_max_change_global_applied)
This function does the operation &#39;*nnet += scale * delta_nnet&#39;, while respecting any max-parameter-ch...
Definition: nnet-utils.cc:2106

kaldi::nnet3::NnetTrainer::~NnetTrainer
~NnetTrainer()
Definition: nnet-training.cc:330

kaldi::RandInt
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95

kaldi::nnet3::GetComputationRequest
void GetComputationRequest(const Nnet &nnet, const NnetExample &eg, bool need_model_derivative, bool store_component_stats, ComputationRequest *request)
This function takes a NnetExample (which should already have been frame-selected, if desired...
Definition: nnet-example-utils.cc:202

kaldi::nnet3::NnetComputer::Run
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...
Definition: nnet-compute.cc:512

kaldi::nnet3::NnetTrainer::ProcessOutputs
void ProcessOutputs(bool is_backstitch_step2, const NnetExample &eg, NnetComputer *computer)
Definition: nnet-training.cc:191