This class is for single-threaded training of neural nets using standard objective functions such as cross-entropy (implemented with logsoftmax nonlinearity and a linear objective function) and quadratic loss. More...

#include <nnet-training.h>

Collaboration diagram for NnetTrainer:

[legend]

Public Member Functions
	NnetTrainer (const NnetTrainerOptions &config, Nnet *nnet)

void	Train (const NnetExample &eg)

bool	PrintTotalStats () const

	~NnetTrainer ()

Private Member Functions
void	TrainInternal (const NnetExample &eg, const NnetComputation &computation)

void	TrainInternalBackstitch (const NnetExample &eg, const NnetComputation &computation, bool is_backstitch_step1)

void	ProcessOutputs (bool is_backstitch_step2, const NnetExample &eg, NnetComputer *computer)

Private Attributes
const NnetTrainerOptions	config_

Nnet *	nnet_

Nnet *	delta_nnet_

CachingOptimizingCompiler	compiler_

int32	num_minibatches_processed_

MaxChangeStats	max_change_stats_

unordered_map< std::string, ObjectiveFunctionInfo, StringHasher >	objf_info_

int32	srand_seed_

Detailed Description

This class is for single-threaded training of neural nets using standard objective functions such as cross-entropy (implemented with logsoftmax nonlinearity and a linear objective function) and quadratic loss.

Something that we should do in the future is to make it possible to have two different threads, one for the compilation, and one for the computation. This would only improve efficiency in the cases where the structure of the input example was different each time, which isn't what we expect to see in speech-recognition training. (If the structure is the same each time, the CachingOptimizingCompiler notices this and uses the computation from last time).

Definition at line 180 of file nnet-training.h.

Constructor & Destructor Documentation

◆ NnetTrainer()

NnetTrainer	(	const NnetTrainerOptions &	config,
		Nnet *	nnet
	)

Definition at line 27 of file nnet-training.cc.

References NnetTrainerOptions::backstitch_training_interval, NnetTrainer::compiler_, NnetTrainer::config_, Nnet::Copy(), NnetTrainer::delta_nnet_, KALDI_ASSERT, KALDI_LOG, KALDI_WARN, NnetTrainerOptions::max_param_change, NnetTrainerOptions::momentum, NnetTrainer::nnet_, Input::Open(), NnetTrainerOptions::read_cache, CachingOptimizingCompiler::ReadCache(), kaldi::nnet3::ScaleNnet(), Input::Stream(), NnetTrainerOptions::zero_component_stats, and kaldi::nnet3::ZeroComponentStats().

                                     :
     config_(config),
     nnet_(nnet),
     compiler_(*nnet, config_.optimize_config, config_.compiler_config),
     num_minibatches_processed_(0),
     max_change_stats_(*nnet),
     srand_seed_(RandInt(0, 100000)) {
   if (config.zero_component_stats)
     ZeroComponentStats(nnet);
   KALDI_ASSERT(config.momentum >= 0.0 &&
                config.max_param_change >= 0.0 &&
                config.backstitch_training_interval > 0);
   delta_nnet_ = nnet_->Copy();
   ScaleNnet(0.0, delta_nnet_);
 
   if (config_.read_cache != "") {
     bool binary;
     Input ki;
     if (ki.Open(config_.read_cache, &binary)) {
       compiler_.ReadCache(ki.Stream(), binary);
       KALDI_LOG << "Read computation cache from " << config_.read_cache;
     } else {
       KALDI_WARN << "Could not open cached computation. "
                     "Probably this is the first training iteration.";
     }
   }
 }

◆ ~NnetTrainer()

~NnetTrainer ( )

Definition at line 330 of file nnet-training.cc.

References NnetTrainerOptions::binary_write_cache, NnetTrainer::compiler_, NnetTrainer::config_, NnetTrainer::delta_nnet_, KALDI_LOG, Output::Stream(), NnetTrainerOptions::write_cache, and CachingOptimizingCompiler::WriteCache().

                           {
   if (config_.write_cache != "") {
     Output ko(config_.write_cache, config_.binary_write_cache);
     compiler_.WriteCache(ko.Stream(), config_.binary_write_cache);
     KALDI_LOG << "Wrote computation cache to " << config_.write_cache;
   }
   delete delta_nnet_;
 }

Member Function Documentation

◆ PrintTotalStats()

bool PrintTotalStats ( ) const

Definition at line 220 of file nnet-training.cc.

References rnnlm::i, NnetTrainer::max_change_stats_, NnetTrainer::nnet_, NnetTrainer::objf_info_, MaxChangeStats::Print(), and ObjectiveFunctionInfo::PrintTotalStats().

Referenced by main().

                                         {
   unordered_map<std::string, ObjectiveFunctionInfo, StringHasher>::const_iterator
       iter = objf_info_.begin(),
       end = objf_info_.end();
   std::vector<std::pair<std::string, const ObjectiveFunctionInfo*> > all_pairs;
   for (; iter != end; ++iter)
     all_pairs.push_back(std::pair<std::string, const ObjectiveFunctionInfo*>(
         iter->first, &(iter->second)));
   // ensure deterministic order of these names (this will matter in situations
   // where a script greps for the objective from the log).
   std::sort(all_pairs.begin(), all_pairs.end());
   bool ans = false;
   for (size_t i = 0; i < all_pairs.size(); i++) {
     const std::string &name = all_pairs[i].first;
     const ObjectiveFunctionInfo &info = *(all_pairs[i].second);
     bool ok = info.PrintTotalStats(name);
     ans = ans || ok;
   }
   max_change_stats_.Print(*nnet_);
   return ans;
 }

◆ ProcessOutputs()

void ProcessOutputs	(	bool	is_backstitch_step2,
		const NnetExample &	eg,
		NnetComputer *	computer
	)

private

Definition at line 191 of file nnet-training.cc.

References kaldi::nnet3::ComputeObjectiveFunction(), NnetTrainer::config_, NnetIo::features, Nnet::GetNode(), Nnet::GetNodeIndex(), NnetExample::io, Nnet::IsOutputNode(), KALDI_ASSERT, NnetIo::name, NnetTrainer::nnet_, NnetTrainer::num_minibatches_processed_, NetworkNode::objective_type, NnetTrainer::objf_info_, NnetTrainerOptions::print_interval, and NetworkNode::u.

Referenced by NnetTrainer::TrainInternal(), and NnetTrainer::TrainInternalBackstitch().

                                                          {
   // normally the eg will have just one output named 'output', but
   // we don't assume this.
   // In backstitch training, the output-name with the "_backstitch" suffix is
   // the one computed after the first, backward step of backstitch.
   const std::string suffix = (is_backstitch_step2 ? "_backstitch" : "");
   std::vector<NnetIo>::const_iterator iter = eg.io.begin(),
       end = eg.io.end();
   for (; iter != end; ++iter) {
     const NnetIo &io = *iter;
     int32 node_index = nnet_->GetNodeIndex(io.name);
     KALDI_ASSERT(node_index >= 0);
     if (nnet_->IsOutputNode(node_index)) {
       ObjectiveType obj_type = nnet_->GetNode(node_index).u.objective_type;
       BaseFloat tot_weight, tot_objf;
       bool supply_deriv = true;
       ComputeObjectiveFunction(io.features, obj_type, io.name,
                                supply_deriv, computer,
                                &tot_weight, &tot_objf);
       objf_info_[io.name + suffix].UpdateStats(io.name + suffix,
                                       config_.print_interval,
                                       num_minibatches_processed_,
                                       tot_weight, tot_objf);
     }
   }
 }

◆ Train()

void Train ( const NnetExample & eg )

Definition at line 57 of file nnet-training.cc.

References NnetTrainerOptions::backstitch_training_interval, NnetTrainerOptions::backstitch_training_scale, CachingOptimizingCompiler::Compile(), NnetTrainer::compiler_, NnetTrainer::config_, kaldi::nnet3::ConsolidateMemory(), NnetTrainer::delta_nnet_, kaldi::nnet3::FreezeNaturalGradient(), kaldi::nnet3::GetComputationRequest(), KALDI_ASSERT, NnetTrainerOptions::momentum, NnetTrainer::nnet_, NnetTrainer::num_minibatches_processed_, kaldi::nnet3::ResetGenerators(), NnetTrainer::srand_seed_, NnetTrainerOptions::store_component_stats, NnetTrainer::TrainInternal(), and NnetTrainer::TrainInternalBackstitch().

Referenced by main().

                                              {
   bool need_model_derivative = true;
   ComputationRequest request;
   GetComputationRequest(*nnet_, eg, need_model_derivative,
                         config_.store_component_stats,
                         &request);
   std::shared_ptr<const NnetComputation> computation = compiler_.Compile(request);
 
   if (config_.backstitch_training_scale > 0.0 &&
       num_minibatches_processed_ % config_.backstitch_training_interval ==
       srand_seed_ % config_.backstitch_training_interval) {
     // backstitch training is incompatible with momentum > 0
     KALDI_ASSERT(config_.momentum == 0.0);
     FreezeNaturalGradient(true, delta_nnet_);
     bool is_backstitch_step1 = true;
     srand(srand_seed_ + num_minibatches_processed_);
     ResetGenerators(nnet_);
     TrainInternalBackstitch(eg, *computation, is_backstitch_step1);
     FreezeNaturalGradient(false, delta_nnet_); // un-freeze natural gradient
     is_backstitch_step1 = false;
     srand(srand_seed_ + num_minibatches_processed_);
     ResetGenerators(nnet_);
     TrainInternalBackstitch(eg, *computation, is_backstitch_step1);
   } else { // conventional training
     TrainInternal(eg, *computation);
   }
   if (num_minibatches_processed_ == 0) {
     ConsolidateMemory(nnet_);
     ConsolidateMemory(delta_nnet_);
   }
   num_minibatches_processed_++;
 
 }

◆ TrainInternal()

void TrainInternal	(	const NnetExample &	eg,
		const NnetComputation &	computation
	)

private

Definition at line 91 of file nnet-training.cc.

References NnetComputer::AcceptInputs(), kaldi::nnet3::ApplyL2Regularization(), NnetTrainerOptions::batchnorm_stats_scale, NnetTrainerOptions::compute_config, NnetTrainer::config_, kaldi::nnet3::ConstrainOrthonormal(), NnetTrainer::delta_nnet_, kaldi::nnet3::GetNumNvalues(), NnetExample::io, NnetTrainerOptions::l2_regularize_factor, NnetTrainer::max_change_stats_, NnetTrainerOptions::max_param_change, NnetTrainerOptions::momentum, NnetTrainer::nnet_, NnetTrainer::ProcessOutputs(), NnetComputer::Run(), kaldi::nnet3::ScaleBatchnormStats(), kaldi::nnet3::ScaleNnet(), and kaldi::nnet3::UpdateNnetWithMaxChange().

Referenced by NnetTrainer::Train().

                                                                     {
   // note: because we give the 1st arg (nnet_) as a pointer to the
   // constructor of 'computer', it will use that copy of the nnet to
   // store stats.
   NnetComputer computer(config_.compute_config, computation,
                         nnet_, delta_nnet_);
   // give the inputs to the computer object.
   computer.AcceptInputs(*nnet_, eg.io);
   computer.Run();
 
   this->ProcessOutputs(false, eg, &computer);
   computer.Run();
 
   // If relevant, add in the part of the gradient that comes from L2
   // regularization.
   ApplyL2Regularization(*nnet_,
                         GetNumNvalues(eg.io, false) * config_.l2_regularize_factor,
                         delta_nnet_);
 
   // Update the parameters of nnet
   bool success = UpdateNnetWithMaxChange(
       *delta_nnet_, config_.max_param_change,
       1.0, 1.0 - config_.momentum, nnet_, &max_change_stats_);
 
   // Scale down the batchnorm stats (keeps them fresh... this affects what
   // happens when we use the model with batchnorm test-mode set).
   ScaleBatchnormStats(config_.batchnorm_stats_scale, nnet_);
 
   // The following will only do something if we have a LinearComponent
   // or AffineComponent with orthonormal-constraint set to a nonzero value.
   ConstrainOrthonormal(nnet_);
 
   // Scale deta_nnet
   if (success)
     ScaleNnet(config_.momentum, delta_nnet_);
   else
     ScaleNnet(0.0, delta_nnet_);
 }

◆ TrainInternalBackstitch()

void TrainInternalBackstitch	(	const NnetExample &	eg,
		const NnetComputation &	computation,
		bool	is_backstitch_step1
	)

private

Definition at line 131 of file nnet-training.cc.

References NnetComputer::AcceptInputs(), kaldi::nnet3::ApplyL2Regularization(), NnetTrainerOptions::backstitch_training_scale, NnetTrainerOptions::batchnorm_stats_scale, NnetTrainerOptions::compute_config, NnetTrainer::config_, kaldi::nnet3::ConstrainOrthonormal(), NnetTrainer::delta_nnet_, kaldi::nnet3::GetNumNvalues(), NnetExample::io, NnetTrainerOptions::l2_regularize_factor, NnetTrainer::max_change_stats_, NnetTrainerOptions::max_param_change, NnetTrainer::nnet_, NnetTrainer::ProcessOutputs(), NnetComputer::Run(), kaldi::nnet3::ScaleBatchnormStats(), kaldi::nnet3::ScaleNnet(), and kaldi::nnet3::UpdateNnetWithMaxChange().

Referenced by NnetTrainer::Train().

                                                                     {
   // note: because we give the 1st arg (nnet_) as a pointer to the
   // constructor of 'computer', it will use that copy of the nnet to
   // store stats.
   NnetComputer computer(config_.compute_config, computation,
                         nnet_, delta_nnet_);
   // give the inputs to the computer object.
   computer.AcceptInputs(*nnet_, eg.io);
   computer.Run();
 
   bool is_backstitch_step2 = !is_backstitch_step1;
   this->ProcessOutputs(is_backstitch_step2, eg, &computer);
   computer.Run();
 
   BaseFloat max_change_scale, scale_adding;
   if (is_backstitch_step1) {
     // max-change is scaled by backstitch_training_scale;
     // delta_nnet is scaled by -backstitch_training_scale when added to nnet;
     max_change_scale = config_.backstitch_training_scale;
     scale_adding = -config_.backstitch_training_scale;
   } else {
     // max-change is scaled by 1 +  backstitch_training_scale;
     // delta_nnet is scaled by 1 + backstitch_training_scale when added to nnet;
     max_change_scale = 1.0 + config_.backstitch_training_scale;
     scale_adding = 1.0 + config_.backstitch_training_scale;
     // If relevant, add in the part of the gradient that comes from L2
     // regularization.  It may not be optimally inefficient to do it on both
     // passes of the backstitch, like we do here, but it probably minimizes
     // any harmful interactions with the max-change.
     ApplyL2Regularization(*nnet_,
                           1.0 / scale_adding * GetNumNvalues(eg.io, false) *
                           config_.l2_regularize_factor, delta_nnet_);
   }
 
   // Updates the parameters of nnet
   UpdateNnetWithMaxChange(
       *delta_nnet_, config_.max_param_change,
       max_change_scale, scale_adding, nnet_,
       &max_change_stats_);
 
   if (is_backstitch_step1) {
     // The following will only do something if we have a LinearComponent or
     // AffineComponent with orthonormal-constraint set to a nonzero value. We
     // choose to do this only on the 1st backstitch step, for efficiency.
     ConstrainOrthonormal(nnet_);
   }
 
   if (!is_backstitch_step1) {
     // Scale down the batchnorm stats (keeps them fresh... this affects what
     // happens when we use the model with batchnorm test-mode set).  Do this
     // after backstitch step 2 so that the stats are scaled down before we start
     // the next minibatch.
     ScaleBatchnormStats(config_.batchnorm_stats_scale, nnet_);
   }
 
   ScaleNnet(0.0, delta_nnet_);
 }