Collaboration diagram for FastNnetCombiner:

[legend]

Public Member Functions
	FastNnetCombiner (const NnetCombineFastConfig &combine_config, const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets_in, Nnet *nnet_out)

Private Member Functions
int32	GetInitialModel (const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets) const
	Returns an integer saying which model to use: either 0 ... More...

void	GetInitialParams ()

void	ComputePreconditioner ()

double	ComputeObjfAndGradient (Vector< double > gradient, double regularizer_objf)
	Computes objf at point "params_". More...

void	ComputeCurrentNnet (Nnet *dest, bool debug=false)

Static Private Member Functions
static void	CombineNnets (const Vector< double > &scale_params, const std::vector< Nnet > &nnets, Nnet *dest)

Private Attributes
TpMatrix< double >	C_

TpMatrix< double >	C_inv_

Vector< double >	params_

const NnetCombineFastConfig &	config_

const std::vector< NnetExample > &	egs_

const std::vector< Nnet > &	nnets_

Nnet *	nnet_out_

Detailed Description

Definition at line 102 of file combine-nnet-fast.cc.

Constructor & Destructor Documentation

◆ FastNnetCombiner()

FastNnetCombiner	(	const NnetCombineFastConfig &	combine_config,
		const std::vector< NnetExample > &	validation_set,
		const std::vector< Nnet > &	nnets_in,
		Nnet *	nnet_out
	)

inline

Definition at line 104 of file combine-nnet-fast.cc.

References kaldi::nnet2::CombineNnets(), kaldi::nnet2::ComputeObjfAndGradient(), OptimizeLbfgs< Real >::DoStep(), LbfgsOptions::first_step_impr, kaldi::nnet2::GetInitialModel(), OptimizeLbfgs< Real >::GetProposedValue(), OptimizeLbfgs< Real >::GetValue(), rnnlm::i, KALDI_ASSERT, KALDI_LOG, LbfgsOptions::m, and LbfgsOptions::minimize.

                                   :
       config_(combine_config), egs_(validation_set),
       nnets_(nnets_in), nnet_out_(nnet_out) {
 
     GetInitialParams();
     ComputePreconditioner();
 
     int32 dim = params_.Dim();
     KALDI_ASSERT(dim > 0);
     Vector<double> gradient(dim);
 
     double regularizer_objf, initial_regularizer_objf; // for diagnostics
     double objf, initial_objf;
 
     LbfgsOptions lbfgs_options;
     lbfgs_options.minimize = false; // We're maximizing.
     lbfgs_options.m = std::min(dim, config_.max_lbfgs_dim);
     lbfgs_options.first_step_impr = config_.initial_impr;
 
     OptimizeLbfgs<double> lbfgs(params_,
                                 lbfgs_options);
 
     for (int32 i = 0; i < config_.num_lbfgs_iters; i++) {
       params_.CopyFromVec(lbfgs.GetProposedValue());
       objf = ComputeObjfAndGradient(&gradient, &regularizer_objf);
       // Note: there is debug printout in ComputeObjfAndGradient
       // (at verbose-level 2).
       if (i == 0) {
         initial_objf = objf;
         initial_regularizer_objf = regularizer_objf;
       }
       lbfgs.DoStep(objf, gradient);
     }
     params_ = lbfgs.GetValue(&objf);
 
     ComputeCurrentNnet(nnet_out_, true); // create the output neural net, and
                                          // print out the scaling factors.
     if (config_.regularizer != 0.0) {
       double initial_part = initial_objf - initial_regularizer_objf,
           part = objf - regularizer_objf;
       KALDI_LOG << "Combining nnets, objf/frame + regularizer changed from "
                 << initial_part << " + " << initial_regularizer_objf
                 << " = " << initial_objf << " to " << part << " + "
                 << regularizer_objf << " = " << objf;
     } else {
       KALDI_LOG << "Combining nnets, objf per frame changed from "
                 << initial_objf << " to " << objf;
     }
   }

Member Function Documentation

◆ CombineNnets()

void CombineNnets	(	const Vector< double > &	scale_params,
		const std::vector< Nnet > &	nnets,
		Nnet *	dest
	)

staticprivate

Definition at line 201 of file combine-nnet-fast.cc.

References Nnet::AddNnet(), KALDI_ASSERT, rnnlm::n, kaldi::nnet3::NumUpdatableComponents(), and Nnet::ScaleComponents().

                                                 {
   int32 num_nnets = nnets.size();
   KALDI_ASSERT(num_nnets >= 1);
   int32 num_uc = nnets[0].NumUpdatableComponents();
   KALDI_ASSERT(nnets[0].NumUpdatableComponents() >= 1);
 
 
   *dest = nnets[0];
   SubVector<double> scale_params0(scale_params, 0, num_uc);
   dest->ScaleComponents(Vector<BaseFloat>(scale_params0));
   for (int32 n = 1; n < num_nnets; n++) {
     SubVector<double> scale_params_n(scale_params, n * num_uc, num_uc);
     dest->AddNnet(Vector<BaseFloat>(scale_params_n), nnets[n]);
   }
 }

◆ ComputeCurrentNnet()

void ComputeCurrentNnet	(	Nnet *	dest,
		bool	debug = `false`
	)

private

Definition at line 356 of file combine-nnet-fast.cc.

References VectorBase< Real >::AddTpVec(), kaldi::nnet2::CombineNnets(), MatrixBase< Real >::CopyRowsFromVec(), KALDI_ASSERT, KALDI_LOG, kaldi::kTrans, FisherComputationClass::nnets_, and kaldi::nnet3::NumUpdatableComponents().

                             {
   int32 num_nnets = nnets_.size();
   KALDI_ASSERT(num_nnets >= 1);
   KALDI_ASSERT(params_.Dim() == num_nnets * nnets_[0].NumUpdatableComponents());
   Vector<double> raw_params(params_.Dim()); // Weights in non-preconditioned space:
   // p = C^{-T} \hat{p}.  Here, raw_params is p, params_, is \hat{p}.
 
   if (C_inv_.NumRows() > 0)
     raw_params.AddTpVec(1.0, C_inv_, kTrans, params_, 0.0);
   else
     raw_params = params_; // C not set up yet: interpret params_ as raw parameters.
 
   if (debug) {
     Matrix<double> params_mat(num_nnets,
                               nnets_[0].NumUpdatableComponents());
     params_mat.CopyRowsFromVec(raw_params);
     KALDI_LOG << "Scale parameters are " << params_mat;
   }
   CombineNnets(raw_params, nnets_, dest);
 }

◆ ComputeObjfAndGradient()

double ComputeObjfAndGradient	(	Vector< double > *	gradient,
		double *	regularizer_objf
	)

private

Computes objf at point "params_".

Definition at line 299 of file combine-nnet-fast.cc.

References VectorBase< Real >::AddTpVec(), kaldi::nnet2::DoBackpropParallel(), UpdatableComponent::DotProduct(), FisherComputationClass::egs_, Nnet::GetComponent(), rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_VLOG, kaldi::kNoTrans, rnnlm::n, FisherComputationClass::nnets_, Nnet::NumComponents(), and Nnet::SetZero().

                                   {
   Nnet nnet;
   ComputeCurrentNnet(&nnet); // compute it at the value "params_".
 
   Nnet nnet_gradient(nnet);
   bool is_gradient = true;
   nnet_gradient.SetZero(is_gradient);
   double tot_weight = 0.0;
   double objf = DoBackpropParallel(nnet, config_.minibatch_size, config_.num_threads,
                                    egs_, &tot_weight, &nnet_gradient) / egs_.size();
 
   // raw_gradient is gradient in non-preconditioned space.
   Vector<double> raw_gradient(params_.Dim());
 
   double regularizer_objf = 0.0; // sum of -0.5 * config_.regularizer * params-squared.
   int32 i = 0; // index into raw_gradient
   int32 num_nnets = nnets_.size();
   for (int32 n = 0; n < num_nnets; n++) {
     for (int32 j = 0; j < nnet.NumComponents(); j++) {
       const UpdatableComponent *uc =
           dynamic_cast<const UpdatableComponent*>(&(nnets_[n].GetComponent(j))),
           *uc_gradient =
           dynamic_cast<const UpdatableComponent*>(&(nnet_gradient.GetComponent(j))),
           *uc_params =
           dynamic_cast<const UpdatableComponent*>(&(nnet.GetComponent(j)));
       if (uc != NULL) {
         double gradient = uc->DotProduct(*uc_gradient) / tot_weight;
         // "gradient" is the derivative of the objective function w.r.t. this
         // element of the parameters (i.e. this weight, which gets applied to
         // the j'th component of the n'th source neural net).
         if (config_.regularizer != 0.0) {
           gradient -= config_.regularizer * uc->DotProduct(*uc_params);
           if (n == 0) // only add this once...
             regularizer_objf +=
                 -0.5 * config_.regularizer * uc_params->DotProduct(*uc_params);
         }
         raw_gradient(i) = gradient;
         i++;
       }
     }
   }
   if (config_.regularizer != 0.0) {
     KALDI_VLOG(2) << "Objf is " << objf << " + regularizer " << regularizer_objf
                   << " = " << (objf + regularizer_objf) << ", raw gradient is "
                   << raw_gradient;
   } else {
     KALDI_VLOG(2) << "Objf is " << objf << ", raw gradient is " << raw_gradient;
   }
   KALDI_ASSERT(i == raw_gradient.Dim());
   // \hat{g} = C^{-1} g.
   gradient->AddTpVec(1.0, C_inv_, kNoTrans, raw_gradient, 0.0);
   *regularizer_objf_ptr = regularizer_objf;
   return objf + regularizer_objf;
 }

◆ ComputePreconditioner()

void ComputePreconditioner ( )

private

Definition at line 220 of file combine-nnet-fast.cc.

References FisherComputationClass::egs_, rnnlm::i, KALDI_ASSERT, kaldi::kTrans, FisherComputationClass::nnets_, PackedMatrix< Real >::NumRows(), SpMatrix< Real >::Resize(), PackedMatrix< Real >::Scale(), and SpMatrix< Real >::Trace().

                                              {
   SpMatrix<double> F; // Fisher matrix.
   Nnet nnet;
   ComputeCurrentNnet(&nnet); // will be at initial value of neural net.
 
   { // This block does the multi-threaded computation.
     // The next line just initializes an "example" object.
     FisherComputationClass fc(nnet, nnets_, egs_,
                               config_.fisher_minibatch_size,
                               &F);
 
     // Setting num_threads to zero if config_.num_threads == 1
     // is a signal to the MultiThreader class to run without creating
     // any extra threads in this case; it helps support GPUs.
     int32 num_threads = config_.num_threads == 1 ? 0 : config_.num_threads;
     // The work gets done in the initializer and destructor of
     // the class below.
     MultiThreader<FisherComputationClass> m(num_threads, fc);
   }
 
   // The scale of F is irrelevant but it might be quite
   // large at this point, so we just normalize it.
   KALDI_ASSERT(F.Trace() > 0);
   F.Scale(F.NumRows() / F.Trace()); // same scale as unit matrix.
   // Make zero diagonal elements of F non-zero.  Relates to updatable
   // components that have no effect, e.g. MixtureProbComponents that have
   // no real free parameters.
   KALDI_ASSERT(config_.fisher_floor > 0.0);
   for (int32 i = 0; i < F.NumRows(); i++)
     F(i, i) = std::max<BaseFloat>(F(i, i), config_.fisher_floor);
   // We next smooth the diagonal elements of F by a small amount.
   // This is mainly necessary in case the number of minibatches is
   // smaller than the dimension of F; we want to ensure F is full rank.
   for (int32 i = 0; i < F.NumRows(); i++)
     F(i, i) *= (1.0 + config_.alpha);
 
   C_.Resize(F.NumRows());
   C_.Cholesky(F);
   C_inv_ = C_;
   C_inv_.Invert();
 
   // Transform the params_ data-member to be in the preconditioned space.
   Vector<double> raw_params(params_);
   params_.AddTpVec(1.0, C_, kTrans, raw_params, 0.0);
 }

◆ GetInitialModel()

int32 GetInitialModel	(	const std::vector< NnetExample > &	validation_set,
		const std::vector< Nnet > &	nnets
	)		const

private

Returns an integer saying which model to use: either 0 ...

num-models - 1 for the best individual model, or (#models) for the average of all of them.

Definition at line 381 of file combine-nnet-fast.cc.

References kaldi::nnet2::CombineNnets(), kaldi::nnet2::ComputeNnetObjfParallel(), KALDI_ASSERT, KALDI_LOG, rnnlm::n, and VectorBase< Real >::Set().

                                         {
   int32 num_nnets = static_cast<int32>(nnets.size());
   KALDI_ASSERT(!nnets.empty());
   int32 best_n = -1;
   double best_objf = -std::numeric_limits<double>::infinity();
   Vector<double> objfs(nnets.size());
   for (int32 n = 0; n < num_nnets; n++) {
     double num_frames;
     double objf = ComputeNnetObjfParallel(nnets[n], config_.minibatch_size,
                                           config_.num_threads, validation_set,
                                           &num_frames);
     KALDI_ASSERT(num_frames != 0);
     objf /= num_frames;
 
     if (n == 0 || objf > best_objf) {
       best_objf = objf;
       best_n = n;
     }
     objfs(n) = objf;
   }
   KALDI_LOG << "Objective functions for the source neural nets are " << objfs;
 
   int32 num_uc = nnets[0].NumUpdatableComponents();
 
   if (num_nnets > 1) { // Now try a version where all the neural nets have the
                        // same weight.  Don't do this if num_nnets == 1 as
                        // it would be a waste of time (identical to n == 0).
     Vector<double> scale_params(num_uc * num_nnets);
     scale_params.Set(1.0 / num_nnets);
     Nnet average_nnet;
     CombineNnets(scale_params, nnets, &average_nnet);
     double num_frames;
     double objf = ComputeNnetObjfParallel(average_nnet, config_.minibatch_size,
                                           config_.num_threads, validation_set,
                                           &num_frames);
     objf /= num_frames;
     KALDI_LOG << "Objf with all neural nets averaged is " << objf;
     if (objf > best_objf) {
       return num_nnets;
     } else {
       return best_n;
     }
   } else {
     return best_n;
   }
 }

◆ GetInitialParams()

void GetInitialParams ( )

private

Definition at line 268 of file combine-nnet-fast.cc.

References FisherComputationClass::egs_, kaldi::nnet2::GetInitialModel(), KALDI_ASSERT, KALDI_LOG, FisherComputationClass::nnets_, and VectorBase< Real >::Set().

                                         {
   int32 initial_model = config_.initial_model,
       num_nnets = static_cast<int32>(nnets_.size());
   if (initial_model > num_nnets)
     initial_model = num_nnets;
   if (initial_model < 0)
     initial_model = GetInitialModel(egs_, nnets_);
 
   KALDI_ASSERT(initial_model >= 0 && initial_model <= num_nnets);
   int32 num_uc = nnets_[0].NumUpdatableComponents();
 
   Vector<double> raw_params(num_uc * num_nnets); // parameters in
                                                  // non-preconditioned space.
   if (initial_model < num_nnets) {
     KALDI_LOG << "Initializing with neural net with index " << initial_model;
     // At this point we're using the best of the individual neural nets.
     raw_params.Set(0.0);
 
     // Set the block of parameters corresponding to the "best" of the
     // source neural nets to
     SubVector<double> best_block(raw_params, num_uc * initial_model, num_uc);
     best_block.Set(1.0);
   } else { // initial_model == num_nnets
     KALDI_LOG << "Initializing with all neural nets averaged.";
     raw_params.Set(1.0 / num_nnets);
   }
   KALDI_ASSERT(C_.NumRows() == 0); // Assume this not set up yet.
   params_ = raw_params; // this is in non-preconditioned space.
 }

Member Data Documentation

◆ C_

TpMatrix<double> C_

private

Definition at line 186 of file combine-nnet-fast.cc.

◆ C_inv_

TpMatrix<double> C_inv_

private

Definition at line 187 of file combine-nnet-fast.cc.

◆ config_

const NnetCombineFastConfig& config_

private

Definition at line 193 of file combine-nnet-fast.cc.

◆ egs_

const std::vector<NnetExample>& egs_

private

Definition at line 194 of file combine-nnet-fast.cc.

◆ nnet_out_

Nnet* nnet_out_

private

Definition at line 196 of file combine-nnet-fast.cc.

◆ nnets_

const std::vector<Nnet>& nnets_

private

Definition at line 195 of file combine-nnet-fast.cc.

◆ params_

Vector<double> params_

private

Definition at line 188 of file combine-nnet-fast.cc.

The documentation for this class was generated from the following file:

nnet2/combine-nnet-fast.cc

Public Member Functions

Private Member Functions

Static Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ FastNnetCombiner()

Member Function Documentation

◆ CombineNnets()

◆ ComputeCurrentNnet()

◆ ComputeObjfAndGradient()

◆ ComputePreconditioner()

◆ GetInitialModel()

◆ GetInitialParams()

Member Data Documentation

◆ C_

◆ C_inv_

◆ config_

◆ egs_

◆ nnet_out_

◆ nnets_

◆ params_