FastNnetCombiner Class Reference
Collaboration diagram for FastNnetCombiner:

Public Member Functions

 FastNnetCombiner (const NnetCombineFastConfig &combine_config, const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets_in, Nnet *nnet_out)
 

Private Member Functions

int32 GetInitialModel (const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets) const
 Returns an integer saying which model to use: either 0 ... More...
 
void GetInitialParams ()
 
void ComputePreconditioner ()
 
double ComputeObjfAndGradient (Vector< double > *gradient, double *regularizer_objf)
 Computes objf at point "params_". More...
 
void ComputeCurrentNnet (Nnet *dest, bool debug=false)
 

Static Private Member Functions

static void CombineNnets (const Vector< double > &scale_params, const std::vector< Nnet > &nnets, Nnet *dest)
 

Private Attributes

TpMatrix< double > C_
 
TpMatrix< double > C_inv_
 
Vector< double > params_
 
const NnetCombineFastConfigconfig_
 
const std::vector< NnetExample > & egs_
 
const std::vector< Nnet > & nnets_
 
Nnetnnet_out_
 

Detailed Description

Definition at line 102 of file combine-nnet-fast.cc.

Constructor & Destructor Documentation

◆ FastNnetCombiner()

FastNnetCombiner ( const NnetCombineFastConfig combine_config,
const std::vector< NnetExample > &  validation_set,
const std::vector< Nnet > &  nnets_in,
Nnet nnet_out 
)
inline

Definition at line 104 of file combine-nnet-fast.cc.

References kaldi::nnet2::CombineNnets(), kaldi::nnet2::ComputeObjfAndGradient(), OptimizeLbfgs< Real >::DoStep(), LbfgsOptions::first_step_impr, kaldi::nnet2::GetInitialModel(), OptimizeLbfgs< Real >::GetProposedValue(), OptimizeLbfgs< Real >::GetValue(), rnnlm::i, KALDI_ASSERT, KALDI_LOG, LbfgsOptions::m, and LbfgsOptions::minimize.

107  :
108  config_(combine_config), egs_(validation_set),
109  nnets_(nnets_in), nnet_out_(nnet_out) {
110 
113 
114  int32 dim = params_.Dim();
115  KALDI_ASSERT(dim > 0);
116  Vector<double> gradient(dim);
117 
118  double regularizer_objf, initial_regularizer_objf; // for diagnostics
119  double objf, initial_objf;
120 
121  LbfgsOptions lbfgs_options;
122  lbfgs_options.minimize = false; // We're maximizing.
123  lbfgs_options.m = std::min(dim, config_.max_lbfgs_dim);
124  lbfgs_options.first_step_impr = config_.initial_impr;
125 
126  OptimizeLbfgs<double> lbfgs(params_,
127  lbfgs_options);
128 
129  for (int32 i = 0; i < config_.num_lbfgs_iters; i++) {
130  params_.CopyFromVec(lbfgs.GetProposedValue());
131  objf = ComputeObjfAndGradient(&gradient, &regularizer_objf);
132  // Note: there is debug printout in ComputeObjfAndGradient
133  // (at verbose-level 2).
134  if (i == 0) {
135  initial_objf = objf;
136  initial_regularizer_objf = regularizer_objf;
137  }
138  lbfgs.DoStep(objf, gradient);
139  }
140  params_ = lbfgs.GetValue(&objf);
141 
142  ComputeCurrentNnet(nnet_out_, true); // create the output neural net, and
143  // print out the scaling factors.
144  if (config_.regularizer != 0.0) {
145  double initial_part = initial_objf - initial_regularizer_objf,
146  part = objf - regularizer_objf;
147  KALDI_LOG << "Combining nnets, objf/frame + regularizer changed from "
148  << initial_part << " + " << initial_regularizer_objf
149  << " = " << initial_objf << " to " << part << " + "
150  << regularizer_objf << " = " << objf;
151  } else {
152  KALDI_LOG << "Combining nnets, objf per frame changed from "
153  << initial_objf << " to " << objf;
154  }
155  }
kaldi::int32 int32
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
const std::vector< Nnet > & nnets_
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
double ComputeObjfAndGradient(Vector< double > *gradient, double *regularizer_objf)
Computes objf at point "params_".
#define KALDI_LOG
Definition: kaldi-error.h:153
const std::vector< NnetExample > & egs_
const NnetCombineFastConfig & config_
void ComputeCurrentNnet(Nnet *dest, bool debug=false)

Member Function Documentation

◆ CombineNnets()

void CombineNnets ( const Vector< double > &  scale_params,
const std::vector< Nnet > &  nnets,
Nnet dest 
)
staticprivate

Definition at line 201 of file combine-nnet-fast.cc.

References Nnet::AddNnet(), KALDI_ASSERT, rnnlm::n, kaldi::nnet3::NumUpdatableComponents(), and Nnet::ScaleComponents().

203  {
204  int32 num_nnets = nnets.size();
205  KALDI_ASSERT(num_nnets >= 1);
206  int32 num_uc = nnets[0].NumUpdatableComponents();
207  KALDI_ASSERT(nnets[0].NumUpdatableComponents() >= 1);
208 
209 
210  *dest = nnets[0];
211  SubVector<double> scale_params0(scale_params, 0, num_uc);
212  dest->ScaleComponents(Vector<BaseFloat>(scale_params0));
213  for (int32 n = 1; n < num_nnets; n++) {
214  SubVector<double> scale_params_n(scale_params, n * num_uc, num_uc);
215  dest->AddNnet(Vector<BaseFloat>(scale_params_n), nnets[n]);
216  }
217 }
kaldi::int32 int32
struct rnnlm::@11::@12 n
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
int32 NumUpdatableComponents(const Nnet &dest)
Returns the number of updatable components in the nnet.
Definition: nnet-utils.cc:422

◆ ComputeCurrentNnet()

void ComputeCurrentNnet ( Nnet dest,
bool  debug = false 
)
private

Definition at line 356 of file combine-nnet-fast.cc.

References VectorBase< Real >::AddTpVec(), kaldi::nnet2::CombineNnets(), MatrixBase< Real >::CopyRowsFromVec(), KALDI_ASSERT, KALDI_LOG, kaldi::kTrans, FisherComputationClass::nnets_, and kaldi::nnet3::NumUpdatableComponents().

357  {
358  int32 num_nnets = nnets_.size();
359  KALDI_ASSERT(num_nnets >= 1);
360  KALDI_ASSERT(params_.Dim() == num_nnets * nnets_[0].NumUpdatableComponents());
361  Vector<double> raw_params(params_.Dim()); // Weights in non-preconditioned space:
362  // p = C^{-T} \hat{p}. Here, raw_params is p, params_, is \hat{p}.
363 
364  if (C_inv_.NumRows() > 0)
365  raw_params.AddTpVec(1.0, C_inv_, kTrans, params_, 0.0);
366  else
367  raw_params = params_; // C not set up yet: interpret params_ as raw parameters.
368 
369  if (debug) {
370  Matrix<double> params_mat(num_nnets,
372  params_mat.CopyRowsFromVec(raw_params);
373  KALDI_LOG << "Scale parameters are " << params_mat;
374  }
375  CombineNnets(raw_params, nnets_, dest);
376 }
static void CombineNnets(const Vector< double > &scale_params, const std::vector< Nnet > &nnets, Nnet *dest)
kaldi::int32 int32
MatrixIndexT NumRows() const
const std::vector< Nnet > & nnets_
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_LOG
Definition: kaldi-error.h:153
int32 NumUpdatableComponents(const Nnet &dest)
Returns the number of updatable components in the nnet.
Definition: nnet-utils.cc:422

◆ ComputeObjfAndGradient()

double ComputeObjfAndGradient ( Vector< double > *  gradient,
double *  regularizer_objf 
)
private

Computes objf at point "params_".

Definition at line 299 of file combine-nnet-fast.cc.

References VectorBase< Real >::AddTpVec(), kaldi::nnet2::DoBackpropParallel(), UpdatableComponent::DotProduct(), FisherComputationClass::egs_, Nnet::GetComponent(), rnnlm::i, rnnlm::j, KALDI_ASSERT, KALDI_VLOG, kaldi::kNoTrans, rnnlm::n, FisherComputationClass::nnets_, Nnet::NumComponents(), and Nnet::SetZero().

301  {
302  Nnet nnet;
303  ComputeCurrentNnet(&nnet); // compute it at the value "params_".
304 
305  Nnet nnet_gradient(nnet);
306  bool is_gradient = true;
307  nnet_gradient.SetZero(is_gradient);
308  double tot_weight = 0.0;
310  egs_, &tot_weight, &nnet_gradient) / egs_.size();
311 
312  // raw_gradient is gradient in non-preconditioned space.
313  Vector<double> raw_gradient(params_.Dim());
314 
315  double regularizer_objf = 0.0; // sum of -0.5 * config_.regularizer * params-squared.
316  int32 i = 0; // index into raw_gradient
317  int32 num_nnets = nnets_.size();
318  for (int32 n = 0; n < num_nnets; n++) {
319  for (int32 j = 0; j < nnet.NumComponents(); j++) {
320  const UpdatableComponent *uc =
321  dynamic_cast<const UpdatableComponent*>(&(nnets_[n].GetComponent(j))),
322  *uc_gradient =
323  dynamic_cast<const UpdatableComponent*>(&(nnet_gradient.GetComponent(j))),
324  *uc_params =
325  dynamic_cast<const UpdatableComponent*>(&(nnet.GetComponent(j)));
326  if (uc != NULL) {
327  double gradient = uc->DotProduct(*uc_gradient) / tot_weight;
328  // "gradient" is the derivative of the objective function w.r.t. this
329  // element of the parameters (i.e. this weight, which gets applied to
330  // the j'th component of the n'th source neural net).
331  if (config_.regularizer != 0.0) {
332  gradient -= config_.regularizer * uc->DotProduct(*uc_params);
333  if (n == 0) // only add this once...
334  regularizer_objf +=
335  -0.5 * config_.regularizer * uc_params->DotProduct(*uc_params);
336  }
337  raw_gradient(i) = gradient;
338  i++;
339  }
340  }
341  }
342  if (config_.regularizer != 0.0) {
343  KALDI_VLOG(2) << "Objf is " << objf << " + regularizer " << regularizer_objf
344  << " = " << (objf + regularizer_objf) << ", raw gradient is "
345  << raw_gradient;
346  } else {
347  KALDI_VLOG(2) << "Objf is " << objf << ", raw gradient is " << raw_gradient;
348  }
349  KALDI_ASSERT(i == raw_gradient.Dim());
350  // \hat{g} = C^{-1} g.
351  gradient->AddTpVec(1.0, C_inv_, kNoTrans, raw_gradient, 0.0);
352  *regularizer_objf_ptr = regularizer_objf;
353  return objf + regularizer_objf;
354 }
kaldi::int32 int32
const std::vector< Nnet > & nnets_
struct rnnlm::@11::@12 n
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
const std::vector< NnetExample > & egs_
double DoBackpropParallel(const Nnet &nnet, int32 minibatch_size, SequentialNnetExampleReader *examples_reader, double *tot_weight, Nnet *nnet_to_update)
This function is similar to "DoBackprop" in nnet-update.h This function computes the objective functi...
const NnetCombineFastConfig & config_
void ComputeCurrentNnet(Nnet *dest, bool debug=false)

◆ ComputePreconditioner()

void ComputePreconditioner ( )
private

Definition at line 220 of file combine-nnet-fast.cc.

References FisherComputationClass::egs_, rnnlm::i, KALDI_ASSERT, kaldi::kTrans, FisherComputationClass::nnets_, PackedMatrix< Real >::NumRows(), SpMatrix< Real >::Resize(), PackedMatrix< Real >::Scale(), and SpMatrix< Real >::Trace().

220  {
221  SpMatrix<double> F; // Fisher matrix.
222  Nnet nnet;
223  ComputeCurrentNnet(&nnet); // will be at initial value of neural net.
224 
225  { // This block does the multi-threaded computation.
226  // The next line just initializes an "example" object.
227  FisherComputationClass fc(nnet, nnets_, egs_,
229  &F);
230 
231  // Setting num_threads to zero if config_.num_threads == 1
232  // is a signal to the MultiThreader class to run without creating
233  // any extra threads in this case; it helps support GPUs.
234  int32 num_threads = config_.num_threads == 1 ? 0 : config_.num_threads;
235  // The work gets done in the initializer and destructor of
236  // the class below.
237  MultiThreader<FisherComputationClass> m(num_threads, fc);
238  }
239 
240  // The scale of F is irrelevant but it might be quite
241  // large at this point, so we just normalize it.
242  KALDI_ASSERT(F.Trace() > 0);
243  F.Scale(F.NumRows() / F.Trace()); // same scale as unit matrix.
244  // Make zero diagonal elements of F non-zero. Relates to updatable
245  // components that have no effect, e.g. MixtureProbComponents that have
246  // no real free parameters.
248  for (int32 i = 0; i < F.NumRows(); i++)
249  F(i, i) = std::max<BaseFloat>(F(i, i), config_.fisher_floor);
250  // We next smooth the diagonal elements of F by a small amount.
251  // This is mainly necessary in case the number of minibatches is
252  // smaller than the dimension of F; we want to ensure F is full rank.
253  for (int32 i = 0; i < F.NumRows(); i++)
254  F(i, i) *= (1.0 + config_.alpha);
255 
256  C_.Resize(F.NumRows());
257  C_.Cholesky(F);
258  C_inv_ = C_;
259  C_inv_.Invert();
260 
261  // Transform the params_ data-member to be in the preconditioned space.
262  Vector<double> raw_params(params_);
263  params_.AddTpVec(1.0, C_, kTrans, raw_params, 0.0);
264 }
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: tp-matrix.h:124
kaldi::int32 int32
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
const std::vector< Nnet > & nnets_
void AddTpVec(const Real alpha, const TpMatrix< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add triangular matrix times vector: this <– beta*this + alpha*M*v.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
const std::vector< NnetExample > & egs_
const NnetCombineFastConfig & config_
void ComputeCurrentNnet(Nnet *dest, bool debug=false)

◆ GetInitialModel()

int32 GetInitialModel ( const std::vector< NnetExample > &  validation_set,
const std::vector< Nnet > &  nnets 
) const
private

Returns an integer saying which model to use: either 0 ...

num-models - 1 for the best individual model, or (#models) for the average of all of them.

Definition at line 381 of file combine-nnet-fast.cc.

References kaldi::nnet2::CombineNnets(), kaldi::nnet2::ComputeNnetObjfParallel(), KALDI_ASSERT, KALDI_LOG, rnnlm::n, and VectorBase< Real >::Set().

383  {
384  int32 num_nnets = static_cast<int32>(nnets.size());
385  KALDI_ASSERT(!nnets.empty());
386  int32 best_n = -1;
387  double best_objf = -std::numeric_limits<double>::infinity();
388  Vector<double> objfs(nnets.size());
389  for (int32 n = 0; n < num_nnets; n++) {
390  double num_frames;
391  double objf = ComputeNnetObjfParallel(nnets[n], config_.minibatch_size,
392  config_.num_threads, validation_set,
393  &num_frames);
394  KALDI_ASSERT(num_frames != 0);
395  objf /= num_frames;
396 
397  if (n == 0 || objf > best_objf) {
398  best_objf = objf;
399  best_n = n;
400  }
401  objfs(n) = objf;
402  }
403  KALDI_LOG << "Objective functions for the source neural nets are " << objfs;
404 
405  int32 num_uc = nnets[0].NumUpdatableComponents();
406 
407  if (num_nnets > 1) { // Now try a version where all the neural nets have the
408  // same weight. Don't do this if num_nnets == 1 as
409  // it would be a waste of time (identical to n == 0).
410  Vector<double> scale_params(num_uc * num_nnets);
411  scale_params.Set(1.0 / num_nnets);
412  Nnet average_nnet;
413  CombineNnets(scale_params, nnets, &average_nnet);
414  double num_frames;
415  double objf = ComputeNnetObjfParallel(average_nnet, config_.minibatch_size,
416  config_.num_threads, validation_set,
417  &num_frames);
418  objf /= num_frames;
419  KALDI_LOG << "Objf with all neural nets averaged is " << objf;
420  if (objf > best_objf) {
421  return num_nnets;
422  } else {
423  return best_n;
424  }
425  } else {
426  return best_n;
427  }
428 }
static void CombineNnets(const Vector< double > &scale_params, const std::vector< Nnet > &nnets, Nnet *dest)
kaldi::int32 int32
struct rnnlm::@11::@12 n
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
double ComputeNnetObjfParallel(const Nnet &nnet, int32 minibatch_size, int32 num_threads, const std::vector< NnetExample > &examples, double *num_frames)
This is basically to clarify the fact that DoBackpropParallel will also work with nnet_to_update == N...
#define KALDI_LOG
Definition: kaldi-error.h:153
const NnetCombineFastConfig & config_

◆ GetInitialParams()

void GetInitialParams ( )
private

Definition at line 268 of file combine-nnet-fast.cc.

References FisherComputationClass::egs_, kaldi::nnet2::GetInitialModel(), KALDI_ASSERT, KALDI_LOG, FisherComputationClass::nnets_, and VectorBase< Real >::Set().

268  {
269  int32 initial_model = config_.initial_model,
270  num_nnets = static_cast<int32>(nnets_.size());
271  if (initial_model > num_nnets)
272  initial_model = num_nnets;
273  if (initial_model < 0)
274  initial_model = GetInitialModel(egs_, nnets_);
275 
276  KALDI_ASSERT(initial_model >= 0 && initial_model <= num_nnets);
277  int32 num_uc = nnets_[0].NumUpdatableComponents();
278 
279  Vector<double> raw_params(num_uc * num_nnets); // parameters in
280  // non-preconditioned space.
281  if (initial_model < num_nnets) {
282  KALDI_LOG << "Initializing with neural net with index " << initial_model;
283  // At this point we're using the best of the individual neural nets.
284  raw_params.Set(0.0);
285 
286  // Set the block of parameters corresponding to the "best" of the
287  // source neural nets to
288  SubVector<double> best_block(raw_params, num_uc * initial_model, num_uc);
289  best_block.Set(1.0);
290  } else { // initial_model == num_nnets
291  KALDI_LOG << "Initializing with all neural nets averaged.";
292  raw_params.Set(1.0 / num_nnets);
293  }
294  KALDI_ASSERT(C_.NumRows() == 0); // Assume this not set up yet.
295  params_ = raw_params; // this is in non-preconditioned space.
296 }
kaldi::int32 int32
MatrixIndexT NumRows() const
int32 GetInitialModel(const std::vector< NnetExample > &validation_set, const std::vector< Nnet > &nnets) const
Returns an integer saying which model to use: either 0 ...
const std::vector< Nnet > & nnets_
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_LOG
Definition: kaldi-error.h:153
const std::vector< NnetExample > & egs_
const NnetCombineFastConfig & config_

Member Data Documentation

◆ C_

TpMatrix<double> C_
private

Definition at line 186 of file combine-nnet-fast.cc.

◆ C_inv_

TpMatrix<double> C_inv_
private

Definition at line 187 of file combine-nnet-fast.cc.

◆ config_

const NnetCombineFastConfig& config_
private

Definition at line 193 of file combine-nnet-fast.cc.

◆ egs_

const std::vector<NnetExample>& egs_
private

Definition at line 194 of file combine-nnet-fast.cc.

◆ nnet_out_

Nnet* nnet_out_
private

Definition at line 196 of file combine-nnet-fast.cc.

◆ nnets_

const std::vector<Nnet>& nnets_
private

Definition at line 195 of file combine-nnet-fast.cc.

◆ params_

Vector<double> params_
private

Definition at line 188 of file combine-nnet-fast.cc.


The documentation for this class was generated from the following file: