Matrix_optimization

Classes

struct  LinearCgdOptions
 
struct  LbfgsOptions
 This is an implementation of L-BFGS. More...
 
class  OptimizeLbfgs< Real >
 

Functions

template<class Real >
int32 LinearCgd (const LinearCgdOptions &opts, const SpMatrix< Real > &A, const VectorBase< Real > &b, VectorBase< Real > *x)
 

Detailed Description

Function Documentation

◆ LinearCgd()

int32 LinearCgd ( const LinearCgdOptions opts,
const SpMatrix< Real > &  A,
const VectorBase< Real > &  b,
VectorBase< Real > *  x 
)

Definition at line 453 of file optimization.cc.

References VectorBase< Real >::AddVec(), VectorBase< Real >::CopyFromVec(), KALDI_ASSERT, KALDI_VLOG, KALDI_WARN, kaldi::LinearCgd< double >(), kaldi::LinearCgd< float >(), OptimizeLbfgs< Real >::M(), LinearCgdOptions::max_error, LinearCgdOptions::max_iters, PackedMatrix< Real >::NumCols(), PackedMatrix< Real >::NumRows(), LinearCgdOptions::recompute_residual_factor, kaldi::SolveQuadraticProblem(), and kaldi::VecVec().

Referenced by OnlineIvectorEstimationStats::GetIvector(), LinearCgdOptions::LinearCgdOptions(), and kaldi::UnitTestLinearCgd().

456  {
457  // Initialize the variables
458  //
459  int32 M = A.NumCols();
460 
461  Matrix<Real> storage(4, M);
462  SubVector<Real> r(storage, 0), p(storage, 1), Ap(storage, 2), x_orig(storage, 3);
463  p.CopyFromVec(b);
464  p.AddSpVec(-1.0, A, *x, 1.0); // p_0 = b - A x_0
465  r.AddVec(-1.0, p); // r_0 = - p_0
466  x_orig.CopyFromVec(*x); // in case of failure.
467 
468  Real r_cur_norm_sq = VecVec(r, r),
469  r_initial_norm_sq = r_cur_norm_sq,
470  r_recompute_norm_sq = r_cur_norm_sq;
471 
472  KALDI_VLOG(5) << "In linear CG: initial norm-square of residual = "
473  << r_initial_norm_sq;
474 
475  KALDI_ASSERT(opts.recompute_residual_factor <= 1.0);
476  Real max_error_sq = std::max<Real>(opts.max_error * opts.max_error,
477  std::numeric_limits<Real>::min()),
478  residual_factor = opts.recompute_residual_factor *
479  opts.recompute_residual_factor,
480  inv_residual_factor = 1.0 / residual_factor;
481 
482  // Note: although from a mathematical point of view the method should converge
483  // after M iterations, in practice (due to roundoff) it does not always
484  // converge to good precision after that many iterations so we let the maximum
485  // be M + 5 instead.
486  int32 k = 0;
487  for (; k < M + 5 && k != opts.max_iters; k++) {
488  // Note: we'll break from this loop if we converge sooner due to
489  // max_error.
490  Ap.AddSpVec(1.0, A, p, 0.0); // Ap = A p
491 
492  // Below is how the code used to look.
493  // // next line: \alpha_k = (r_k^T r_k) / (p_k^T A p_k)
494  // Real alpha = r_cur_norm_sq / VecVec(p, Ap);
495  //
496  // We changed r_cur_norm_sq below to -VecVec(p, r). Although this is
497  // slightly less efficient, it seems to make the algorithm dramatically more
498  // robust. Note that -p^T r is the mathematically more natural quantity to
499  // use here, that corresponds to minimizing along that direction... r^T r is
500  // recommended in Nocedal and Wright only as a kind of optimization as it is
501  // supposed to be the same as -p^T r and we already have it computed.
502  Real alpha = -VecVec(p, r) / VecVec(p, Ap);
503 
504  // next line: x_{k+1} = x_k + \alpha_k p_k;
505  x->AddVec(alpha, p);
506  // next line: r_{k+1} = r_k + \alpha_k A p_k
507  r.AddVec(alpha, Ap);
508  Real r_next_norm_sq = VecVec(r, r);
509 
510  if (r_next_norm_sq < residual_factor * r_recompute_norm_sq ||
511  r_next_norm_sq > inv_residual_factor * r_recompute_norm_sq) {
512 
513  // Recompute the residual from scratch if the residual norm has decreased
514  // a lot; this costs an extra matrix-vector multiply, but helps keep the
515  // residual accurate.
516  // Also do the same if the residual norm has increased a lot since
517  // the last time we recomputed... this shouldn't happen often, but
518  // it can indicate bad stuff is happening.
519 
520  // r_{k+1} = A x_{k+1} - b
521  r.AddSpVec(1.0, A, *x, 0.0);
522  r.AddVec(-1.0, b);
523  r_next_norm_sq = VecVec(r, r);
524  r_recompute_norm_sq = r_next_norm_sq;
525 
526  KALDI_VLOG(5) << "In linear CG: recomputing residual.";
527  }
528  KALDI_VLOG(5) << "In linear CG: k = " << k
529  << ", r_next_norm_sq = " << r_next_norm_sq;
530  // Check if converged.
531  if (r_next_norm_sq <= max_error_sq)
532  break;
533 
534  // next line: \beta_{k+1} = \frac{r_{k+1}^T r_{k+1}}{r_k^T r_K}
535  Real beta_next = r_next_norm_sq / r_cur_norm_sq;
536  // next lines: p_{k+1} = -r_{k+1} + \beta_{k+1} p_k
537  Vector<Real> p_old(p);
538  p.Scale(beta_next);
539  p.AddVec(-1.0, r);
540  r_cur_norm_sq = r_next_norm_sq;
541  }
542 
543  // note: the first element of the && is only there to save compute.
544  // the residual r is A x - b, and r_cur_norm_sq and r_initial_norm_sq are
545  // of the form r * r, so it's clear that b * b has the right dimension to
546  // compare with the residual.
547  if (r_cur_norm_sq > r_initial_norm_sq &&
548  r_cur_norm_sq > r_initial_norm_sq + 1.0e-10 * VecVec(b, b)) {
549  KALDI_WARN << "Doing linear CGD in dimension " << A.NumRows() << ", after " << k
550  << " iterations the squared residual has got worse, "
551  << r_cur_norm_sq << " > " << r_initial_norm_sq
552  << ". Will do an exact optimization.";
553  SolverOptions opts("called-from-linearCGD");
554  x->CopyFromVec(x_orig);
555  SolveQuadraticProblem(A, b, opts, x);
556  }
557  return k;
558 }
double SolveQuadraticProblem(const SpMatrix< double > &H, const VectorBase< double > &g, const SolverOptions &opts, VectorBase< double > *x)
Definition: sp-matrix.cc:635
kaldi::int32 int32
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37