Matrix_optimization

## Classes

struct  LinearCgdOptions

struct  LbfgsOptions
This is an implementation of L-BFGS. More...

class  OptimizeLbfgs< Real >

## Enumerations

enum  ComputationState { kBeforeStep, kWithinStep }
"compute p_k <-- - H_k \delta f_k" (i.e. Algorithm 7.4). More...

enum  { kWolfeI, kWolfeII, kNone }

## Functions

template<class Real >
int32 LinearCgd (const LinearCgdOptions &opts, const SpMatrix< Real > &A, const VectorBase< Real > &b, VectorBase< Real > *x)

## Enumeration Type Documentation

 anonymous enum
private
Enumerator
kWolfeI
kWolfeII
kNone

Definition at line 223 of file optimization.h.

 enum ComputationState
private

"compute p_k <-- - H_k \delta f_k" (i.e. Algorithm 7.4).

Enumerator
kBeforeStep
kWithinStep

Definition at line 173 of file optimization.h.

173  {
174  kBeforeStep,
175  kWithinStep, // This means we're within the step-size computation, and
176  // have not yet done the 1st function evaluation.
177  };

## Function Documentation

 int32 LinearCgd ( const LinearCgdOptions & opts, const SpMatrix< Real > & A, const VectorBase< Real > & b, VectorBase< Real > * x )

Definition at line 453 of file optimization.cc.

Referenced by OnlineIvectorEstimationStats::GetIvector(), and kaldi::UnitTestLinearCgd().

456  {
457  // Initialize the variables
458  //
459  int32 M = A.NumCols();
460
461  Matrix<Real> storage(4, M);
462  SubVector<Real> r(storage, 0), p(storage, 1), Ap(storage, 2), x_orig(storage, 3);
463  p.CopyFromVec(b);
464  p.AddSpVec(-1.0, A, *x, 1.0); // p_0 = b - A x_0
465  r.AddVec(-1.0, p); // r_0 = - p_0
466  x_orig.CopyFromVec(*x); // in case of failure.
467
468  Real r_cur_norm_sq = VecVec(r, r),
469  r_initial_norm_sq = r_cur_norm_sq,
470  r_recompute_norm_sq = r_cur_norm_sq;
471
472  KALDI_VLOG(5) << "In linear CG: initial norm-square of residual = "
473  << r_initial_norm_sq;
474
475  KALDI_ASSERT(opts.recompute_residual_factor <= 1.0);
476  Real max_error_sq = std::max<Real>(opts.max_error * opts.max_error,
477  std::numeric_limits<Real>::min()),
478  residual_factor = opts.recompute_residual_factor *
479  opts.recompute_residual_factor,
480  inv_residual_factor = 1.0 / residual_factor;
481
482  // Note: although from a mathematical point of view the method should converge
483  // after M iterations, in practice (due to roundoff) it does not always
484  // converge to good precision after that many iterations so we let the maximum
485  // be M + 5 instead.
486  int32 k = 0;
487  for (; k < M + 5 && k != opts.max_iters; k++) {
488  // Note: we'll break from this loop if we converge sooner due to
489  // max_error.
490  Ap.AddSpVec(1.0, A, p, 0.0); // Ap = A p
491
492  // Below is how the code used to look.
493  // // next line: \alpha_k = (r_k^T r_k) / (p_k^T A p_k)
494  // Real alpha = r_cur_norm_sq / VecVec(p, Ap);
495  //
496  // We changed r_cur_norm_sq below to -VecVec(p, r). Although this is
497  // slightly less efficient, it seems to make the algorithm dramatically more
498  // robust. Note that -p^T r is the mathematically more natural quantity to
499  // use here, that corresponds to minimizing along that direction... r^T r is
500  // recommended in Nocedal and Wright only as a kind of optimization as it is
501  // supposed to be the same as -p^T r and we already have it computed.
502  Real alpha = -VecVec(p, r) / VecVec(p, Ap);
503
504  // next line: x_{k+1} = x_k + \alpha_k p_k;
506  // next line: r_{k+1} = r_k + \alpha_k A p_k
508  Real r_next_norm_sq = VecVec(r, r);
509
510  if (r_next_norm_sq < residual_factor * r_recompute_norm_sq ||
511  r_next_norm_sq > inv_residual_factor * r_recompute_norm_sq) {
512
513  // Recompute the residual from scratch if the residual norm has decreased
514  // a lot; this costs an extra matrix-vector multiply, but helps keep the
515  // residual accurate.
516  // Also do the same if the residual norm has increased a lot since
517  // the last time we recomputed... this shouldn't happen often, but
518  // it can indicate bad stuff is happening.
519
520  // r_{k+1} = A x_{k+1} - b
523  r_next_norm_sq = VecVec(r, r);
524  r_recompute_norm_sq = r_next_norm_sq;
525
526  KALDI_VLOG(5) << "In linear CG: recomputing residual.";
527  }
528  KALDI_VLOG(5) << "In linear CG: k = " << k
529  << ", r_next_norm_sq = " << r_next_norm_sq;
530  // Check if converged.
531  if (r_next_norm_sq <= max_error_sq)
532  break;
533
534  // next line: \beta_{k+1} = \frac{r_{k+1}^T r_{k+1}}{r_k^T r_K}
535  Real beta_next = r_next_norm_sq / r_cur_norm_sq;
536  // next lines: p_{k+1} = -r_{k+1} + \beta_{k+1} p_k
537  Vector<Real> p_old(p);
538  p.Scale(beta_next);
540  r_cur_norm_sq = r_next_norm_sq;
541  }
542
543  // note: the first element of the && is only there to save compute.
544  // the residual r is A x - b, and r_cur_norm_sq and r_initial_norm_sq are
545  // of the form r * r, so it's clear that b * b has the right dimension to
546  // compare with the residual.
547  if (r_cur_norm_sq > r_initial_norm_sq &&
548  r_cur_norm_sq > r_initial_norm_sq + 1.0e-10 * VecVec(b, b)) {
549  KALDI_WARN << "Doing linear CGD in dimension " << A.NumRows() << ", after " << k
550  << " iterations the squared residual has got worse, "
551  << r_cur_norm_sq << " > " << r_initial_norm_sq
552  << ". Will do an exact optimization.";
553  SolverOptions opts("called-from-linearCGD");
554  x->CopyFromVec(x_orig);
556  }
557  return k;
558 }
double SolveQuadraticProblem(const SpMatrix< double > &H, const VectorBase< double > &g, const SolverOptions &opts, VectorBase< double > *x)
Definition: sp-matrix.cc:635
#define KALDI_WARN
Definition: kaldi-error.h:130
KALDI_ASSERT & A
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
#define KALDI_VLOG(v)
Definition: kaldi-error.h:136
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37