Classes
struct	LinearCgdOptions

struct	LbfgsOptions
	This is an implementation of L-BFGS. More...

class	OptimizeLbfgs< Real >

Functions
template<class Real >
int32	LinearCgd (const LinearCgdOptions &opts, const SpMatrix< Real > &A, const VectorBase< Real > &b, VectorBase< Real > *x)

Detailed Description

Function Documentation

◆ LinearCgd()

int32 LinearCgd	(	const LinearCgdOptions &	opts,
		const SpMatrix< Real > &	A,
		const VectorBase< Real > &	b,
		VectorBase< Real > *	x
	)

Definition at line 453 of file optimization.cc.

References VectorBase< Real >::AddVec(), VectorBase< Real >::CopyFromVec(), KALDI_ASSERT, KALDI_VLOG, KALDI_WARN, kaldi::LinearCgd< double >(), kaldi::LinearCgd< float >(), OptimizeLbfgs< Real >::M(), LinearCgdOptions::max_error, LinearCgdOptions::max_iters, PackedMatrix< Real >::NumCols(), PackedMatrix< Real >::NumRows(), LinearCgdOptions::recompute_residual_factor, kaldi::SolveQuadraticProblem(), and kaldi::VecVec().

Referenced by OnlineIvectorEstimationStats::GetIvector(), LinearCgdOptions::LinearCgdOptions(), and kaldi::UnitTestLinearCgd().

                                      {
   // Initialize the variables
   //
   int32 M = A.NumCols();
 
   Matrix<Real> storage(4, M);
   SubVector<Real> r(storage, 0), p(storage, 1), Ap(storage, 2), x_orig(storage, 3);
   p.CopyFromVec(b);
   p.AddSpVec(-1.0, A, *x, 1.0);  // p_0 = b - A x_0
   r.AddVec(-1.0, p);  // r_0 = - p_0
   x_orig.CopyFromVec(*x);  // in case of failure.
   
   Real r_cur_norm_sq = VecVec(r, r),
       r_initial_norm_sq = r_cur_norm_sq,
       r_recompute_norm_sq = r_cur_norm_sq;
 
   KALDI_VLOG(5) << "In linear CG: initial norm-square of residual = "
                 << r_initial_norm_sq;
   
   KALDI_ASSERT(opts.recompute_residual_factor <= 1.0);
   Real max_error_sq = std::max<Real>(opts.max_error * opts.max_error,
                                      std::numeric_limits<Real>::min()),
       residual_factor = opts.recompute_residual_factor *
                         opts.recompute_residual_factor,
       inv_residual_factor = 1.0 / residual_factor;
   
   // Note: although from a mathematical point of view the method should converge
   // after M iterations, in practice (due to roundoff) it does not always
   // converge to good precision after that many iterations so we let the maximum
   // be M + 5 instead.
   int32 k = 0;
   for (; k < M + 5 && k != opts.max_iters; k++) {
     // Note: we'll break from this loop if we converge sooner due to
     // max_error.
     Ap.AddSpVec(1.0, A, p, 0.0);  // Ap = A p
 
     // Below is how the code used to look.
     // // next line: \alpha_k = (r_k^T r_k) / (p_k^T A p_k)
     // Real alpha = r_cur_norm_sq / VecVec(p, Ap);
     // 
     // We changed r_cur_norm_sq below to -VecVec(p, r).  Although this is
     // slightly less efficient, it seems to make the algorithm dramatically more
     // robust.  Note that -p^T r is the mathematically more natural quantity to
     // use here, that corresponds to minimizing along that direction... r^T r is
     // recommended in Nocedal and Wright only as a kind of optimization as it is
     // supposed to be the same as -p^T r and we already have it computed.
     Real alpha = -VecVec(p, r) / VecVec(p, Ap);
     
     // next line: x_{k+1} = x_k + \alpha_k p_k;
     x->AddVec(alpha, p);
     // next line: r_{k+1} = r_k + \alpha_k A p_k
     r.AddVec(alpha, Ap);
     Real r_next_norm_sq = VecVec(r, r);
     
     if (r_next_norm_sq < residual_factor * r_recompute_norm_sq ||
         r_next_norm_sq > inv_residual_factor * r_recompute_norm_sq) {
          
       // Recompute the residual from scratch if the residual norm has decreased
       // a lot; this costs an extra matrix-vector multiply, but helps keep the
       // residual accurate.
       // Also do the same if the residual norm has increased a lot since
       // the last time we recomputed... this shouldn't happen often, but
       // it can indicate bad stuff is happening.
       
       // r_{k+1} = A x_{k+1} - b
       r.AddSpVec(1.0, A, *x, 0.0);
       r.AddVec(-1.0, b);
       r_next_norm_sq = VecVec(r, r);
       r_recompute_norm_sq = r_next_norm_sq;
 
       KALDI_VLOG(5) << "In linear CG: recomputing residual.";
     }
     KALDI_VLOG(5) << "In linear CG: k = " << k
                   << ", r_next_norm_sq = " << r_next_norm_sq;
     // Check if converged.
     if (r_next_norm_sq <= max_error_sq)
       break;
     
     // next line: \beta_{k+1} = \frac{r_{k+1}^T r_{k+1}}{r_k^T r_K}
     Real beta_next = r_next_norm_sq / r_cur_norm_sq;
     // next lines: p_{k+1} = -r_{k+1} + \beta_{k+1} p_k
     Vector<Real> p_old(p);
     p.Scale(beta_next);
     p.AddVec(-1.0, r);
     r_cur_norm_sq = r_next_norm_sq;
   }
 
   // note: the first element of the && is only there to save compute.
   // the residual r is A x - b, and r_cur_norm_sq and r_initial_norm_sq are
   // of the form r * r, so it's clear that b * b has the right dimension to
   // compare with the residual.
   if (r_cur_norm_sq > r_initial_norm_sq &&
       r_cur_norm_sq > r_initial_norm_sq + 1.0e-10 * VecVec(b, b)) {
     KALDI_WARN << "Doing linear CGD in dimension " << A.NumRows() << ", after " << k
               << " iterations the squared residual has got worse, "
                << r_cur_norm_sq << " > " << r_initial_norm_sq
                << ".  Will do an exact optimization.";
     SolverOptions opts("called-from-linearCGD");
     x->CopyFromVec(x_orig);
     SolveQuadraticProblem(A, b, opts, x);
   }
   return k;
 } 

Classes

Functions

Detailed Description

Function Documentation

◆ LinearCgd()