34 template<
typename Real>
37 opts_(opts), k_(0), computation_state_(kBeforeStep), H_was_set_(false) {
45 data_.Resize(2 * opts.
m, dim);
48 f_ = (opts.
minimize ? 1 : -1 ) * std::numeric_limits<Real>::infinity();
54 template<
typename Real>
57 if (n == 0)
return std::numeric_limits<Real>::infinity();
63 for (
size_t i = 0;
i <
n;
i++)
69 template<
typename Real>
78 Real gradient_length = gradient.
Norm(2.0);
79 learning_rate = (gradient_length > 0.0 ?
83 Real gradient_length = gradient.
Norm(2.0);
84 learning_rate = (gradient_length > 0.0 ?
102 KALDI_WARN <<
"NaN encountered in L-BFGS (already converged?)";
113 template<
typename Real>
127 q.CopyFromVec(gradient);
131 i >= std::max(k - m, static_cast<SignedMatrixIndexT>(0));
134 q.AddVec(-alpha(
i % m),
Y(
i));
147 Real dot =
VecVec(gradient, r);
149 KALDI_WARN <<
"Step direction has the wrong sign! Routine will fail.";
162 deriv_.CopyFromVec(gradient);
172 template<
typename Real>
179 y.CopyFromVec(gradient);
188 Real len = s.
Norm(2.0);
194 KALDI_VLOG(3) <<
"Accepted step; length was " << len
195 <<
", prod was " << prod;
206 template<
typename Real>
214 template<
typename Real>
237 template<
typename Real>
240 KALDI_VLOG(3) <<
"In step size iteration, function value changed " 241 <<
f_ <<
" to " << function_value;
261 else wolfe_i_ok = (function_value >= temp);
274 else wolfe_ii_ok = (p2f <=
opts_.
c2 * pf);
276 enum { kDecrease, kNoChange } d_action;
278 enum { kAccept, kDecreaseStep, kIncreaseStep, kRestart } iteration_action;
287 d_action = kNoChange;
289 if (wolfe_i_ok && wolfe_ii_ok) {
290 iteration_action = kAccept;
291 d_action = kNoChange;
292 }
else if (!wolfe_i_ok) {
297 d_action = kDecrease;
299 iteration_action = kDecreaseStep;
302 }
else if (!wolfe_ii_ok) {
305 d_action = kDecrease;
306 iteration_action = kIncreaseStep;
316 KALDI_VLOG(2) <<
"Too many steps in line search -> restarting.";
317 iteration_action = kRestart;
320 if (d_action == kDecrease)
323 KALDI_VLOG(3) <<
"d = " <<
d_ <<
", iter = " <<
k_ <<
", action = " 324 << (iteration_action == kAccept ?
"accept" :
325 (iteration_action == kDecreaseStep ?
"decrease" :
326 (iteration_action == kIncreaseStep ?
"increase" :
331 if (iteration_action == kAccept) {
337 KALDI_VLOG(2) <<
"Restarting L-BFGS computation; problem found while " 338 <<
"accepting step.";
339 iteration_action = kRestart;
342 if (iteration_action == kDecreaseStep || iteration_action == kIncreaseStep) {
343 Real scale = (iteration_action == kDecreaseStep ? 1.0 /
d_ :
d_);
349 KALDI_VLOG(3) <<
"Value of x did not change, when taking step; " 350 <<
"will restart computation.";
351 iteration_action = kRestart;
354 std::abs(
f_ - function_value) < 1.0e-08 *
355 std::abs(
f_) && iteration_action == kDecreaseStep) {
357 KALDI_VLOG(3) <<
"We appear to be backtracking while we are extremely " 358 <<
"close to the old value; restarting.";
359 iteration_action = kRestart;
362 if (iteration_action == kDecreaseStep) {
370 if (iteration_action == kRestart) {
375 else use_newx = (function_value >
f_);
382 template<
typename Real>
386 best_f_ = function_value;
395 template<
typename Real>
400 best_f_ = function_value;
409 H_.CopyFromVec(diag_approx_2nd_deriv);
411 DoStep(function_value, gradient);
414 template<
typename Real>
417 if (objf_value != NULL) *objf_value =
best_f_;
462 SubVector<Real> r(storage, 0), p(storage, 1), Ap(storage, 2), x_orig(storage, 3);
464 p.AddSpVec(-1.0, A, *x, 1.0);
468 Real r_cur_norm_sq =
VecVec(r, r),
469 r_initial_norm_sq = r_cur_norm_sq,
470 r_recompute_norm_sq = r_cur_norm_sq;
472 KALDI_VLOG(5) <<
"In linear CG: initial norm-square of residual = " 473 << r_initial_norm_sq;
477 std::numeric_limits<Real>::min()),
480 inv_residual_factor = 1.0 / residual_factor;
487 for (; k < M + 5 && k != opts.
max_iters; k++) {
490 Ap.AddSpVec(1.0, A, p, 0.0);
508 Real r_next_norm_sq =
VecVec(r, r);
510 if (r_next_norm_sq < residual_factor * r_recompute_norm_sq ||
511 r_next_norm_sq > inv_residual_factor * r_recompute_norm_sq) {
521 r.AddSpVec(1.0, A, *x, 0.0);
523 r_next_norm_sq =
VecVec(r, r);
524 r_recompute_norm_sq = r_next_norm_sq;
526 KALDI_VLOG(5) <<
"In linear CG: recomputing residual.";
529 <<
", r_next_norm_sq = " << r_next_norm_sq;
531 if (r_next_norm_sq <= max_error_sq)
535 Real beta_next = r_next_norm_sq / r_cur_norm_sq;
540 r_cur_norm_sq = r_next_norm_sq;
547 if (r_cur_norm_sq > r_initial_norm_sq &&
548 r_cur_norm_sq > r_initial_norm_sq + 1.0e-10 *
VecVec(b, b)) {
550 <<
" iterations the squared residual has got worse, " 551 << r_cur_norm_sq <<
" > " << r_initial_norm_sq
552 <<
". Will do an exact optimization.";
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void DoStep(Real function_value, const VectorBase< Real > &gradient)
The user calls this function to provide the class with the function and gradient info at the point Ge...
Packed symetric matrix class.
This class describes the options for maximizing various quadratic objective functions.
std::vector< Real > step_lengths_
double SolveQuadraticProblem(const SpMatrix< double > &H, const VectorBase< double > &g, const SolverOptions &opts, VectorBase< double > *x)
ComputationState computation_state_
void Restart(const VectorBase< Real > &x, Real function_value, const VectorBase< Real > &gradient)
int max_line_search_iters
float first_step_learning_rate
const VectorBase< Real > & GetValue(Real *objf_value=NULL) const
This returns the value of the variable x that has the best objective function so far, and the corresponding objective function value if requested.
A class for storing matrices.
BaseFloat recompute_residual_factor
Real Min() const
Returns the minimum value of any element, or +infinity for the empty vector.
MatrixIndexT NumRows() const
Real Norm(Real p) const
Compute the p-th norm of the vector.
template int32 LinearCgd< float >(const LinearCgdOptions &opts, const SpMatrix< float > &A, const VectorBase< float > &b, VectorBase< float > *x)
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
OptimizeLbfgs(const VectorBase< Real > &x, const LbfgsOptions &opts)
Initializer takes the starting value of x.
enum kaldi::OptimizeLbfgs::@0 last_failure_type_
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
MatrixIndexT NumCols() const
SubVector< Real > S(MatrixIndexT i)
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
int num_wolfe_ii_failures_
void StepSizeIteration(Real function_value, const VectorBase< Real > &gradient)
MatrixIndexT Dim() const
Returns the dimension of the vector.
void RecordStepLength(Real s)
Real RecentStepLength() const
Returns the average magnitude of the last n steps (but not more than the number we have stored)...
int num_wolfe_i_failures_
template int32 LinearCgd< double >(const LinearCgdOptions &opts, const SpMatrix< double > &A, const VectorBase< double > &b, VectorBase< double > *x)
void ComputeHifNeeded(const VectorBase< Real > &gradient)
SubVector< Real > Y(MatrixIndexT i)
A class representing a vector.
#define KALDI_ASSERT(cond)
void ComputeNewDirection(Real function_value, const VectorBase< Real > &gradient)
int32 LinearCgd(const LinearCgdOptions &opts, const SpMatrix< Real > &A, const VectorBase< Real > &b, VectorBase< Real > *x)
This is an implementation of L-BFGS.
Provides a vector abstraction class.
void SetZero()
Set vector to all zeros.
bool AcceptStep(Real function_value, const VectorBase< Real > &gradient)
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...