doc/nnet-precondition-online_8h_source.html

 // nnet2/nnet-precondition-online.h

 // Copyright 2013-2015   Johns Hopkins University (author: Daniel Povey)
 //                2015   Xiaohui Zhang

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #ifndef KALDI_NNET2_NNET_PRECONDITION_ONLINE_H_
 #define KALDI_NNET2_NNET_PRECONDITION_ONLINE_H_

 #include <iostream>
 #include <mutex>
 #include "base/kaldi-common.h"
 #include "matrix/matrix-lib.h"
 #include "cudamatrix/cu-matrix-lib.h"

 namespace kaldi {
 namespace nnet2 {


 class OnlinePreconditioner {
  public:
   OnlinePreconditioner();

   void SetRank(int32 rank);
   void SetUpdatePeriod(int32 update_period);
   // num_samples_history is a time-constant (in samples) that determines eta.
   void SetNumSamplesHistory(BaseFloat num_samples_history);
   void SetAlpha(BaseFloat alpha);
   void TurnOnDebug() { self_debug_ = true; }
   BaseFloat GetNumSamplesHistory() const { return num_samples_history_; }
   BaseFloat GetAlpha() const { return alpha_; }
   int32 GetRank() const { return rank_; }
   int32 GetUpdatePeriod() const { return update_period_; }

   // The "R" pointer is both the input (R in the comment) and the output (P in
   // the comment; equal to the preconditioned directions before scaling by
   // gamma).  If the pointer "row_prod" is supplied, it's set to the inner product
   // of each row of the preconditioned directions P, at output, with itself.
   // You would need to apply "scale" to R and "scale * scale" to row_prod, to
   // get the preconditioned directions; we don't do this ourselves, in order to
   // save CUDA calls.
   void PreconditionDirections(CuMatrixBase<BaseFloat> *R,
                               CuVectorBase<BaseFloat> *row_prod,
                               BaseFloat *scale);

   // Copy constructor.
   explicit OnlinePreconditioner(const OnlinePreconditioner &other);
   // Assignent operator
   OnlinePreconditioner &operator = (const OnlinePreconditioner &other);
  private:

   // This does the work of PreconditionDirections (the top-level
   // function handles some multithreading issues and then calls this function).
   // Note: WJKL_t (dimension 2*R by D + R) is [ W_t L_t; J_t K_t ].
   void PreconditionDirectionsInternal(const int32 t,
                                       const BaseFloat rho_t,
                                       const Vector<BaseFloat> &d_t,
                                       CuMatrixBase<BaseFloat> *WJKL_t,
                                       CuMatrixBase<BaseFloat> *X_t,
                                       CuVectorBase<BaseFloat> *row_prod,
                                       BaseFloat *scale);

   void ComputeEt(const VectorBase<BaseFloat> &d_t,
                  BaseFloat beta_t,
                  VectorBase<BaseFloat> *e_t,
                  VectorBase<BaseFloat> *sqrt_e_t,
                  VectorBase<BaseFloat> *inv_sqrt_e_t) const;

   void ComputeZt(int32 N,
                  BaseFloat rho_t,
                  const VectorBase<BaseFloat> &d_t,
                  const VectorBase<BaseFloat> &inv_sqrt_e_t,
                  const MatrixBase<BaseFloat> &K_t,
                  const MatrixBase<BaseFloat> &L_t,
                  SpMatrix<double> *Z_t) const;
   // Computes W_{t+1}.  Overwrites J_t.
   void ComputeWt1(int32 N,
                   const VectorBase<BaseFloat> &d_t,
                   const VectorBase<BaseFloat> &d_t1,
                   BaseFloat rho_t,
                   BaseFloat rho_t1,
                   const MatrixBase<BaseFloat> &U_t,
                   const VectorBase<BaseFloat> &sqrt_c_t,
                   const VectorBase<BaseFloat> &inv_sqrt_e_t,
                   const CuMatrixBase<BaseFloat> &W_t,
                   CuMatrixBase<BaseFloat> *J_t,
                   CuMatrixBase<BaseFloat> *W_t1) const;

   // This function is called if C_t has high condition number; it makes sure
   // that R_{t+1} is orthogonal.  See the section in the extended comment above
   // on "keeping R_t orthogonal".
   void ReorthogonalizeXt1(const VectorBase<BaseFloat> &d_t1,
                           BaseFloat rho_t1,
                           CuMatrixBase<BaseFloat> *W_t1,
                           CuMatrixBase<BaseFloat> *temp_W,
                           CuMatrixBase<BaseFloat> *temp_O);

   void Init(const CuMatrixBase<BaseFloat> &R0);

   // Initialize to some small 'default' values, called from Init().  Init() then
   // does a few iterations of update with the first batch's data to give more
   // reasonable values.
   void InitDefault(int32 D);

   // initializes R, which is assumed to have at least as many columns as rows,
   // to a specially designed matrix with orthonormal rows, that has no zero rows
   // or columns.
   static void InitOrthonormalSpecial(CuMatrixBase<BaseFloat> *R);

   // Returns the learning rate eta as the function of the number of samples
   // (actually, N is the number of vectors we're preconditioning, which due to
   // context is not always exactly the same as the number of samples).  The
   // value returned depends on num_samples_history_.
   BaseFloat Eta(int32 N) const;

   // called if self_debug_ = true, makes sure the members satisfy certain
   // properties.
   void SelfTest() const;

   // Configuration values:

   // The rank of the correction to the unit matrix (e.g. 20).
   int32 rank_;

   // After a few initial iterations of updating whenever we can, we start only
   // updating the Fisher-matrix parameters every "update_period_" minibatches;
   // this saves time.
   int32 update_period_;

   // num_samples_history_ determines the value of eta, which in turn affects how
   // fast we update our estimate of the covariance matrix.  We've done it this
   // way in order to make it easy to have a single configuration value that
   // doesn't have to be changed when we change the minibatch size.
   BaseFloat num_samples_history_;

   // alpha controls how much we smooth the Fisher matrix with the unit matrix.
   // e.g. alpha = 4.0.
   BaseFloat alpha_;

   // epsilon is an absolute floor on the unit-matrix scaling factor rho_t in our
   // Fisher estimate, which we set to 1.0e-10.  We don't actually make this
   // configurable from the command line.  It's needed to avoid crashes on
   // all-zero inputs.
   BaseFloat epsilon_;

   // delta is a relative floor on the unit-matrix scaling factor rho_t in our
   // Fisher estimate, which we set to 1.0e-05: this is relative to the largest
   // value of D_t.  It's needed to control roundoff error.  We apply the same
   // floor to the eigenvalues in D_t.
   BaseFloat delta_;

   // t is a counter that measures how many updates we've done.
   int32 t_;

   // This keeps track of how many minibatches we've skipped updating the parameters,
   // since the most recent update; it's used in enforcing "update_period_", which
   // is a mechanism to avoid spending too much time updating the subspace (which can
   // be wasteful).
   int32 num_updates_skipped_;

   // If true, activates certain checks.
   bool self_debug_;

   CuMatrix<BaseFloat> W_t_;
   BaseFloat rho_t_;
   Vector<BaseFloat> d_t_;


   // Used to prevent parameters being read or written in an inconsistent state.
   std::mutex read_write_mutex_;

   // This mutex is used to control which thread gets to update the
   // parameters, in multi-threaded code.
   std::mutex update_mutex_;
 };

 } // namespace nnet2
 } // namespace kaldi


 #endif
kaldi::nnet2::OnlinePreconditioner::OnlinePreconditioner
OnlinePreconditioner()
Definition: nnet-precondition-online.cc:27

kaldi::nnet2::OnlinePreconditioner::read_write_mutex_
std::mutex read_write_mutex_
Definition: nnet-precondition-online.h:563

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::SpMatrix< double >

matrix-lib.h

kaldi::nnet2::OnlinePreconditioner::ComputeWt1
void ComputeWt1(int32 N, const VectorBase< BaseFloat > &d_t, const VectorBase< BaseFloat > &d_t1, BaseFloat rho_t, BaseFloat rho_t1, const MatrixBase< BaseFloat > &U_t, const VectorBase< BaseFloat > &sqrt_c_t, const VectorBase< BaseFloat > &inv_sqrt_e_t, const CuMatrixBase< BaseFloat > &W_t, CuMatrixBase< BaseFloat > *J_t, CuMatrixBase< BaseFloat > *W_t1) const
Definition: nnet-precondition-online.cc:501

kaldi::nnet2::OnlinePreconditioner::rank_
int32 rank_
Definition: nnet-precondition-online.h:516

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::nnet2::OnlinePreconditioner::d_t_
Vector< BaseFloat > d_t_
Definition: nnet-precondition-online.h:559

kaldi::nnet2::OnlinePreconditioner::t_
int32 t_
Definition: nnet-precondition-online.h:546

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet2::OnlinePreconditioner::SetRank
void SetRank(int32 rank)
Definition: nnet-precondition-online.cc:621

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::nnet2::OnlinePreconditioner::SelfTest
void SelfTest() const
Definition: nnet-precondition-online.cc:255

kaldi::nnet2::OnlinePreconditioner::InitDefault
void InitDefault(int32 D)
Definition: nnet-precondition-online.cc:75

kaldi::nnet2::OnlinePreconditioner::InitOrthonormalSpecial
static void InitOrthonormalSpecial(CuMatrixBase< BaseFloat > *R)
This function creates a matrix with orthonormal rows that is like the following matrix, except with each row normalized to have unit 2-norm: [ 1.1 0 1 0 1 0 0 1.1 0 1 0 1 ] The reason why the first element in each row is 1.1 and not 1, is for symmetry-breaking...
Definition: nnet-precondition-online.cc:45

kaldi::nnet2::OnlinePreconditioner::delta_
BaseFloat delta_
Definition: nnet-precondition-online.h:543

kaldi::nnet2::OnlinePreconditioner::ComputeEt
void ComputeEt(const VectorBase< BaseFloat > &d_t, BaseFloat beta_t, VectorBase< BaseFloat > *e_t, VectorBase< BaseFloat > *sqrt_e_t, VectorBase< BaseFloat > *inv_sqrt_e_t) const
Definition: nnet-precondition-online.cc:577

kaldi::nnet2::OnlinePreconditioner::ComputeZt
void ComputeZt(int32 N, BaseFloat rho_t, const VectorBase< BaseFloat > &d_t, const VectorBase< BaseFloat > &inv_sqrt_e_t, const MatrixBase< BaseFloat > &K_t, const MatrixBase< BaseFloat > &L_t, SpMatrix< double > *Z_t) const
Definition: nnet-precondition-online.cc:546

kaldi::nnet2::OnlinePreconditioner::SetAlpha
void SetAlpha(BaseFloat alpha)
Definition: nnet-precondition-online.cc:634

kaldi::nnet2::OnlinePreconditioner::Eta
BaseFloat Eta(int32 N) const
Definition: nnet-precondition-online.cc:492

kaldi::nnet2::OnlinePreconditioner::W_t_
CuMatrix< BaseFloat > W_t_
Definition: nnet-precondition-online.h:557

kaldi::nnet2::OnlinePreconditioner::PreconditionDirections
void PreconditionDirections(CuMatrixBase< BaseFloat > *R, CuVectorBase< BaseFloat > *row_prod, BaseFloat *scale)
Definition: nnet-precondition-online.cc:145

kaldi::nnet2::OnlinePreconditioner::update_mutex_
std::mutex update_mutex_
Definition: nnet-precondition-online.h:567

float

kaldi::nnet2::OnlinePreconditioner::SetNumSamplesHistory
void SetNumSamplesHistory(BaseFloat num_samples_history)
Definition: nnet-precondition-online.cc:629

kaldi::nnet2::OnlinePreconditioner::TurnOnDebug
void TurnOnDebug()
Definition: nnet-precondition-online.h:422

kaldi::nnet2::OnlinePreconditioner::num_samples_history_
BaseFloat num_samples_history_
Definition: nnet-precondition-online.h:527

kaldi::nnet2::OnlinePreconditioner::GetUpdatePeriod
int32 GetUpdatePeriod() const
Definition: nnet-precondition-online.h:426

kaldi::nnet2::OnlinePreconditioner::GetNumSamplesHistory
BaseFloat GetNumSamplesHistory() const
Definition: nnet-precondition-online.h:423

kaldi::nnet2::OnlinePreconditioner::update_period_
int32 update_period_
Definition: nnet-precondition-online.h:521

kaldi::nnet2::OnlinePreconditioner::epsilon_
BaseFloat epsilon_
Definition: nnet-precondition-online.h:537

kaldi::nnet2::OnlinePreconditioner::GetRank
int32 GetRank() const
Definition: nnet-precondition-online.h:425

kaldi::nnet2::OnlinePreconditioner::ReorthogonalizeXt1
void ReorthogonalizeXt1(const VectorBase< BaseFloat > &d_t1, BaseFloat rho_t1, CuMatrixBase< BaseFloat > *W_t1, CuMatrixBase< BaseFloat > *temp_W, CuMatrixBase< BaseFloat > *temp_O)
Definition: nnet-precondition-online.cc:184

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

cu-matrix-lib.h

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::nnet2::OnlinePreconditioner::operator=
OnlinePreconditioner & operator=(const OnlinePreconditioner &other)
Definition: nnet-precondition-online.cc:605

kaldi::nnet2::OnlinePreconditioner::GetAlpha
BaseFloat GetAlpha() const
Definition: nnet-precondition-online.h:424

kaldi::nnet2::OnlinePreconditioner::rho_t_
BaseFloat rho_t_
Definition: nnet-precondition-online.h:558

kaldi::nnet2::OnlinePreconditioner::Init
void Init(const CuMatrixBase< BaseFloat > &R0)
Definition: nnet-precondition-online.cc:123

kaldi::nnet2::OnlinePreconditioner::SetUpdatePeriod
void SetUpdatePeriod(int32 update_period)
Definition: nnet-precondition-online.cc:625

kaldi::nnet2::OnlinePreconditioner
Keywords for search: natural gradient, naturalgradient, NG-SGD.
Definition: nnet-precondition-online.h:413

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::nnet2::OnlinePreconditioner::self_debug_
bool self_debug_
Definition: nnet-precondition-online.h:555

kaldi::nnet2::OnlinePreconditioner::num_updates_skipped_
int32 num_updates_skipped_
Definition: nnet-precondition-online.h:552

kaldi-common.h

kaldi::nnet2::OnlinePreconditioner::alpha_
BaseFloat alpha_
Definition: nnet-precondition-online.h:531

kaldi::nnet2::OnlinePreconditioner::PreconditionDirectionsInternal
void PreconditionDirectionsInternal(const int32 t, const BaseFloat rho_t, const Vector< BaseFloat > &d_t, CuMatrixBase< BaseFloat > *WJKL_t, CuMatrixBase< BaseFloat > *X_t, CuVectorBase< BaseFloat > *row_prod, BaseFloat *scale)
Definition: nnet-precondition-online.cc:300

kaldi::CuVectorBase
Vector for CUDA computing.
Definition: matrix-common.h:72