28 rank_(40), update_period_(1), num_samples_history_(2000.0), alpha_(4.0),
29 epsilon_(1.0e-10), delta_(5.0e-04), t_(-1),
30 num_updates_skipped_(0), self_debug_(false) { }
49 std::vector<MatrixElement<BaseFloat> > elems;
50 elems.reserve(num_cols);
52 for (
int32 r = 0; r < num_rows; r++) {
53 std::vector<int32> cols;
54 for (
int32 c = r; c < num_cols; c += num_rows)
56 BaseFloat normalizer = 1.0 / sqrt(first_elem * first_elem +
58 for (
size_t i = 0;
i < cols.size();
i++) {
61 normalizer * (
i == 0 ? first_elem :
77 KALDI_WARN <<
"Rank " <<
rank_ <<
" of online preconditioner is >= dim " << D
79 << (D - 1) <<
" (but this is probably still too high)";
119 W_t_.Scale(sqrt(E_tii));
132 int32 num_init_iters = 3;
133 for (
int32 i = 0;
i < num_init_iters;
i++) {
135 R0_copy.CopyFromMat(R0);
159 if (row_prod == NULL) {
177 WJKL_t.
Range(0, R, 0, D).CopyFromMat(
W_t_);
198 ComputeEt(d_t1, beta_t1, &e_t1, &sqrt_e_t1, &inv_sqrt_e_t1);
208 O(
i,
j) *= i_factor * j_factor;
211 if (O.
IsUnit(threshold)) {
213 KALDI_WARN <<
"Not reorthogonalizing since already orthognoal: " << O;
221 if (!(C.
Max() < 100.0))
222 KALDI_ERR <<
"Cholesky out of expected range, " 223 <<
"reorthogonalizing with Gram-Schmidt";
227 KALDI_WARN <<
"Cholesky or Invert() failed while re-orthogonalizing R_t. " 228 <<
"Re-orthogonalizing on CPU.";
245 C(i,
j) *= i_factor * j_factor;
275 O(
i,
j) *= i_factor * j_factor;
278 if (!O.
IsUnit(1.0e-04) || O(0, 0) != O(0, 0)) {
280 int32 worst_i = 0, worst_j = 0;
284 BaseFloat error = fabs(elem - (
i ==
j ? 1.0 : 0.0));
285 if (error > worst_error || error != error) {
292 if (worst_error > 1.0e-02 || worst_error != worst_error) {
293 KALDI_WARN <<
"Failed to verify W_t (worst error: O[" << worst_i <<
',' 294 << worst_j <<
"] = " << O(worst_i, worst_j)
295 <<
", d_t = " <<
d_t_;
319 L_t(*WJKL_t, 0, R, D, R),
320 K_t(*WJKL_t, R, R, D, R),
321 WJ_t(*WJKL_t, 0, 2 * R, 0, D),
322 LK_t(*WJKL_t, 0, 2 * R, D, R);
329 const int num_initial_updates = 10;
331 t_ >= num_initial_updates)) {
360 BaseFloat gamma_t = (tr_Xhat_XhatT == 0.0 ? 1.0 :
361 sqrt(tr_Xt_XtT / tr_Xhat_XhatT));
367 bool compute_lk_together = (N > D);
369 if (compute_lk_together) {
378 K_t.SymAddMat2(1.0, J_t,
kNoTrans, 0.0);
379 L_t.SymAddMat2(1.0, H_t,
kTrans, 0.0);
384 K_t_cpu(LK_cpu, R, R, 0, R);
385 if (!compute_lk_together) {
387 L_t_cpu.CopyLowerToUpper();
394 ComputeEt(d_t, beta_t, &e_t, &sqrt_e_t, &inv_sqrt_e_t);
400 ComputeZt(N, rho_t, d_t, inv_sqrt_e_t, K_t_cpu, L_t_cpu, &Z_t_double);
402 Z_t_double.
Scale(1.0 / z_t_scale);
408 Z_t_scaled.
Eig(&c_t, &U_t);
410 c_t.
Scale(z_t_scale);
412 const BaseFloat condition_threshold = 1.0e+06;
416 bool must_reorthogonalize = (c_t(0) > condition_threshold * c_t(R - 1));
418 BaseFloat c_t_floor = pow(rho_t * (1 - eta), 2);
422 must_reorthogonalize =
true;
424 KALDI_WARN <<
"Floored " << nf <<
" elements of C_t.";
436 double tr_Xt_XtT = tr_Xhat_XhatT;
438 tr_Xt_XtT += L_t_cpu(
i,
i) * (2.0 - e_t(
i));
442 BaseFloat gamma_t = (tr_Xhat_XhatT == 0.0 ? 1.0 :
443 sqrt(tr_Xt_XtT / tr_Xhat_XhatT));
450 BaseFloat rho_t1 = 1.0 / (D - R) * (eta / N * tr_Xt_XtT
451 + (1-eta)*(D * rho_t + d_t.
Sum())
457 if (rho_t1 < floor_val)
462 ComputeWt1(N, d_t, d_t1, rho_t, rho_t1, U_t, sqrt_c_t, inv_sqrt_e_t,
465 if (must_reorthogonalize) {
482 d_t_.CopyFromVec(d_t1);
497 if (ans > 0.9) ans = 0.9;
521 ComputeEt(d_t1, beta_t1, &e_t1, &sqrt_e_t1, &inv_sqrt_e_t1);
527 w_t_coeff(
i) = (1.0 - eta) / (eta/N) * (d_t(
i) + rho_t);
535 BaseFloat i_factor = (eta / N) * sqrt_e_t1(
i) * inv_sqrt_c_t(
i);
538 A_t(
i,
j) *= i_factor * j_factor;
557 d_t_rho_t.
Add(rho_t);
558 double etaN = eta / N, eta1 = 1.0 - eta,
559 etaN_sq = etaN * etaN, eta1_sq = eta1 * eta1,
560 etaN_eta1 = etaN * eta1;
563 double inv_sqrt_e_t_i = inv_sqrt_e_t(
i), d_t_rho_t_i = d_t_rho_t(
i);
565 double inv_sqrt_e_t_j = inv_sqrt_e_t(
j), d_t_rho_t_j = d_t_rho_t(
j),
566 L_t_i_j = 0.5 * (L_t(
i,
j) + L_t(
j,
i)),
567 K_t_i_j = 0.5 * (K_t(
i,
j) + K_t(
j,
i));
569 (*Z_t)(
i,
j) = etaN_sq * inv_sqrt_e_t_i * K_t_i_j * inv_sqrt_e_t_j
570 + etaN_eta1 * inv_sqrt_e_t_i * L_t_i_j * inv_sqrt_e_t_j * d_t_rho_t_j
571 + etaN_eta1 * d_t_rho_t_i * inv_sqrt_e_t_i * L_t_i_j * inv_sqrt_e_t_j
572 + (i ==
j ? eta1_sq * d_t_rho_t_i * d_t_rho_t_i : 0.0);
587 e[
i] = 1.0 / (beta_t / d[
i] + 1);
631 num_samples_history < 1.0e+6);
std::mutex read_write_mutex_
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool IsUnit(Real cutoff=1.0e-05) const
Packed symetric matrix class.
void ComputeWt1(int32 N, const VectorBase< BaseFloat > &d_t, const VectorBase< BaseFloat > &d_t1, BaseFloat rho_t, BaseFloat rho_t1, const MatrixBase< BaseFloat > &U_t, const VectorBase< BaseFloat > &sqrt_c_t, const VectorBase< BaseFloat > &inv_sqrt_e_t, const CuMatrixBase< BaseFloat > &W_t, CuMatrixBase< BaseFloat > *J_t, CuMatrixBase< BaseFloat > *W_t1) const
Base class which provides matrix operations not involving resizing or allocation. ...
void AddElements(Real alpha, const std::vector< MatrixElement< Real > > &input)
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void InitDefault(int32 D)
static void InitOrthonormalSpecial(CuMatrixBase< BaseFloat > *R)
This function creates a matrix with orthonormal rows that is like the following matrix, except with each row normalized to have unit 2-norm: [ 1.1 0 1 0 1 0 0 1.1 0 1 0 1 ] The reason why the first element in each row is 1.1 and not 1, is for symmetry-breaking...
void ComputeEt(const VectorBase< BaseFloat > &d_t, BaseFloat beta_t, VectorBase< BaseFloat > *e_t, VectorBase< BaseFloat > *sqrt_e_t, VectorBase< BaseFloat > *inv_sqrt_e_t) const
void ComputeZt(int32 N, BaseFloat rho_t, const VectorBase< BaseFloat > &d_t, const VectorBase< BaseFloat > &inv_sqrt_e_t, const MatrixBase< BaseFloat > &K_t, const MatrixBase< BaseFloat > &L_t, SpMatrix< double > *Z_t) const
void SetAlpha(BaseFloat alpha)
bool IsUnit(Real tol=0.001) const
BaseFloat Eta(int32 N) const
CuMatrix< BaseFloat > W_t_
void PreconditionDirections(CuMatrixBase< BaseFloat > *R, CuVectorBase< BaseFloat > *row_prod, BaseFloat *scale)
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void Cholesky(const SpMatrix< Real > &orig)
void SetNumSamplesHistory(BaseFloat num_samples_history)
void SetZero()
Math operations, some calling kernels.
BaseFloat num_samples_history_
void SymAddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transA, Real beta)
*this = beta * *this + alpha * M M^T, for symmetric matrices.
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
Packed symetric matrix class.
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
Real * Data()
Returns a pointer to the start of the vector's data.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
Real Sum() const
Returns sum of the elements.
void ReorthogonalizeXt1(const VectorBase< BaseFloat > &d_t1, BaseFloat rho_t1, CuMatrixBase< BaseFloat > *W_t1, CuMatrixBase< BaseFloat > *temp_W, CuMatrixBase< BaseFloat > *temp_O)
Matrix for CUDA computing.
MatrixIndexT NumCols() const
A class representing a vector.
void InvertElements()
Invert all elements.
#define KALDI_ASSERT(cond)
OnlinePreconditioner & operator=(const OnlinePreconditioner &other)
void ApplyPow(Real power)
Take all elements of vector to a power.
void Init(const CuMatrixBase< BaseFloat > &R0)
void OrthogonalizeRows()
This function orthogonalizes the rows of a matrix using the Gram-Schmidt process. ...
void SetUpdatePeriod(int32 update_period)
Keywords for search: natural gradient, naturalgradient, NG-SGD.
MatrixIndexT NumRows() const
Dimensions.
Provides a vector abstraction class.
void Add(Real c)
Add a constant to each element of a vector.
int32 num_updates_skipped_
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i'th row by scale[i]
Sub-matrix representation.
void AddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
void PreconditionDirectionsInternal(const int32 t, const BaseFloat rho_t, const Vector< BaseFloat > &d_t, CuMatrixBase< BaseFloat > *WJKL_t, CuMatrixBase< BaseFloat > *X_t, CuVectorBase< BaseFloat > *row_prod, BaseFloat *scale)
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
void CopyLowerToUpper()
Copy lower triangle to upper triangle (symmetrize)
Vector for CUDA computing.
void AddDiagVecMat(const Real alpha, const CuVectorBase< Real > &v, const CuMatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].