basis-fmllr-diag-gmm.cc
Go to the documentation of this file.
1 // transform/basis-fmllr-diag-gmm.cc
2 
3 // Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
4 // 2014 Johns Hopkins University (author: Daniel Povey)
5 // 2014 IMSL, PKU-HKUST (Author: Wei Shi)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include <algorithm>
23 #include <utility>
24 #include <vector>
25 using std::vector;
26 #include <string>
27 using std::string;
28 
30 #include "gmm/am-diag-gmm.h"
31 #include "gmm/mle-diag-gmm.h"
32 #include "gmm/mle-am-diag-gmm.h"
34 
35 namespace kaldi {
36 
37 
44  const AffineXformStats &spk_stats,
45  const Matrix<BaseFloat> &spk_stats_tmp_K,
46  const std::vector<SpMatrix<BaseFloat> > &spk_stats_tmp_G,
47  const Matrix<BaseFloat> &delta,
48  const Matrix<BaseFloat> &A,
49  const Matrix<BaseFloat> &S,
50  int32 max_iters);
51 
52 
53 void BasisFmllrAccus::Write(std::ostream &os, bool binary) const {
54 
55  WriteToken(os, binary, "<BASISFMLLRACCUS>");
56  WriteToken(os, binary, "<BETA>");
57  WriteBasicType(os, binary, beta_);
58  if (!binary) os << '\n';
59  if (grad_scatter_.NumCols() != 0) {
60  WriteToken(os, binary, "<GRADSCATTER>");
61  grad_scatter_.Write(os, binary);
62  }
63  WriteToken(os, binary, "</BASISFMLLRACCUS>");
64 }
65 
66 void BasisFmllrAccus::Read(std::istream &is, bool binary,
67  bool add) {
68  ExpectToken(is, binary, "<BASISFMLLRACCUS>");
69  ExpectToken(is, binary, "<BETA>");
70  double tmp_beta = 0;
71  ReadBasicType(is, binary, &tmp_beta);
72  if (add) {
73  beta_ += tmp_beta;
74  } else {
75  beta_ = tmp_beta;
76  }
77  ExpectToken(is, binary, "<GRADSCATTER>");
78  grad_scatter_.Read(is, binary, add);
79  ExpectToken(is, binary, "</BASISFMLLRACCUS>");
80 }
81 
83  if (dim <= 0) {
84  KALDI_ERR << "Invalid feature dimension " << dim; // dim=0 is not allowed
85  } else {
86  // 'kSetZero' may not be necessary, but makes computation safe
87  grad_scatter_.Resize((dim + 1) * dim, kSetZero);
88  }
89 }
90 
92  const AffineXformStats &spk_stats) {
93 
94  // Gradient of auxf w.r.t. xform_spk
95  // Eq. (33)
96  Matrix<double> grad_mat(dim_, dim_ + 1);
97  grad_mat.SetUnit();
98  grad_mat.Scale(spk_stats.beta_);
99  grad_mat.AddMat(1.0, spk_stats.K_);
100  for (int d = 0; d < dim_; ++d) {
101  Matrix<double> G_d_mat(spk_stats.G_[d]);
102  grad_mat.Row(d).AddVec(-1.0, G_d_mat.Row(d));
103  }
104  // Row stack of gradient matrix
105  Vector<BaseFloat> grad_vec((dim_+1) * dim_);
106  grad_vec.CopyRowsFromMat(grad_mat);
107  // The amount of data beta_ is likely to be ZERO, especially
108  // when silence-weight is set to be 0 and we are using the
109  // per-utt mode.
110  if (spk_stats.beta_ > 0) {
111  beta_ += spk_stats.beta_;
112  grad_scatter_.AddVec2(BaseFloat(1.0 / spk_stats.beta_), grad_vec);
113  }
114 }
115 
116 void BasisFmllrEstimate::Write(std::ostream &os, bool binary) const {
117  uint32 tmp_uint32;
118 
119  WriteToken(os, binary, "<BASISFMLLRPARAM>");
120 
121  WriteToken(os, binary, "<NUMBASIS>");
122  tmp_uint32 = static_cast<uint32>(basis_size_);
123  WriteBasicType(os, binary, tmp_uint32);
124  if (fmllr_basis_.size() != 0) {
125  WriteToken(os, binary, "<BASIS>");
126  for (int32 n = 0; n < basis_size_; ++n) {
127  fmllr_basis_[n].Write(os, binary);
128  }
129  }
130  WriteToken(os, binary, "</BASISFMLLRPARAM>");
131 }
132 
133 void BasisFmllrEstimate::Read(std::istream &is, bool binary) {
134  uint32 tmp_uint32;
135  string token;
136 
137  ExpectToken(is, binary, "<BASISFMLLRPARAM>");
138 
139  ExpectToken(is, binary, "<NUMBASIS>");
140  ReadBasicType(is, binary, &tmp_uint32);
141  basis_size_ = static_cast<int32>(tmp_uint32);
142  KALDI_ASSERT(basis_size_ > 0);
143  ExpectToken(is, binary, "<BASIS>");
144  fmllr_basis_.resize(basis_size_);
145  for (int32 n = 0; n < basis_size_; ++n) {
146  fmllr_basis_[n].Read(is, binary);
147  if (n == 0)
148  dim_ = fmllr_basis_[n].NumRows();
149  else {
150  KALDI_ASSERT(dim_ == fmllr_basis_[n].NumRows());
151  }
152  }
153  ExpectToken(is, binary, "</BASISFMLLRPARAM>");
154 }
155 
157  SpMatrix<double> *pre_cond) {
158  KALDI_ASSERT(am_gmm.Dim() == dim_);
159  if (pre_cond->NumRows() != (dim_ + 1) * dim_)
160  pre_cond->Resize((dim_ + 1) * dim_, kSetZero);
161 
162  int32 num_pdf = am_gmm.NumPdfs();
163  Matrix<double> H_mat((dim_ + 1) * dim_, (dim_ + 1) * dim_);
164  // expected values of fMLLR G statistics
165  vector< SpMatrix<double> > G_hat(dim_);
166  for (int32 d = 0; d < dim_; ++d)
167  G_hat[d].Resize(dim_ + 1, kSetZero);
168 
169  // extend mean vectors with 1 [mule_jm 1]
170  Vector<double> extend_mean(dim_ + 1);
171  // extend covariance matrix with a row and column of 0
172  Vector<double> extend_var(dim_ + 1);
173  for (int32 j = 0; j < num_pdf; ++j) {
174  const DiagGmm &diag_gmm = am_gmm.GetPdf(j);
175  int32 num_comp = diag_gmm.NumGauss();
176  // means, covariance and mixture weights for this diagonal GMM
177  Matrix<double> means(num_comp, dim_);
178  Matrix<double> vars(num_comp, dim_);
179  diag_gmm.GetMeans(&means); diag_gmm.GetVars(&vars);
180  Vector<BaseFloat> weights(diag_gmm.weights());
181 
182  for (int32 m = 0; m < num_comp; ++m) {
183  extend_mean.Range(0, dim_).CopyFromVec(means.Row(m));
184  extend_mean(dim_) = 1.0;
185  extend_var.Range(0, dim_).CopyFromVec(vars.Row(m));
186  extend_var(dim_) = 0;
187  // loop over feature dimension
188  // Eq. (28): G_hat {d} = \sum_{j, m} P_{j}{m} Inv_Sigma{j, m, d}
189  // (mule_extend mule_extend^T + Sigma_extend)
190  // where P_{j}{m} = P_{j} c_{j}{m}
191  for (int32 d = 0; d < dim_; ++d) {
192  double alpha = (1.0 / num_pdf) * weights(m) * (1.0 / vars.Row(m)(d));
193  G_hat[d].AddVec2(alpha, extend_mean);
194  // add vector to the diagonal elements of the matrix
195  // not work for full covariance matrices
196  G_hat[d].AddDiagVec(alpha, extend_var);
197  } // loop over dimension
198  } // loop over Gaussians
199  } // loop over states
200 
201  // fill H_ with G_hat[i]; build the block diagonal structure
202  // Eq. (31)
203  for (int32 d = 0; d < dim_; d++) {
204  H_mat.Range(d * (dim_ + 1), (dim_ + 1), d * (dim_ + 1), (dim_ + 1))
205  .CopyFromSp(G_hat[d]);
206  }
207 
208  // add the extra H(1) elements
209  // Eq. (30) and Footnote 1 (0-based index)
210  for (int32 i = 0; i < dim_; ++i)
211  for (int32 j = 0; j < dim_; ++j)
212  H_mat(i * (dim_ + 1) + j, j * (dim_ + 1) + i) += 1;
213  // the final H should be symmetric
214  if (!H_mat.IsSymmetric())
215  KALDI_ERR << "Preconditioner matrix H = H(1) + H(2) is not symmetric";
216  pre_cond->CopyFromMat(H_mat, kTakeLower);
217 }
218 
220  const AmDiagGmm &am_gmm,
221  const BasisFmllrAccus &basis_accus) {
222  // Compute the preconditioner
223  SpMatrix<double> precond_mat((dim_ + 1) * dim_);
224  ComputeAmDiagPrecond(am_gmm, &precond_mat);
225  // H = C C^T
226  TpMatrix<double> C((dim_+1) * dim_);
227  C.Cholesky(precond_mat);
228  TpMatrix<double> C_inv(C);
229  C_inv.InvertDouble();
230  // From TpMatrix to Matrix
231  Matrix<double> C_inv_full((dim_ + 1) * dim_, (dim_ + 1) * dim_);
232  C_inv_full.CopyFromTp(C_inv);
233 
234  // Convert to the preconditioned coordinates
235  // Eq. (35) M_hat = C^{-1} grad_scatter C^{-T}
236  SpMatrix<double> M_hat((dim_ + 1) * dim_);
237  {
238  SpMatrix<double> grad_scatter_d(basis_accus.grad_scatter_);
239  M_hat.AddMat2Sp(1.0, C_inv_full, kNoTrans, grad_scatter_d, 0.0);
240  }
241  Vector<double> Lvec((dim_ + 1) * dim_);
242  Matrix<double> U((dim_ + 1) * dim_, (dim_ + 1) * dim_);
243  // SVD of M_hat; sort eigenvalues from greatest to smallest
244  M_hat.SymPosSemiDefEig(&Lvec, &U);
245  SortSvd(&Lvec, &U);
246  // After transpose, each row is one base
247  U.Transpose();
248 
249  fmllr_basis_.resize(basis_size_);
250  for (int32 n = 0; n < basis_size_; ++n) {
251  fmllr_basis_[n].Resize(dim_, dim_ + 1, kSetZero);
252  Vector<double> basis_vec((dim_ + 1) * dim_);
253  // Convert eigenvectors back to unnormalized space
254  basis_vec.AddMatVec(1.0, C_inv_full, kTrans, U.Row(n), 0.0);
255  // Convert stacked vectors to matrix
256  fmllr_basis_[n].CopyRowsFromVec(basis_vec);
257  }
258  // Output the eigenvalues of the gradient scatter matrix
259  // The eigenvalues are divided by twice the number of frames
260  // in the training data, to get the per-frame values.
261  Vector<double> Lvec_scaled(Lvec);
262  Lvec_scaled.Scale(1.0 / (2 * basis_accus.beta_));
263  KALDI_LOG << "The [per-frame] eigenvalues sorted from largest to smallest: " << Lvec_scaled;
266  KALDI_LOG << "Sum of the [per-frame] eigenvalues, that is"
267  " the log-likelihood improvement, is " << Lvec_scaled.Sum();
268 }
269 
271  const AffineXformStats &spk_stats,
272  Matrix<BaseFloat> *out_xform,
273  Vector<BaseFloat> *coefficient,
274  BasisFmllrOptions options) const {
275  if (coefficient == NULL) {
276  Vector<BaseFloat> tmp;
277  return ComputeTransform(spk_stats, out_xform, &tmp, options);
278  }
279  KALDI_ASSERT(dim_ == spk_stats.dim_);
280  if (spk_stats.beta_ < options.min_count) {
281  KALDI_WARN << "Not updating fMLLR since count is below min-count: "
282  << spk_stats.beta_;
283  coefficient->Resize(0);
284  return 0.0;
285  } else {
286  if (out_xform->NumRows() != dim_ || out_xform->NumCols() != (dim_ +1)) {
287  out_xform->Resize(dim_, dim_ + 1, kSetZero);
288  }
289  // Initialized either as [I;0] or as the current transform
290  Matrix<BaseFloat> W_mat(dim_, dim_ + 1);
291  if (out_xform->IsZero()) {
292  W_mat.SetUnit();
293  } else {
294  W_mat.CopyFromMat(*out_xform);
295  }
296 
297  // Create temporary K and G quantities. Add for efficiency,
298  // avoid repetitions of converting the stats from double
299  // precision to single precision
300  Matrix<BaseFloat> stats_tmp_K(spk_stats.K_);
301  std::vector<SpMatrix<BaseFloat> > stats_tmp_G(dim_);
302  for (int32 d = 0; d < dim_; d++)
303  stats_tmp_G[d] = SpMatrix<BaseFloat>(spk_stats.G_[d]);
304 
305  // Number of bases for this speaker, according to the available
306  // adaptation data
307  int32 basis_size = int32 (std::min( double(basis_size_),
308  options.size_scale * spk_stats.beta_));
309 
310  coefficient->Resize(basis_size, kSetZero);
311 
312  BaseFloat impr_spk = 0;
313  for (int32 iter = 1; iter <= options.num_iters; ++iter) {
314  // Auxf computation based on FmllrAuxFuncDiagGmm from fmllr-diag-gmm.cc
315  BaseFloat start_obj = FmllrAuxFuncDiagGmm(W_mat, spk_stats);
316 
317  // Contribution of quadratic terms to derivative
318  // Eq. (37) s_{d} = G_{d} w_{d}
319  Matrix<BaseFloat> S(dim_, dim_ + 1);
320  for (int32 d = 0; d < dim_; ++d)
321  S.Row(d).AddSpVec(1.0, stats_tmp_G[d], W_mat.Row(d), 0.0);
322 
323 
324  // W_mat = [A; b]
325  Matrix<BaseFloat> A(dim_, dim_);
326  A.CopyFromMat(W_mat.Range(0, dim_, 0, dim_));
327  Matrix<BaseFloat> A_inv(A);
328  A_inv.InvertDouble();
329  Matrix<BaseFloat> A_inv_trans(A_inv);
330  A_inv_trans.Transpose();
331  // Compute gradient of auxf w.r.t. W_mat
332  // Eq. (38) P = beta [A^{-T}; 0] + K - S
333  Matrix<BaseFloat> P(dim_, dim_ + 1);
334  P.SetZero();
335  P.Range(0, dim_, 0, dim_).CopyFromMat(A_inv_trans);
336  P.Scale(spk_stats.beta_);
337  P.AddMat(1.0, stats_tmp_K);
338  P.AddMat(-1.0, S);
339 
340  // Compute directional gradient restricted by bases. Here we only use
341  // the simple gradient method, rather than conjugate gradient. Finding
342  // the optimal transformation W_mat is equivalent to optimizing weights
343  // d_{1,2,...,N}.
344  // Eq. (39) delta(W) = \sum_n tr(\fmllr_basis_{n}^T \P) \fmllr_basis_{n}
345  // delta(d_{n}) = tr(\fmllr_basis_{n}^T \P)
346  Matrix<BaseFloat> delta_W(dim_, dim_ + 1);
347  Vector<BaseFloat> delta_d(basis_size);
348  for (int32 n = 0; n < basis_size; ++n) {
349  delta_d(n) = TraceMatMat(fmllr_basis_[n], P, kTrans);
350  delta_W.AddMat(delta_d(n), fmllr_basis_[n]);
351  }
352 
353  BaseFloat step_size = CalBasisFmllrStepSize(spk_stats, stats_tmp_K,
354  stats_tmp_G, delta_W, A, S, options.step_size_iters);
355  W_mat.AddMat(step_size, delta_W, kNoTrans);
356  coefficient->AddVec(step_size, delta_d);
357  // Check auxiliary function
358  BaseFloat end_obj = FmllrAuxFuncDiagGmm(W_mat, spk_stats);
359 
360  KALDI_VLOG(4) << "Objective function (iter=" << iter << "): "
361  << start_obj / spk_stats.beta_ << " -> "
362  << (end_obj / spk_stats.beta_) << " over "
363  << spk_stats.beta_ << " frames";
364 
365  impr_spk += (end_obj - start_obj);
366  } // loop over iters
367 
368  out_xform->CopyFromMat(W_mat, kNoTrans);
369  return impr_spk;
370  }
371 }
372 
373 // static
375  const Matrix<BaseFloat> &spk_stats_tmp_K,
376  const std::vector<SpMatrix<BaseFloat> > &spk_stats_tmp_G,
377  const Matrix<BaseFloat> &delta,
378  const Matrix<BaseFloat> &A,
379  const Matrix<BaseFloat> &S,
380  int32 max_iters) {
381 
382  int32 dim = spk_stats.dim_;
383  KALDI_ASSERT(dim == delta.NumRows() && dim == S.NumRows());
384  // The first D columns of delta_W
385  SubMatrix<BaseFloat> delta_Dim(delta, 0, dim, 0, dim);
386  // Eq. (46): b = tr(delta K^T) - tr(delta S^T)
387  BaseFloat b = TraceMatMat(delta, spk_stats_tmp_K, kTrans)
388  - TraceMatMat(delta, S, kTrans);
389  // Eq. (47): c = sum_d tr(delta_{d} G_{d} delta_{d})
390  BaseFloat c = 0;
391  Vector<BaseFloat> G_row_delta(dim + 1);
392  for (int32 d = 0; d < dim; ++d) {
393  G_row_delta.AddSpVec(1.0, spk_stats_tmp_G[d], delta.Row(d), 0.0);
394  c += VecVec(G_row_delta, delta.Row(d));
395  }
396 
397  // Sometimes, the change of step size, d1/d2, may get tiny
398  // Due to numerical precision, we compute everything in double
399  BaseFloat step_size = 0.0;
400  BaseFloat obj_old, obj_new = 0.0;
401  Matrix<BaseFloat> N(dim, dim);
402  for (int32 iter_step = 1; iter_step <= max_iters; ++iter_step) {
403  if (iter_step == 1) {
404  // k = 0, auxf = beta logdet(A)
405  obj_old = spk_stats.beta_ * A.LogDet();
406  } else {
407  obj_old = obj_new;
408  }
409 
410  // Eq. (49): N = (A + k * delta_Dim)^{-1} delta_Dim
411  // In case of bad condition, careful preconditioning should be done. Maybe safer
412  // to use SolveQuadraticMatrixProblem. Future work for Yajie.
413  Matrix<BaseFloat> tmp_A(A);
414  tmp_A.AddMat(step_size, delta_Dim, kNoTrans);
415  tmp_A.InvertDouble();
416  N.AddMatMat(1.0, tmp_A, kNoTrans, delta_Dim, kNoTrans, 0.0);
417  // first-order derivative w.r.t. k
418  // Eq. (50): d1 = beta * trace(N) + b - k * c
419  BaseFloat d1 = spk_stats.beta_ * TraceMat(N) + b - step_size * c;
420  // second-order derivative w.r.t. k
421  // Eq. (51): d2 = -beta * tr(N N) - c
422  BaseFloat d2 = -c - spk_stats.beta_ * TraceMatMat(N, N, kNoTrans);
423  d2 = std::min((double)d2, -c / 10.0);
424  // convergence judgment from fmllr-sgmm.cc
425  // it seems to work well, though not sure whether 1e-06 is appropriate
426  // note from Dan: commenting this out after someone complained it was
427  // causing a test to behave weirdly. This doesn't dominate computation
428  // anyway, I don't think.
429  // if (std::fabs(d1 / d2) < 0.000001) { break; }
430 
431  // Eq. (52): update step_size
432  BaseFloat step_size_change = -(d1 / d2);
433  step_size += step_size_change;
434 
435  // Repeatedly check auxiliary function; halve step size change if auxf decreases.
436  // According to the paper, we should limit the number of repetitions. The
437  // following implementation seems to work well. But the termination condition/judgment
438  // should be optimized later.
439  do {
440  // Eq. (48): auxf = beta * logdet(A + k * delta_Dim) + kb - 0.5 * k * k * c
441  tmp_A.CopyFromMat(A);
442  tmp_A.AddMat(step_size, delta_Dim, kNoTrans);
443  obj_new = spk_stats.beta_ * tmp_A.LogDet() + step_size * b -
444  0.5 * step_size * step_size * c;
445 
446  if (obj_new - obj_old < -1.0e-04 * spk_stats.beta_) { // deal with numerical issues
447  KALDI_WARN << "Objective function decreased (" << obj_old << "->"
448  << obj_new << "). Halving step size change ( step size "
449  << step_size << " -> " << (step_size - (step_size_change/2))
450  << ")";
451  step_size_change /= 2;
452  step_size -= step_size_change;
453  }
454  } while (obj_new - obj_old < -1.0e-04 * spk_stats.beta_ && step_size_change > 1e-05);
455  }
456  return step_size;
457 }
458 
459 } // namespace kaldi
double beta_
Occupancy count.
void Write(std::ostream &out_stream, bool binary) const
Routines for reading and writing fMLLR basis matrices.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void InvertDouble(Real *LogDet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse [double].
Matrix< double > K_
K_ is the summed outer product of [mean times inverse variance] with [extended data], scaled by the occupation counts; dimension is dim by (dim+1)
void AccuGradientScatter(const AffineXformStats &spk_stats)
Accumulate gradient scatter for one (training) speaker.
void Transpose()
Transpose the matrix.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
double TraceMat(const MatrixBase< Real > &A)
Returns trace of matrix.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void Read(std::istream &in_stream, bool binary)
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
static BaseFloat CalBasisFmllrStepSize(const AffineXformStats &spk_stats, const Matrix< BaseFloat > &spk_stats_tmp_K, const std::vector< SpMatrix< BaseFloat > > &spk_stats_tmp_G, const Matrix< BaseFloat > &delta, const Matrix< BaseFloat > &A, const Matrix< BaseFloat > &S, int32 max_iters)
This function takes the step direction (delta) of fMLLR matrix as argument, and optimize step size us...
kaldi::int32 int32
void Write(std::ostream &out_stream, bool binary) const
Routines for reading and writing stats.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MatrixIndexT NumRows() const
int32 dim_
dim_ is the feature dimension.
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
double ComputeTransform(const AffineXformStats &spk_stats, Matrix< BaseFloat > *out_xform, Vector< BaseFloat > *coefficients, BasisFmllrOptions options) const
This function performs speaker adaptation, computing the fMLLR matrix based on speaker statistics...
void GetVars(Matrix< Real > *v) const
Accessor for covariances.
Definition: diag-gmm-inl.h:115
void AddVec2(const Real alpha, const VectorBase< Real > &v)
Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
void ComputeAmDiagPrecond(const AmDiagGmm &am_gmm, SpMatrix< double > *pre_cond)
This function computes the preconditioner matrix, prior to base matrices estimation.
bool IsZero(Real cutoff=1.0e-05) const
Returns true if matrix is all zeros.
bool IsSymmetric(Real cutoff=1.0e-05) const
Returns true if matrix is Symmetric.
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
float BaseFloat
Definition: kaldi-types.h:29
Stats for fMLLR subspace estimation.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void Scale(Real alpha)
Multiply each element with a scalar value.
void Read(std::istream &in_stream, bool binary, bool add=false)
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
struct rnnlm::@11::@12 n
void GetMeans(Matrix< Real > *m) const
Accessor for means.
Definition: diag-gmm-inl.h:123
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
const Vector< BaseFloat > & weights() const
Definition: diag-gmm.h:178
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
void SetZero()
Sets matrix to zero.
int32 dim_
Feature dimension.
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
SpMatrix< BaseFloat > grad_scatter_
Gradient scatter. Dim is [(D+1)*D] [(D+1)*D].
std::vector< SpMatrix< double > > G_
G_ is the outer product of extended-data, scaled by inverse variance, for each dimension.
int32 Dim() const
Definition: am-diag-gmm.h:79
int32 NumPdfs() const
Definition: am-diag-gmm.h:82
void CopyFromMat(const MatrixBase< Real > &orig, SpCopyType copy_type=kTakeMean)
Definition: sp-matrix.cc:112
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
Definition: am-diag-gmm.h:119
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Definition: sp-matrix.cc:982
float FmllrAuxFuncDiagGmm(const MatrixBase< float > &xform, const AffineXformStats &stats)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void EstimateFmllrBasis(const AmDiagGmm &am_gmm, const BasisFmllrAccus &basis_accus)
Estimate the base matrices efficiently in a Maximum Likelihood manner.
void InvertDouble()
Definition: tp-matrix.h:83
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81
void SymPosSemiDefEig(VectorBase< Real > *s, MatrixBase< Real > *P, Real tolerance=0.001) const
This is the version of SVD that we implement for symmetric positive definite matrices.
Definition: sp-matrix.cc:57
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
double beta_
beta_ is the occupation count.
Sub-matrix representation.
Definition: kaldi-matrix.h:988
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94