fmllr-sgmm2.cc
Go to the documentation of this file.
1 // sgmm2/fmllr-sgmm2.cc
2 
3 // Copyright 2009-2012 Saarland University (author: Arnab Ghoshal)
4 // Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include <algorithm>
22 #include <string>
23 #include <vector>
24 using std::vector;
25 
26 #include "sgmm2/fmllr-sgmm2.h"
27 #include "util/parse-options.h"
28 
29 namespace kaldi {
30 
32  const Matrix<BaseFloat> &gradient_in,
33  Matrix<BaseFloat> *gradient_out) {
34  // Eq. (B.14): P' = A_{inv}^T P {W_{pre}^+}^T
35  int32 dim = gradient_in.NumRows();
36  Matrix<BaseFloat> Wpre_plus(dim + 1, dim + 1, kSetZero);
37  Wpre_plus.Range(0, dim, 0, dim + 1).CopyFromMat(globals.pre_xform_);
38  Wpre_plus(dim, dim) = 1;
39  SubMatrix<BaseFloat> Ainv(globals.inv_xform_, 0, dim, 0, dim);
40  Matrix<BaseFloat> AinvP(dim, dim + 1, kUndefined);
41  AinvP.AddMatMat(1.0, Ainv, kTrans, gradient_in, kNoTrans, 0.0);
42  gradient_out->AddMatMat(1.0, AinvP, kNoTrans, Wpre_plus, kTrans, 0.0);
43 }
44 
46  const Matrix<BaseFloat> &delta_in,
47  Matrix<BaseFloat> *delta_out) {
48  // Eq. (B.25): \Delta = A_{inv} \Delta' W_{pre}^+
49  int32 dim = delta_in.NumRows();
50  Matrix<BaseFloat> Wpre_plus(dim + 1, dim + 1, kSetZero);
51  Wpre_plus.Range(0, dim, 0, dim + 1).CopyFromMat(globals.pre_xform_);
52  Wpre_plus(dim, dim) = 1;
53  SubMatrix<BaseFloat> Ainv(globals.inv_xform_, 0, dim, 0, dim);
54  Matrix<BaseFloat> AinvD(dim, dim + 1, kUndefined);
55  AinvD.AddMatMat(1.0, Ainv, kNoTrans, delta_in, kNoTrans, 0.0);
56  delta_out->AddMatMat(1.0, AinvD, kNoTrans, Wpre_plus, kNoTrans, 0.0);
57 }
58 
60  const Matrix<BaseFloat> &gradient_in,
61  Matrix<BaseFloat> *gradient_out) {
62  int32 dim = gradient_in.NumRows();
63  const Vector<BaseFloat> &D = globals.mean_scatter_;
64  if (D.Min() <= 0.0)
65  KALDI_ERR << "Cannot estimate FMLLR: mean scatter has 0 eigenvalues.";
66  for (int32 r = 0; r < dim; r++) {
67  for (int32 c = 0; c < r; c++) {
68  // Eq. (B.15)
69  (*gradient_out)(r, c) = gradient_in(r, c) / std::sqrt(1 + D(c));
70  // Eq. (B.16)
71  (*gradient_out)(c, r) = gradient_in(c, r) / std::sqrt(1 + D(r) -
72  1 / (1 + D(c))) - gradient_in(r, c) / ((1 + D(c)) *
73  std::sqrt(1 + D(r) - 1 / (1 + D(c))));
74  }
75  // Eq. (B.17) & (B.18)
76  (*gradient_out)(r, r) = gradient_in(r, r) / std::sqrt(2 + D(r));
77  (*gradient_out)(r, dim) = gradient_in(r, dim);
78  }
79 }
80 
82  const Matrix<BaseFloat> &delta_in,
83  Matrix<BaseFloat> *delta_out) {
84  int32 dim = delta_in.NumRows();
85  const Vector<BaseFloat> &D = globals.mean_scatter_;
86  if (D.Min() <= 0.0)
87  KALDI_ERR << "Cannot estimate FMLLR: mean scatter has 0 eigenvalues.";
88  for (int32 r = 0; r < dim; r++) {
89  for (int32 c = 0; c < r; c++) {
90  // Eq. (B.21)
91  (*delta_out)(r, c) = delta_in(r, c) / std::sqrt(1 + D(c)) -
92  delta_in(c, r) / ((1 + D(c)) * std::sqrt(1 + D(r) - 1 / (1 + D(c))));
93  // Eq. (B.22)
94  (*delta_out)(c, r) = delta_in(c, r) / std::sqrt(1 + D(r) - 1/ (1 + D(c)));
95  }
96  // Eq. (B.23) & (B.24)
97  (*delta_out)(r, r) = delta_in(r, r) / std::sqrt(2 + D(r));
98  (*delta_out)(r, dim) = delta_in(r, dim);
99  }
100 }
101 
102 
103 void Sgmm2FmllrGlobalParams::Write(std::ostream &out, bool binary) const {
104  WriteToken(out, binary, "<SGMM_FMLLR_GLOBAL_PARAMS>");
105  WriteToken(out, binary, "<PRE_XFORM>");
106  pre_xform_.Write(out, binary);
107  WriteToken(out, binary, "<INV_XFORM>");
108  inv_xform_.Write(out, binary);
109  WriteToken(out, binary, "<MEAN_SCATTER>");
110  mean_scatter_.Write(out, binary);
111  if (fmllr_bases_.size() != 0) {
112  WriteToken(out, binary, "<FMLLR_BASIS>");
113  uint32 tmp = static_cast<uint32>(fmllr_bases_.size());
114  WriteBasicType(out, binary, tmp);
115  for (uint32 i = 0; i < tmp; i++) {
116  fmllr_bases_[i].Write(out, binary);
117  }
118  }
119  WriteToken(out, binary, "</SGMM_FMLLR_GLOBAL_PARAMS>");
120 }
121 
122 void Sgmm2FmllrGlobalParams::Read(std::istream &in, bool binary) {
123  ExpectToken(in, binary, "<SGMM_FMLLR_GLOBAL_PARAMS>");
124  ExpectToken(in, binary, "<PRE_XFORM>");
125  pre_xform_.Read(in, binary);
126  ExpectToken(in, binary, "<INV_XFORM>");
127  inv_xform_.Read(in, binary);
128  ExpectToken(in, binary, "<MEAN_SCATTER>");
129  mean_scatter_.Read(in, binary);
130  std::string token;
131  ReadToken(in, binary, &token);
132  if (token == "<FMLLR_BASIS>") {
133  uint32 tmp;
134  ReadBasicType(in, binary, &tmp);
135  fmllr_bases_.resize(tmp);
136  for (uint32 i = 0; i < tmp; i++) {
137  fmllr_bases_[i].Read(in, binary);
138  }
139  } else {
140  if (token != "</SGMM_FMLLR_GLOBAL_PARAMS>")
141  KALDI_ERR << "Unexpected token '" << token << "' found.";
142  }
143 }
144 
145 
146 void FmllrSgmm2Accs::Init(int32 dim, int32 num_gaussians) {
147  if (dim == 0) { // empty stats
148  dim_ = 0; // non-zero dimension is meaningless in empty stats
149  stats_.Init(0, 0); // clear the stats
150  } else {
151  dim_ = dim;
152  stats_.Init(dim, num_gaussians);
153  }
154 }
155 
157  const VectorBase<BaseFloat> &data,
158  const Sgmm2PerFrameDerivedVars &frame_vars,
159  int32 pdf_index, BaseFloat weight,
160  Sgmm2PerSpkDerivedVars *spk) {
161  // Calulate Gaussian posteriors and collect statistics
162  Matrix<BaseFloat> posteriors;
163  BaseFloat log_like = model.ComponentPosteriors(frame_vars, pdf_index,
164  spk, &posteriors);
165  posteriors.Scale(weight);
166  AccumulateFromPosteriors(model, *spk, data, frame_vars.gselect, posteriors,
167  pdf_index);
168  return log_like;
169 }
170 
172  const AmSgmm2 &model,
173  const Sgmm2PerSpkDerivedVars &spk,
174  const VectorBase<BaseFloat> &data,
175  const vector<int32> &gselect,
176  const Matrix<BaseFloat> &posteriors,
177  int32 j2) {
178  Vector<double> var_scaled_mean(dim_), extended_data(dim_+1);
179  extended_data.Range(0, dim_).CopyFromVec(data);
180  extended_data(dim_) = 1.0;
181  SpMatrix<double> scatter(dim_+1, kSetZero);
182  scatter.AddVec2(1.0, extended_data);
183  int32 j1 = model.Pdf2Group(j2);
184  for (int32 ki = 0, ki_max = gselect.size(); ki < ki_max; ki++) {
185  int32 i = gselect[ki];
186 
187  for (int32 m = 0; m < model.NumSubstatesForGroup(j1); m++) {
188  // posterior gamma_{jkmi}(t) eq.(39)
189  BaseFloat gammat_jmi = posteriors(ki, m);
190 
191  // Accumulate statistics for non-zero gaussian posterior
192  if (gammat_jmi > 0.0) {
193  stats_.beta_ += gammat_jmi;
194  model.GetVarScaledSubstateSpeakerMean(j1, m, i, spk,
195  &var_scaled_mean);
196  // Eq. (52): K += \gamma_{jmi} \Sigma_{i}^{-1} \mu_{jmi}^{(s)} x^{+T}
197  stats_.K_.AddVecVec(gammat_jmi, var_scaled_mean, extended_data);
198  // Eq. (53): G_{i} += \gamma_{jmi} x^{+} x^{+T}
199  stats_.G_[i].AddSp(gammat_jmi, scatter);
200  } // non-zero posteriors
201  } // loop over substates
202  } // loop over selected Gaussians
203 }
204 
206  const Sgmm2FmllrGlobalParams &globals, SpMatrix<double> *grad_scatter) {
207  if (stats_.beta_ <= 0.0) {
208  KALDI_WARN << "Not committing any stats since no stats accumulated.";
209  return;
210  }
211  int32 dim = sgmm.FeatureDim();
212  Matrix<BaseFloat> xform(dim, dim + 1, kUndefined);
213  xform.SetUnit();
214  Matrix<BaseFloat> grad(dim, dim + 1, kSetZero);
215  this->FmllrObjGradient(sgmm, xform, &grad, NULL);
216  Matrix<BaseFloat> pre_xformed_grad(dim, dim + 1, kSetZero);
217  ApplyPreXformToGradient(globals, grad, &pre_xformed_grad);
218  Matrix<BaseFloat> hess_xformed_grad(dim, dim + 1, kSetZero);
219  ApplyHessianXformToGradient(globals, pre_xformed_grad, &hess_xformed_grad);
220  Vector<double> grad_vec(dim * (dim + 1));
221  grad_vec.CopyRowsFromMat(hess_xformed_grad);
222  grad_vec.Scale(1 / std::sqrt(stats_.beta_));
223  grad_scatter->AddVec2(1.0, grad_vec);
224  KALDI_LOG << "Frame counts for when committing fMLLR subspace stats are "
225  << stats_.beta_;
226 }
227 
228 
230  const Matrix<BaseFloat> &xform,
231  Matrix<BaseFloat> *grad_out,
232  Matrix<BaseFloat> *G_out) const {
233  int32 dim = sgmm.FeatureDim(),
234  num_gauss = sgmm.NumGauss();
235  KALDI_ASSERT(stats_.G_.size() == static_cast<size_t>(num_gauss));
236  Matrix<double> xform_d(xform);
237  SubMatrix<double> A(xform_d, 0, dim, 0, dim);
238  Matrix<double> xform_g(dim, dim + 1), total_g(dim, dim + 1);
239  SpMatrix<double> inv_covar(dim);
240  double obj = stats_.beta_ * A.LogDet() +
241  TraceMatMat(xform_d, stats_.K_, kTrans);
242  for (int32 i = 0; i < num_gauss; i++) {
243  sgmm.GetInvCovars(i, &inv_covar);
244  xform_g.AddMatSp(1.0, xform_d, kNoTrans, stats_.G_[i], 0.0);
245  total_g.AddSpMat(1.0, inv_covar, xform_g, kNoTrans, 1.0);
246  }
247  obj -= 0.5 * TraceMatMat(xform_d, total_g, kTrans);
248  if (G_out != NULL) G_out->CopyFromMat(total_g);
249 
250  // Compute the gradient: P = \beta [(A^{-1})^{T} , 0] + K - S
251  if (grad_out != NULL) {
252  Matrix<double> grad_d(dim, dim + 1, kSetZero);
253  grad_d.Range(0, dim, 0, dim).CopyFromMat(A);
254  grad_d.Range(0, dim, 0, dim).InvertDouble();
255  grad_d.Range(0, dim, 0, dim).Transpose();
256  grad_d.Scale(stats_.beta_);
257  grad_d.AddMat(-1.0, total_g, kNoTrans);
258  grad_d.AddMat(1.0, stats_.K_, kNoTrans);
259  grad_out->CopyFromMat(grad_d);
260  }
261 
262  return obj;
263 }
264 
265 
266 void FmllrSgmm2Accs::Write(std::ostream &out, bool binary) const {
267  WriteToken(out, binary, "<FMLLRACCS>");
268  WriteToken(out, binary, "<DIMENSION>");
269  WriteBasicType(out, binary, dim_);
270  WriteToken(out, binary, "<STATS>");
271  stats_.Write(out, binary);
272  WriteToken(out, binary, "</FMLLRACCS>");
273 }
274 
275 void FmllrSgmm2Accs::Read(std::istream &in, bool binary, bool add) {
276  ExpectToken(in, binary, "<FMLLRACCS>");
277  ExpectToken(in, binary, "<DIMENSION>");
278  ReadBasicType(in, binary, &dim_);
279  KALDI_ASSERT(dim_ > 0);
280  ExpectToken(in, binary, "<STATS>");
281  stats_.Read(in, binary, add);
282  ExpectToken(in, binary, "</FMLLRACCS>");
283 }
284 
285 
287  const AmSgmm2 &sgmm,
288  const MatrixBase<BaseFloat> &Delta,
289  const MatrixBase<BaseFloat> &A,
290  const Matrix<BaseFloat> &G,
291  int32 max_iters) {
292  int32 dim = sgmm.FeatureDim();
293  Matrix<double> Delta_d(Delta);
294  Matrix<double> G_d(G);
295  SubMatrix<double> Delta_C(Delta_d, 0, dim, 0, dim);
296 
297  // Eq. (B.28): m = tr(\Delta K^T) - tr(\Delta S^T)
298  BaseFloat m = TraceMatMat(Delta_d, stats.K_, kTrans)
299  - TraceMatMat(Delta_d, G_d, kTrans);
300  // Eq. (B.29): n = \sum_i tr(\Delta \Sigma_{i}^{-1} \Delta S_{i})
301  BaseFloat n = 0;
302  SpMatrix<double> inv_covar;
303  for (int32 i = 0, num_gauss = sgmm.NumGauss(); i < num_gauss; i++) {
304  sgmm.GetInvCovars(i, &inv_covar);
305  n += TraceMatSpMatSp(Delta_d, kTrans, inv_covar, Delta_d, kNoTrans,
306  stats.G_[i]);
307  }
308 
309  BaseFloat step_size = 0.0;
310  // initialize just to get rid of compile errors.
311  BaseFloat obj_step_old, obj_step_new = 0.0;
312  Matrix<double> new_A(dim, dim);
313  Matrix<double> B(dim, dim);
314  for (int32 iter_step = 0; iter_step < max_iters; iter_step++) {
315  if (iter_step == 0) {
316  obj_step_old = stats.beta_ * A.LogDet(); // Q_0 = \beta * log det(A)
317  } else {
318  obj_step_old = obj_step_new;
319  }
320 
321  // Eq. (B.30); B = (A + k\Delta^{-C})^{-1} \Delta^{-C}
322  new_A.CopyFromMat(A);
323  new_A.AddMat(step_size, Delta_C, kNoTrans);
324  new_A.InvertDouble();
325  B.AddMatMat(1.0, new_A, kNoTrans, Delta_C, kNoTrans, 0.0);
326 
327  BaseFloat d = m - step_size * n + stats.beta_ * TraceMat(B);
328  BaseFloat d2 = -n - stats.beta_ * TraceMatMat(B, B, kNoTrans);
329  if (std::fabs(d / d2) < 0.000001) { break; } // converged
330 
331  BaseFloat step_size_change = -(d / d2);
332  step_size += step_size_change; // Eq. (B.33)
333 
334  // Halve step size when the auxiliary function decreases.
335  do {
336  new_A.CopyFromMat(A);
337  new_A.AddMat(step_size, Delta_C, kNoTrans);
338  BaseFloat logdet = new_A.LogDet();
339  obj_step_new = stats.beta_ * logdet + step_size * m -
340  0.5 * step_size * step_size * n;
341 
342  if (obj_step_new - obj_step_old < -0.001) {
343  KALDI_WARN << "Objective function decreased (" << obj_step_old << "->"
344  << obj_step_new << "). Halving step size change ("
345  << step_size << " -> " << (step_size - (step_size_change/2))
346  << ")";
347  step_size_change /= 2;
348  step_size -= step_size_change; // take away half of our step
349  } // Facing numeric precision issues. Compute in double?
350  } while (obj_step_new - obj_step_old < -0.001 && step_size_change > 1e-05);
351  }
352  return step_size;
353 }
354 
355 
357  const Sgmm2FmllrGlobalParams &globals,
358  const Sgmm2FmllrConfig &opts,
359  Matrix<BaseFloat> *out_xform,
360  BaseFloat *frame_count, BaseFloat *auxf_out) const {
361  BaseFloat auxf_improv = 0.0, logdet = 0.0;
362  KALDI_ASSERT(out_xform->NumRows() == dim_ && out_xform->NumCols() == dim_+1);
363  BaseFloat mincount = (globals.HasBasis() ?
364  std::min(opts.fmllr_min_count_basis, opts.fmllr_min_count_full) :
365  opts.fmllr_min_count);
366  bool using_subspace = (globals.HasBasis() ?
367  (stats_.beta_ < opts.fmllr_min_count_full) : false);
368 
369  if (globals.IsEmpty())
370  KALDI_ERR << "Must set up pre-transforms before estimating FMLLR.";
371 
372  KALDI_VLOG(1) << "Mincount = " << mincount << "; Basis: "
373  << std::string(globals.HasBasis()? "yes; " : "no; ")
374  << "Using subspace: " << std::string(using_subspace? "yes; "
375  : "no; ");
376 
377  int32 num_bases = 0;
378  if (using_subspace) {
379  KALDI_ASSERT(globals.fmllr_bases_.size() != 0);
380  int32 max_bases = std::min(static_cast<int32>(globals.fmllr_bases_.size()),
381  opts.num_fmllr_bases);
382  num_bases = (opts.bases_occ_scale <= 0.0)? max_bases :
383  std::min(max_bases, static_cast<int32>(std::floor(opts.bases_occ_scale
384  * stats_.beta_)));
385  KALDI_VLOG(1) << "Have " << stats_.beta_ << " frames for speaker: Using "
386  << num_bases << " fMLLR bases.";
387  }
388 
389  // initialization just to get rid of compile errors.
390  BaseFloat auxf_old = 0, auxf_new = 0;
391  if (frame_count != NULL) *frame_count = stats_.beta_;
392 
393  // If occupancy is greater than the min count, update the transform
394  if (stats_.beta_ >= mincount) {
395  for (int32 iter = 0; iter < opts.fmllr_iters; iter++) {
396  Matrix<BaseFloat> grad(dim_, dim_ + 1, kSetZero);
397  Matrix<BaseFloat> G(dim_, dim_ + 1, kSetZero);
398  auxf_new = this->FmllrObjGradient(sgmm, *out_xform, &grad, &G);
399 
400  // For diagnostic purposes
401  KALDI_VLOG(3) << "Iter " << iter << ": Auxiliary function = "
402  << (auxf_new / stats_.beta_) << " per frame over " << stats_.beta_
403  << " frames";
404 
405  if (iter > 0) {
406  // For diagnostic purposes
407  KALDI_VLOG(2) << "Iter " << iter << ": Auxiliary function improvement: "
408  << ((auxf_new - auxf_old) / stats_.beta_) << " per frame over "
409  << (stats_.beta_) << " frames";
410  auxf_improv += auxf_new - auxf_old;
411  }
412 
413  Matrix<BaseFloat> pre_xformed_grad(dim_, dim_ + 1, kSetZero);
414  ApplyPreXformToGradient(globals, grad, &pre_xformed_grad);
415 // std::cout << "Pre-X Grad = " << pre_xformed_grad << std::endl;
416 
417  // Transform P_sk with the Hessian
418  Matrix<BaseFloat> hess_xformed_grad(dim_, dim_ + 1, kSetZero);
419  ApplyHessianXformToGradient(globals, pre_xformed_grad,
420  &hess_xformed_grad);
421 // std::cout << "Hess-X Grad = " << hess_xformed_grad << std::endl;
422 
423  // Update the actual FMLLR transform matrices
424  Matrix<BaseFloat> hess_xformed_delta(dim_, dim_ + 1, kUndefined);
425  if (using_subspace) {
426  // Note that in this case we can simply store the speaker-specific
427  // coefficients for each of the basis matrices. The current
428  // implementation stores the computed transform to simplify the code!
429  hess_xformed_delta.SetZero();
430  for (int32 b = 0; b < num_bases; b++) { // Eq (B.20)
431  hess_xformed_delta.AddMat(TraceMatMat(globals.fmllr_bases_[b],
432  hess_xformed_grad, kTrans),
433  globals.fmllr_bases_[b], kNoTrans);
434  }
435  hess_xformed_delta.Scale(1 / stats_.beta_);
436  } else {
437  hess_xformed_delta.CopyFromMat(hess_xformed_grad);
438  hess_xformed_delta.Scale(1 / stats_.beta_); // Eq. (B.19)
439  }
440 
441 // std::cout << "Hess-X Delta = " << hess_xformed_delta << std::endl;
442 
443  // Transform Delta with the Hessian
444  Matrix<BaseFloat> pre_xformed_delta(dim_, dim_ + 1, kSetZero);
445  ApplyInvHessianXformToChange(globals, hess_xformed_delta,
446  &pre_xformed_delta);
447 
448  // Apply inverse pre-transform to Delta
449  Matrix<BaseFloat> delta(dim_, dim_ + 1, kSetZero);
450  ApplyInvPreXformToChange(globals, pre_xformed_delta, &delta);
451 
452 #ifdef KALDI_PARANOID
453  // Check whether co-ordinate transformation is correct.
454  {
455  BaseFloat tr1 = TraceMatMat(delta, grad, kTrans);
456  BaseFloat tr2 = TraceMatMat(pre_xformed_delta, pre_xformed_grad,
457  kTrans);
458  BaseFloat tr3 = TraceMatMat(hess_xformed_delta, hess_xformed_grad,
459  kTrans);
460  AssertEqual(tr1, tr2, 1e-5);
461  AssertEqual(tr2, tr3, 1e-5);
462  }
463 #endif
464 
465  // Calculate the optimal step size
466  SubMatrix<BaseFloat> A(*out_xform, 0, dim_, 0, dim_);
467  BaseFloat step_size = CalcFmllrStepSize(stats_, sgmm, delta, A, G,
468  opts.fmllr_iters);
469 
470  // Update: W <-- W + k \Delta Eq. (B.34)
471  out_xform->AddMat(step_size, delta, kNoTrans);
472  auxf_old = auxf_new;
473 
474  // Check the objective function change for the last iteration
475  if (iter == opts.fmllr_iters - 1) {
476  auxf_new = this->FmllrObjGradient(sgmm, *out_xform, NULL, NULL);
477  logdet = A.LogDet();
478  // SubMatrix A points to the memory location of out_xform, and so will
479  // contain the updated value
480 
481  KALDI_VLOG(2) << "Iter " << iter << ": Auxiliary function improvement: "
482  << ((auxf_new - auxf_old) / stats_.beta_) << " per frame over "
483  << (stats_.beta_) << " frames";
484  auxf_improv += auxf_new - auxf_old;
485  }
486  }
487  if (auxf_out != NULL) *auxf_out = auxf_improv;
488  auxf_improv /= (stats_.beta_ + 1.0e-10);
489 
490  KALDI_LOG << "Auxiliary function improvement for FMLLR = " << auxf_improv
491  << " per frame over " << stats_.beta_ << " frames. Log-determinant = "
492  << logdet;
493  return true;
494  } else {
495  KALDI_ASSERT(stats_.beta_ < mincount);
496 // std::cerr.precision(10);
497 // std::cerr.setf(std::ios::fixed,std::ios::floatfield);
498  KALDI_WARN << "Not updating FMLLR because count is " << stats_.beta_
499  << " < " << (mincount);
500  if (auxf_out != NULL) *auxf_out = 0.0;
501  return false;
502  } // Do not use the transform if it does not have enough counts
503  KALDI_ASSERT(false); // Should never be reached.
504 }
505 
506 void EstimateSgmm2FmllrSubspace(const SpMatrix<double> &fmllr_grad_scatter,
507  int32 num_fmllr_bases, int32 feat_dim,
508  Sgmm2FmllrGlobalParams *globals, double min_eig) {
509  KALDI_ASSERT(num_fmllr_bases > 0 && feat_dim > 0);
510  if (num_fmllr_bases > feat_dim * (feat_dim + 1)) {
511  num_fmllr_bases = feat_dim * (feat_dim + 1);
512  KALDI_WARN << "Limiting number of fMLLR bases to be the same as transform "
513  << "dimension.";
514  }
515 
516  vector< Matrix<BaseFloat> > &fmllr_bases(globals->fmllr_bases_);
517 
518  Vector<double> s(fmllr_grad_scatter.NumRows());
519  Matrix<double> U(fmllr_grad_scatter.NumRows(),
520  fmllr_grad_scatter.NumRows());
521  try {
522  fmllr_grad_scatter.Eig(&s, &U);
523  SortSvd(&s, &U); // in case was not exactly sorted.
524  KALDI_VLOG(1) << "Eigenvalues (max 200) of CMLLR scatter are: "
525  << (SubVector<double>(s, 0,
526  std::min(static_cast<MatrixIndexT>(200),
527  s.Dim())));
528 
529 
530 // for (int32 b = 2; b < num_fmllr_bases; b++) {
531 // if (s(b) < min_eig) {
532 // num_fmllr_bases = b;
533 // KALDI_WARN << "Limiting number of fMLLR bases to " << num_fmllr_bases
534 // << " because of small eigenvalues.";
535 // break;
536 // }
537 // }
538 
539  U.Transpose(); // Now the rows of U correspond to the basis vectors.
540  fmllr_bases.resize(num_fmllr_bases);
541  for (int32 b = 0; b < num_fmllr_bases; b++) {
542  fmllr_bases[b].Resize(feat_dim, feat_dim + 1, kSetZero);
543  fmllr_bases[b].CopyRowsFromVec(U.Row(b));
544  }
545  KALDI_LOG << "Estimated " << num_fmllr_bases << " fMLLR basis matrices.";
546  } catch(const std::exception &e) {
547  KALDI_WARN << "Not estimating FMLLR bases because of a thrown exception:\n"
548  << e.what();
549  fmllr_bases.resize(0);
550  }
551 } // End of EstimateSgmm2FmllrSubspace
552 
553 
554 } // namespace kaldi
555 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void InvertDouble(Real *LogDet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse [double].
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231
void Write(std::ostream &out, bool binary) const
write to stream.
Matrix< double > K_
K_ is the summed outer product of [mean times inverse variance] with [extended data], scaled by the occupation counts; dimension is dim by (dim+1)
BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *post) const
Similar to LogLikelihood() function above, but also computes the posterior probabilities for the pre-...
Definition: am-sgmm2.cc:574
BaseFloat FmllrObjGradient(const AmSgmm2 &sgmm, const Matrix< BaseFloat > &xform, Matrix< BaseFloat > *grad_out, Matrix< BaseFloat > *G_out) const
Definition: fmllr-sgmm2.cc:229
static void ApplyPreXformToGradient(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &gradient_in, Matrix< BaseFloat > *gradient_out)
Definition: fmllr-sgmm2.cc:31
void Write(std::ostream &out_stream, bool binary) const
Definition: fmllr-sgmm2.cc:103
Matrix< BaseFloat > pre_xform_
Pre-transform matrix. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:103
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
double TraceMat(const MatrixBase< Real > &A)
Returns trace of matrix.
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void GetVarScaledSubstateSpeakerMean(int32 j1, int32 substate, int32 gauss, const Sgmm2PerSpkDerivedVars &spk, VectorBase< Real > *mean_out) const
Definition: am-sgmm2.h:541
BaseFloat fmllr_min_count_full
Minimum occupancy count to stop using FMLLR bases and switch to regular FMLLR estimation.
Definition: fmllr-sgmm2.h:49
void GetInvCovars(int32 gauss_index, SpMatrix< Real > *out) const
Templated accessors (used to accumulate in different precision)
Definition: am-sgmm2.h:511
void AccumulateFromPosteriors(const AmSgmm2 &sgmm, const Sgmm2PerSpkDerivedVars &spk, const VectorBase< BaseFloat > &data, const std::vector< int32 > &gauss_select, const Matrix< BaseFloat > &posteriors, int32 state_index)
Definition: fmllr-sgmm2.cc:171
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
Definition: qr.cc:433
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real Min() const
Returns the minimum value of any element, or +infinity for the empty vector.
MatrixIndexT NumRows() const
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
void EstimateSgmm2FmllrSubspace(const SpMatrix< double > &fmllr_grad_scatter, int32 num_fmllr_bases, int32 feat_dim, Sgmm2FmllrGlobalParams *globals, double min_eig)
Computes the fMLLR basis matrices given the scatter of the vectorized gradients (eq: B...
Definition: fmllr-sgmm2.cc:506
void AccumulateForFmllrSubspace(const AmSgmm2 &sgmm, const Sgmm2FmllrGlobalParams &fmllr_globals, SpMatrix< double > *grad_scatter)
Definition: fmllr-sgmm2.cc:205
BaseFloat bases_occ_scale
Scale per-speaker count to determine number of CMLLR bases.
Definition: fmllr-sgmm2.h:54
int32 fmllr_iters
Number of iterations in FMLLR estimation.
Definition: fmllr-sgmm2.h:41
bool Update(const AmSgmm2 &model, const Sgmm2FmllrGlobalParams &fmllr_globals, const Sgmm2FmllrConfig &opts, Matrix< BaseFloat > *out_xform, BaseFloat *frame_count, BaseFloat *auxf_improv) const
Computes the FMLLR transform from the accumulated stats, using the pre-transforms in fmllr_globals...
Definition: fmllr-sgmm2.cc:356
static void ApplyInvPreXformToChange(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &delta_in, Matrix< BaseFloat > *delta_out)
Definition: fmllr-sgmm2.cc:45
int32 FeatureDim() const
Definition: am-sgmm2.h:363
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void Read(std::istream &in_stream, bool binary, bool add)
Definition: fmllr-sgmm2.cc:275
void Read(std::istream &in_stream, bool binary)
Definition: fmllr-sgmm2.cc:122
static BaseFloat CalcFmllrStepSize(const AffineXformStats &stats, const AmSgmm2 &sgmm, const MatrixBase< BaseFloat > &Delta, const MatrixBase< BaseFloat > &A, const Matrix< BaseFloat > &G, int32 max_iters)
Definition: fmllr-sgmm2.cc:286
Matrix< BaseFloat > inv_xform_
Inverse of pre-transform. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:105
void Scale(Real alpha)
Multiply each element with a scalar value.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
struct rnnlm::@11::@12 n
int32 Pdf2Group(int32 j2) const
Definition: am-sgmm2.cc:196
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
static void ApplyInvHessianXformToChange(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &delta_in, Matrix< BaseFloat > *delta_out)
Definition: fmllr-sgmm2.cc:81
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetZero()
Sets matrix to zero.
void Scale(Real alpha)
Multiplies all elements by this constant.
Configuration variables needed in the estimation of FMLLR for SGMMs.
Definition: fmllr-sgmm2.h:40
std::vector< SpMatrix< double > > G_
G_ is the outer product of extended-data, scaled by inverse variance, for each dimension.
int32 NumGauss() const
Definition: am-sgmm2.h:360
Real TraceMatSpMatSp(const MatrixBase< Real > &A, MatrixTransposeType transA, const SpMatrix< Real > &B, const MatrixBase< Real > &C, MatrixTransposeType transC, const SpMatrix< Real > &D)
Returns tr (A B C D) (A and C may be transposed as specified by transA and transB).
Definition: sp-matrix.cc:438
std::vector< int32 > gselect
Definition: am-sgmm2.h:143
int32 num_fmllr_bases
Number of basis matrices to use for FMLLR estimation.
Definition: fmllr-sgmm2.h:52
A class representing a vector.
Definition: kaldi-vector.h:406
BaseFloat Accumulate(const AmSgmm2 &sgmm, const VectorBase< BaseFloat > &data, const Sgmm2PerFrameDerivedVars &frame_vars, int32 state_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk)
Accumulation routine that computes the Gaussian posteriors and calls the AccumulateFromPosteriors fun...
Definition: fmllr-sgmm2.cc:156
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Write(std::ostream &out_stream, bool binary) const
Definition: fmllr-sgmm2.cc:266
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
BaseFloat fmllr_min_count_basis
Minimum occupancy count to estimate FMLLR using basis matrices.
Definition: fmllr-sgmm2.h:44
static void ApplyHessianXformToGradient(const Sgmm2FmllrGlobalParams &globals, const Matrix< BaseFloat > &gradient_in, Matrix< BaseFloat > *gradient_out)
Definition: fmllr-sgmm2.cc:59
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
Global adaptation parameters.
Definition: fmllr-sgmm2.h:91
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Vector< BaseFloat > mean_scatter_
Diagonal of mean-scatter matrix. Dim is [D].
Definition: fmllr-sgmm2.h:107
std::vector< Matrix< BaseFloat > > fmllr_bases_
{W}_b. [b][d][d], dim is [B][D][D+1].
Definition: fmllr-sgmm2.h:109
#define KALDI_LOG
Definition: kaldi-error.h:153
void Init(int32 dim, int32 num_gaussians)
Definition: fmllr-sgmm2.cc:146
double beta_
beta_ is the occupation count.
void AddSpMat(const Real alpha, const SpMatrix< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*SpA*B.
Definition: kaldi-matrix.h:692
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Definition: am-sgmm2.h:142
int32 NumSubstatesForGroup(int32 j1) const
Definition: am-sgmm2.h:357
BaseFloat fmllr_min_count
Minimum occupancy count to estimate FMLLR without basis matrices.
Definition: fmllr-sgmm2.h:46
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94