fmllr-diag-gmm.cc
Go to the documentation of this file.
1 // transform/fmllr-diag-gmm.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation; Saarland University;
4 // Georg Stemmer
5 // 2013 Johns Hopkins University (author: Daniel Povey)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include <utility>
23 #include <vector>
24 using std::vector;
25 
27 
28 namespace kaldi {
29 
31  const DiagGmm &pdf,
32  const VectorBase<BaseFloat> &data,
33  const VectorBase<BaseFloat> &posterior) {
34 
35  if (this->DataHasChanged(data)) {
38  }
40  stats.count += posterior.Sum();
41  stats.a.AddMatVec(1.0, pdf.means_invvars(), kTrans, posterior, 1.0);
42  stats.b.AddMatVec(1.0, pdf.inv_vars(), kTrans, posterior, 1.0);
43 }
44 
46  const DiagGmm &pdf,
47  const std::vector<int32> &gselect,
48  const VectorBase<BaseFloat> &data,
49  const VectorBase<BaseFloat> &posterior) {
50 
51  if (this->DataHasChanged(data)) {
54  }
56  stats.count += posterior.Sum();
57 
58  const Matrix<BaseFloat> &means_invvars = pdf.means_invvars(),
59  &inv_vars = pdf.inv_vars();
60  KALDI_ASSERT(static_cast<int32>(gselect.size()) == posterior.Dim());
61  for (size_t i = 0; i < gselect.size(); i++) {
62  stats.a.AddVec(posterior(i), means_invvars.Row(gselect[i]));
63  stats.b.AddVec(posterior(i), inv_vars.Row(gselect[i]));
64  }
65 }
66 
68  const AccumFullGmm &fgmm_accs):
70  KALDI_ASSERT(gmm.NumGauss() == fgmm_accs.NumGauss()
71  && gmm.Dim() == fgmm_accs.Dim());
72  Init(gmm.Dim());
73  int32 dim = gmm.Dim(), num_gauss = gmm.NumGauss();
74  for (int32 g = 0; g < num_gauss; g++) {
75  double this_occ = fgmm_accs.occupancy()(g);
76  if (this_occ == 0) continue;
77  SubVector<BaseFloat> this_mean_invvar(gmm.means_invvars(), g);
78  SubVector<BaseFloat> this_invvar(gmm.inv_vars(), g);
79  SubVector<double> this_mean_acc(fgmm_accs.mean_accumulator(), g);
80  Vector<double> this_mean_invvar_dbl(this_mean_invvar);
81  Vector<double> this_extended_mean_acc(dim+1);
82  this_extended_mean_acc.Range(0, dim).CopyFromVec(this_mean_acc);
83  this_extended_mean_acc(dim) = this_occ; // acc of x^+
84  Matrix<double> this_cov_acc(fgmm_accs.covariance_accumulator()[g]); // copy to
85  // regular Matrix.
86  Matrix<double> this_extended_cov_acc(dim+1, dim+1); // make as if accumulated
87  // using x^+, not x.
88  this_extended_cov_acc.Range(0, dim, 0, dim).CopyFromMat(this_cov_acc);
89  this_extended_cov_acc.Row(dim).CopyFromVec(this_extended_mean_acc);
90  this_extended_cov_acc.CopyColFromVec(this_extended_mean_acc, dim); // since
91  // there is no Col() function, use a member-function of the matrix class.
92  SpMatrix<double> this_extended_cov_acc_sp(this_extended_cov_acc);
93  beta_ += this_occ;
94  K_.AddVecVec(1.0, this_mean_invvar_dbl, this_extended_mean_acc);
95  for (int32 d = 0; d < dim; d++)
96  G_[d].AddSp(this_invvar(d), this_extended_cov_acc_sp);
97  }
98 }
99 
100 
102  const VectorBase<BaseFloat> &data,
103  BaseFloat weight) {
104  int32 num_comp = pdf.NumGauss();
105  Vector<BaseFloat> posterior(num_comp);
106  BaseFloat loglike;
107 
108  loglike = pdf.ComponentPosteriors(data, &posterior);
109  posterior.Scale(weight);
110  AccumulateFromPosteriors(pdf, data, posterior);
111  return loglike;
112 }
113 
115  const DiagGmm &pdf,
116  const std::vector<int32> &gselect,
117  const VectorBase<BaseFloat> &data,
118  BaseFloat weight) {
119  KALDI_ASSERT(!gselect.empty() && "Empty gselect information");
120  Vector<BaseFloat> loglikes;
121  pdf.LogLikelihoodsPreselect(data, gselect, &loglikes);
122 
123  BaseFloat loglike = loglikes.ApplySoftMax(); // they are now posteriors.
124  loglikes.Scale(weight);
125  AccumulateFromPosteriorsPreselect(pdf, gselect, data, loglikes);
126  return loglike;
127 }
128 
129 
130 
132  MatrixBase<BaseFloat> *fmllr_mat,
133  BaseFloat *objf_impr,
134  BaseFloat *count) {
135  KALDI_ASSERT(fmllr_mat != NULL);
137  if (fmllr_mat->IsZero())
138  KALDI_ERR << "You must initialize the fMLLR matrix to a non-singular value "
139  "(so we can report objective function changes); e.g. call SetUnit()";
140  if (opts.update_type == "full" && this->opts_.update_type != "full") {
141  KALDI_ERR << "You are requesting a full-fMLLR update but you accumulated "
142  << "stats for more limited update type.";
143  }
144  if (beta_ > opts.min_count) {
145  Matrix<BaseFloat> tmp_old(*fmllr_mat), tmp_new(*fmllr_mat);
146  BaseFloat objf_change;
147  if (opts.update_type == "full")
148  objf_change = ComputeFmllrMatrixDiagGmmFull(tmp_old, *this, opts.num_iters, &tmp_new);
149  else if (opts.update_type == "diag")
150  objf_change = ComputeFmllrMatrixDiagGmmDiagonal(tmp_old, *this, &tmp_new);
151  else if (opts.update_type == "offset")
152  objf_change = ComputeFmllrMatrixDiagGmmOffset(tmp_old, *this, &tmp_new);
153  else if (opts.update_type == "none")
154  objf_change = 0.0;
155  else
156  KALDI_ERR << "Unknown fMLLR update type " << opts.update_type
157  << ", fmllr-update-type must be one of \"full\"|\"diag\"|\"offset\"|\"none\"";
158  fmllr_mat->CopyFromMat(tmp_new);
159  if (objf_impr) *objf_impr = objf_change;
160  if (count) *count = beta_;
161  } else { // Not changing matrix.
162  KALDI_WARN << "Not updating fMLLR since below min-count: count is " << beta_;
163  if (objf_impr) *objf_impr = 0.0;
164  if (count) *count = beta_;
165  }
166 }
167 
168 
170  const AffineXformStats &stats,
171  std::string fmllr_type, // "none", "offset", "diag", "full"
172  int32 num_iters,
173  MatrixBase<BaseFloat> *out_xform) {
174  if (fmllr_type == "full") {
175  return ComputeFmllrMatrixDiagGmmFull(in_xform, stats, num_iters, out_xform);
176  } else if (fmllr_type == "diag") {
177  return ComputeFmllrMatrixDiagGmmDiagonal(in_xform, stats, out_xform);
178  } else if (fmllr_type == "offset") {
179  return ComputeFmllrMatrixDiagGmmOffset(in_xform, stats, out_xform);
180  } else if (fmllr_type == "none") {
181  if (!in_xform.IsUnit())
182  KALDI_WARN << "You set fMLLR type to \"none\" but your starting transform "
183  "is not unit [this is strange, and diagnostics will be wrong].";
184  out_xform->SetUnit();
185  return 0.0;
186  } else
187  KALDI_ERR << "Unknown fMLLR update type " << fmllr_type
188  << ", must be one of \"full\"|\"diag\"|\"offset\"|\"none\"";
189  return 0.0;
190 }
191 
192 
195  double beta,
196  int32 row,
197  MatrixBase<double> *transform) {
198  int32 dim = transform->NumRows();
199  KALDI_ASSERT(transform->NumCols() == dim + 1);
200  KALDI_ASSERT(row >= 0 && row < dim);
201 
202  double logdet;
203  // Calculating the matrix of cofactors (transpose of adjugate)
204  Matrix<double> cofact_mat(dim, dim);
205  cofact_mat.CopyFromMat(transform->Range(0, dim, 0, dim), kTrans);
206  cofact_mat.Invert(&logdet);
207  // Removed this step because it's not necessary and could lead to
208  // under/overflow [Dan]
209  // cofact_mat.Scale(exp(logdet));
210 
211  // The extended cofactor vector for the current row
212  Vector<double> cofact_row(dim + 1);
213  cofact_row.Range(0, dim).CopyRowFromMat(cofact_mat, row);
214  cofact_row(dim) = 0;
215  Vector<double> cofact_row_invg(dim + 1);
216  cofact_row_invg.AddSpVec(1.0, inv_G, cofact_row, 0.0);
217 
218  // Solve the quadratic equation for step size
219  double e1 = VecVec(cofact_row_invg, cofact_row);
220  double e2 = VecVec(cofact_row_invg, k);
221  double discr = std::sqrt(e2 * e2 + 4 * e1 * beta);
222  double alpha1 = (-e2 + discr) / (2 * e1);
223  double alpha2 = (-e2 - discr) / (2 * e1);
224  double auxf1 = beta * Log(std::abs(alpha1 * e1 + e2)) -
225  0.5 * alpha1 * alpha1 * e1;
226  double auxf2 = beta * Log(std::abs(alpha2 * e1 + e2)) -
227  0.5 * alpha2 * alpha2 * e1;
228  double alpha = (auxf1 > auxf2) ? alpha1 : alpha2;
229 
230  // Update transform row: w_d = (\alpha cofact_d + k_d) G_d^{-1}
231  cofact_row.Scale(alpha);
232  cofact_row.AddVec(1.0, k);
233  transform->Row(row).AddSpVec(1.0, inv_G, cofact_row, 0.0);
234 }
235 
237  const AffineXformStats &stats,
238  int32 num_iters,
239  MatrixBase<BaseFloat> *out_xform) {
240  int32 dim = static_cast<int32>(stats.G_.size());
241 
242  // Compute the inverse matrices of second-order statistics
243  vector< SpMatrix<double> > inv_g(dim);
244  for (int32 d = 0; d < dim; d++) {
245  inv_g[d].Resize(dim + 1);
246  inv_g[d].CopyFromSp(stats.G_[d]);
247  inv_g[d].Invert();
248  }
249 
250  Matrix<double> old_xform(in_xform), new_xform(in_xform);
251  BaseFloat old_objf = FmllrAuxFuncDiagGmm(old_xform, stats);
252 
253  for (int32 iter = 0; iter < num_iters; ++iter) {
254  for (int32 d = 0; d < dim; d++) {
255  SubVector<double> k_d(stats.K_, d);
256  FmllrInnerUpdate(inv_g[d], k_d, stats.beta_, d, &new_xform);
257  } // end of looping over rows
258  } // end of iterations
259 
260  BaseFloat new_objf = FmllrAuxFuncDiagGmm(new_xform, stats),
261  objf_improvement = new_objf - old_objf;
262  KALDI_LOG << "fMLLR objf improvement is "
263  << (objf_improvement / (stats.beta_ + 1.0e-10))
264  << " per frame over " << stats.beta_ << " frames.";
265  if (objf_improvement < 0.0 && !ApproxEqual(new_objf, old_objf)) {
266  KALDI_WARN << "No applying fMLLR transform change because objective "
267  << "function did not increase.";
268  return 0.0;
269  } else {
270  out_xform->CopyFromMat(new_xform, kNoTrans);
271  return objf_improvement;
272  }
273 }
274 
276  const AffineXformStats &stats,
277  MatrixBase<BaseFloat> *out_xform) {
278  // The "Diagonal" here means a diagonal fMLLR matrix, i.e. like W = [ A; b] where
279  // A is diagonal.
280  // We re-derived the math (see exponential transform paper) to get a simpler
281  // update rule.
282 
283  /*
284  Write out_xform as D, which is a d x d+1 matrix (where d is the feature dimension).
285  We are solving for s == d_{i,i}, and o == d_{i,d} [assuming zero-based indexing];
286  s is a scale, o is an offset.
287  The stats are K (dimension d x d+1) and G_i for i=0..d-1 (dimension: d+1 x d+1),
288  and the count beta.
289 
290  The auxf for the i'th row of the transform is (assuming zero-based indexing):
291 
292  s k_{i,i} + o k_{i,d}
293  - \frac{1}{2} s^2 g_{i,i,i} - \frac{1}{2} o^2 g_{i,d,d} - s o g_{i,d,i}
294  + \beta \log |s|
295 
296  Suppose we know s, we can solve for o:
297  o = (k_{i,d} - s g_{i,d,i}) / g_{i,d,d}
298  Substituting this expression for o into the auxf (and ignoring
299  terms that don't vary with s), we have the auxf:
300 
301  \frac{1}{2} s^2 ( g_{i,d,i}^2 / g_{i,d,d} - g_{i,i,i} )
302  + s ( k_{i,i} - g_{i,d,i} k_{i,d} / g_{i,d,d} )
303  + \beta \log |s|
304 
305  Differentiating w.r.t. s and assuming s is positive, we have
306  a s + b + c/s = 0
307  where
308  a = ( g_{i,d,i}^2 / g_{i,d,d} - g_{i,i,i} ),
309  b = ( k_{i,i} - g_{i,d,i} k_{i,d} / g_{i,d,d} )
310  c = beta
311  Multiplying by s, we have the equation
312  a s^2 + b s + c = 0, where we assume s > 0.
313  We solve it with:
314  s = (-b - \sqrt{b^2 - 4ac}) / 2a
315  [take the negative root because we know a is negative, and this gives
316  the more positive solution for s; the other one would be negative].
317  We then solve for o with the equation above, i.e.:
318  o = (k_{i,d} - s g_{i,d,i}) / g_{i,d,d})
319  */
320 
321  int32 dim = stats.G_.size();
322  double beta = stats.beta_;
323  out_xform->CopyFromMat(in_xform);
324  if (beta == 0.0) {
325  KALDI_WARN << "Computing diagonal fMLLR matrix: no stats [using original transform]";
326  return 0.0;
327  }
328  BaseFloat old_obj = FmllrAuxFuncDiagGmm(*out_xform, stats);
329  KALDI_ASSERT(out_xform->Range(0, dim, 0, dim).IsDiagonal()); // orig transform
330  // must be diagonal.
331  for(int32 i = 0; i < dim; i++) {
332  double k_ii = stats.K_(i, i), k_id = stats.K_(i, dim),
333  g_iii = stats.G_[i](i, i), g_idd = stats.G_[i](dim, dim),
334  g_idi = stats.G_[i](dim, i);
335  double a = g_idi*g_idi/g_idd - g_iii,
336  b = k_ii - g_idi*k_id/g_idd,
337  c = beta;
338  double s = (-b - std::sqrt(b*b - 4*a*c)) / (2*a);
339  KALDI_ASSERT(s > 0.0);
340  double o = (k_id - s*g_idi) / g_idd;
341  (*out_xform)(i, i) = s;
342  (*out_xform)(i, dim) = o;
343  }
344  BaseFloat new_obj = FmllrAuxFuncDiagGmm(*out_xform, stats);
345  KALDI_VLOG(2) << "fMLLR objective function improvement = "
346  << (new_obj - old_obj);
347  return new_obj - old_obj;
348 }
349 
351  const AffineXformStats &stats,
352  MatrixBase<BaseFloat> *out_xform) {
353  int32 dim = stats.G_.size();
354  KALDI_ASSERT(in_xform.NumRows() == dim && in_xform.NumCols() == dim+1);
355  SubMatrix<BaseFloat> square_part(in_xform, 0, dim, 0, dim);
356  KALDI_ASSERT(square_part.IsUnit());
357  BaseFloat objf_impr = 0.0;
358  out_xform->CopyFromMat(in_xform);
359  for (int32 i = 0; i < dim; i++) {
360  // auxf in this offset b_i is:
361  // -0.5 b_i^2 G_i(dim, dim) - b_i G_i(i, dim)*1.0 + b_i K(i, dim) (1)
362  // answer is:
363  // b_i = [K(i, dim) - G_i(i, dim)] / G_i(dim, dim)
364  // objf change is given by (1)
365  BaseFloat b_i = (*out_xform)(i, dim);
366  BaseFloat objf_before = -0.5 * b_i * b_i * stats.G_[i](dim, dim)
367  - b_i * stats.G_[i](i, dim) + b_i * stats.K_(i, dim);
368  b_i = (stats.K_(i, dim) - stats.G_[i](i, dim)) / stats.G_[i](dim, dim);
369  (*out_xform)(i, dim) = b_i;
370  BaseFloat objf_after = -0.5 * b_i * b_i * stats.G_[i](dim, dim)
371  - b_i * stats.G_[i](i, dim) + b_i * stats.K_(i, dim);
372  if (objf_after < objf_before)
373  KALDI_WARN << "Objf decrease in offset estimation:"
374  << objf_after << " < " << objf_before;
375  objf_impr += objf_after - objf_before;
376  }
377  return objf_impr;
378 }
379 
380 
382  AffineXformStats *stats) {
383  KALDI_ASSERT(stats != NULL && stats->Dim() != 0);
384  int32 dim = stats->Dim();
385  // make sure the stats are of the standard diagonal kind.
386  KALDI_ASSERT(stats->G_.size() == static_cast<size_t>(dim));
387  KALDI_ASSERT( (xform.NumRows() == dim && xform.NumCols() == dim) // linear
388  || (xform.NumRows() == dim && xform.NumCols() == dim+1) // affine
389  || (xform.NumRows() == dim+1 && xform.NumCols() == dim+1)); // affine w/ extra row.
390  if (xform.NumRows() == dim+1) { // check last row of input
391  // has correct value. 0 0 0 .. 0 1.
392  for (int32 i = 0; i < dim; i++)
393  KALDI_ASSERT(xform(dim, i) == 0.0);
394  KALDI_ASSERT(xform(dim, dim) == 1.0);
395  }
396 
397  // Get the transform into the (dim+1 x dim+1) format, with
398  // 0 0 0 .. 0 1 as the last row.
399  SubMatrix<BaseFloat> xform_square(xform, 0, dim, 0, dim);
400  Matrix<double> xform_full(dim+1, dim+1);
401  SubMatrix<double> xform_full_square(xform_full, 0, dim, 0, dim);
402  xform_full_square.CopyFromMat(xform_square);
403  if (xform.NumCols() == dim+1) // copy offset.
404  for (int32 i = 0; i < dim; i++)
405  xform_full(i, dim) = xform(i, dim);
406 
407  xform_full(dim, dim) = 1.0;
408 
409  SpMatrix<double> Gtmp(dim+1);
410  for (int32 i = 0; i < dim; i++) {
411  // Gtmp <-- xform_full * stats->G_[i] * xform_full^T
412  Gtmp.AddMat2Sp(1.0, xform_full, kNoTrans, stats->G_[i], 0.0);
413  stats->G_[i].CopyFromSp(Gtmp);
414  }
415  Matrix<double> Ktmp(dim, dim+1);
416  // Ktmp <-- stats->K_ * xform_full^T
417  Ktmp.AddMatMat(1.0, stats->K_, kNoTrans, xform_full, kTrans, 0.0);
418  stats->K_.CopyFromMat(Ktmp);
419 }
420 
422  AffineXformStats *stats) {
423  KALDI_ASSERT(stats != NULL && stats->Dim() != 0.0);
424  int32 dim = stats->Dim();
425  KALDI_ASSERT(xform.NumRows() == dim && xform.NumCols() == dim+1);
426  {
427  SubMatrix<BaseFloat> xform_square(xform, 0, dim, 0, dim);
428  // Only works with diagonal transforms.
429  KALDI_ASSERT(xform_square.IsDiagonal());
430  }
431 
432  // Working out rules for transforming fMLLR statistics under diagonal
433  // model-space transformations.
434  //
435  // We work out what the stats would be if we had accumulated
436  // with offset/scaled means and vars. Let T be the transform
437  // T = [ D; b ],
438  // where D is diagonal, d_i is the i'th diagonal of D, and b_i
439  // is the i'th offset element. This is equivalent to the transform
440  // x_i -> y_i = d_i x_i + b_i,
441  // so d_i is the diagonal and b_i is the offset term. We work out the
442  // reverse feature transform (from general to speaker-specific space),
443  // which is
444  // y_i -> x_i = (y_i - b_i) / d_i
445  // the corresponding mean transform to speaker-space is the same:
446  // mu_i -> (mu_i - b_i) / d_i
447  // and the transfrom on the variances is:
448  // sigma_i^2 -> sigma_i^2 / d_i^2,
449  // so on inverse variance this becomes:
450  // (1/sigma_i^2) -> (1/sigma_i^2) * d_i^2.
451  //
452  // Now, we work out the change in K and G_i from these effects on the
453  // means and variances.
454  //
455  // Now, k_{ij} is \sum_{m, t} \gamma_m (1/\sigma^2_{m, i}) \mu_{m, i} x^+_j .
456  //
457  // If we are transforming to K', we want:
458  //
459  // k'_{ij} = \sum_{m, t} \gamma_m (d_i^2/\sigma^2_{m, i}) ((\mu_{m, i} - b_i)/d_i) x^+_j .
460  // = d_i k_{i, j} - \sum_{m, t} \gamma_m (1/\sigma^2_{m, i}) d_i b_i x^+_j .
461  // = d_i k_{i, j} - d_i b_i g_{i, d, j},
462  // where g_{i, d, j} is the {d, j}'th element of G_i. (in zero-based indexing).
463  //
464  //
465  // G_i only depends on the variances and features, so the only change
466  // in G_i is G_i -> d_i^2 G_i (this comes from the change in 1/sigma_i^2).
467  // This is done after the change in K.
468 
469  for (int32 i = 0; i < dim; i++) {
470  BaseFloat d_i = xform(i, i), b_i = xform(i, dim);
471  for (int32 j = 0; j <= dim; j++) {
472  stats->K_(i, j) = d_i * stats->K_(i, j) - d_i * b_i * stats->G_[i](dim, j);
473  }
474  }
475  for (int32 i = 0; i < dim; i++) {
476  BaseFloat d_i = xform(i, i);
477  stats->G_[i].Scale(d_i * d_i);
478  }
479 }
480 
482  const AffineXformStats &stats) {
483  int32 dim = static_cast<int32>(stats.G_.size());
484  Matrix<double> xform_d(xform);
485  Vector<double> xform_row_g(dim + 1);
486  SubMatrix<double> A(xform_d, 0, dim, 0, dim);
487  double obj = stats.beta_ * A.LogDet() +
488  TraceMatMat(xform_d, stats.K_, kTrans);
489  for (int32 d = 0; d < dim; d++) {
490  xform_row_g.AddSpVec(1.0, stats.G_[d], xform_d.Row(d), 0.0);
491  obj -= 0.5 * VecVec(xform_row_g, xform_d.Row(d));
492  }
493  return obj;
494 }
495 
497  const AffineXformStats &stats) {
498  int32 dim = static_cast<int32>(stats.G_.size());
499  Vector<double> xform_row_g(dim + 1);
500  SubMatrix<double> A(xform, 0, dim, 0, dim);
501  double obj = stats.beta_ * A.LogDet() +
502  TraceMatMat(xform, stats.K_, kTrans);
503  for (int32 d = 0; d < dim; d++) {
504  xform_row_g.AddSpVec(1.0, stats.G_[d], xform.Row(d), 0.0);
505  obj -= 0.5 * VecVec(xform_row_g, xform.Row(d));
506  }
507  return obj;
508 }
509 
511  // if this is changed back to Matrix<double>
512  // un-comment the Resize() below.
513  const AffineXformStats &stats,
514  MatrixBase<BaseFloat> *grad_out) {
515  int32 dim = static_cast<int32>(stats.G_.size());
516  Matrix<double> xform_d(xform);
517  Vector<double> xform_row_g(dim + 1);
518  SubMatrix<double> A(xform_d, 0, dim, 0, dim);
519  double obj = stats.beta_ * A.LogDet() +
520  TraceMatMat(xform_d, stats.K_, kTrans);
521  Matrix<double> S(dim, dim + 1);
522  for (int32 d = 0; d < dim; d++) {
523  xform_row_g.AddSpVec(1.0, stats.G_[d], xform_d.Row(d), 0.0);
524  obj -= 0.5 * VecVec(xform_row_g, xform_d.Row(d));
525  S.CopyRowFromVec(xform_row_g, d);
526  }
527 
528  // Compute the gradient: P = \beta [(A^{-1})^{T} , 0] + K - S
529  // grad_out->Resize(dim, dim + 1);
530  Matrix<double> tmp_grad(dim, dim + 1);
531  tmp_grad.Range(0, dim, 0, dim).CopyFromMat(A);
532  tmp_grad.Range(0, dim, 0, dim).Invert();
533  tmp_grad.Range(0, dim, 0, dim).Transpose();
534  tmp_grad.Scale(stats.beta_);
535  tmp_grad.AddMat(-1.0, S, kNoTrans);
536  tmp_grad.AddMat(1.0, stats.K_, kNoTrans);
537  grad_out->CopyFromMat(tmp_grad, kNoTrans);
538 
539  return obj;
540 }
541 
543  KALDI_ASSERT(data.Dim() == this->Dim());
544  return !data.ApproxEqual(single_frame_stats_.x, 0.0);
545 }
546 
548  x.Resize(dim);
549  a.Resize(dim);
550  b.Resize(dim);
551  count = 0.0;
552 }
553 
556  stats.x.CopyFromVec(data);
557  stats.count = 0.0;
558  stats.a.SetZero();
559  stats.b.SetZero();
560 }
561 
563  // Commit the stats for this from (in SingleFrameStats).
564  int32 dim = Dim();
566  if (stats.count == 0.0) return;
567 
568  Vector<double> xplus(dim+1);
569  xplus.Range(0, dim).CopyFromVec(stats.x);
570  xplus(dim) = 1.0;
571 
572  this->beta_ += stats.count;
573  this->K_.AddVecVec(1.0, Vector<double>(stats.a), xplus);
574 
575 
576  if (opts_.update_type == "full") {
577  SpMatrix<double> scatter(dim+1);
578  scatter.AddVec2(1.0, xplus);
579 
580  KALDI_ASSERT(static_cast<size_t>(dim) == this->G_.size());
581  for (int32 i = 0; i < dim; i++)
582  this->G_[i].AddSp(stats.b(i), scatter);
583  } else {
584  // We only need some elements of these stats, so just update those elements.
585  for (int32 i = 0; i < dim; i++) {
586  BaseFloat scale = stats.b(i), x_i = xplus(i);
587  this->G_[i](i, i) += scale * x_i * x_i;
588  this->G_[i](dim, i) += scale * 1.0 * x_i;
589  this->G_[i](dim, dim) += scale * 1.0 * 1.0;
590  }
591  }
592 
593  stats.count = 0.0;
594  stats.a.SetZero();
595  stats.b.SetZero();
596 }
597 
598 
599 
600 
601 } // namespace kaldi
bool ApproxEqual(const VectorBase< Real > &other, float tol=0.01) const
Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
void ApplyModelTransformToStats(const MatrixBase< BaseFloat > &xform, AffineXformStats *stats)
ApplyModelTransformToStats takes a transform "xform", which must be diagonal (i.e.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74
Matrix< double > K_
K_ is the summed outer product of [mean times inverse variance] with [extended data], scaled by the occupation counts; dimension is dim by (dim+1)
void CopyColFromVec(const VectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
bool IsDiagonal(Real cutoff=1.0e-05) const
Returns true if matrix is Diagonal.
int32 NumGauss() const
Returns the number of mixture components.
Definition: mle-full-gmm.h:97
const std::vector< SpMatrix< double > > & covariance_accumulator() const
Definition: mle-full-gmm.h:129
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition: diag-gmm.cc:566
BaseFloat ComputeFmllrMatrixDiagGmmFull(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
Updates the FMLLR matrix using Mark Gales&#39; row-by-row update.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179
void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Accumulate stats for a GMM, given supplied posteriors.
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
SingleFrameStats single_frame_stats_
std::string update_type
"full", "diag", "offset", "none"
int32 Dim() const
Returns the dimensionality of the feature vectors.
Definition: mle-full-gmm.h:99
kaldi::int32 int32
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
const Vector< double > & occupancy() const
Definition: mle-full-gmm.h:127
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
BaseFloat FmllrAuxfGradient(const MatrixBase< BaseFloat > &xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *grad_out)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
void ApplyFeatureTransformToStats(const MatrixBase< BaseFloat > &xform, AffineXformStats *stats)
This function applies a feature-level transform to stats (useful for certain techniques based on fMLL...
void InitSingleFrameStats(const VectorBase< BaseFloat > &data)
BaseFloat ComputeFmllrMatrixDiagGmmOffset(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does offset-only fMLLR, i.e. it only estimates an offset.
bool IsZero(Real cutoff=1.0e-05) const
Returns true if matrix is all zeros.
const size_t count
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: diag-gmm.cc:601
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
double Log(double x)
Definition: kaldi-math.h:100
FmllrDiagGmmAccs(const FmllrOptions &opts=FmllrOptions())
void Scale(Real alpha)
Multiply each element with a scalar value.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
Class for computing the maximum-likelihood estimates of the parameters of a Gaussian mixture model...
Definition: mle-full-gmm.h:74
#define KALDI_WARN
Definition: kaldi-error.h:150
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Scale(Real alpha)
Multiplies all elements by this constant.
const Matrix< double > & mean_accumulator() const
Definition: mle-full-gmm.h:128
BaseFloat ComputeFmllrMatrixDiagGmmDiagonal(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, MatrixBase< BaseFloat > *out_xform)
This does diagonal fMLLR (i.e.
std::vector< SpMatrix< double > > G_
G_ is the outer product of extended-data, scaled by inverse variance, for each dimension.
Real Sum() const
Returns sum of the elements.
BaseFloat AccumulateForGmmPreselect(const DiagGmm &gmm, const std::vector< int32 > &gselect, const VectorBase< BaseFloat > &data, BaseFloat weight)
This is like AccumulateForGmm but when you have gselect (Gaussian selection) information.
BaseFloat ComputeFmllrMatrixDiagGmm(const MatrixBase< BaseFloat > &in_xform, const AffineXformStats &stats, std::string fmllr_type, int32 num_iters, MatrixBase< BaseFloat > *out_xform)
This function internally calls ComputeFmllrMatrixDiagGmm{Full, Diagonal, Offset}, depending on "fmllr...
void FmllrInnerUpdate(SpMatrix< double > &inv_G, VectorBase< double > &k, double beta, int32 row, MatrixBase< double > *transform)
This function does one row of the inner-loop fMLLR transform update.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Definition: sp-matrix.cc:982
float FmllrAuxFuncDiagGmm(const MatrixBase< float > &xform, const AffineXformStats &stats)
Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform and the stats...
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void CopyRowFromVec(const VectorBase< Real > &v, const MatrixIndexT row)
Copy vector into specific row of matrix.
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void Init(size_t dim)
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void AccumulateFromPosteriorsPreselect(const DiagGmm &gmm, const std::vector< int32 > &gselect, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Accumulate stats for a GMM, given supplied posteriors.
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: kaldi-matrix.cc:38
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
bool IsUnit(Real cutoff=1.0e-05) const
Returns true if the matrix is all zeros, except for ones on diagonal.
bool DataHasChanged(const VectorBase< BaseFloat > &data) const
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
double beta_
beta_ is the occupation count.
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
BaseFloat AccumulateForGmm(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat weight)
Accumulate stats for a single GMM in the model; returns log likelihood.
void Update(const FmllrOptions &opts, MatrixBase< BaseFloat > *fmllr_mat, BaseFloat *objf_impr, BaseFloat *count)
Update.
const Matrix< BaseFloat > & inv_vars() const
Definition: diag-gmm.h:180
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94