ivector-extractor.cc
Go to the documentation of this file.
1 // ivector/ivector-extractor.cc
2 
3 // Copyright 2013 Daniel Povey
4 // 2015 David Snyder
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include <vector>
22 
24 #include "util/kaldi-thread.h"
25 
26 namespace kaldi {
27 
29  KALDI_ASSERT(!M_.empty());
30  return M_[0].NumRows();
31 }
32 
34  if (M_.empty()) { return 0.0; }
35  else { return M_[0].NumCols(); }
36 }
37 
39  return static_cast<int32>(M_.size());
40 }
41 
42 
43 // This function basically inverts the input and puts it in the output, but it's
44 // smart numerically. It uses the prior knowledge that the "inverse_floor" can
45 // have no eigenvalues less than one, so it applies that floor (in double
46 // precision) before inverting. This avoids certain numerical problems that can
47 // otherwise occur.
48 // static
50  SpMatrix<double> *var) {
51  SpMatrix<double> dbl_var(inverse_var);
52  int32 dim = inverse_var.NumRows();
53  Vector<double> s(dim);
54  Matrix<double> P(dim, dim);
55  // Solve the symmetric eigenvalue problem, inverse_var = P diag(s) P^T.
56  inverse_var.Eig(&s, &P);
57  s.ApplyFloor(1.0);
58  s.InvertElements();
59  var->AddMat2Vec(1.0, P, kNoTrans, s, 0.0);
60 }
61 
62 
64  const IvectorExtractorUtteranceStats &utt_stats,
65  VectorBase<double> *mean,
66  SpMatrix<double> *var) const {
67  if (!IvectorDependentWeights()) {
68  Vector<double> linear(IvectorDim());
69  SpMatrix<double> quadratic(IvectorDim());
70  GetIvectorDistMean(utt_stats, &linear, &quadratic);
71  GetIvectorDistPrior(utt_stats, &linear, &quadratic);
72  if (var != NULL) {
73  var->CopyFromSp(quadratic);
74  var->Invert(); // now it's a variance.
75 
76  // mean of distribution = quadratic^{-1} * linear...
77  mean->AddSpVec(1.0, *var, linear, 0.0);
78  } else {
79  quadratic.Invert();
80  mean->AddSpVec(1.0, quadratic, linear, 0.0);
81  }
82  } else {
83  Vector<double> linear(IvectorDim());
84  SpMatrix<double> quadratic(IvectorDim());
85  GetIvectorDistMean(utt_stats, &linear, &quadratic);
86  GetIvectorDistPrior(utt_stats, &linear, &quadratic);
87  // At this point, "linear" and "quadratic" contain
88  // the mean and prior-related terms, and we avoid
89  // recomputing those.
90 
91  Vector<double> cur_mean(IvectorDim());
92 
93  SpMatrix<double> quadratic_inv(IvectorDim());
94  InvertWithFlooring(quadratic, &quadratic_inv);
95  cur_mean.AddSpVec(1.0, quadratic_inv, linear, 0.0);
96 
97  KALDI_VLOG(3) << "Trace of quadratic is " << quadratic.Trace()
98  << ", condition is " << quadratic.Cond();
99  KALDI_VLOG(3) << "Trace of quadratic_inv is " << quadratic_inv.Trace()
100  << ", condition is " << quadratic_inv.Cond();
101 
102  // The loop is finding successively better approximation points
103  // for the quadratic expansion of the weights.
104  int32 num_iters = 4;
105  double change_threshold = 0.1; // If the iVector changes by less than
106  // this (in 2-norm), we abort early.
107  for (int32 iter = 0; iter < num_iters; iter++) {
108  if (GetVerboseLevel() >= 3) {
109  KALDI_VLOG(3) << "Auxf on iter " << iter << " is "
110  << GetAuxf(utt_stats, cur_mean, &quadratic_inv);
111  int32 show_dim = 5;
112  if (show_dim > cur_mean.Dim()) show_dim = cur_mean.Dim();
113  KALDI_VLOG(3) << "Current distribution mean is "
114  << cur_mean.Range(0, show_dim) << "... "
115  << ", var trace is " << quadratic_inv.Trace();
116  }
117  Vector<double> this_linear(linear);
118  SpMatrix<double> this_quadratic(quadratic);
119  GetIvectorDistWeight(utt_stats, cur_mean,
120  &this_linear, &this_quadratic);
121  InvertWithFlooring(this_quadratic, &quadratic_inv);
122  Vector<double> mean_diff(cur_mean);
123  cur_mean.AddSpVec(1.0, quadratic_inv, this_linear, 0.0);
124  mean_diff.AddVec(-1.0, cur_mean);
125  double change = mean_diff.Norm(2.0);
126  KALDI_VLOG(2) << "On iter " << iter << ", iVector changed by " << change;
127  if (change < change_threshold)
128  break;
129  }
130  mean->CopyFromVec(cur_mean);
131  if (var != NULL)
132  var->CopyFromSp(quadratic_inv);
133  }
134 }
135 
136 
138  const IvectorExtractorOptions &opts,
139  const FullGmm &fgmm) {
140  KALDI_ASSERT(opts.ivector_dim > 0);
141  Sigma_inv_.resize(fgmm.NumGauss());
142  for (int32 i = 0; i < fgmm.NumGauss(); i++) {
143  const SpMatrix<BaseFloat> &inv_var = fgmm.inv_covars()[i];
144  Sigma_inv_[i].Resize(inv_var.NumRows());
145  Sigma_inv_[i].CopyFromSp(inv_var);
146  }
147  Matrix<double> gmm_means;
148  fgmm.GetMeans(&gmm_means);
149  KALDI_ASSERT(!Sigma_inv_.empty());
150  int32 feature_dim = Sigma_inv_[0].NumRows(),
151  num_gauss = Sigma_inv_.size();
152 
153  prior_offset_ = 100.0; // hardwired for now. Must be nonzero.
154  gmm_means.Scale(1.0 / prior_offset_);
155 
156  M_.resize(num_gauss);
157  for (int32 i = 0; i < num_gauss; i++) {
158  M_[i].Resize(feature_dim, opts.ivector_dim);
159  M_[i].SetRandn();
160  M_[i].CopyColFromVec(gmm_means.Row(i), 0);
161  }
162  if (opts.use_weights) { // will regress the log-weights on the iVector.
163  w_.Resize(num_gauss, opts.ivector_dim);
164  } else {
165  w_vec_.Resize(fgmm.NumGauss());
166  w_vec_.CopyFromVec(fgmm.weights());
167  }
169 }
170 
172  public:
174  int32 i):
175  extractor_(extractor), i_(i) { }
176  void operator () () { extractor_->ComputeDerivedVars(i_); }
177  private:
180 };
181 
183  KALDI_LOG << "Computing derived variables for iVector extractor";
185  for (int32 i = 0; i < NumGauss(); i++) {
186  double var_logdet = -Sigma_inv_[i].LogPosDefDet();
187  gconsts_(i) = -0.5 * (var_logdet + FeatDim() * M_LOG_2PI);
188  // the gconsts don't contain any weight-related terms.
189  }
190  U_.Resize(NumGauss(), IvectorDim() * (IvectorDim() + 1) / 2);
191  Sigma_inv_M_.resize(NumGauss());
192 
193  // Note, we could have used RunMultiThreaded for this and similar tasks we
194  // have here, but we found that we don't get as complete CPU utilization as we
195  // could because some tasks finish before others.
196  {
197  TaskSequencerConfig sequencer_opts;
198  sequencer_opts.num_threads = g_num_threads;
200  sequencer_opts);
201  for (int32 i = 0; i < NumGauss(); i++)
202  sequencer.Run(new IvectorExtractorComputeDerivedVarsClass(this, i));
203  }
204  KALDI_LOG << "Done.";
205 }
206 
207 
209  SpMatrix<double> temp_U(IvectorDim());
210  // temp_U = M_i^T Sigma_i^{-1} M_i
211  temp_U.AddMat2Sp(1.0, M_[i], kTrans, Sigma_inv_[i], 0.0);
212  SubVector<double> temp_U_vec(temp_U.Data(),
213  IvectorDim() * (IvectorDim() + 1) / 2);
214  U_.Row(i).CopyFromVec(temp_U_vec);
215 
216  Sigma_inv_M_[i].Resize(FeatDim(), IvectorDim());
217  Sigma_inv_M_[i].AddSpMat(1.0, Sigma_inv_[i], M_[i], kNoTrans, 0.0);
218 }
219 
220 
222  const IvectorExtractorUtteranceStats &utt_stats,
223  const VectorBase<double> &mean,
224  VectorBase<double> *linear,
225  SpMatrix<double> *quadratic) const {
226  // If there is no w_, then weights do not depend on the iVector
227  // and the weights contribute nothing to the distribution.
229  return;
230 
231  Vector<double> logw_unnorm(NumGauss());
232  logw_unnorm.AddMatVec(1.0, w_, kNoTrans, mean, 0.0);
233 
234  Vector<double> w(logw_unnorm);
235  w.ApplySoftMax(); // now w is the weights.
236 
237  // See eq.58 in SGMM paper
238  // http://www.sciencedirect.com/science/article/pii/S088523081000063X
239  // linear_coeff(i) = \gamma_{jmi} - \gamma_{jm} \hat{w}_{jmi} + \max(\gamma_{jmi}, \gamma_{jm} \hat{w}_{jmi} \hat{\w}_i \v_{jm}
240  // here \v_{jm} corresponds to the iVector. Ignore the j,m indices.
241  Vector<double> linear_coeff(NumGauss());
242  Vector<double> quadratic_coeff(NumGauss());
243  double gamma = utt_stats.gamma_.Sum();
244  for (int32 i = 0; i < NumGauss(); i++) {
245  double gamma_i = utt_stats.gamma_(i);
246  double max_term = std::max(gamma_i, gamma * w(i));
247  linear_coeff(i) = gamma_i - gamma * w(i) + max_term * logw_unnorm(i);
248  quadratic_coeff(i) = max_term;
249  }
250  linear->AddMatVec(1.0, w_, kTrans, linear_coeff, 1.0);
251 
252  // *quadratic += \sum_i quadratic_coeff(i) w_i w_i^T, where w_i is
253  // i'th row of w_.
254  quadratic->AddMat2Vec(1.0, w_, kTrans, quadratic_coeff, 1.0);
255 }
256 
258  const IvectorExtractorUtteranceStats &utt_stats,
259  VectorBase<double> *linear,
260  SpMatrix<double> *quadratic) const {
261  int32 I = NumGauss();
262  for (int32 i = 0; i < I; i++) {
263  double gamma = utt_stats.gamma_(i);
264  if (gamma != 0.0) {
265  SubVector<double> x(utt_stats.X_, i); // == \gamma(i) \m_i
266  // next line: a += \gamma_i \M_i^T \Sigma_i^{-1} \m_i
267  linear->AddMatVec(1.0, Sigma_inv_M_[i], kTrans, x, 1.0);
268  }
269  }
270  SubVector<double> q_vec(quadratic->Data(), IvectorDim()*(IvectorDim()+1)/2);
271  q_vec.AddMatVec(1.0, U_, kTrans, utt_stats.gamma_, 1.0);
272 }
273 
275  const IvectorExtractorUtteranceStats &utt_stats,
276  VectorBase<double> *linear,
277  SpMatrix<double> *quadratic) const {
278 
279  (*linear)(0) += prior_offset_; // the zero'th dimension has an offset mean.
281  quadratic->AddToDiag(1.0);
282 }
283 
284 
286  const IvectorExtractorUtteranceStats &utt_stats,
287  const VectorBase<double> &mean,
288  const SpMatrix<double> *var) const {
289  if (!IvectorDependentWeights()) { // Not using the weight-projection matrices.
290  Vector<double> log_w_vec(w_vec_);
291  log_w_vec.ApplyLog();
292  return VecVec(log_w_vec, utt_stats.gamma_);
293  } else {
295  w.AddMatVec(1.0, w_, kNoTrans, mean, 0.0); // now w is unnormalized
296  // log-weights.
297 
298  double lse = w.LogSumExp();
299  w.Add(-lse); // Normalize so log-weights sum to one.
300 
301  // "ans" below is the point-value of the weight auxf, without
302  // considering the variance. At the moment, "w" contains
303  // the normalized log weights.
304  double ans = VecVec(w, utt_stats.gamma_);
305 
306  w.ApplyExp(); // now w is the weights.
307 
308  if (var == NULL) {
309  return ans;
310  } else {
311  // Below, "Jacobian" will be the derivative d(log_w) / d(ivector)
312  // = (I - w w^T) W, where W (w_ in the code) is the projection matrix
313  // from iVector space to unnormalized log-weights, and w is the normalized
314  // weight values at the current point.
315  Matrix<double> Jacobian(w_);
316  Vector<double> WTw(IvectorDim()); // W^T w
317  WTw.AddMatVec(1.0, w_, kTrans, w, 0.0);
318  Jacobian.AddVecVec(1.0, w, WTw); // Jacobian += (w (W^T w)^T = w^T w W)
319 
320  // the matrix S is the negated 2nd derivative of the objf w.r.t. the iVector \x.
322  S.AddMat2Vec(1.0, Jacobian, kTrans, Vector<double>(utt_stats.gamma_), 0.0);
323  ans += -0.5 * TraceSpSp(S, *var);
324  return ans;
325  }
326  }
327 }
328 
329 
330 
332  const VectorBase<double> &mean,
333  const SpMatrix<double> *var) const {
334 
335  double acoustic_auxf = GetAcousticAuxf(utt_stats, mean, var),
336  prior_auxf = GetPriorAuxf(mean, var), num_frames = utt_stats.gamma_.Sum();
337  KALDI_VLOG(3) << "Acoustic auxf is " << (acoustic_auxf/num_frames) << "/frame over "
338  << num_frames << " frames, prior auxf is " << prior_auxf
339  << " = " << (prior_auxf/num_frames) << " per frame.";
340  return acoustic_auxf + prior_auxf;
341 }
342 
343 // gets logdet of a matrix while suppressing exceptions; always returns finite
344 // value even if there was a problem.
345 static double GetLogDetNoFailure(const SpMatrix<double> &var) {
346  try {
347  return var.LogPosDefDet();
348  } catch (...) {
349  Vector<double> eigs(var.NumRows());
350  var.Eig(&eigs);
351  int32 floored;
352  eigs.ApplyFloor(1.0e-20, &floored);
353  if (floored > 0)
354  KALDI_WARN << "Floored " << floored << " eigenvalues of variance.";
355  eigs.ApplyLog();
356  return eigs.Sum();
357  }
358 }
359 
360 /*
361  Get the prior-related part of the auxiliary function. Suppose
362  the ivector is x, the prior distribution is p(x), the likelihood
363  of the data given x (itself an auxiliary function) is q(x) and
364  the prior is r(x).
365 
366  In the case where we just have a point ivector x and no p(x),
367  the prior-related term we return will just be q(x).
368 
369  If we have a distribution over x, we define an auxiliary function
370  t(x) = \int p(x) log(q(x)r(x) / p(x)) dx.
371  We separate this into data-dependent and prior parts, where the prior
372  part is
373  \int p(x) log(q(x) / p(x)) dx
374  (which this function returns), and the acoustic part is
375  \int p(x) log(r(x)) dx.
376  Note that the fact that we divide by p(x) in the prior part and not
377  the acoustic part is a bit arbitrary, at least the way we have written
378  it down here; it doesn't matter where we do it, but this way seems
379  more natural.
380 */
382  const VectorBase<double> &mean,
383  const SpMatrix<double> *var) const {
384  KALDI_ASSERT(mean.Dim() == IvectorDim());
385 
386  Vector<double> offset(mean);
387  offset(0) -= prior_offset_; // The mean of the prior distribution
388  // may only be nonzero in the first dimension. Now, "offset" is the
389  // offset of ivector from the prior's mean.
390 
391 
392  if (var == NULL) {
393  // The log-determinant of the variance of the prior distribution is one,
394  // since it's the unit matrix.
395  return -0.5 * (VecVec(offset, offset) + IvectorDim()*M_LOG_2PI);
396  } else {
397  // The mean-related part of the answer will be
398  // -0.5 * (VecVec(offset, offset), just like above.
399  // The variance-related part will be
400  // \int p(x) . -0.5 (x^T I x - x^T var^{-1} x + logdet(I) - logdet(var)) dx
401  // and using the fact that x is distributed with variance "var", this is:
402  //= \int p(x) . -0.5 (x^T I x - x^T var^{-1} x + logdet(I) - logdet(var)) dx
403  // = -0.5 ( trace(var I) - trace(var^{-1} var) + 0.0 - logdet(var))
404  // = -0.5 ( trace(var) - dim(var) - logdet(var))
405 
406  KALDI_ASSERT(var->NumRows() == IvectorDim());
407  return -0.5 * (VecVec(offset, offset) + var->Trace() -
408  IvectorDim() - GetLogDetNoFailure(*var));
409  }
410 }
411 
412 /* Gets the acoustic-related part of the auxf.
413  If the distribution over the ivector given by "mean" and
414  "var" is p(x), and the acoustic auxiliary-function given
415  x is r(x), this function returns
416  \int p(x) log r(x) dx
417 
418 */
420  const IvectorExtractorUtteranceStats &utt_stats,
421  const VectorBase<double> &mean,
422  const SpMatrix<double> *var) const {
423  double weight_auxf = GetAcousticAuxfWeight(utt_stats, mean, var),
424  gconst_auxf = GetAcousticAuxfGconst(utt_stats),
425  mean_auxf = GetAcousticAuxfMean(utt_stats, mean, var),
426  var_auxf = GetAcousticAuxfVariance(utt_stats),
427  T = utt_stats.gamma_.Sum();
428  KALDI_VLOG(3) << "Per frame, auxf is: weight " << (weight_auxf/T) << ", gconst "
429  << (gconst_auxf/T) << ", mean " << (mean_auxf/T) << ", var "
430  << (var_auxf/T) << ", over " << T << " frames.";
431  return weight_auxf + gconst_auxf + mean_auxf + var_auxf;
432 }
433 
434 /* This is the part of the auxf that involves the data
435  means, and we also involve the gconsts here.
436  Let \m_i be the observed data mean vector for the
437  i'th Gaussian (which we get from the utterance-specific
438  stats). Let \mu_i(\x) be the mean of the i'th Gaussian,
439  written as a function of the iVector \x.
440 
441  \int_\x p(\x) (\sum_i \gamma_i -0.5 (\mu(\x) - \m_i)^T \Sigma_i^{-1} (\mu(\x) - \m_i)) d\x
442 
443  To compute this integral we'll first write out the summation as a function of \x.
444 
445  \sum_i -0.5 \gamma_i \m_i^T \Sigma_i^{-1} \m_i
446  + \gamma_i \mu(\x)^T \Sigma_i^{-1} \m_i
447  -0.5 \gamma_i \mu(\x)^T \Sigma_i^{-1} \m_i
448  = \sum_i -0.5 \gamma_i \m_i^T \Sigma_i^{-1} \m_i
449  + \gamma_i \x^T \M_i^T \Sigma_i^{-1} \m_i
450  -0.5 \gamma_i \x^T \M_i^T \Sigma_i^{-1} \x
451  = K + \x^T \a - 0.5 \x^T \B \x,
452  where K = \sum_i -0.5 \gamma_i \m_i^T \Sigma_i^{-1} \m_i
453  \a = \sum_i \gamma_i \M_i^T \Sigma_i^{-1} \m_i
454  \B = \sum_i \gamma_i \U_i (where \U_i = \M_i^T \Sigma_i^{-1} \M_i
455  has been stored previously).
456  Note: the matrix M in the stats actually contains \gamma_i \m_i,
457  i.e. the mean times the count, so we need to modify the definitions
458  above accordingly.
459 */
461  const IvectorExtractorUtteranceStats &utt_stats,
462  const VectorBase<double> &mean,
463  const SpMatrix<double> *var) const {
464  double K = 0.0;
465  Vector<double> a(IvectorDim()), temp(FeatDim());
466 
467  int32 I = NumGauss();
468  for (int32 i = 0; i < I; i++) {
469  double gamma = utt_stats.gamma_(i);
470  if (gamma != 0.0) {
471  Vector<double> x(utt_stats.X_.Row(i)); // == \gamma(i) \m_i
472  temp.AddSpVec(1.0 / gamma, Sigma_inv_[i], x, 0.0);
473  // now temp = Sigma_i^{-1} \m_i.
474  // next line: K += -0.5 \gamma_i \m_i^T \Sigma_i^{-1} \m_i
475  K += -0.5 * VecVec(x, temp);
476  // next line: a += \gamma_i \M_i^T \Sigma_i^{-1} \m_i
477  a.AddMatVec(gamma, M_[i], kTrans, temp, 1.0);
478  }
479  }
481  SubVector<double> B_vec(B.Data(), IvectorDim()*(IvectorDim()+1)/2);
482  B_vec.AddMatVec(1.0, U_, kTrans, Vector<double>(utt_stats.gamma_), 0.0);
483 
484  double ans = K + VecVec(mean, a) - 0.5 * VecSpVec(mean, B, mean);
485  if (var != NULL)
486  ans -= 0.5 * TraceSpSp(*var, B);
487  return ans;
488 }
489 
491  const IvectorExtractorUtteranceStats &utt_stats) const {
492  return VecVec(Vector<double>(utt_stats.gamma_),
493  gconsts_);
494 }
495 
496 
498  const IvectorExtractorUtteranceStats &utt_stats) const {
499  if (utt_stats.S_.empty()) {
500  // we did not store the variance, so assume it's as predicted
501  // by the model itself.
502  // for each Gaussian i, we have a term -0.5 * gamma(i) * trace(Sigma[i] * Sigma[i]^{-1})
503  // = -0.5 * gamma(i) * FeatDim().
504  return -0.5 * utt_stats.gamma_.Sum() * FeatDim();
505  } else {
506  int32 I = NumGauss();
507  double ans = 0.0;
508  for (int32 i = 0; i < I; i++) {
509  double gamma = utt_stats.gamma_(i);
510  if (gamma != 0.0) {
511  SpMatrix<double> var(utt_stats.S_[i]);
512  var.Scale(1.0 / gamma);
513  Vector<double> mean(utt_stats.X_.Row(i));
514  mean.Scale(1.0 / gamma);
515  var.AddVec2(-1.0, mean); // get centered covariance..
516  ans += -0.5 * gamma * TraceSpSp(var, Sigma_inv_[i]);
517  }
518  }
519  return ans;
520  }
521 }
522 
524  double new_prior_offset) {
525  Matrix<double> Tinv(T);
526  Tinv.Invert();
527  // w <-- w Tinv. (construct temporary copy with Matrix<double>(w))
529  w_.AddMatMat(1.0, Matrix<double>(w_), kNoTrans, Tinv, kNoTrans, 0.0);
530  // next: M_i <-- M_i Tinv. (construct temporary copy with Matrix<double>(M_[i]))
531  for (int32 i = 0; i < NumGauss(); i++)
532  M_[i].AddMatMat(1.0, Matrix<double>(M_[i]), kNoTrans, Tinv, kNoTrans, 0.0);
533  KALDI_LOG << "Setting iVector prior offset to " << new_prior_offset;
534  prior_offset_ = new_prior_offset;
535 }
536 
538  const IvectorExtractor &extractor,
539  const VectorBase<BaseFloat> &feature,
540  const std::vector<std::pair<int32, BaseFloat> > &gauss_post) {
541  KALDI_ASSERT(extractor.IvectorDim() == this->IvectorDim());
543 
544  Vector<double> feature_dbl(feature);
545  double tot_weight = 0.0;
546  int32 ivector_dim = this->IvectorDim(),
547  quadratic_term_dim = (ivector_dim * (ivector_dim + 1)) / 2;
548  SubVector<double> quadratic_term_vec(quadratic_term_.Data(),
549  quadratic_term_dim);
550 
551  for (size_t idx = 0; idx < gauss_post.size(); idx++) {
552  int32 g = gauss_post[idx].first;
553  double weight = gauss_post[idx].second;
554  // allow negative weights; it's needed in the online iVector extraction
555  // with speech-silence detection based on decoder traceback (we subtract
556  // stuff we previously added if the traceback changes).
557  if (weight == 0.0)
558  continue;
559  linear_term_.AddMatVec(weight, extractor.Sigma_inv_M_[g], kTrans,
560  feature_dbl, 1.0);
561  SubVector<double> U_g(extractor.U_, g);
562  quadratic_term_vec.AddVec(weight, U_g);
563  tot_weight += weight;
564  }
565  if (max_count_ > 0.0) {
566  // see comments in header RE max_count for explanation. It relates to
567  // prior scaling when the count exceeds max_count_
568  double old_num_frames = num_frames_,
569  new_num_frames = num_frames_ + tot_weight;
570  double old_prior_scale = std::max(old_num_frames, max_count_) / max_count_,
571  new_prior_scale = std::max(new_num_frames, max_count_) / max_count_;
572  // The prior_scales are the inverses of the scales we would put on the stats
573  // if we were implementing this by scaling the stats. Instead we
574  // scale the prior term.
575  double prior_scale_change = new_prior_scale - old_prior_scale;
576  if (prior_scale_change != 0.0) {
577  linear_term_(0) += prior_offset_ * prior_scale_change;
578  quadratic_term_.AddToDiag(prior_scale_change);
579  }
580  }
581  num_frames_ += tot_weight;
582 }
583 
584 
585 // This is used in OnlineIvectorEstimationStats::AccStats().
586 struct GaussInfo {
587  // total weight for this Gaussian.
589  // vector of pairs of (frame-index, weight for this Gaussian)
590  std::vector<std::pair<int32, BaseFloat> > frame_weights;
591  GaussInfo(): tot_weight(0.0) { }
592 };
593 
595  const std::vector<std::vector<std::pair<int32, BaseFloat> > > &gauss_post,
596  std::unordered_map<int32, GaussInfo> *gauss_info) {
597  int32 num_frames = gauss_post.size();
598  for (int32 t = 0; t < num_frames; t++) {
599  const std::vector<std::pair<int32, BaseFloat> > &this_post = gauss_post[t];
600  auto iter = this_post.begin(), end = this_post.end();
601  for (; iter != end; ++iter) {
602  int32 gauss_idx = iter->first;
603  GaussInfo &info = (*gauss_info)[gauss_idx];
604  BaseFloat weight = iter->second;
605  info.tot_weight += weight;
606  info.frame_weights.push_back(std::pair<int32, BaseFloat>(t, weight));
607  }
608  }
609 }
610 
612  const IvectorExtractor &extractor,
613  const MatrixBase<BaseFloat> &features,
614  const std::vector<std::vector<std::pair<int32, BaseFloat> > > &gauss_post) {
615  KALDI_ASSERT(extractor.IvectorDim() == this->IvectorDim());
617 
618  int32 feat_dim = features.NumCols();
619  std::unordered_map<int32, GaussInfo> gauss_info;
620  ConvertPostToGaussInfo(gauss_post, &gauss_info);
621 
622  Vector<double> weighted_feats(feat_dim, kUndefined);
623  double tot_weight = 0.0;
624  int32 ivector_dim = this->IvectorDim(),
625  quadratic_term_dim = (ivector_dim * (ivector_dim + 1)) / 2;
626  SubVector<double> quadratic_term_vec(quadratic_term_.Data(),
627  quadratic_term_dim);
628 
629  std::unordered_map<int32, GaussInfo>::const_iterator
630  iter = gauss_info.begin(), end = gauss_info.end();
631  for (; iter != end; ++iter) {
632  int32 gauss_idx = iter->first;
633  const GaussInfo &info = iter->second;
634 
635  weighted_feats.SetZero();
636  std::vector<std::pair<int32, BaseFloat> >::const_iterator
637  f_iter = info.frame_weights.begin(), f_end = info.frame_weights.end();
638  for (; f_iter != f_end; ++f_iter) {
639  int32 t = f_iter->first;
640  BaseFloat weight = f_iter->second;
641  weighted_feats.AddVec(weight, features.Row(t));
642  }
643  BaseFloat this_tot_weight = info.tot_weight;
644 
645  linear_term_.AddMatVec(1.0, extractor.Sigma_inv_M_[gauss_idx], kTrans,
646  weighted_feats, 1.0);
647  SubVector<double> U_g(extractor.U_, gauss_idx);
648  quadratic_term_vec.AddVec(this_tot_weight, U_g);
649  tot_weight += this_tot_weight;
650  }
651  if (max_count_ > 0.0) {
652  // see comments in header RE max_count for explanation. It relates to
653  // prior scaling when the count exceeds max_count_
654  double old_num_frames = num_frames_,
655  new_num_frames = num_frames_ + tot_weight;
656  double old_prior_scale = std::max(old_num_frames, max_count_) / max_count_,
657  new_prior_scale = std::max(new_num_frames, max_count_) / max_count_;
658  // The prior_scales are the inverses of the scales we would put on the stats
659  // if we were implementing this by scaling the stats. Instead we
660  // scale the prior term.
661  double prior_scale_change = new_prior_scale - old_prior_scale;
662  if (prior_scale_change != 0.0) {
663  linear_term_(0) += prior_offset_ * prior_scale_change;
664  quadratic_term_.AddToDiag(prior_scale_change);
665  }
666  }
667  num_frames_ += tot_weight;
668 }
669 
670 
672  KALDI_ASSERT(scale >= 0.0 && scale <= 1.0);
673  double old_num_frames = num_frames_;
674  num_frames_ *= scale;
675  quadratic_term_.Scale(scale);
676  linear_term_.Scale(scale);
677 
678  // Scale back up the prior term, by adding in whatever we scaled down.
679  if (max_count_ == 0.0) {
680  linear_term_(0) += prior_offset_ * (1.0 - scale);
681  quadratic_term_.AddToDiag(1.0 - scale);
682  } else {
683  double new_num_frames = num_frames_;
684  double old_prior_scale =
685  scale * std::max(old_num_frames, max_count_) / max_count_,
686  new_prior_scale = std::max(new_num_frames, max_count_) / max_count_;
687  // old_prior_scale is the scale the prior term currently has in the stats,
688  // i.e. the previous scale times "scale" as we just scaled the stats.
689  // new_prior_scale is the scale we want the prior term to have.
690  linear_term_(0) += prior_offset_ * (new_prior_scale - old_prior_scale);
691  quadratic_term_.AddToDiag(new_prior_scale - old_prior_scale);
692  }
693 }
694 
695 void OnlineIvectorEstimationStats::Write(std::ostream &os, bool binary) const {
696  WriteToken(os, binary, "<OnlineIvectorEstimationStats>");
697  WriteToken(os, binary, "<PriorOffset>");
698  WriteBasicType(os, binary, prior_offset_);
699  WriteToken(os, binary, "<MaxCount>");
700  WriteBasicType(os, binary, max_count_);
701  WriteToken(os, binary, "<NumFrames>");
702  WriteBasicType(os, binary, num_frames_);
703  WriteToken(os, binary, "<QuadraticTerm>");
704  quadratic_term_.Write(os, binary);
705  WriteToken(os, binary, "<LinearTerm>");
706  linear_term_.Write(os, binary);
707  WriteToken(os, binary, "</OnlineIvectorEstimationStats>");
708 }
709 
710 void OnlineIvectorEstimationStats::Read(std::istream &is, bool binary) {
711  ExpectToken(is, binary, "<OnlineIvectorEstimationStats>");
712  ExpectToken(is, binary, "<PriorOffset>");
713  ReadBasicType(is, binary, &prior_offset_);
714  std::string tok;
715  ReadToken(is, binary, &tok);
716  if (tok == "<MaxCount>") {
717  ReadBasicType(is, binary, &max_count_);
718  ExpectToken(is, binary, "<NumFrames>");
719  ReadBasicType(is, binary, &num_frames_);
720  } else {
721  KALDI_ASSERT(tok == "<NumFrames>");
722  max_count_ = 0.0;
723  ReadBasicType(is, binary, &num_frames_);
724  }
725  ExpectToken(is, binary, "<QuadraticTerm>");
726  quadratic_term_.Read(is, binary);
727  ExpectToken(is, binary, "<LinearTerm>");
728  linear_term_.Read(is, binary);
729  ExpectToken(is, binary, "</OnlineIvectorEstimationStats>");
730 }
731 
733  int32 num_cg_iters,
734  VectorBase<double> *ivector) const {
735  KALDI_ASSERT(ivector != NULL && ivector->Dim() ==
736  this->IvectorDim());
737 
738  if (num_frames_ > 0.0) {
739  // could be done exactly as follows:
740  // SpMatrix<double> quadratic_inv(quadratic_term_);
741  // quadratic_inv.Invert();
742  // ivector->AddSpVec(1.0, quadratic_inv, linear_term_, 0.0);
743  if ((*ivector)(0) == 0.0)
744  (*ivector)(0) = prior_offset_; // better initial guess.
745  LinearCgdOptions opts;
746  opts.max_iters = num_cg_iters;
747  LinearCgd(opts, quadratic_term_, linear_term_, ivector);
748  } else {
749  // Use 'default' value.
750  ivector->SetZero();
751  (*ivector)(0) = prior_offset_;
752  }
753  KALDI_VLOG(4) << "Objective function improvement from estimating the "
754  << "iVector (vs. default value) is "
755  << ObjfChange(*ivector);
756 }
757 
759  const VectorBase<double> &ivector) const {
760  double ans = Objf(ivector) - DefaultObjf();
761  KALDI_ASSERT(!KALDI_ISNAN(ans));
762  return ans;
763 }
764 
766  const VectorBase<double> &ivector) const {
767  if (num_frames_ == 0.0) {
768  return 0.0;
769  } else {
770  return (1.0 / num_frames_) * (-0.5 * VecSpVec(ivector, quadratic_term_,
771  ivector)
772  + VecVec(ivector, linear_term_));
773  }
774 }
775 
777  if (num_frames_ == 0.0) {
778  return 0.0;
779  } else {
780  double x = prior_offset_;
781  return (1.0 / num_frames_) * (-0.5 * quadratic_term_(0, 0) * x * x
782  + x * linear_term_(0));
783  }
784 }
785 
787  BaseFloat prior_offset,
788  BaseFloat max_count):
789  prior_offset_(prior_offset), max_count_(max_count), num_frames_(0.0),
790  quadratic_term_(ivector_dim), linear_term_(ivector_dim) {
791  if (ivector_dim != 0) {
792  linear_term_(0) += prior_offset;
794  }
795 }
796 
798  const OnlineIvectorEstimationStats &other):
800  max_count_(other.max_count_),
801  num_frames_(other.num_frames_),
803  linear_term_(other.linear_term_) { }
804 
805 
806 
807 void IvectorExtractor::Write(std::ostream &os, bool binary) const {
808  WriteToken(os, binary, "<IvectorExtractor>");
809  WriteToken(os, binary, "<w>");
810  w_.Write(os, binary);
811  WriteToken(os, binary, "<w_vec>");
812  w_vec_.Write(os, binary);
813  WriteToken(os, binary, "<M>");
814  int32 size = M_.size();
815  WriteBasicType(os, binary, size);
816  for (int32 i = 0; i < size; i++)
817  M_[i].Write(os, binary);
818  WriteToken(os, binary, "<SigmaInv>");
819  KALDI_ASSERT(size == static_cast<int32>(Sigma_inv_.size()));
820  for (int32 i = 0; i < size; i++)
821  Sigma_inv_[i].Write(os, binary);
822  WriteToken(os, binary, "<IvectorOffset>");
823  WriteBasicType(os, binary, prior_offset_);
824  WriteToken(os, binary, "</IvectorExtractor>");
825 }
826 
827 
828 void IvectorExtractor::Read(std::istream &is, bool binary) {
829  ExpectToken(is, binary, "<IvectorExtractor>");
830  ExpectToken(is, binary, "<w>");
831  w_.Read(is, binary);
832  ExpectToken(is, binary, "<w_vec>");
833  w_vec_.Read(is, binary);
834  ExpectToken(is, binary, "<M>");
835  int32 size;
836  ReadBasicType(is, binary, &size);
837  KALDI_ASSERT(size > 0);
838  M_.resize(size);
839  for (int32 i = 0; i < size; i++)
840  M_[i].Read(is, binary);
841  ExpectToken(is, binary, "<SigmaInv>");
842  Sigma_inv_.resize(size);
843  for (int32 i = 0; i < size; i++)
844  Sigma_inv_[i].Read(is, binary);
845  ExpectToken(is, binary, "<IvectorOffset>");
846  ReadBasicType(is, binary, &prior_offset_);
847  ExpectToken(is, binary, "</IvectorExtractor>");
848  ComputeDerivedVars();
849 }
850 
851 
853  const MatrixBase<BaseFloat> &feats,
854  const Posterior &post) {
855  typedef std::vector<std::pair<int32, BaseFloat> > VecType;
856  int32 num_frames = feats.NumRows(),
857  num_gauss = X_.NumRows(),
858  feat_dim = feats.NumCols();
859  KALDI_ASSERT(X_.NumCols() == feat_dim);
860  KALDI_ASSERT(feats.NumRows() == static_cast<int32>(post.size()));
861  bool update_variance = (!S_.empty());
862  SpMatrix<double> outer_prod(feat_dim);
863  for (int32 t = 0; t < num_frames; t++) {
864  SubVector<BaseFloat> frame(feats, t);
865  const VecType &this_post(post[t]);
866  if (update_variance) {
867  outer_prod.SetZero();
868  outer_prod.AddVec2(1.0, frame);
869  }
870  for (VecType::const_iterator iter = this_post.begin();
871  iter != this_post.end(); ++iter) {
872  int32 i = iter->first; // Gaussian index.
873  KALDI_ASSERT(i >= 0 && i < num_gauss &&
874  "Out-of-range Gaussian (mismatched posteriors?)");
875  double weight = iter->second;
876  gamma_(i) += weight;
877  X_.Row(i).AddVec(weight, frame);
878  if (update_variance)
879  S_[i].AddSp(weight, outer_prod);
880  }
881  }
882 }
883 
885  gamma_.Scale(scale);
886  X_.Scale(scale);
887  for (size_t i = 0; i < S_.size(); i++)
888  S_[i].Scale(scale);
889 }
890 
892  const IvectorExtractor &extractor,
893  const IvectorExtractorStatsOptions &stats_opts):
894  config_(stats_opts) {
895  int32 S = extractor.IvectorDim(), D = extractor.FeatDim(),
896  I = extractor.NumGauss();
897 
899  tot_auxf_ = 0.0;
900  gamma_.Resize(I);
901  Y_.resize(I);
902  for (int32 i = 0; i < I; i++)
903  Y_[i].Resize(D, S);
904  R_.Resize(I, S * (S + 1) / 2);
905  R_num_cached_ = 0;
906  KALDI_ASSERT(stats_opts.cache_size > 0 && "--cache-size=0 not allowed");
907 
908  R_gamma_cache_.Resize(stats_opts.cache_size, I);
909  R_ivec_scatter_cache_.Resize(stats_opts.cache_size, S*(S+1)/2);
910 
911  if (extractor.IvectorDependentWeights()) {
912  Q_.Resize(I, S * (S + 1) / 2);
913  G_.Resize(I, S);
914  }
915  if (stats_opts.update_variances) {
916  S_.resize(I);
917  for (int32 i = 0; i < I; i++)
918  S_[i].Resize(D);
919  }
920  num_ivectors_ = 0;
921  ivector_sum_.Resize(S);
923 }
924 
925 
927  const IvectorExtractor &extractor,
928  const IvectorExtractorUtteranceStats &utt_stats,
929  const VectorBase<double> &ivec_mean,
930  const SpMatrix<double> &ivec_var) {
931 
932  gamma_Y_lock_.lock();
933 
934  // We do the occupation stats here also.
935  gamma_.AddVec(1.0, utt_stats.gamma_);
936 
937  // Stats for the linear term in M:
938  for (int32 i = 0; i < extractor.NumGauss(); i++) {
939  Y_[i].AddVecVec(1.0, utt_stats.X_.Row(i),
940  Vector<double>(ivec_mean));
941  }
942  gamma_Y_lock_.unlock();
943 
944  SpMatrix<double> ivec_scatter(ivec_var);
945  ivec_scatter.AddVec2(1.0, ivec_mean);
946 
947  R_cache_lock_.lock();
948  while (R_num_cached_ == R_gamma_cache_.NumRows()) {
949  // Cache full. The "while" statement is in case of certain race conditions.
950  R_cache_lock_.unlock();
951  FlushCache();
952  R_cache_lock_.lock();
953  }
954  R_gamma_cache_.Row(R_num_cached_).CopyFromVec(utt_stats.gamma_);
955  int32 ivector_dim = ivec_mean.Dim();
956  SubVector<double> ivec_scatter_vec(ivec_scatter.Data(),
957  ivector_dim * (ivector_dim + 1) / 2);
958  R_ivec_scatter_cache_.Row(R_num_cached_).CopyFromVec(ivec_scatter_vec);
959  R_num_cached_++;
960  R_cache_lock_.unlock();
961 }
962 
964  R_cache_lock_.lock();
965  if (R_num_cached_ > 0) {
966  KALDI_VLOG(1) << "Flushing cache for IvectorExtractorStats";
967  // Store these quantities as copies in memory so other threads can use the
968  // cache while we update R_ from the cache.
969  Matrix<double> R_gamma_cache(
971  0, R_gamma_cache_.NumCols()));
972  Matrix<double> R_ivec_scatter_cache(
975  R_num_cached_ = 0; // As far as other threads are concerned, the cache is
976  // cleared and they may write to it.
977  R_cache_lock_.unlock();
978  R_lock_.lock();
979  R_.AddMatMat(1.0, R_gamma_cache, kTrans,
980  R_ivec_scatter_cache, kNoTrans, 1.0);
981  R_lock_.unlock();
982  } else {
983  R_cache_lock_.unlock();
984  }
985 }
986 
987 
989  const IvectorExtractor &extractor,
990  const IvectorExtractorUtteranceStats &utt_stats) {
991  variance_stats_lock_.lock();
992  // Storing the raw scatter statistics per Gaussian. In the update phase we'll
993  // take into account some other terms relating to the model means and their
994  // correlation with the data.
995  for (int32 i = 0; i < extractor.NumGauss(); i++)
996  S_[i].AddSp(1.0, utt_stats.S_[i]);
997  variance_stats_lock_.unlock();
998 }
999 
1000 
1001 // This function commits stats for a single sample of the ivector,
1002 // to update the weight projection w_.
1004  const IvectorExtractor &extractor,
1005  const IvectorExtractorUtteranceStats &utt_stats,
1006  const VectorBase<double> &ivector,
1007  double weight) {
1008  int32 num_gauss = extractor.NumGauss();
1009  // Compare this function with GetIvectorDistWeight(), from which it
1010  // was derived.
1011  Vector<double> logw_unnorm(num_gauss);
1012  logw_unnorm.AddMatVec(1.0, extractor.w_, kNoTrans, ivector, 0.0);
1013 
1014  Vector<double> w(logw_unnorm);
1015  w.ApplySoftMax(); // now w is the weights.
1016 
1017  Vector<double> linear_coeff(num_gauss);
1018  Vector<double> quadratic_coeff(num_gauss);
1019  double gamma = utt_stats.gamma_.Sum();
1020  for (int32 i = 0; i < num_gauss; i++) {
1021  double gamma_i = utt_stats.gamma_(i);
1022  double max_term = std::max(gamma_i, gamma * w(i));
1023  linear_coeff(i) = gamma_i - gamma * w(i) + max_term * logw_unnorm(i);
1024  quadratic_coeff(i) = max_term;
1025  }
1026  weight_stats_lock_.lock();
1027  G_.AddVecVec(weight, linear_coeff, Vector<double>(ivector));
1028 
1029  int32 ivector_dim = extractor.IvectorDim();
1030  SpMatrix<double> outer_prod(ivector_dim);
1031  outer_prod.AddVec2(1.0, ivector);
1032  SubVector<double> outer_prod_vec(outer_prod.Data(),
1033  ivector_dim * (ivector_dim + 1) / 2);
1034  Q_.AddVecVec(weight, quadratic_coeff, outer_prod_vec);
1035  weight_stats_lock_.unlock();
1036 }
1037 
1039  const IvectorExtractor &extractor,
1040  const IvectorExtractorUtteranceStats &utt_stats,
1041  const VectorBase<double> &ivec_mean,
1042  const SpMatrix<double> &ivec_var) {
1044 
1046  rand.SetRandn();
1047  TpMatrix<double> ivec_stddev(extractor.IvectorDim());
1048  ivec_stddev.Cholesky(ivec_var);
1050  ivecs.AddMatTp(1.0, rand, kNoTrans, ivec_stddev, kTrans, 0.0);
1051  // Now make the ivecs zero-mean
1052  Vector<double> avg_ivec(extractor.IvectorDim());
1053  avg_ivec.AddRowSumMat(1.0 / config_.num_samples_for_weights, ivecs);
1054  ivecs.AddVecToRows(-1.0, avg_ivec);
1055  // Correct the variance for what we just did, so the expected
1056  // variance still has the correct value.
1057  ivecs.Scale(sqrt(config_.num_samples_for_weights / (config_.num_samples_for_weights - 1.0)));
1058  // Add the mean of the distribution to "ivecs".
1059  ivecs.AddVecToRows(1.0, ivec_mean);
1060  // "ivecs" is now a sample from the iVector distribution.
1061  for (int32 samp = 0; samp < config_.num_samples_for_weights; samp++)
1062  CommitStatsForWPoint(extractor, utt_stats,
1063  ivecs.Row(samp),
1065 }
1066 
1068  const VectorBase<double> &ivec_mean,
1069  const SpMatrix<double> &ivec_var) {
1070  SpMatrix<double> ivec_scatter(ivec_var);
1071  ivec_scatter.AddVec2(1.0, ivec_mean);
1072  prior_stats_lock_.lock();
1073  num_ivectors_ += 1.0;
1074  ivector_sum_.AddVec(1.0, ivec_mean);
1075  ivector_scatter_.AddSp(1.0, ivec_scatter);
1076  prior_stats_lock_.unlock();
1077 }
1078 
1079 
1081  const IvectorExtractor &extractor,
1082  const IvectorExtractorUtteranceStats &utt_stats) {
1083 
1084  int32 ivector_dim = extractor.IvectorDim();
1085  Vector<double> ivec_mean(ivector_dim);
1086  SpMatrix<double> ivec_var(ivector_dim);
1087 
1088  extractor.GetIvectorDistribution(utt_stats,
1089  &ivec_mean,
1090  &ivec_var);
1091 
1092  if (config_.compute_auxf)
1093  tot_auxf_ += extractor.GetAuxf(utt_stats, ivec_mean, &ivec_var);
1094 
1095  CommitStatsForM(extractor, utt_stats, ivec_mean, ivec_var);
1096  if (extractor.IvectorDependentWeights())
1097  CommitStatsForW(extractor, utt_stats, ivec_mean, ivec_var);
1098  CommitStatsForPrior(ivec_mean, ivec_var);
1099  if (!S_.empty())
1100  CommitStatsForSigma(extractor, utt_stats);
1101 }
1102 
1103 
1105  int32 S = extractor.IvectorDim(), D = extractor.FeatDim(),
1106  I = extractor.NumGauss();
1108  KALDI_ASSERT(gamma_.Dim() == I);
1109  KALDI_ASSERT(static_cast<int32>(Y_.size()) == I);
1110  for (int32 i = 0; i < I; i++)
1111  KALDI_ASSERT(Y_[i].NumRows() == D && Y_[i].NumCols() == S);
1112  KALDI_ASSERT(R_.NumRows() == I && R_.NumCols() == S*(S+1)/2);
1113  if (extractor.IvectorDependentWeights()) {
1114  KALDI_ASSERT(Q_.NumRows() == I && Q_.NumCols() == S*(S+1)/2);
1115  KALDI_ASSERT(G_.NumRows() == I && G_.NumCols() == S);
1116  } else {
1117  KALDI_ASSERT(Q_.NumRows() == 0);
1118  KALDI_ASSERT(G_.NumRows() == 0);
1119  }
1120  // S_ may be empty or not, depending on whether update_variances == true in
1121  // the options.
1122  if (!S_.empty()) {
1123  KALDI_ASSERT(static_cast<int32>(S_.size() == I));
1124  for (int32 i = 0; i < I; i++)
1125  KALDI_ASSERT(S_[i].NumRows() == D);
1126  }
1128  KALDI_ASSERT(ivector_sum_.Dim() == S);
1130 }
1131 
1133  const IvectorExtractor &extractor,
1134  const MatrixBase<BaseFloat> &feats,
1135  const Posterior &post) {
1136  typedef std::vector<std::pair<int32, BaseFloat> > VecType;
1137 
1138  CheckDims(extractor);
1139 
1140  int32 num_gauss = extractor.NumGauss(), feat_dim = extractor.FeatDim();
1141 
1142  if (feat_dim != feats.NumCols()) {
1143  KALDI_ERR << "Feature dimension mismatch, expected " << feat_dim
1144  << ", got " << feats.NumCols();
1145  }
1146  KALDI_ASSERT(static_cast<int32>(post.size()) == feats.NumRows());
1147 
1148  bool update_variance = (!S_.empty());
1149 
1150  // The zeroth and 1st-order stats are in "utt_stats".
1151  IvectorExtractorUtteranceStats utt_stats(num_gauss, feat_dim,
1152  update_variance);
1153 
1154  utt_stats.AccStats(feats, post);
1155 
1156  CommitStatsForUtterance(extractor, utt_stats);
1157 }
1158 
1160  const IvectorExtractor &extractor,
1161  const MatrixBase<BaseFloat> &feats,
1162  const FullGmm &fgmm) {
1163  int32 num_frames = feats.NumRows();
1164  Posterior post(num_frames);
1165 
1166  double tot_log_like = 0.0;
1167  for (int32 t = 0; t < num_frames; t++) {
1168  SubVector<BaseFloat> frame(feats, t);
1169  Vector<BaseFloat> posterior(fgmm.NumGauss(), kUndefined);
1170  tot_log_like += fgmm.ComponentPosteriors(frame, &posterior);
1171  for (int32 i = 0; i < posterior.Dim(); i++)
1172  post[t].push_back(std::make_pair(i, posterior(i)));
1173  }
1174  AccStatsForUtterance(extractor, feats, post);
1175  return tot_log_like;
1176 }
1177 
1181  double weight = 1.0; // will later make this configurable if needed.
1182  tot_auxf_ += weight * other.tot_auxf_;
1183  gamma_.AddVec(weight, other.gamma_);
1184  KALDI_ASSERT(Y_.size() == other.Y_.size());
1185  for (size_t i = 0; i < Y_.size(); i++)
1186  Y_[i].AddMat(weight, other.Y_[i]);
1187  R_.AddMat(weight, other.R_);
1188  Q_.AddMat(weight, other.Q_);
1189  G_.AddMat(weight, other.G_);
1190  KALDI_ASSERT(S_.size() == other.S_.size());
1191  for (size_t i = 0; i < S_.size(); i++)
1192  S_[i].AddSp(weight, other.S_[i]);
1193  num_ivectors_ += weight * other.num_ivectors_;
1194  ivector_sum_.AddVec(weight, other.ivector_sum_);
1195  ivector_scatter_.AddSp(weight, other.ivector_scatter_);
1196 }
1197 
1198 
1199 void IvectorExtractorStats::Write(std::ostream &os, bool binary) {
1200  FlushCache(); // for R stats.
1201  ((const IvectorExtractorStats&)(*this)).Write(os, binary); // call const version.
1202 }
1203 
1204 
1205 void IvectorExtractorStats::Write(std::ostream &os, bool binary) const {
1206  KALDI_ASSERT(R_num_cached_ == 0 && "Please use the non-const Write().");
1207  WriteToken(os, binary, "<IvectorExtractorStats>");
1208  WriteToken(os, binary, "<TotAuxf>");
1209  WriteBasicType(os, binary, tot_auxf_);
1210  WriteToken(os, binary, "<gamma>");
1211  gamma_.Write(os, binary);
1212  WriteToken(os, binary, "<Y>");
1213  int32 size = Y_.size();
1214  WriteBasicType(os, binary, size);
1215  for (int32 i = 0; i < size; i++)
1216  Y_[i].Write(os, binary);
1217  WriteToken(os, binary, "<R>");
1218  Matrix<BaseFloat> R_float(R_);
1219  R_float.Write(os, binary);
1220  WriteToken(os, binary, "<Q>");
1221  Matrix<BaseFloat> Q_float(Q_);
1222  Q_float.Write(os, binary);
1223  WriteToken(os, binary, "<G>");
1224  G_.Write(os, binary);
1225  WriteToken(os, binary, "<S>");
1226  size = S_.size();
1227  WriteBasicType(os, binary, size);
1228  for (int32 i = 0; i < size; i++)
1229  S_[i].Write(os, binary);
1230  WriteToken(os, binary, "<NumIvectors>");
1231  WriteBasicType(os, binary, num_ivectors_);
1232  WriteToken(os, binary, "<IvectorSum>");
1233  ivector_sum_.Write(os, binary);
1234  WriteToken(os, binary, "<IvectorScatter>");
1235  ivector_scatter_.Write(os, binary);
1236  WriteToken(os, binary, "</IvectorExtractorStats>");
1237 }
1238 
1239 
1240 void IvectorExtractorStats::Read(std::istream &is, bool binary, bool add) {
1241  ExpectToken(is, binary, "<IvectorExtractorStats>");
1242  ExpectToken(is, binary, "<TotAuxf>");
1243  ReadBasicType(is, binary, &tot_auxf_, add);
1244  ExpectToken(is, binary, "<gamma>");
1245  gamma_.Read(is, binary, add);
1246  ExpectToken(is, binary, "<Y>");
1247  int32 size;
1248  ReadBasicType(is, binary, &size);
1249  Y_.resize(size);
1250  for (int32 i = 0; i < size; i++)
1251  Y_[i].Read(is, binary, add);
1252  ExpectToken(is, binary, "<R>");
1253  R_.Read(is, binary, add);
1254  ExpectToken(is, binary, "<Q>");
1255  Q_.Read(is, binary, add);
1256  ExpectToken(is, binary, "<G>");
1257  G_.Read(is, binary, add);
1258  ExpectToken(is, binary, "<S>");
1259  ReadBasicType(is, binary, &size);
1260  S_.resize(size);
1261  for (int32 i = 0; i < size; i++)
1262  S_[i].Read(is, binary, add);
1263  ExpectToken(is, binary, "<NumIvectors>");
1264  ReadBasicType(is, binary, &num_ivectors_, add);
1265  ExpectToken(is, binary, "<IvectorSum>");
1266  ivector_sum_.Read(is, binary, add);
1267  ExpectToken(is, binary, "<IvectorScatter>");
1268  ivector_scatter_.Read(is, binary, add);
1269  ExpectToken(is, binary, "</IvectorExtractorStats>");
1270 }
1271 
1274  IvectorExtractor *extractor) const {
1275  CheckDims(*extractor);
1276  if (tot_auxf_ != 0.0) {
1277  KALDI_LOG << "Overall auxf/frame on training data was "
1278  << (tot_auxf_/gamma_.Sum()) << " per frame over "
1279  << gamma_.Sum() << " frames.";
1280  }
1281 
1282  double ans = 0.0;
1283  ans += UpdateProjections(opts, extractor);
1284  if (extractor->IvectorDependentWeights())
1285  ans += UpdateWeights(opts, extractor);
1286  if (!S_.empty())
1287  ans += UpdateVariances(opts, extractor);
1288  ans += UpdatePrior(opts, extractor); // This will also transform the ivector
1289  // space. Note: this must be done as the
1290  // last stage, because it will make the
1291  // stats invalid for that model.
1292  KALDI_LOG << "Overall objective-function improvement per frame was " << ans;
1293  extractor->ComputeDerivedVars();
1294  return ans;
1295 }
1296 
1298  const IvectorExtractor &extractor) {
1299 
1300  // W is an estimate of the total residual variance explained by the
1301  // speaker-adapated model. B is an estimate of the total variance
1302  // explained by the Ivector-subspace.
1303  SpMatrix<double> W(extractor.Sigma_inv_[0].NumRows()),
1304  B(extractor.M_[0].NumRows());
1306  w.Scale(1.0 / gamma_.Sum());
1307  for (int32 i = 0; i < extractor.NumGauss(); i++) {
1308  SpMatrix<double> Sigma_i(extractor.FeatDim());
1309  extractor.InvertWithFlooring(extractor.Sigma_inv_[i], &Sigma_i);
1310  W.AddSp(w(i), Sigma_i);
1311  B.AddMat2(w(i), extractor.M_[i], kNoTrans, 1.0);
1312  }
1313  double trace_W = W.Trace(),
1314  trace_B = B.Trace();
1315  KALDI_LOG << "The proportion of within-Gaussian variance explained by "
1316  << "the iVectors is " << trace_B / (trace_B + trace_W) << ".";
1317 }
1318 
1321  int32 i,
1322  IvectorExtractor *extractor) const {
1323  int32 I = extractor->NumGauss(), S = extractor->IvectorDim();
1324  KALDI_ASSERT(i >= 0 && i < I);
1325  /*
1326  For Gaussian index i, maximize the auxiliary function
1327  Q_i(x) = tr(M_i^T Sigma_i^{-1} Y_i) - 0.5 tr(Sigma_i^{-1} M_i R_i M_i^T)
1328  */
1329  if (gamma_(i) < opts.gaussian_min_count) {
1330  KALDI_WARN << "Skipping Gaussian index " << i << " because count "
1331  << gamma_(i) << " is below min-count.";
1332  return 0.0;
1333  }
1334  SpMatrix<double> R(S, kUndefined), SigmaInv(extractor->Sigma_inv_[i]);
1335  SubVector<double> R_vec(R_, i); // i'th row of R; vectorized form of SpMatrix.
1336  SubVector<double> R_sp(R.Data(), S * (S+1) / 2);
1337  R_sp.CopyFromVec(R_vec); // copy to SpMatrix's memory.
1338 
1339  Matrix<double> M(extractor->M_[i]);
1340  SolverOptions solver_opts;
1341  solver_opts.name = "M";
1342  solver_opts.diagonal_precondition = true;
1343  double impr = SolveQuadraticMatrixProblem(R, Y_[i], SigmaInv, solver_opts, &M),
1344  gamma = gamma_(i);
1345  if (i < 4) {
1346  KALDI_VLOG(1) << "Objf impr for M for Gaussian index " << i << " is "
1347  << (impr / gamma) << " per frame over " << gamma << " frames.";
1348  }
1349  extractor->M_[i].CopyFromMat(M);
1350  return impr;
1351 }
1352 
1354  const SubMatrix<double> &T,
1355  IvectorExtractor *extractor,
1356  Matrix<double> *A) const {
1357  extractor->ComputeDerivedVars(); // Update the extractor->U_ matrix.
1358  int32 ivector_dim = extractor->IvectorDim(),
1359  num_gauss = extractor->NumGauss();
1360  int32 quad_dim = ivector_dim*(ivector_dim + 1)/2;
1361 
1362  // Each row of extractor->U_ is an SpMatrix. We can compute the weighted
1363  // avg of these rows in a SubVector that updates the data of the SpMatrix
1364  // Uavg.
1365  SpMatrix<double> Uavg(ivector_dim), Vavg(ivector_dim - 1);
1366  SubVector<double> uavg_vec(Uavg.Data(), quad_dim);
1367  if (extractor->IvectorDependentWeights()) {
1368  Vector<double> w_uniform(num_gauss);
1369  for (int32 i = 0; i < num_gauss; i++) w_uniform(i) = 1.0;
1370  uavg_vec.AddMatVec(1.0/num_gauss, extractor->U_, kTrans, w_uniform, 0.0);
1371  } else {
1372  uavg_vec.AddMatVec(1.0, extractor->U_, kTrans, extractor->w_vec_, 0.0);
1373  }
1374 
1375  Matrix<double> Tinv(T);
1376  Tinv.Invert();
1377  Matrix<double> Vavg_temp(Vavg), Uavg_temp(Uavg);
1378 
1379  Vavg_temp.AddMatMatMat(1.0, Tinv, kTrans, SubMatrix<double>(Uavg_temp,
1380  1, ivector_dim-1, 1, ivector_dim-1),
1381  kNoTrans, Tinv, kNoTrans, 0.0);
1382  Vavg.CopyFromMat(Vavg_temp);
1383 
1384  Vector<double> s(ivector_dim-1);
1385  Matrix<double> P(ivector_dim-1, ivector_dim-1);
1386  Vavg.Eig(&s, &P);
1387  SortSvd(&s, &P);
1388  A->Resize(P.NumCols(), P.NumRows());
1389  A->SetZero();
1390  A->AddMat(1.0, P, kTrans);
1391  KALDI_LOG << "Eigenvalues of Vavg: " << s;
1392 }
1393 
1395  public:
1398  int32 i,
1399  IvectorExtractor *extractor,
1400  double *tot_impr):
1401  stats_(stats), opts_(opts), i_(i), extractor_(extractor),
1402  tot_impr_(tot_impr), impr_(0.0) { }
1403  void operator () () {
1404  impr_ = stats_.UpdateProjection(opts_, i_, extractor_);
1405  }
1406  ~IvectorExtractorUpdateProjectionClass() { *tot_impr_ += impr_; }
1407  private:
1412  double *tot_impr_;
1413  double impr_;
1414 };
1415 
1418  IvectorExtractor *extractor) const {
1419  int32 I = extractor->NumGauss();
1420  double tot_impr = 0.0;
1421  {
1422  TaskSequencerConfig sequencer_opts;
1423  sequencer_opts.num_threads = g_num_threads;
1425  sequencer_opts);
1426  for (int32 i = 0; i < I; i++)
1428  *this, opts, i, extractor, &tot_impr));
1429  }
1430  double count = gamma_.Sum();
1431  KALDI_LOG << "Overall objective function improvement for M (mean projections) "
1432  << "was " << (tot_impr / count) << " per frame over "
1433  << count << " frames.";
1434  return tot_impr / count;
1435 }
1436 
1439  IvectorExtractor *extractor) const {
1440  int32 num_gauss = extractor->NumGauss(),
1441  feat_dim = extractor->FeatDim(),
1442  ivector_dim = extractor->IvectorDim();
1443  KALDI_ASSERT(!S_.empty());
1444  double tot_objf_impr = 0.0;
1445 
1446  // "raw_variances" will be the variances directly from
1447  // the stats, without any flooring.
1448  std::vector<SpMatrix<double> > raw_variances(num_gauss);
1449  SpMatrix<double> var_floor(feat_dim);
1450  double var_floor_count = 0.0;
1451 
1452  for (int32 i = 0; i < num_gauss; i++) {
1453  if (gamma_(i) < opts.gaussian_min_count) continue; // warned in UpdateProjections
1454  SpMatrix<double> &S(raw_variances[i]);
1455  S = S_[i]; // Set it to the raw scatter statistics.
1456 
1457  // The equations for estimating the variance are similar to
1458  // those used in SGMMs. We need to convert it to a centered
1459  // covariance, and for this we can use a combination of other
1460  // stats and the model parameters.
1461 
1462  Matrix<double> M(extractor->M_[i]);
1463  // Y * M^T.
1464  Matrix<double> YM(feat_dim, feat_dim);
1465  YM.AddMatMat(1.0, Y_[i], kNoTrans, M, kTrans, 0.0);
1466  Matrix<double> YMMY(YM, kTrans);
1467  YMMY.AddMat(1.0, YM);
1468  // Now, YMMY = Y * M^T + M * Y^T. This is a kind of cross-term
1469  // between the mean and the data, which we subtract.
1470  SpMatrix<double> YMMY_sp(YMMY, kTakeMeanAndCheck);
1471  S.AddSp(-1.0, YMMY_sp);
1472 
1473  // Add in a mean-squared term.
1474  SpMatrix<double> R(ivector_dim); // will be scatter of iVectors, weighted
1475  // by count for this Gaussian.
1476  SubVector<double> R_vec(R.Data(),
1477  ivector_dim * (ivector_dim + 1) / 2);
1478  R_vec.CopyFromVec(R_.Row(i)); //
1479 
1480  S.AddMat2Sp(1.0, M, kNoTrans, R, 1.0);
1481 
1482  var_floor.AddSp(1.0, S);
1483  var_floor_count += gamma_(i);
1484  S.Scale(1.0 / gamma_(i));
1485  }
1486  KALDI_ASSERT(var_floor_count > 0.0);
1487  KALDI_ASSERT(opts.variance_floor_factor > 0.0 &&
1488  opts.variance_floor_factor <= 1.0);
1489 
1490  var_floor.Scale(opts.variance_floor_factor / var_floor_count);
1491 
1492  // var_floor should not be singular in any normal case, but previously
1493  // we've had situations where cholesky on it failed (perhaps due to
1494  // people using linearly dependent features). So we floor its
1495  // singular values.
1496  int eig_floored = var_floor.ApplyFloor(var_floor.MaxAbsEig() * 1.0e-04);
1497  if (eig_floored > 0) {
1498  KALDI_WARN << "Floored " << eig_floored << " eigenvalues of the "
1499  << "variance floor matrix. This is not expected. Maybe your "
1500  << "feature data is linearly dependent.";
1501  }
1502 
1503  int32 tot_num_floored = 0;
1504  for (int32 i = 0; i < num_gauss; i++) {
1505  SpMatrix<double> &S(raw_variances[i]); // un-floored variance.
1506  if (S.NumRows() == 0) continue; // due to low count.
1507  SpMatrix<double> floored_var(S);
1508  SpMatrix<double> old_inv_var(extractor->Sigma_inv_[i]);
1509 
1510  int32 num_floored = floored_var.ApplyFloor(var_floor);
1511  tot_num_floored += num_floored;
1512  if (num_floored > 0)
1513  KALDI_LOG << "For Gaussian index " << i << ", floored "
1514  << num_floored << " eigenvalues of variance.";
1515  // this objf is per frame;
1516  double old_objf = -0.5 * (TraceSpSp(S, old_inv_var) -
1517  old_inv_var.LogPosDefDet());
1518 
1519  SpMatrix<double> new_inv_var(floored_var);
1520  new_inv_var.Invert();
1521 
1522  double new_objf = -0.5 * (TraceSpSp(S, new_inv_var) -
1523  new_inv_var.LogPosDefDet());
1524  if (i < 4) {
1525  KALDI_VLOG(1) << "Objf impr/frame for variance for Gaussian index "
1526  << i << " was " << (new_objf - old_objf);
1527  }
1528  tot_objf_impr += gamma_(i) * (new_objf - old_objf);
1529  extractor->Sigma_inv_[i].CopyFromSp(new_inv_var);
1530  }
1531  double floored_percent = tot_num_floored * 100.0 / (num_gauss * feat_dim);
1532  KALDI_LOG << "Floored " << floored_percent << "% of all Gaussian eigenvalues";
1533 
1534  KALDI_LOG << "Overall objf impr/frame for variances was "
1535  << (tot_objf_impr / gamma_.Sum()) << " over "
1536  << gamma_.Sum() << " frames.";
1537  return tot_objf_impr / gamma_.Sum();
1538 }
1539 
1542  int32 i,
1543  IvectorExtractor *extractor) const {
1544 
1545  int32 num_gauss = extractor->NumGauss(),
1546  ivector_dim = extractor->IvectorDim();
1547  KALDI_ASSERT(i >= 0 && i < num_gauss);
1548 
1549  SolverOptions solver_opts;
1550  solver_opts.diagonal_precondition = true;
1551  solver_opts.name = "w";
1552 
1553  SubVector<double> w_i(extractor->w_, i);
1554  SubVector<double> g_i(G_, i);
1555  SpMatrix<double> Q(ivector_dim);
1556  SubVector<double> Q_vec(Q.Data(), ivector_dim * (ivector_dim + 1) / 2);
1557  Q_vec.CopyFromVec(Q_.Row(i));
1558  double objf_impr = SolveQuadraticProblem(Q, g_i, solver_opts, &w_i);
1559  if (i < 4 && gamma_(i) != 0.0) {
1560  KALDI_VLOG(1) << "Auxf impr/frame for Gaussian index " << i
1561  << " for weights is " << (objf_impr / gamma_(i))
1562  << " over " << gamma_(i) << " frames.";
1563  }
1564  return objf_impr;
1565 }
1566 
1568  public:
1571  int32 i,
1572  IvectorExtractor *extractor,
1573  double *tot_impr):
1574  stats_(stats), opts_(opts), i_(i), extractor_(extractor),
1575  tot_impr_(tot_impr), impr_(0.0) { }
1576  void operator () () {
1577  impr_ = stats_.UpdateWeight(opts_, i_, extractor_);
1578  }
1579  ~IvectorExtractorUpdateWeightClass() { *tot_impr_ += impr_; }
1580  private:
1585  double *tot_impr_;
1586  double impr_;
1587 };
1588 
1591  IvectorExtractor *extractor) const {
1592 
1593  int32 I = extractor->NumGauss();
1594  double tot_impr = 0.0;
1595  {
1596  TaskSequencerConfig sequencer_opts;
1597  sequencer_opts.num_threads = g_num_threads;
1599  sequencer_opts);
1600  for (int32 i = 0; i < I; i++)
1601  sequencer.Run(new IvectorExtractorUpdateWeightClass(
1602  *this, opts, i, extractor, &tot_impr));
1603  }
1604 
1605  double num_frames = gamma_.Sum();
1606  KALDI_LOG << "Overall auxf impr/frame from weight update is "
1607  << (tot_impr / num_frames) << " over "
1608  << num_frames << " frames.";
1609  return tot_impr / num_frames;
1610 }
1611 
1612 
1613 double IvectorExtractorStats::PriorDiagnostics(double old_prior_offset) const {
1614  // The iVectors had a centered covariance "covar"; we want to figure out
1615  // the objective-function change from rescaling. It's as if we were
1616  // formerly modeling "covar" with the unit matrix, and we're now modeling
1617  // it with "covar" itself. This is ignoring flooring issues. Of course,
1618  // we implement it through rescaling the space, but it has the same effect.
1619  // We also need to take into account that before the rescaling etc., the
1620  // old mean might have been wrong.
1621 
1622  int32 ivector_dim = ivector_sum_.Dim();
1624  sum.Scale(1.0 / num_ivectors_);
1626  covar.Scale(1.0 / num_ivectors_);
1627  covar.AddVec2(-1.0, sum); // Get the centered covariance.
1628 
1629  // Now work out the offset from the old prior's mean.
1630  Vector<double> mean_offset(sum);
1631  mean_offset(0) -= old_prior_offset;
1632 
1633  SpMatrix<double> old_covar(covar); // the covariance around the old mean.
1634  old_covar.AddVec2(1.0, mean_offset);
1635  // old likelihood = -0.5 * (Trace(I old_covar) + logdet(I) + [ignored])
1636  double old_like = -0.5 * old_covar.Trace();
1637  // new likelihood is if we updated the variance to equal "covar"... this isn't
1638  // how we did it (we use rescaling of the ivectors) but it has the same
1639  // effect. -0.5 * (Trace(covar^{-1} covar) + logdet(covar))
1640  double new_like = -0.5 * (ivector_dim + covar.LogPosDefDet()),
1641  like_change = new_like - old_like,
1642  like_change_per_frame = like_change * num_ivectors_ / gamma_.Sum();
1643 
1644  KALDI_LOG << "Overall auxf improvement from prior is " << like_change_per_frame
1645  << " per frame, or " << like_change << " per iVector.";
1646  return like_change_per_frame; // we'll be adding this to other per-frame
1647  // quantities.
1648 }
1649 
1650 
1653  IvectorExtractor *extractor) const {
1654 
1655  KALDI_ASSERT(num_ivectors_ > 0.0);
1657  sum.Scale(1.0 / num_ivectors_);
1659  covar.Scale(1.0 / num_ivectors_);
1660  covar.AddVec2(-1.0, sum); // Get the centered covariance.
1661 
1662  int32 ivector_dim = extractor->IvectorDim();
1663  Vector<double> s(ivector_dim);
1664  Matrix<double> P(ivector_dim, ivector_dim);
1665  // decompose covar = P diag(s) P^T:
1666  covar.Eig(&s, &P);
1667  KALDI_LOG << "Eigenvalues of iVector covariance range from "
1668  << s.Min() << " to " << s.Max();
1669  int32 num_floored;
1670  s.ApplyFloor(1.0e-07, &num_floored);
1671  if (num_floored > 0)
1672  KALDI_WARN << "Floored " << num_floored << " eigenvalues of covar "
1673  << "of iVectors.";
1674 
1675  Matrix<double> T(P, kTrans);
1676  { // set T to a transformation that makes covar unit
1677  // (modulo floored eigenvalues).
1678  Vector<double> scales(s);
1679  scales.ApplyPow(-0.5);
1680  T.MulRowsVec(scales);
1681  if (num_floored == 0) { // a check..
1682  SpMatrix<double> Tproj(ivector_dim);
1683  Tproj.AddMat2Sp(1.0, T, kNoTrans, covar, 0.0);
1684  KALDI_ASSERT(Tproj.IsUnit(1.0e-06));
1685  }
1686  }
1687 
1688  Vector<double> sum_proj(ivector_dim);
1689  sum_proj.AddMatVec(1.0, T, kNoTrans, sum, 0.0);
1690 
1691  KALDI_ASSERT(sum_proj.Norm(2.0) != 0.0);
1692 
1693  // We need a projection that (like T) makes "covar" unit,
1694  // but also that sends "sum" to a multiple of the vector e0 = [ 1 0 0 0 .. ].
1695  // We'll do this by a transform that follows T, of the form
1696  // (I - 2 a a^T), where a is unit. [i.e. a Householder reflection].
1697  // Firstly, let x equal sum_proj normalized to unit length.
1698  // We'll let a = alpha x + beta e0, for suitable coefficients alpha and beta,
1699  // To project sum_proj (or equivalenty, x) to a multiple of e0, we'll need that
1700  // the x term in
1701  // (I - 2(alpha x + beta e0)(alpha x + beta e0) x
1702  // equals zero., i.e. 1 - 2 alpha (alpha x^T x + beta e0^T x) == 0,
1703  // (1 - 2 alpha^2 - 2 alpha beta x0) = 0
1704  // To ensure that a is unit, we require that
1705  // (alpha x + beta e0).(alpha x + beta e0) = 1, i.e.
1706  // alpha^2 + beta^2 + 2 alpha beta x0 = 1
1707  // at wolframalpha.com,
1708  // Solve[ {a^2 + b^2 + 2 a b x = 1}, {1 - 2 a^2 - 2 a b x = 0}, {a, b} ]
1709  // gives different solutions, but the one that keeps the offset positive
1710  // after projection seems to be:
1711  // alpha = 1/(sqrt(2)sqrt(1 - x0)), beta = -alpha
1712 
1713  Matrix<double> U(ivector_dim, ivector_dim);
1714  U.SetUnit();
1715  Vector<double> x(sum_proj);
1716  x.Scale(1.0 / x.Norm(2.0));
1717  double x0 = x(0), alpha, beta;
1718  alpha = 1.0 / (M_SQRT2 * sqrt(1.0 - x0));
1719  beta = -alpha;
1720  Vector<double> a(x);
1721  a.Scale(alpha);
1722  a(0) += beta;
1723  U.AddVecVec(-2.0, a, a);
1724 
1725  Matrix<double> V(ivector_dim, ivector_dim);
1726  V.AddMatMat(1.0, U, kNoTrans, T, kNoTrans, 0.0);
1727 
1728  // Optionally replace transform V with V' such that V' makes the
1729  // covariance unit and additionally diagonalizes the quadratic
1730  // term.
1731  if (opts.diagonalize) {
1732 
1733  SubMatrix<double> Vsub(V, 1, V.NumRows()-1, 0, V.NumCols());
1734  Matrix<double> Vtemp(SubMatrix<double>(V, 1, V.NumRows()-1,
1735  0, V.NumCols())),
1736  A;
1738  Vtemp.NumRows(), 1, Vtemp.NumCols()-1),
1739  extractor, &A);
1740 
1741  // It is necessary to exclude the first row of V in this transformation
1742  // so that the sum_vproj has the form [ x 0 0 0 .. ], where x > 0.
1743  Vsub.AddMatMat(1.0, A, kNoTrans, Vtemp, kNoTrans, 0.0);
1744  }
1745 
1746  if (num_floored == 0) { // a check..
1747  SpMatrix<double> Vproj(ivector_dim);
1748  Vproj.AddMat2Sp(1.0, V, kNoTrans, covar, 0.0);
1749  KALDI_ASSERT(Vproj.IsUnit(1.0e-04));
1750  }
1751 
1752 
1753  Vector<double> sum_vproj(ivector_dim);
1754  sum_vproj.AddMatVec(1.0, V, kNoTrans, sum, 0.0);
1755  // Make sure sum_vproj is of the form [ x 0 0 0 .. ] with x > 0.
1756  // (the x > 0 part isn't really necessary, it's just nice to know.)
1757  KALDI_ASSERT(ApproxEqual(sum_vproj(0), sum_vproj.Norm(2.0)));
1758 
1759  double ans = PriorDiagnostics(extractor->prior_offset_);
1760 
1761  extractor->TransformIvectors(V, sum_vproj(0));
1762 
1763  return ans;
1764 }
1765 
1767  const IvectorExtractorStats &other):
1768  config_(other.config_), tot_auxf_(other.tot_auxf_), gamma_(other.gamma_),
1769  Y_(other.Y_), R_(other.R_), R_num_cached_(other.R_num_cached_),
1772  Q_(other.Q_), G_(other.G_), S_(other.S_), num_ivectors_(other.num_ivectors_),
1774 }
1775 
1776 
1777 
1779  const Matrix<BaseFloat> &feats,
1780  const Posterior &post,
1781  const IvectorExtractor &extractor,
1782  int32 ivector_period,
1783  int32 num_cg_iters,
1784  BaseFloat max_count,
1785  Matrix<BaseFloat> *ivectors) {
1786 
1787  KALDI_ASSERT(ivector_period > 0);
1788  KALDI_ASSERT(static_cast<int32>(post.size()) == feats.NumRows());
1789  int32 num_frames = feats.NumRows(),
1790  num_ivectors = (num_frames + ivector_period - 1) / ivector_period;
1791 
1792  ivectors->Resize(num_ivectors, extractor.IvectorDim());
1793 
1794  OnlineIvectorEstimationStats online_stats(extractor.IvectorDim(),
1795  extractor.PriorOffset(),
1796  max_count);
1797 
1798  double ans = 0.0;
1799 
1800  Vector<double> cur_ivector(extractor.IvectorDim());
1801  cur_ivector(0) = extractor.PriorOffset();
1802  for (int32 frame = 0; frame < num_frames; frame++) {
1803  online_stats.AccStats(extractor,
1804  feats.Row(frame),
1805  post[frame]);
1806  if (frame % ivector_period == 0) {
1807  online_stats.GetIvector(num_cg_iters, &cur_ivector);
1808  int32 ivector_index = frame / ivector_period;
1809  ivectors->Row(ivector_index).CopyFromVec(cur_ivector);
1810  if (ivector_index == num_ivectors - 1) // last iVector
1811  ans = online_stats.ObjfChange(cur_ivector);
1812  }
1813  }
1814  return ans;
1815 }
1816 
1817 
1818 
1819 
1820 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
double GetAcousticAuxfWeight(const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &mean, const SpMatrix< double > *var=NULL) const
This returns the part of the acoustic auxf that relates to the Gaussian-specific weights.
int32 R_num_cached_
To avoid too-frequent rank-1 update of R, which is slow, we cache some quantities here...
static double GetLogDetNoFailure(const SpMatrix< double > &var)
bool IsUnit(Real cutoff=1.0e-05) const
Definition: sp-matrix.cc:480
double ObjfChange(const VectorBase< double > &ivector) const
ObjfChange returns the change in objective function *per frame* from using the default value [ prior_...
void Add(const IvectorExtractorStats &other)
void Write(std::ostream &out, bool binary) const
write to stream.
static void InvertWithFlooring(const SpMatrix< double > &quadratic_term, SpMatrix< double > *var)
void ApplyExp()
Apply exponential to each value in vector.
std::mutex R_cache_lock_
This mutex guards R_num_cached_, R_gamma_cache_, R_ivec_cache_ (for multi-threaded update) ...
const std::vector< SpMatrix< BaseFloat > > & inv_covars() const
Definition: full-gmm.h:146
double PriorOffset() const
The distribution over iVectors, in our formulation, is not centered at zero; its first dimension has ...
#define M_LOG_2PI
Definition: kaldi-math.h:60
This class describes the options for maximizing various quadratic objective functions.
Definition: sp-matrix.h:443
void CommitStatsForUtterance(const IvectorExtractor &extractor, const IvectorExtractorUtteranceStats &utt_stats)
void Scale(Real c)
IvectorExtractorUpdateWeightClass(const IvectorExtractorStats &stats, const IvectorExtractorEstimationOptions &opts, int32 i, IvectorExtractor *extractor, double *tot_impr)
double SolveQuadraticProblem(const SpMatrix< double > &H, const VectorBase< double > &g, const SolverOptions &opts, VectorBase< double > *x)
Definition: sp-matrix.cc:635
void Read(std::istream &in, bool binary, bool add=false)
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
Matrix< double > U_
U_i = M_i^T ^{-1} M_i is a quantity that comes up in ivector estimation.
void Write(std::ostream &out, bool binary) const
void Run(C *c)
This function takes ownership of the pointer "c", and will delete it in the same sequence as Run was ...
Definition: kaldi-thread.h:190
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, VectorBase< BaseFloat > *posterior) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: full-gmm.cc:719
void AddMat2Vec(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const VectorBase< Real > &v, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * diag(v) * M^T.
Definition: sp-matrix.cc:1081
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Real SolveQuadraticMatrixProblem(const SpMatrix< Real > &Q, const MatrixBase< Real > &Y, const SpMatrix< Real > &SigmaInv, const SolverOptions &opts, MatrixBase< Real > *M)
Maximizes the auxiliary function : Like a numerically stable version of .
Definition: sp-matrix.cc:729
double UpdateVariances(const IvectorExtractorEstimationOptions &opts, IvectorExtractor *extractor) const
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
Vector< double > w_vec_
If we are not using weight-projection vectors, stores the Gaussian mixture weights from the UBM...
bool IvectorDependentWeights() const
Real Trace() const
Definition: sp-matrix.cc:171
void AccStats(const IvectorExtractor &extractor, const VectorBase< BaseFloat > &feature, const std::vector< std::pair< int32, BaseFloat > > &gauss_post)
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60
void CommitStatsForSigma(const IvectorExtractor &extractor, const IvectorExtractorUtteranceStats &utt_stats)
Commit the stats used to update the variance.
Matrix< double > R_ivec_scatter_cache_
dimension: [num-to-cache][S*(S+1)/2]
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
std::mutex R_lock_
This mutex guards R_ (for multi-threaded update)
IvectorExtractorComputeDerivedVarsClass(IvectorExtractor *extractor, int32 i)
int32 g_num_threads
Definition: kaldi-thread.cc:25
double GetAuxf(const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &mean, const SpMatrix< double > *var=NULL) const
Returns the log-likelihood objective function, summed over frames, for this distribution of iVectors ...
IvectorExtractorStats is a class used to update the parameters of the ivector extractor.
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
void Read(std::istream &is, bool binary)
kaldi::int32 int32
void Read(std::istream &is, bool binary, bool add=false)
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
Matrix< double > R_gamma_cache_
dimension: [num-to-cache][I]
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
Definition: qr.cc:433
This class helps us to efficiently estimate iVectors in situations where the data is coming in frame ...
double UpdateWeights(const IvectorExtractorEstimationOptions &opts, IvectorExtractor *extractor) const
static void ConvertPostToGaussInfo(const std::vector< std::vector< std::pair< int32, BaseFloat > > > &gauss_post, std::unordered_map< int32, GaussInfo > *gauss_info)
void ApplyLog()
Apply natural log to all elements.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
double tot_auxf_
Caution: if we read from disk, this.
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
Vector< double > ivector_sum_
Sum of all the iVector means. Needed for prior re-estimation.
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
Real Min() const
Returns the minimum value of any element, or +infinity for the empty vector.
MatrixIndexT NumRows() const
double UpdateProjection(const IvectorExtractorEstimationOptions &opts, int32 gaussian, IvectorExtractor *extractor) const
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
std::vector< Matrix< double > > Sigma_inv_M_
The product of Sigma_inv_[i] with M_[i].
void CopyFromSp(const SpMatrix< Real > &other)
Definition: sp-matrix.h:85
Real Norm(Real p) const
Compute the p-th norm of the vector.
OnlineIvectorEstimationStats(int32 ivector_dim, BaseFloat prior_offset, BaseFloat max_count)
Matrix< double > G_
G_ is the linear term in the weight projection matrix w_.
double Objf(const VectorBase< double > &ivector) const
Returns objective function per frame, at this iVector value.
void GetIvectorDistPrior(const IvectorExtractorUtteranceStats &utt_stats, VectorBase< double > *linear, SpMatrix< double > *quadratic) const
Gets the linear and quadratic terms in the distribution over iVectors, that arise from the prior...
int ApplyFloor(const SpMatrix< Real > &Floor, Real alpha=1.0, bool verbose=false)
Floors this symmetric matrix to the matrix alpha * Floor, where the matrix Floor is positive definite...
Definition: sp-matrix.cc:536
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
Definition: kaldi-vector.h:149
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void AddToDiag(const Real r)
const size_t count
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
double PriorDiagnostics(double old_prior_offset) const
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
Matrix< double > R_
R_i, quadratic term for ivector subspace (M matrix)estimation.
Real LogPosDefDet() const
Computes log determinant but only for +ve-def matrices (it uses Cholesky).
Definition: sp-matrix.cc:36
Options for IvectorExtractorStats, which is used to update the parameters of IvectorExtractor.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
#define M_SQRT2
Definition: kaldi-math.h:48
void Write(std::ostream &os, bool binary) const
double GetPriorAuxf(const VectorBase< double > &mean, const SpMatrix< double > *var=NULL) const
Returns the prior-related part of the log-likelihood objective function.
void Scale(Real alpha)
Multiply each element with a scalar value.
void GetIvector(int32 num_cg_iters, VectorBase< double > *ivector) const
This function gets the current estimate of the iVector.
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
Definition: sp-matrix.h:211
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
IvectorExtractorStatsOptions config_
void FlushCache()
Flushes the cache for the R_ stats.
IvectorExtractorUpdateProjectionClass(const IvectorExtractorStats &stats, const IvectorExtractorEstimationOptions &opts, int32 i, IvectorExtractor *extractor, double *tot_impr)
Matrix< double > Q_
Q_ is like R_ (with same dimensions), except used for weight estimation; the scatter of ivectors is w...
const IvectorExtractorEstimationOptions & opts_
std::vector< SpMatrix< double > > Sigma_inv_
Inverse variances of speaker-adapted model, dimension [I][D][D].
void Write(std::ostream &os, bool binary) const
SpMatrix< double > ivector_scatter_
Second-order stats for the iVectors. Needed for prior re-estimation.
double UpdateWeight(const IvectorExtractorEstimationOptions &opts, int32 gaussian, IvectorExtractor *extractor) const
double num_ivectors_
Count of the number of iVectors we trained on.
void SetRandn()
Sets to random values of a normal distribution.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
double GetAcousticAuxf(const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &mean, const SpMatrix< double > *var=NULL) const
Returns the data-dependent part of the log-likelihood objective function, summed over frames...
std::string name
Definition: sp-matrix.h:446
#define KALDI_ERR
Definition: kaldi-error.h:147
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
double TraceSpSp(const SpMatrix< double > &A, const SpMatrix< double > &B)
Definition: sp-matrix.cc:326
const IvectorExtractorEstimationOptions & opts_
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
Definition: sp-matrix.cc:964
#define KALDI_WARN
Definition: kaldi-error.h:150
std::vector< Matrix< double > > M_
Ivector-subspace projection matrices, dimension is [I][D][S].
void CommitStatsForW(const IvectorExtractor &extractor, const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &ivec_mean, const SpMatrix< double > &ivec_var)
Commit the stats used to update the weight-projection w_.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
std::vector< SpMatrix< double > > S_
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetZero()
Sets matrix to zero.
void GetOrthogonalIvectorTransform(const SubMatrix< double > &T, IvectorExtractor *extractor, Matrix< double > *A) const
Computes an orthogonal matrix A from the iVector transform T such that T&#39; = A*T is an alternative tra...
void Scale(Real alpha)
Multiplies all elements by this constant.
const Vector< BaseFloat > & weights() const
Definition: full-gmm.h:144
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
double Update(const IvectorExtractorEstimationOptions &opts, IvectorExtractor *extractor) const
Returns the objf improvement per frame.
Real Sum() const
Returns sum of the elements.
void MulRowsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = diag(scale) * (*this).
void GetMeans(Matrix< Real > *m) const
Accessor for means.
Definition: full-gmm-inl.h:118
std::mutex weight_stats_lock_
This mutex guards Q_ and G_ (for multi-threaded update)
double EstimateIvectorsOnline(const Matrix< BaseFloat > &feats, const Posterior &post, const IvectorExtractor &extractor, int32 ivector_period, int32 num_cg_iters, BaseFloat max_count, Matrix< BaseFloat > *ivectors)
double GetAcousticAuxfMean(const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &mean, const SpMatrix< double > *var=NULL) const
This returns just the part of the acoustic auxf that relates to the speaker-dependent means (and how ...
void CommitStatsForPrior(const VectorBase< double > &ivec_mean, const SpMatrix< double > &ivec_var)
Commit the stats used to update the prior distribution.
void CommitStatsForWPoint(const IvectorExtractor &extractor, const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &ivector, double weight)
Commit the stats used to update the weight-projection w_– this one takes a point sample...
void AccStats(const MatrixBase< BaseFloat > &feats, const Posterior &post)
void GetIvectorDistribution(const IvectorExtractorUtteranceStats &utt_stats, VectorBase< double > *mean, SpMatrix< double > *var) const
Gets the distribution over ivectors (or at least, a Gaussian approximation to it).
void InvertElements()
Invert all elements.
#define KALDI_ISNAN
Definition: kaldi-math.h:72
double GetAcousticAuxfVariance(const IvectorExtractorUtteranceStats &utt_stats) const
This returns just the part of the acoustic auxf that relates to the variance of the utt_stats (i...
double UpdateProjections(const IvectorExtractorEstimationOptions &opts, IvectorExtractor *extractor) const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::mutex variance_stats_lock_
This mutex guards S_ (for multi-threaded update)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Definition: sp-matrix.cc:982
void ApplyPow(Real power)
Take all elements of vector to a power.
Definition: kaldi-vector.h:179
std::vector< Matrix< double > > Y_
Stats Y_i for estimating projections M.
void AccStatsForUtterance(const IvectorExtractor &extractor, const MatrixBase< BaseFloat > &feats, const Posterior &post)
Options for training the IvectorExtractor, e.g. variance flooring.
Real Cond() const
Returns maximum ratio of singular values.
Definition: sp-matrix.h:145
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
double DefaultObjf() const
Returns objective function evaluated at the point [ prior_offset_, 0, 0, 0, ...
int32 LinearCgd(const LinearCgdOptions &opts, const SpMatrix< Real > &A, const VectorBase< Real > &b, VectorBase< Real > *x)
Vector< double > gconsts_
The constant term in the log-likelihood of each Gaussian (not counting any weight).
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void Read(std::istream &is, bool binary)
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
std::mutex prior_stats_lock_
This mutex guards num_ivectors_, ivector_sum_ and ivector_scatter_ (for multi-threaded update) ...
These are the stats for a particular utterance, i.e.
void GetIvectorDistWeight(const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &mean, VectorBase< double > *linear, SpMatrix< double > *quadratic) const
Gets the linear and quadratic terms in the distribution over iVectors, that arise from the weights (i...
double prior_offset_
1st dim of the prior over the ivector has an offset, so it is not zero.
double GetAcousticAuxfGconst(const IvectorExtractorUtteranceStats &utt_stats) const
This returns the part of the acoustic auxf that relates to the gconsts of the Gaussians.
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81
void CheckDims(const IvectorExtractor &extractor) const
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: kaldi-matrix.cc:38
void CommitStatsForM(const IvectorExtractor &extractor, const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &ivec_mean, const SpMatrix< double > &ivec_var)
This is called by CommitStatsForUtterance.
void AddMatTp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const TpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
Definition: kaldi-matrix.h:725
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void Add(Real c)
Add a constant to each element of a vector.
Matrix< double > w_
Weight projection vectors, if used. Dimension is [I][S].
void GetIvectorDistMean(const IvectorExtractorUtteranceStats &utt_stats, VectorBase< double > *linear, SpMatrix< double > *quadratic) const
Gets the linear and quadratic terms in the distribution over iVectors, but only the terms arising fro...
void SetZero()
Set vector to all zeros.
Vector< double > gamma_
Total occupation count for each Gaussian index (zeroth-order stats)
void Invert(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: sp-matrix.cc:219
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Real MaxAbsEig() const
Returns the maximum of the absolute values of any of the eigenvalues.
Definition: sp-matrix.cc:67
double UpdatePrior(const IvectorExtractorEstimationOptions &opts, IvectorExtractor *extractor) const
std::vector< std::pair< int32, BaseFloat > > frame_weights
Sub-matrix representation.
Definition: kaldi-matrix.h:988
void IvectorVarianceDiagnostic(const IvectorExtractor &extractor)
Prints the proportion of the variance explained by the Ivector model versus the Gaussians.
void Write(std::ostream &os, bool binary)
void Read(std::istream &in, bool binary, bool add=false)
Read function using C++ streams.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void Scale(double scale)
Scales the number of frames of stats by 0 <= scale <= 1, to make it as if we had fewer frames of adap...
std::vector< SpMatrix< double > > S_
S_{i}, raw second-order stats per Gaussian which we will use to update the variances Sigma_inv_...
void TransformIvectors(const MatrixBase< double > &T, double new_prior_offset)
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
std::mutex gamma_Y_lock_
This mutex guards gamma_ and Y_ (for multi-threaded update)
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94