estimate-am-sgmm2.cc
Go to the documentation of this file.
1 // sgmm2/estimate-am-sgmm2.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation; Lukas Burget;
4 // Saarland University (Author: Arnab Ghoshal);
5 // Ondrej Glembek; Yanmin Qian;
6 // Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
7 // Liang Lu; Arnab Ghoshal
8 
9 // See ../../COPYING for clarification regarding multiple authors
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
19 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
20 // MERCHANTABLITY OR NON-INFRINGEMENT.
21 // See the Apache 2 License for the specific language governing permissions and
22 // limitations under the License.
23 
24 
25 #include "sgmm2/am-sgmm2.h"
27 #include "util/kaldi-thread.h"
28 
29 namespace kaldi {
30 
31 using std::string;
32 using std::vector;
33 
34 void MleAmSgmm2Accs::Write(std::ostream &out_stream, bool binary) const {
35 
36  WriteToken(out_stream, binary, "<SGMMACCS>");
37  WriteToken(out_stream, binary, "<NUMPDFS>");
38  WriteBasicType(out_stream, binary, num_pdfs_);
39  WriteToken(out_stream, binary, "<NUMGROUPS>");
40  WriteBasicType(out_stream, binary, num_groups_);
41  WriteToken(out_stream, binary, "<NUMGaussians>");
42  WriteBasicType(out_stream, binary, num_gaussians_);
43  WriteToken(out_stream, binary, "<FEATUREDIM>");
44  WriteBasicType(out_stream, binary, feature_dim_);
45  WriteToken(out_stream, binary, "<PHONESPACEDIM>");
46  WriteBasicType(out_stream, binary, phn_space_dim_);
47  WriteToken(out_stream, binary, "<SPKSPACEDIM>");
48  WriteBasicType(out_stream, binary, spk_space_dim_);
49  if (!binary) out_stream << "\n";
50 
51  if (Y_.size() != 0) {
52  KALDI_ASSERT(gamma_.size() != 0);
53  WriteToken(out_stream, binary, "<Y>");
54  for (int32 i = 0; i < num_gaussians_; i++) {
55  Matrix<BaseFloat>(Y_[i]).Write(out_stream, binary);
56  }
57  }
58  if (Z_.size() != 0) {
59  KALDI_ASSERT(R_.size() != 0);
60  WriteToken(out_stream, binary, "<Z>");
61  for (int32 i = 0; i < num_gaussians_; i++) {
62  Matrix<BaseFloat>(Z_[i]).Write(out_stream, binary);
63  }
64  WriteToken(out_stream, binary, "<R>");
65  for (int32 i = 0; i < num_gaussians_; i++) {
66  SpMatrix<BaseFloat>(R_[i]).Write(out_stream, binary);
67  }
68  }
69  if (S_.size() != 0) {
70  KALDI_ASSERT(gamma_.size() != 0);
71  WriteToken(out_stream, binary, "<S>");
72  for (int32 i = 0; i < num_gaussians_; i++) {
73  SpMatrix<BaseFloat>(S_[i]).Write(out_stream, binary);
74  }
75  }
76  if (y_.size() != 0) {
77  KALDI_ASSERT(gamma_.size() != 0);
78  WriteToken(out_stream, binary, "<y>");
79  for (int32 j1 = 0; j1 < num_groups_; j1++) {
80  Matrix<BaseFloat>(y_[j1]).Write(out_stream, binary);
81  }
82  }
83  if (gamma_.size() != 0) { // These stats are large
84  // -> write as single precision.
85  WriteToken(out_stream, binary, "<gamma>");
86  for (int32 j1 = 0; j1 < num_groups_; j1++) {
87  Matrix<BaseFloat> gamma_j1(gamma_[j1]);
88  gamma_j1.Write(out_stream, binary);
89  }
90  }
91  if (t_.NumRows() != 0) {
92  WriteToken(out_stream, binary, "<t>");
93  Matrix<BaseFloat>(t_).Write(out_stream, binary);
94  }
95  if (U_.size() != 0) {
96  WriteToken(out_stream, binary, "<U>");
97  for (int32 i = 0; i < num_gaussians_; i++) {
98  SpMatrix<BaseFloat>(U_[i]).Write(out_stream, binary);
99  }
100  }
101  if (gamma_c_.size() != 0) {
102  WriteToken(out_stream, binary, "<gamma_c>");
103  for (int32 j2 = 0; j2 < num_pdfs_; j2++) {
104  Vector<BaseFloat>(gamma_c_[j2]).Write(out_stream, binary);
105  }
106  }
107  if (a_.size() != 0) {
108  WriteToken(out_stream, binary, "<a>");
109  for (int32 j1 = 0; j1 < num_groups_; j1++) {
110  Matrix<BaseFloat>(a_[j1]).Write(out_stream, binary);
111  }
112  }
113  WriteToken(out_stream, binary, "<total_like>");
114  WriteBasicType(out_stream, binary, total_like_);
115 
116  WriteToken(out_stream, binary, "<total_frames>");
117  WriteBasicType(out_stream, binary, total_frames_);
118 
119  WriteToken(out_stream, binary, "</SGMMACCS>");
120 }
121 
122 void MleAmSgmm2Accs::Read(std::istream &in_stream, bool binary,
123  bool add) {
124  ExpectToken(in_stream, binary, "<SGMMACCS>");
125  ExpectToken(in_stream, binary, "<NUMPDFS>");
126  ReadBasicType(in_stream, binary, &num_pdfs_);
127  ExpectToken(in_stream, binary, "<NUMGROUPS>");
128  ReadBasicType(in_stream, binary, &num_groups_);
129  ExpectToken(in_stream, binary, "<NUMGaussians>");
130  ReadBasicType(in_stream, binary, &num_gaussians_);
131  ExpectToken(in_stream, binary, "<FEATUREDIM>");
132  ReadBasicType(in_stream, binary, &feature_dim_);
133  ExpectToken(in_stream, binary, "<PHONESPACEDIM>");
134  ReadBasicType(in_stream, binary, &phn_space_dim_);
135  ExpectToken(in_stream, binary, "<SPKSPACEDIM>");
136  ReadBasicType(in_stream, binary, &spk_space_dim_);
137 
138  string token;
139  ReadToken(in_stream, binary, &token);
140 
141  while (token != "</SGMMACCS>") {
142  if (token == "<Y>") {
143  Y_.resize(num_gaussians_);
144  for (size_t i = 0; i < Y_.size(); i++) {
145  Y_[i].Read(in_stream, binary, add);
146  }
147  } else if (token == "<Z>") {
148  Z_.resize(num_gaussians_);
149  for (size_t i = 0; i < Z_.size(); i++) {
150  Z_[i].Read(in_stream, binary, add);
151  }
152  } else if (token == "<R>") {
153  R_.resize(num_gaussians_);
155  for (size_t i = 0; i < R_.size(); i++) {
156  R_[i].Read(in_stream, binary, add);
157  }
158  } else if (token == "<S>") {
159  S_.resize(num_gaussians_);
160  for (size_t i = 0; i < S_.size(); i++) {
161  S_[i].Read(in_stream, binary, add);
162  }
163  } else if (token == "<y>") {
164  y_.resize(num_groups_);
165  for (int32 j1 = 0; j1 < num_groups_; j1++) {
166  y_[j1].Read(in_stream, binary, add);
167  }
168  } else if (token == "<gamma>") {
169  gamma_.resize(num_groups_);
170  for (int32 j1 = 0; j1 < num_groups_; j1++) {
171  gamma_[j1].Read(in_stream, binary, add);
172  }
173  // Don't read gamma_s, it's just a temporary variable and
174  // not part of the permanent (non-speaker-specific) accs.
175  } else if (token == "<a>") {
176  a_.resize(num_groups_);
177  for (int32 j1 = 0; j1 < num_groups_; j1++) {
178  a_[j1].Read(in_stream, binary, add);
179  }
180  } else if (token == "<gamma_c>") {
181  gamma_c_.resize(num_pdfs_);
182  for (int32 j2 = 0; j2 < num_pdfs_; j2++) {
183  gamma_c_[j2].Read(in_stream, binary, add);
184  }
185  } else if (token == "<t>") {
186  t_.Read(in_stream, binary, add);
187  } else if (token == "<U>") {
188  U_.resize(num_gaussians_);
189  for (int32 i = 0; i < num_gaussians_; i++) {
190  U_[i].Read(in_stream, binary, add);
191  }
192  } else if (token == "<total_like>") {
193  double total_like;
194  ReadBasicType(in_stream, binary, &total_like);
195  if (add)
196  total_like_ += total_like;
197  else
198  total_like_ = total_like;
199  } else if (token == "<total_frames>") {
200  double total_frames;
201  ReadBasicType(in_stream, binary, &total_frames);
202  if (add)
203  total_frames_ += total_frames;
204  else
205  total_frames_ = total_frames;
206  } else {
207  KALDI_ERR << "Unexpected token '" << token << "' in model file ";
208  }
209  ReadToken(in_stream, binary, &token);
210  }
211 }
212 
213 void MleAmSgmm2Accs::Check(const AmSgmm2 &model,
214  bool show_properties) const {
215  if (show_properties)
216  KALDI_LOG << "Sgmm2PdfModel: J1 = " << num_groups_ << ", J2 = "
217  << num_pdfs_ << ", D = " << feature_dim_ << ", S = "
218  << phn_space_dim_ << ", T = " << spk_space_dim_ << ", I = "
219  << num_gaussians_;
220 
221  KALDI_ASSERT(num_pdfs_ == model.NumPdfs() && num_pdfs_ > 0);
222  KALDI_ASSERT(num_groups_ == model.NumGroups() && num_groups_ > 0);
223  KALDI_ASSERT(num_gaussians_ == model.NumGauss() && num_gaussians_ > 0);
227 
228  std::ostringstream debug_str;
229 
230  if (Y_.size() == 0) {
231  debug_str << "Y: no. ";
232  } else {
233  KALDI_ASSERT(gamma_.size() != 0);
234  KALDI_ASSERT(Y_.size() == static_cast<size_t>(num_gaussians_));
235  bool nz = false;
236  for (int32 i = 0; i < num_gaussians_; i++) {
237  KALDI_ASSERT(Y_[i].NumRows() == feature_dim_ &&
238  Y_[i].NumCols() == phn_space_dim_);
239  if (!nz && Y_[i](0, 0) != 0) { nz = true; }
240  }
241  debug_str << "Y: yes, " << string(nz ? "nonzero. " : "zero. ");
242  }
243 
244  if (Z_.size() == 0) {
245  KALDI_ASSERT(R_.size() == 0);
246  debug_str << "Z, R: no. ";
247  } else {
249  KALDI_ASSERT(Z_.size() == static_cast<size_t>(num_gaussians_));
250  KALDI_ASSERT(R_.size() == static_cast<size_t>(num_gaussians_));
251  bool Z_nz = false, R_nz = false;
252  for (int32 i = 0; i < num_gaussians_; i++) {
253  KALDI_ASSERT(Z_[i].NumRows() == feature_dim_ &&
254  Z_[i].NumCols() == spk_space_dim_);
255  KALDI_ASSERT(R_[i].NumRows() == spk_space_dim_);
256  if (!Z_nz && Z_[i](0, 0) != 0) { Z_nz = true; }
257  if (!R_nz && R_[i](0, 0) != 0) { R_nz = true; }
258  }
259  bool gamma_s_nz = !gamma_s_.IsZero();
260  debug_str << "Z: yes, " << string(Z_nz ? "nonzero. " : "zero. ");
261  debug_str << "R: yes, " << string(R_nz ? "nonzero. " : "zero. ");
262  debug_str << "gamma_s: yes, " << string(gamma_s_nz ? "nonzero. " : "zero. ");
263  }
264 
265  if (S_.size() == 0) {
266  debug_str << "S: no. ";
267  } else {
268  KALDI_ASSERT(gamma_.size() != 0);
269  bool S_nz = false;
270  KALDI_ASSERT(S_.size() == static_cast<size_t>(num_gaussians_));
271  for (int32 i = 0; i < num_gaussians_; i++) {
272  KALDI_ASSERT(S_[i].NumRows() == feature_dim_);
273  if (!S_nz && S_[i](0, 0) != 0) { S_nz = true; }
274  }
275  debug_str << "S: yes, " << string(S_nz ? "nonzero. " : "zero. ");
276  }
277 
278  if (y_.size() == 0) {
279  debug_str << "y: no. ";
280  } else {
281  KALDI_ASSERT(gamma_.size() != 0);
282  bool nz = false;
283  KALDI_ASSERT(y_.size() == static_cast<size_t>(num_groups_));
284  for (int32 j1 = 0; j1 < num_groups_; j1++) {
285  KALDI_ASSERT(y_[j1].NumRows() == model.NumSubstatesForGroup(j1));
286  KALDI_ASSERT(y_[j1].NumCols() == phn_space_dim_);
287  if (!nz && y_[j1](0, 0) != 0) { nz = true; }
288  }
289  debug_str << "y: yes, " << string(nz ? "nonzero. " : "zero. ");
290  }
291 
292  if (a_.size() == 0) {
293  debug_str << "a: no. ";
294  } else {
295  debug_str << "a: yes. ";
296  bool nz = false;
297  KALDI_ASSERT(a_.size() == static_cast<size_t>(num_groups_));
298  for (int32 j1 = 0; j1 < num_groups_; j1++) {
299  KALDI_ASSERT(a_[j1].NumRows() == model.NumSubstatesForGroup(j1) &&
300  a_[j1].NumCols() == num_gaussians_);
301  if (!nz && a_[j1].Sum() != 0) nz = true;
302  }
303  debug_str << "a: yes, " << string(nz ? "nonzero. " : "zero. "); // TODO: take out "string"
304  }
305 
306  double tot_gamma = 0.0;
307  if (gamma_.size() == 0) {
308  debug_str << "gamma: no. ";
309  } else {
310  debug_str << "gamma: yes. ";
311  KALDI_ASSERT(gamma_.size() == static_cast<size_t>(num_groups_));
312  for (int32 j1 = 0; j1 < num_groups_; j1++) {
313  KALDI_ASSERT(gamma_[j1].NumRows() == model.NumSubstatesForGroup(j1) &&
314  gamma_[j1].NumCols() == num_gaussians_);
315  tot_gamma += gamma_[j1].Sum();
316  }
317  bool nz = (tot_gamma != 0.0);
318  KALDI_ASSERT(gamma_c_.size() == num_pdfs_ && "gamma_ set up but not gamma_c_.");
319  debug_str << "gamma: yes, " << string(nz ? "nonzero. " : "zero. ");
320  }
321 
322  if (gamma_c_.size() == 0) {
323  KALDI_ERR << "gamma_c_ not set up."; // required for all accs.
324  } else {
325  KALDI_ASSERT(gamma_c_.size() == num_pdfs_);
326  double tot_gamma_c = 0.0;
327  for (int32 j2 = 0; j2 < num_pdfs_; j2++) {
328  KALDI_ASSERT(gamma_c_[j2].Dim() == model.NumSubstatesForPdf(j2));
329  tot_gamma_c += gamma_c_[j2].Sum();
330  }
331  bool nz = (tot_gamma_c != 0.0);
332  debug_str << "gamma_c: yes, " << string(nz ? "nonzero. " : "zero. ");
333  if (!gamma_.empty() && !ApproxEqual(tot_gamma_c, tot_gamma))
334  KALDI_WARN << "Counts from gamma and gamma_c differ "
335  << tot_gamma << " vs. " << tot_gamma_c;
336  }
337 
338  if (t_.NumRows() == 0) {
339  debug_str << "t: no. ";
340  } else {
341  KALDI_ASSERT(t_.NumRows() == num_gaussians_ &&
342  t_.NumCols() == spk_space_dim_);
343  KALDI_ASSERT(!U_.empty()); // t and U are used together.
344  bool nz = (t_.FrobeniusNorm() != 0);
345  debug_str << "t: yes, " << string(nz ? "nonzero. " : "zero. ");
346  }
347 
348  if (U_.size() == 0) {
349  debug_str << "U: no. ";
350  } else {
351  bool nz = false;
352  KALDI_ASSERT(U_.size() == num_gaussians_);
353  for (int32 i = 0; i < num_gaussians_; i++) {
354  if (!nz && U_[i].FrobeniusNorm() != 0) nz = true;
355  KALDI_ASSERT(U_[i].NumRows() == spk_space_dim_);
356  }
357  KALDI_ASSERT(t_.NumRows() != 0); // t and U are used together.
358  debug_str << "t: yes, " << string(nz ? "nonzero. " : "zero. ");
359  }
360 
361  if (show_properties)
362  KALDI_LOG << "Subspace GMM model properties: " << debug_str.str();
363 }
364 
366  SgmmUpdateFlagsType flags,
367  bool have_spk_vecs) {
368  num_pdfs_ = model.NumPdfs();
369  num_groups_ = model.NumGroups();
370  num_gaussians_ = model.NumGauss();
371  feature_dim_ = model.FeatureDim();
372  phn_space_dim_ = model.PhoneSpaceDim();
373  spk_space_dim_ = model.SpkSpaceDim();
375 
377  Y_.resize(num_gaussians_);
378  for (int32 i = 0; i < num_gaussians_; i++) {
379  Y_[i].Resize(feature_dim_, phn_space_dim_);
380  }
381  } else {
382  Y_.clear();
383  }
384 
387  } else {
388  gamma_s_.Resize(0);
389  }
390 
391  if (flags & kSgmmSpeakerProjections) {
392  if (spk_space_dim_ == 0) {
393  KALDI_ERR << "Cannot set up accumulators for speaker projections "
394  << "because speaker subspace has not been set up";
395  }
396  Z_.resize(num_gaussians_);
397  R_.resize(num_gaussians_);
398  for (int32 i = 0; i < num_gaussians_; i++) {
399  Z_[i].Resize(feature_dim_, spk_space_dim_);
400  R_[i].Resize(spk_space_dim_);
401  }
402  } else {
403  Z_.clear();
404  R_.clear();
405  }
406 
407  if (flags & kSgmmCovarianceMatrix) {
408  S_.resize(num_gaussians_);
409  for (int32 i = 0; i < num_gaussians_; i++) {
410  S_[i].Resize(feature_dim_);
411  }
412  } else {
413  S_.clear();
414  }
415 
417  kSgmmCovarianceMatrix | kSgmmPhoneProjections)) {
418  gamma_.resize(num_groups_);
419  for (int32 j1 = 0; j1 < num_groups_; j1++) {
420  gamma_[j1].Resize(model.NumSubstatesForGroup(j1), num_gaussians_);
421  }
422  } else {
423  gamma_.clear();
424  }
425 
427  && model.HasSpeakerDependentWeights() && have_spk_vecs) { // SSGMM code.
428  a_.resize(num_groups_);
429  for (int32 j1 = 0; j1 < num_groups_; j1++) {
430  a_[j1].Resize(model.NumSubstatesForGroup(j1),
432  }
433  } else {
434  a_.clear();
435  }
436 
437  if (flags & kSgmmSpeakerWeightProjections) {
439  "remove the flag \"u\" if you don't have u set up.");
442  U_.resize(num_gaussians_);
443  for (int32 i = 0; i < num_gaussians_; i++)
444  U_[i].Resize(spk_space_dim_);
445  } else {
446  a_s_.Resize(0);
447  t_.Resize(0, 0);
448  U_.resize(0);
449  }
450 
451  if (true) { // always set up gamma_c_; it's nominally for
452  // estimation of substate weights, but it's also required when
453  // GetStateOccupancies() is called.
454  gamma_c_.resize(num_pdfs_);
455  for (int32 j2 = 0; j2 < num_pdfs_; j2++) {
456  gamma_c_[j2].Resize(model.NumSubstatesForPdf(j2));
457  }
458  }
459 
460 
461  if (flags & kSgmmPhoneVectors) {
462  y_.resize(num_groups_);
463  for (int32 j1 = 0; j1 < num_groups_; j1++) {
464  y_[j1].Resize(model.NumSubstatesForGroup(j1), phn_space_dim_);
465  }
466  } else {
467  y_.clear();
468  }
469 }
470 
472  const Sgmm2PerFrameDerivedVars &frame_vars,
473  int32 j2,
474  BaseFloat weight,
475  Sgmm2PerSpkDerivedVars *spk_vars) {
476  // Calculate Gaussian posteriors and collect statistics
477  Matrix<BaseFloat> posteriors;
478  BaseFloat log_like = model.ComponentPosteriors(frame_vars, j2, spk_vars, &posteriors);
479  posteriors.Scale(weight);
480  BaseFloat count = AccumulateFromPosteriors(model, frame_vars, posteriors,
481  j2, spk_vars);
482  // Note: total_frames_ is incremented in AccumulateFromPosteriors().
483  total_like_ += count * log_like;
484  return log_like;
485 }
486 
488  const AmSgmm2 &model,
489  const Sgmm2PerFrameDerivedVars &frame_vars,
490  const Matrix<BaseFloat> &posteriors,
491  int32 j2,
492  Sgmm2PerSpkDerivedVars *spk_vars) {
493  double tot_count = 0.0;
494  const vector<int32> &gselect = frame_vars.gselect;
495  // Intermediate variables
496  Vector<BaseFloat> gammat(gselect.size()), // sum of gammas over mix-weight.
497  a_is_part(gselect.size()); //
499  zt_jmi(spk_space_dim_);
500 
501  int32 j1 = model.Pdf2Group(j2);
502  int32 num_substates = model.NumSubstatesForGroup(j1);
503 
504  for (int32 m = 0; m < num_substates; m++) {
505  BaseFloat d_jms = model.GetDjms(j1, m, spk_vars);
506  BaseFloat gammat_jm = 0.0;
507  for (int32 ki = 0; ki < static_cast<int32>(gselect.size()); ki++) {
508  int32 i = gselect[ki];
509 
510  // Eq. (39): gamma_{jmi}(t) = p (j, m, i|t)
511  BaseFloat gammat_jmi = RandPrune(posteriors(ki, m), rand_prune_);
512  if (gammat_jmi == 0.0) continue;
513  gammat(ki) += gammat_jmi;
514  if (gamma_s_.Dim() != 0)
515  gamma_s_(i) += gammat_jmi;
516  gammat_jm += gammat_jmi;
517 
518  // Accumulate statistics for non-zero gaussian posteriors
519  tot_count += gammat_jmi;
520  if (!gamma_.empty()) {
521  // Eq. (40): gamma_{jmi} = \sum_t gamma_{jmi}(t)
522  gamma_[j1](m, i) += gammat_jmi;
523  }
524  if (!y_.empty()) {
525  // Eq. (41): y_{jm} = \sum_{t, i} \gamma_{jmi}(t) z_{i}(t)
526  // Suggestion: move this out of the loop over m
527  y_[j1].Row(m).AddVec(gammat_jmi, frame_vars.zti.Row(ki));
528  }
529  if (!Y_.empty()) {
530  // Eq. (42): Y_{i} = \sum_{t, j, m} \gamma_{jmi}(t) x_{i}(t) v_{jm}^T
531  Y_[i].AddVecVec(gammat_jmi, frame_vars.xti.Row(ki),
532  model.v_[j1].Row(m));
533  }
534  // Accumulate for speaker projections
535  if (!Z_.empty()) {
537  // Eq. (43): x_{jmi}(t) = x_k(t) - M{i} v_{jm}
538  model.GetSubstateMean(j1, m, i, &mu_jmi);
539  xt_jmi.CopyFromVec(frame_vars.xt);
540  xt_jmi.AddVec(-1.0, mu_jmi);
541  // Eq. (44): Z_{i} = \sum_{t, j, m} \gamma_{jmi}(t) x_{jmi}(t) v^{s}'
542  if (spk_vars->v_s.Dim() != 0) // interpret empty v_s as zero.
543  Z_[i].AddVecVec(gammat_jmi, xt_jmi, spk_vars->v_s);
544  // Eq. (49): \gamma_{i}^{(s)} = \sum_{t\in\Tau(s), j, m} gamma_{jmi}
545  // Will be used when you call CommitStatsForSpk(), to update R_.
546  }
547  } // loop over selected Gaussians
548  if (gammat_jm != 0.0) {
549  if (!a_.empty()) { // SSGMM code.
550  KALDI_ASSERT(d_jms > 0);
551  // below is eq. 40 in the MSR techreport. Caution: there
552  // was an error in the original techreport. The index i
553  // in the summation and the quantity \gamma_{jmi}^{(t)}
554  // should be differently named, e.g. i'.
555  a_[j1].Row(m).AddVec(gammat_jm / d_jms, spk_vars->b_is);
556  }
557  if (a_s_.Dim() != 0) { // [SSGMM]
558  KALDI_ASSERT(d_jms > 0);
559  KALDI_ASSERT(!model.w_jmi_.empty());
560  a_s_.AddVec(gammat_jm / d_jms, model.w_jmi_[j1].Row(m));
561  }
562  if (!gamma_c_.empty())
563  gamma_c_[j2](m) += gammat_jm;
564  }
565  } // loop over substates
566 
567  if (!S_.empty()) {
568  for (int32 ki = 0; ki < static_cast<int32>(gselect.size()); ki++) {
569  // Eq. (47): S_{i} = \sum_{t, j, m} \gamma_{jmi}(t) x_{i}(t) x_{i}(t)^T
570  if (gammat(ki) != 0.0) {
571  int32 i = gselect[ki];
572  S_[i].AddVec2(gammat(ki), frame_vars.xti.Row(ki));
573  }
574  }
575  }
576  total_frames_ += tot_count;
577  return tot_count;
578 }
579 
581  const Sgmm2PerSpkDerivedVars &spk_vars) {
582  const VectorBase<BaseFloat> &v_s = spk_vars.v_s;
583  if (v_s.Dim() != 0 && !v_s.IsZero() && !R_.empty()) {
584  for (int32 i = 0; i < num_gaussians_; i++)
585  // Accumulate Statistics R_{ki}
586  if (gamma_s_(i) != 0.0)
587  R_[i].AddVec2(gamma_s_(i),
588  Vector<double>(v_s));
589  }
590  if (a_s_.Dim() != 0) {
592  // tmp(i) = gamma_s^{(i)} - a_i^{(s)} b_i^{(s)}.
593  tmp.AddVecVec(-1.0, Vector<BaseFloat>(a_s_), spk_vars.b_is, 1.0);
594  t_.AddVecVec(1.0, tmp, v_s); // eq. 53 of techreport.
595  for (int32 i = 0; i < num_gaussians_; i++) {
596  U_[i].AddVec2(a_s_(i) * spk_vars.b_is(i),
597  Vector<double>(v_s)); // eq. 54 of techreport.
598  }
599  }
600  gamma_s_.SetZero();
601  a_s_.SetZero();
602 }
603 
605  int32 J2 = gamma_c_.size();
606  occs->Resize(J2);
607  for (int32 j2 = 0; j2 < J2; j2++) {
608  (*occs)(j2) = gamma_c_[j2].Sum();
609  }
610 }
611 
613  AmSgmm2 *model,
614  SgmmUpdateFlagsType flags) {
615  // Q_{i}, quadratic term for phonetic subspace estimation. Dim is [I][S][S]
616  std::vector< SpMatrix<double> > Q;
617 
618  // Eq (74): S_{i}^{(means)}, scatter of substate mean vectors for estimating
619  // the shared covariance matrices. [Actually this variable contains also the
620  // term -(Y_i M_i^T + M_i Y_I^T).] Dimension is [I][D][D].
621  std::vector< SpMatrix<double> > S_means;
622  std::vector<Matrix<double> > log_a;
623 
624  Vector<double> gamma_i(accs.num_gaussians_);
625  for (int32 j1 = 0; j1 < accs.num_groups_; j1++)
626  gamma_i.AddRowSumMat(1.0, accs.gamma_[j1]); // add sum of rows of
627  // accs.gamma_[j1], to gamma_i.
628 
629  if (flags & kSgmmPhoneProjections)
630  ComputeQ(accs, *model, &Q);
631  if (flags & kSgmmCovarianceMatrix)
632  ComputeSMeans(accs, *model, &S_means);
633  if (!accs.a_.empty())
634  ComputeLogA(accs, &log_a);
635 
636  // quantities used in both vector and weights updates...
637  vector< SpMatrix<double> > H;
638  // "smoothing" matrices, weighted sums of above.
639  SpMatrix<double> H_sm; // weighted sum of H. Used e.g. in renormalizing phonetic space.
641  || options_.renormalize_V)
642  model->ComputeH(&H);
643 
644  BaseFloat tot_impr = 0.0;
645 
646  if (flags & kSgmmPhoneVectors)
647  tot_impr += UpdatePhoneVectors(accs, H, log_a, model);
648  if (flags & kSgmmPhoneProjections) {
649  if (options_.tau_map_M > 0.0)
650  tot_impr += MapUpdateM(accs, Q, gamma_i, model); // MAP adaptation of M
651  else
652  tot_impr += UpdateM(accs, Q, gamma_i, model);
653  }
654  if (flags & kSgmmPhoneWeightProjections)
655  tot_impr += UpdateW(accs, log_a, gamma_i, model);
656  if (flags & kSgmmCovarianceMatrix)
657  tot_impr += UpdateVars(accs, S_means, gamma_i, model);
658  if (flags & kSgmmSubstateWeights)
659  tot_impr += UpdateSubstateWeights(accs, model);
660  if (flags & kSgmmSpeakerProjections)
661  tot_impr += UpdateN(accs, gamma_i, model);
662  if (flags & kSgmmSpeakerWeightProjections)
663  tot_impr += UpdateU(accs, gamma_i, model);
664 
665  if ((flags & kSgmmSpeakerProjections) && (options_.renormalize_N))
666  RenormalizeN(accs, gamma_i, model); // if you renormalize N you have to
667  // alter any speaker vectors you're keeping around, as well.
668  // So be careful with this option.
669 
670  if (options_.renormalize_V)
671  RenormalizeV(accs, model, gamma_i, H);
672 
673  KALDI_LOG << "*Overall auxf improvement, combining all parameters, is "
674  << tot_impr;
675 
676  KALDI_LOG << "***Overall data likelihood is "
677  << (accs.total_like_/accs.total_frames_)
678  << " over " << accs.total_frames_ << " frames.";
679 
680  model->n_.clear(); // has become invalid.
681  model->w_jmi_.clear(); // has become invalid.
682  // we updated the v or w quantities.
683 }
684 
685 // Compute the Q_{i} (Eq. 64)
687  const AmSgmm2 &model,
688  std::vector< SpMatrix<double> > *Q) {
689  Q->resize(accs.num_gaussians_);
690  for (int32 i = 0; i < accs.num_gaussians_; i++) {
691  (*Q)[i].Resize(accs.phn_space_dim_);
692  for (int32 j1 = 0; j1 < accs.num_groups_; j1++) {
693  for (int32 m = 0; m < model.NumSubstatesForGroup(j1); m++) {
694  if (accs.gamma_[j1](m, i) > 0.0) {
695  (*Q)[i].AddVec2(static_cast<BaseFloat>(accs.gamma_[j1](m, i)),
696  model.v_[j1].Row(m));
697  }
698  }
699  }
700  }
701 }
702 
703 // Compute the S_i^{(means)} quantities (Eq. 74).
704 // Note: we seem to have also included in this variable
705 // the term - (Y_i M_I^T + M_i Y_i^T).
707  const AmSgmm2 &model,
708  std::vector< SpMatrix<double> > *S_means) {
709  S_means->resize(accs.num_gaussians_);
710  Matrix<double> YM_MY(accs.feature_dim_, accs.feature_dim_);
711  Vector<BaseFloat> mu_jmi(accs.feature_dim_);
712  for (int32 i = 0; i < accs.num_gaussians_; i++) {
713  // YM_MY = - (Y_{i} M_{i}^T)
714  YM_MY.AddMatMat(-1.0, accs.Y_[i], kNoTrans,
715  Matrix<double>(model.M_[i]), kTrans, 0.0);
716  // Add its own transpose: YM_MY = - (Y_{i} M_{i}^T + M_{i} Y_{i}^T)
717  {
718  Matrix<double> M(YM_MY, kTrans);
719  YM_MY.AddMat(1.0, M);
720  }
721  (*S_means)[i].Resize(accs.feature_dim_, kUndefined);
722  (*S_means)[i].CopyFromMat(YM_MY); // Sigma_{i} = -(YM' + MY')
723 
724  for (int32 j1 = 0; j1 < accs.num_groups_; j1++) {
725  for (int32 m = 0; m < model.NumSubstatesForGroup(j1); m++) {
726  if (accs.gamma_[j1](m, i) != 0.0) {
727  // Sigma_{i} += gamma_{jmi} * mu_{jmi}*mu_{jmi}^T
728  mu_jmi.AddMatVec(1.0, model.M_[i], kNoTrans, model.v_[j1].Row(m), 0.0);
729  (*S_means)[i].AddVec2(static_cast<BaseFloat>(accs.gamma_[j1](m, i)), mu_jmi);
730  }
731  }
732  }
733  KALDI_ASSERT(1.0 / (*S_means)[i](0, 0) != 0.0);
734  }
735 }
736 
737 
738 class UpdatePhoneVectorsClass: public MultiThreadable { // For multi-threaded.
739  public:
741  const MleAmSgmm2Accs &accs,
742  const std::vector<SpMatrix<double> > &H,
743  const std::vector<Matrix<double> > &log_a,
744  AmSgmm2 *model,
745  double *auxf_impr):
746  updater_(updater), accs_(accs), model_(model),
747  H_(H), log_a_(log_a), auxf_impr_ptr_(auxf_impr),
748  auxf_impr_(0.0) { }
749 
751  MultiThreadable(other),
752  updater_(other.updater_), accs_(other.accs_), model_(other.model_),
753  H_(other.H_), log_a_(other.log_a_), auxf_impr_ptr_(other.auxf_impr_ptr_),
754  auxf_impr_(0.0) { }
755 
757  *auxf_impr_ptr_ += auxf_impr_;
758  }
759 
760  inline void operator() () {
761  // Note: give them local copy of the sums we're computing,
762  // which will be propagated to the total sums in the destructor.
763  updater_.UpdatePhoneVectorsInternal(accs_, H_, log_a_, model_,
764  &auxf_impr_, num_threads_, thread_id_);
765  }
766  private:
770  const std::vector<SpMatrix<double> > &H_;
771  const std::vector<Matrix<double> > &log_a_;
772  double *auxf_impr_ptr_;
773  double auxf_impr_;
774 };
775 
783  const MleAmSgmm2Accs &accs,
784  const vector< SpMatrix<double> > &H,
785  const vector< Matrix<double> > &log_a,
786  AmSgmm2 *model) const {
787 
788  KALDI_LOG << "Updating phone vectors";
789 
790  double count = 0.0, auxf_impr = 0.0; // sum over all states
791 
792  for (int32 j1 = 0; j1 < accs.num_groups_; j1++)
793  count += accs.gamma_[j1].Sum();
794 
795  UpdatePhoneVectorsClass c(*this, accs, H, log_a, model, &auxf_impr);
796  RunMultiThreaded(c);
797 
798  double auxf_per_frame = auxf_impr / (count + 1.0e-20);
799 
800  KALDI_LOG << "**Overall auxf impr for v is " << auxf_per_frame << " over "
801  << count << " frames";
802  return auxf_per_frame;
803 }
804 
805 //static
807  std::vector<Matrix<double> > *log_a) {
808  // This computes the logarithm of the statistics a_{jmi} defined
809  // in Eq. 40 of the SSGMM techreport. Although the log of a_{jmi} never
810  // explicitly appears in the techreport, it happens to be more convenient
811  // in the code to use the log of it.
812  // Note: because of the way a is computed, for each (j,m) the
813  // entries over i should always be all zero or all nonzero.
814  int32 num_zeros = 0;
815  KALDI_ASSERT(accs.a_.size() == accs.num_groups_);
816  log_a->resize(accs.num_groups_);
817  for (int32 j1 = 0; j1 < accs.num_groups_; j1++) {
818  int32 num_substates = accs.a_[j1].NumRows();
819  KALDI_ASSERT(num_substates > 0);
820  (*log_a)[j1].Resize(num_substates, accs.num_gaussians_);
821  for (int32 m = 0; m < num_substates; m++) {
822  if (accs.a_[j1](m, 0) == 0.0) { // Zero accs.
823  num_zeros++;
824  if (accs.gamma_[j1].Row(m).Sum() != 0.0)
825  KALDI_WARN << "Inconsistency between a and gamma stats. [BAD!]";
826  // leave the row zero. This means the sub-state saw no stats.
827  } else {
828  (*log_a)[j1].Row(m).CopyFromVec(accs.a_[j1].Row(m));
829  (*log_a)[j1].Row(m).ApplyLog();
830  }
831  }
832  }
833  if (num_zeros != 0)
834  KALDI_WARN << num_zeros
835  << " sub-states with zero \"a\" (and presumably gamma) stats.";
836 }
837 
839  const MleAmSgmm2Accs &accs,
840  const vector< SpMatrix<double> > &H,
841  const vector< Matrix<double> > &log_a,
842  AmSgmm2 *model,
843  double *auxf_impr_ptr,
844  int32 num_threads,
845  int32 thread_id) const {
846 
847  int32 J1 = accs.num_groups_, block_size = (J1 + (num_threads-1)) / num_threads,
848  j1_start = block_size * thread_id,
849  j1_end = std::min(accs.num_groups_, j1_start + block_size);
850 
851  double tot_auxf_impr = 0.0;
852 
853  for (int32 j1 = j1_start; j1 < j1_end; j1++) {
854  for (int32 m = 0; m < model->NumSubstatesForGroup(j1); m++) {
855  double gamma_jm = accs.gamma_[j1].Row(m).Sum();
856  SpMatrix<double> X_jm(accs.phn_space_dim_); // = \sum_i \gamma_{jmi} H_i
857 
858  for (int32 i = 0; i < accs.num_gaussians_; i++) {
859  double gamma_jmi = accs.gamma_[j1](m, i);
860  if (gamma_jmi != 0.0)
861  X_jm.AddSp(gamma_jmi, H[i]);
862  }
863 
864  Vector<double> v_jm_orig(model->v_[j1].Row(m)),
865  v_jm(v_jm_orig);
866 
867  double exact_auxf_start = 0.0, exact_auxf = 0.0, approx_auxf_impr = 0.0;
868  int32 backtrack_iter, max_backtrack = 10;
869  for (backtrack_iter = 0; backtrack_iter < max_backtrack; backtrack_iter++) {
870  // Note: the 1st time we go through this loop we have not yet updated
871  // v_jm and it has the old value; the 2nd time, it has the updated value
872  // and we will typically break at this point, after verifying that
873  // the auxf has improved.
874 
875  // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7)
876  Vector<double> w_jm(accs.num_gaussians_);
877  w_jm.AddMatVec(1.0, Matrix<double>(model->w_), kNoTrans,
878  v_jm, 0.0);
879  if (!log_a.empty()) w_jm.AddVec(1.0, log_a[j1].Row(m)); // SSGMM techreport eq. 42
880  w_jm.Add(-w_jm.LogSumExp()); // it is now log w_jm
881 
882 
883  exact_auxf = VecVec(w_jm, accs.gamma_[j1].Row(m))
884  + VecVec(v_jm, accs.y_[j1].Row(m))
885  -0.5 * VecSpVec(v_jm, X_jm, v_jm);
886 
887  if (backtrack_iter == 0) {
888  exact_auxf_start = exact_auxf;
889  } else {
890  if (exact_auxf >= exact_auxf_start) {
891  break; // terminate backtracking.
892  } else {
893  KALDI_LOG << "Backtracking computation of v_jm for j = " << j1
894  << " and m = " << m << " because auxf changed by "
895  << (exact_auxf-exact_auxf_start) << " [vs. predicted:] "
896  << approx_auxf_impr;
897  v_jm.AddVec(1.0, v_jm_orig);
898  v_jm.Scale(0.5);
899  }
900  }
901 
902  if (backtrack_iter == 0) { // computing updated value.
903  w_jm.ApplyExp(); // it is now w_jm
904  SpMatrix<double> H_jm(X_jm);
905  Vector<double> g_jm(accs.y_[j1].Row(m));
906  for (int32 i = 0; i < accs.num_gaussians_; i++) {
907  double gamma_jmi = accs.gamma_[j1](m, i);
908  double quadratic_term = std::max(gamma_jmi, gamma_jm * w_jm(i));
909  double scalar = gamma_jmi - gamma_jm * w_jm(i) + quadratic_term
910  * VecVec(model->w_.Row(i), model->v_[j1].Row(m));
911  g_jm.AddVec(scalar, model->w_.Row(i));
912  if (quadratic_term > 1.0e-10) {
913  H_jm.AddVec2(static_cast<BaseFloat>(quadratic_term), model->w_.Row(i));
914  }
915  }
916 
917  SolverOptions opts;
918  opts.name = "v";
919  opts.K = options_.max_cond;
920  opts.eps = options_.epsilon;
921 
922  approx_auxf_impr = SolveQuadraticProblem(H_jm, g_jm, opts, &v_jm);
923  }
924  }
925  double exact_auxf_impr = exact_auxf - exact_auxf_start;
926  tot_auxf_impr += exact_auxf_impr;
927  if (backtrack_iter == max_backtrack) {
928  KALDI_WARN << "Backtracked " << max_backtrack << " times [not updating]";
929  } else {
930  model->v_[j1].Row(m).CopyFromVec(v_jm);
931  }
932 
933  if (j1 < 3 && m < 3) {
934  KALDI_LOG << "Auxf impr for j = " << j1 << " m = " << m << " is "
935  << (exact_auxf_impr/gamma_jm+1.0e-20) << " per frame over "
936  << gamma_jm << " frames.";
937  }
938  }
939  }
940  *auxf_impr_ptr = tot_auxf_impr;
941 }
942 
943 
945  AmSgmm2 *model,
946  const Vector<double> &gamma_i,
947  const vector<SpMatrix<double> > &H) {
948  // Compute H^{(sm)}, the "smoothing" matrix-- average of H's.
949  SpMatrix<double> H_sm(accs.phn_space_dim_);
950  for (int32 i = 0; i < accs.num_gaussians_; i++)
951  H_sm.AddSp(gamma_i(i), H[i]);
952  KALDI_ASSERT(gamma_i.Sum() > 0.0);
953  H_sm.Scale(1.0 / gamma_i.Sum());
954 
955  SpMatrix<double> Sigma(accs.phn_space_dim_);
956  int32 count = 0;
957  for (int32 j1 = 0; j1 < accs.num_groups_; j1++) {
958  for (int32 m = 0; m < model->NumSubstatesForGroup(j1); m++) {
959  count++;
960  Sigma.AddVec2(static_cast<BaseFloat>(1.0), model->v_[j1].Row(m));
961  }
962  }
963  if (!Sigma.IsPosDef()) {
964  KALDI_LOG << "Not renormalizing v because scatter is not positive definite"
965  << " -- maybe first iter?";
966  return;
967  }
968  Sigma.Scale(1.0 / count);
969  KALDI_LOG << "Scatter of vectors v is : ";
970  Sigma.PrintEigs("Sigma");
971 
972  // Want to make variance of v unit and H_sm (like precision matrix) diagonal.
974  L.Cholesky(Sigma);
975  TpMatrix<double> LInv(L);
976  LInv.Invert();
977 
979  tmpL.CopyFromTp(L);
980 
981  SpMatrix<double> H_sm_proj(accs.phn_space_dim_);
982  H_sm_proj.AddMat2Sp(1.0, tmpL, kTrans, H_sm, 0.0);
983  // H_sm_proj := L^{T} * H_sm * L.
984  // This is right because we would transform the vectors themselves
985  // by L^{-1}, and H_sm is like the inverse of the vectors,
986  // so it's {L^{-1}}^{-T} = L^T.
987 
989  Vector<double> eigs(accs.phn_space_dim_);
990  H_sm_proj.SymPosSemiDefEig(&eigs, &U, 1.0); // 1.0 means no checking +ve def -> faster
991  KALDI_LOG << "Note on the next diagnostic: the first number is generally not "
992  << "that meaningful as it relates to the static offset";
993  H_sm_proj.PrintEigs("H_sm_proj (Significance of dims in vector space.. note)");
994 
995  // Transform on vectors is U^T L^{-1}.
996  // Why? Because transform on H_sm is T =U^T L^T
997  // and we want T^{-T} by normal rules of vector/covector and we
998  // have (U^T L^T)^{-T} = (L U)^{-1} = U^T L^{-1}.
999  Matrix<double> Trans(accs.phn_space_dim_, accs.phn_space_dim_); // T^{-T}
1000  Matrix<double> tmpLInv(accs.phn_space_dim_, accs.phn_space_dim_);
1001  tmpLInv.CopyFromTp(LInv);
1002  Trans.AddMatMat(1.0, U, kTrans, tmpLInv, kNoTrans, 0.0);
1003  Matrix<double> TransInv(Trans);
1004  TransInv.Invert(); // T in above...
1005 
1006 #ifdef KALDI_PARANOID
1007  {
1008  SpMatrix<double> H_sm_tmp(accs.phn_space_dim_);
1009  H_sm_tmp.AddMat2Sp(1.0, TransInv, kTrans, H_sm, 0.0);
1010  KALDI_ASSERT(H_sm_tmp.IsDiagonal(0.1));
1011  }
1012  {
1013  SpMatrix<double> Sigma_tmp(accs.phn_space_dim_);
1014  Sigma_tmp.AddMat2Sp(1.0, Trans, kNoTrans, Sigma, 0.0);
1015  KALDI_ASSERT(Sigma_tmp.IsUnit(0.1));
1016  }
1017 #endif
1018 
1019  for (int32 j1 = 0; j1 < accs.num_groups_; j1++) {
1020  for (int32 m = 0; m < model->NumSubstatesForGroup(j1); m++) {
1021  Vector<double> tmp(accs.phn_space_dim_);
1022  tmp.AddMatVec(1.0, Trans, kNoTrans, Vector<double>(model->v_[j1].Row(m)), 0.0);
1023  model->v_[j1].Row(m).CopyFromVec(tmp);
1024  }
1025  }
1026  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1027  Vector<double> tmp(accs.phn_space_dim_);
1028  tmp.AddMatVec(1.0, TransInv, kTrans, Vector<double>(model->w_.Row(i)), 0.0);
1029  model->w_.Row(i).CopyFromVec(tmp);
1030 
1031  Matrix<double> tmpM(accs.feature_dim_, accs.phn_space_dim_);
1032  // Multiplying on right not left so must not transpose TransInv.
1033  tmpM.AddMatMat(1.0, Matrix<double>(model->M_[i]), kNoTrans,
1034  TransInv, kNoTrans, 0.0);
1035  model->M_[i].CopyFromMat(tmpM);
1036  }
1037  KALDI_LOG << "Renormalized subspace.";
1038 }
1039 
1041  const std::vector< SpMatrix<double> > &Q,
1042  const Vector<double> &gamma_i,
1043  AmSgmm2 *model) {
1044  double tot_count = 0.0, tot_like_impr = 0.0;
1045  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1046  if (gamma_i(i) < accs.feature_dim_) {
1047  KALDI_WARN << "For component " << i << ": not updating M due to very "
1048  << "small count (=" << gamma_i(i) << ").";
1049  continue;
1050  }
1051 
1052  SolverOptions opts;
1053  opts.name = "M";
1054  opts.K = options_.max_cond;
1055  opts.eps = options_.epsilon;
1056 
1057  Matrix<double> Mi(model->M_[i]);
1058  double impr =
1059  SolveQuadraticMatrixProblem(Q[i], accs.Y_[i],
1060  SpMatrix<double>(model->SigmaInv_[i]),
1061  opts, &Mi);
1062 
1063  model->M_[i].CopyFromMat(Mi);
1064 
1065  if (i < 10) {
1066  KALDI_VLOG(2) << "Objf impr for projection M for i = " << i << ", is "
1067  << (impr/(gamma_i(i) + 1.0e-20)) << " over " << gamma_i(i)
1068  << " frames";
1069  }
1070  tot_count += gamma_i(i);
1071  tot_like_impr += impr;
1072  }
1073  tot_like_impr /= (tot_count + 1.0e-20);
1074  KALDI_LOG << "Overall objective function improvement for model projections "
1075  << "M is " << tot_like_impr << " over " << tot_count << " frames";
1076  return tot_like_impr;
1077 }
1078 
1079 
1080 // Estimate the parameters of a Gaussian prior over the M matrices. There are
1081 // as many mean matrices as UBM size and two covariance matrices for the rows
1082 // of M and columns of M. The prior means M_i are fixed to the unadapted values.
1083 // This is what was done in Lu, et al. "Maximum a posteriori adaptation of
1084 // subspace Gaussian mixture models for cross-lingual speech recognition",
1085 // ICASSP 2012.
1087  KALDI_ASSERT(options_.map_M_prior_iters > 0);
1088  int32 Ddim = model->FeatureDim();
1089  int32 Sdim = model->PhoneSpaceDim();
1090  int32 nGaussians = model->NumGauss();
1091 
1092  // inverse variance of the columns of M: dim is # of rows
1093  model->col_cov_inv_.Resize(Ddim);
1094  // inverse covariance of the rows of M: dim is # of columns
1095  model->row_cov_inv_.Resize(Sdim);
1096 
1097  model->col_cov_inv_.SetUnit();
1098  model->row_cov_inv_.SetUnit();
1099 
1100  if (model->M_prior_.size() == 0) {
1101  model->M_prior_.resize(nGaussians);
1102  for (int32 i = 0; i < nGaussians; i++) {
1103  model->M_prior_[i].Resize(Ddim, Sdim);
1104  model->M_prior_[i].CopyFromMat(model->M_[i]); // We initialize Mpri as this
1105  }
1106  }
1107 
1108  if (options_.full_col_cov || options_.full_row_cov) {
1109  Matrix<double> avg_M(Ddim, Sdim); // average of the Gaussian prior means
1110  for (int32 i = 0; i < nGaussians; i++)
1111  avg_M.AddMat(1.0, Matrix<double>(model->M_prior_[i]));
1112  avg_M.Scale(1.0 / nGaussians);
1113 
1114  Matrix<double> MDiff(Ddim, Sdim);
1115  for (int32 iter = 0; iter < options_.map_M_prior_iters; iter++) {
1116  { // diagnostic block.
1117  double prior_like = -0.5 * nGaussians * (Ddim * Sdim * Log(2 * M_PI)
1118  + Sdim * (-model->row_cov_inv_.LogPosDefDet())
1119  + Ddim * (-model->col_cov_inv_.LogPosDefDet()));
1120  for (int32 i = 0; i < nGaussians; i++) {
1121  MDiff.CopyFromMat(Matrix<double>(model->M_prior_[i]));
1122  MDiff.AddMat(-1.0, avg_M); // MDiff = M_{i} - avg(M)
1123  SpMatrix<double> tmp(Ddim);
1124  // tmp = MDiff.Omega_r^{-1}*MDiff^T.
1125  tmp.AddMat2Sp(1.0, MDiff, kNoTrans,
1126  SpMatrix<double>(model->row_cov_inv_), 0.0);
1127  prior_like -= 0.5 * TraceSpSp(tmp, SpMatrix<double>(model->col_cov_inv_));
1128  }
1129  KALDI_LOG << "Before iteration " << iter
1130  << " of updating prior over M, log like per dimension modeled is "
1131  << prior_like / (nGaussians * Ddim * Sdim);
1132  }
1133 
1134  // First estimate the column covariances (\Omega_r in paper)
1135  if (options_.full_col_cov) {
1136  size_t limited;
1137  model->col_cov_inv_.SetZero();
1138  for (int32 i = 0; i < nGaussians; i++) {
1139  MDiff.CopyFromMat(Matrix<double>(model->M_prior_[i]));
1140  MDiff.AddMat(-1.0, avg_M); // MDiff = M_{i} - avg(M)
1141  // Omega_r += 1/(D*I) * Mdiff * Omega_c^{-1} * Mdiff^T
1142  model->col_cov_inv_.AddMat2Sp(1.0 / (Ddim * nGaussians),
1143  Matrix<BaseFloat>(MDiff), kNoTrans,
1144  model->row_cov_inv_, 1.0);
1145  }
1146  model->col_cov_inv_.PrintEigs("col_cov");
1147  limited = model->col_cov_inv_.LimitCond(options_.max_cond,
1148  true /*invert the matrix*/);
1149  if (limited != 0) {
1150  KALDI_LOG << "Computing column covariances for M: limited " << limited
1151  << " singular values, max condition is "
1152  << options_.max_cond;
1153  }
1154  }
1155 
1156  // Now estimate the row covariances (\Omega_c in paper)
1157  if (options_.full_row_cov) {
1158  size_t limited;
1159  model->row_cov_inv_.SetZero();
1160  for (int32 i = 0; i < nGaussians; i++) {
1161  MDiff.CopyFromMat(Matrix<double>(model->M_prior_[i]));
1162  MDiff.AddMat(-1.0, avg_M); // MDiff = M_{i} - avg(M)
1163  // Omega_c += 1/(S*I) * Mdiff^T * Omega_r^{-1} * Mdiff.
1164  model->row_cov_inv_.AddMat2Sp(1.0 / (Sdim * nGaussians),
1165  Matrix<BaseFloat>(MDiff), kTrans,
1166  model->col_cov_inv_, 1.0);
1167  }
1168  model->row_cov_inv_.PrintEigs("row_cov");
1169  limited = model->row_cov_inv_.LimitCond(options_.max_cond,
1170  true /*invert the matrix*/);
1171  if (limited != 0) {
1172  KALDI_LOG << "Computing row covariances for M: limited " << limited
1173  << " singular values, max condition is "
1174  << options_.max_cond;
1175  }
1176  }
1177  } // end iterations
1178  }
1179 }
1180 
1181 
1182 // MAP adaptation of M with a matrix-variate Gaussian prior
1184  const std::vector< SpMatrix<double> > &Q,
1185  const Vector<double> &gamma_i,
1186  AmSgmm2 *model) {
1187  int32 Ddim = model->FeatureDim();
1188  int32 Sdim = model->PhoneSpaceDim();
1189  int32 nGaussians = model->NumGauss();
1190 
1191  KALDI_LOG << "Prior smoothing parameter: Tau = " << options_.tau_map_M;
1192  if (model->M_prior_.size() == 0 || model->col_cov_inv_.NumRows() == 0
1193  || model->row_cov_inv_.NumRows() == 0) {
1194  KALDI_LOG << "Computing the prior first";
1195  ComputeMPrior(model);
1196  }
1197 
1198  Matrix<double> G(Ddim, Sdim);
1199  // \tau \Omega_c^{-1} avg(M) \Omega_r^{-1}, depends on Gaussian index
1200  Matrix<double> prior_term_i(Ddim, Sdim);
1201  SpMatrix<double> P2(model->col_cov_inv_);
1202  SpMatrix<double> Q2(model->row_cov_inv_);
1203  Q2.Scale(options_.tau_map_M);
1204 
1205  double totcount = 0.0, tot_like_impr = 0.0;
1206  for (int32 i = 0; i < nGaussians; ++i) {
1207  if (gamma_i(i) < accs.feature_dim_) {
1208  KALDI_WARN << "For component " << i << ": not updating M due to very "
1209  << "small count (=" << gamma_i(i) << ").";
1210  continue;
1211  }
1212 
1213  Matrix<double> tmp(Ddim, Sdim, kSetZero);
1214  tmp.AddSpMat(1.0, SpMatrix<double>(model->col_cov_inv_),
1215  Matrix<double>(model->M_prior_[i]), kNoTrans, 0.0);
1216  prior_term_i.AddMatSp(options_.tau_map_M, tmp, kNoTrans,
1217  SpMatrix<double>(model->row_cov_inv_), 0.0);
1218 
1219  Matrix<double> SigmaY(Ddim, Sdim, kSetZero);
1220  SigmaY.AddSpMat(1.0, SpMatrix<double>(model->SigmaInv_[i]), accs.Y_[i],
1221  kNoTrans, 0.0);
1222  G.CopyFromMat(SigmaY); // G = \Sigma_{i}^{-1} Y_{i}
1223  G.AddMat(1.0, prior_term_i); // G += \tau \Omega_c^{-1} avg(M) \Omega_r^{-1}
1224  SpMatrix<double> P1(model->SigmaInv_[i]);
1225  Matrix<double> Mi(model->M_[i]);
1226 
1227  SolverOptions opts;
1228  opts.name = "M";
1229  opts.K = options_.max_cond;
1230  opts.eps = options_.epsilon;
1231  double impr =
1232  SolveDoubleQuadraticMatrixProblem(G, P1, P2, Q[i], Q2, opts, &Mi);
1233  model->M_[i].CopyFromMat(Mi);
1234  if (i < 10) {
1235  KALDI_LOG << "Objf impr for projection M for i = " << i << ", is "
1236  << (impr / (gamma_i(i) + 1.0e-20)) << " over " << gamma_i(i)
1237  << " frames";
1238  }
1239  totcount += gamma_i(i);
1240  tot_like_impr += impr;
1241  }
1242  tot_like_impr /= (totcount + 1.0e-20);
1243  KALDI_LOG << "Overall objective function improvement for model projections "
1244  << "M is " << tot_like_impr << " over " << totcount << " frames";
1245  return tot_like_impr;
1246 }
1247 
1248 
1256 
1257 // static
1259  const AmSgmm2 &model,
1260  const Matrix<double> &w,
1261  const std::vector<Matrix<double> > &log_a,
1262  Matrix<double> *F_i,
1263  Matrix<double> *g_i,
1264  double *tot_like,
1265  int32 num_threads,
1266  int32 thread_id) {
1267 
1268  // Accumulate stats from a block of states (this gets called in parallel).
1269  int32 block_size = (accs.num_groups_ + (num_threads-1)) / num_threads,
1270  j1_start = block_size * thread_id,
1271  j1_end = std::min(accs.num_groups_, j1_start + block_size);
1272 
1273  // Unlike in the report the inner most loop is over Gaussians, where
1274  // per-gaussian statistics are accumulated. This is more memory demanding
1275  // but more computationally efficient, as outer product v_{jvm} v_{jvm}^T
1276  // is computed only once for all gaussians.
1277 
1278  SpMatrix<double> v_vT(accs.phn_space_dim_);
1279 
1280  for (int32 j1 = j1_start; j1 < j1_end; j1++) {
1281  int32 num_substates = model.NumSubstatesForGroup(j1);
1282  Matrix<double> w_j(num_substates, accs.num_gaussians_);
1283  // The linear term and quadratic term for each Gaussian-- two scalars
1284  // for each Gaussian, they appear in the accumulation formulas.
1285  Matrix<double> linear_term(num_substates, accs.num_gaussians_);
1286  Matrix<double> quadratic_term(num_substates, accs.num_gaussians_);
1287  Matrix<double> v_vT_m(num_substates,
1288  (accs.phn_space_dim_*(accs.phn_space_dim_+1))/2);
1289 
1290  // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7)
1291  Matrix<double> v_j_double(model.v_[j1]);
1292  w_j.AddMatMat(1.0, v_j_double, kNoTrans, w, kTrans, 0.0);
1293  if (!log_a.empty()) w_j.AddMat(1.0, log_a[j1]); // SSGMM techreport eq. 42
1294 
1295  for (int32 m = 0; m < model.NumSubstatesForGroup(j1); m++) {
1296  SubVector<double> w_jm(w_j, m);
1297  double gamma_jm = accs.gamma_[j1].Row(m).Sum();
1298  w_jm.Add(-1.0 * w_jm.LogSumExp());
1299  *tot_like += VecVec(w_jm, accs.gamma_[j1].Row(m));
1300  w_jm.ApplyExp();
1301  v_vT.SetZero();
1302  // v_vT := v_{jkm} v_{jkm}^T
1303  v_vT.AddVec2(static_cast<BaseFloat>(1.0), v_j_double.Row(m));
1304  v_vT_m.Row(m).CopyFromPacked(v_vT); // a bit wasteful, but does not dominate.
1305 
1306  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1307  // Suggestion: g_jkm can be computed more efficiently
1308  // using the Vector/Matrix routines for all i at once
1309  // linear term around cur value.
1310  linear_term(m, i) = accs.gamma_[j1](m, i) - gamma_jm * w_jm(i);
1311  quadratic_term(m, i) = std::max(accs.gamma_[j1](m, i),
1312  gamma_jm * w_jm(i));
1313  }
1314  } // loop over substates
1315  g_i->AddMatMat(1.0, linear_term, kTrans, v_j_double, kNoTrans, 1.0);
1316  F_i->AddMatMat(1.0, quadratic_term, kTrans, v_vT_m, kNoTrans, 1.0);
1317  } // loop over states
1318 }
1319 
1320 // The parallel weight update, in the paper.
1322  const std::vector<Matrix<double> > &log_a,
1323  const Vector<double> &gamma_i,
1324  AmSgmm2 *model) {
1325  KALDI_LOG << "Updating weight projections";
1326 
1327  // tot_like_{after, before} are totals over multiple iterations,
1328  // not valid likelihoods. but difference is valid (when divided by tot_count).
1329  double tot_predicted_like_impr = 0.0, tot_like_before = 0.0,
1330  tot_like_after = 0.0;
1331 
1333  // View F_i as a vector of SpMatrix.
1334  Matrix<double> F_i(accs.num_gaussians_,
1335  (accs.phn_space_dim_*(accs.phn_space_dim_+1))/2);
1336 
1337  Matrix<double> w(model->w_);
1338  double tot_count = gamma_i.Sum();
1339 
1340  for (int iter = 0; iter < options_.weight_projections_iters; iter++) {
1341  F_i.SetZero();
1342  g_i.SetZero();
1343  double k_like_before = 0.0;
1344 
1345  UpdateWClass c(accs, *model, w, log_a, &F_i, &g_i, &k_like_before);
1346  RunMultiThreaded(c);
1347 
1348  Matrix<double> w_orig(w);
1349  double k_predicted_like_impr = 0.0, k_like_after = 0.0;
1350  double min_step = 0.001, step_size;
1351 
1352  SolverOptions opts;
1353  opts.name = "w";
1354  opts.K = options_.max_cond;
1355  opts.eps = options_.epsilon;
1356 
1357  for (step_size = 1.0; step_size >= min_step; step_size /= 2) {
1358  k_predicted_like_impr = 0.0;
1359  k_like_after = 0.0;
1360 
1361  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1362  // auxf is formulated in terms of change in w.
1363  Vector<double> delta_w(accs.phn_space_dim_);
1364  // returns objf impr with step_size = 1,
1365  // but it may not be 1 so we recalculate it.
1366  SpMatrix<double> this_F_i(accs.phn_space_dim_);
1367  this_F_i.CopyFromVec(F_i.Row(i));
1368  SolveQuadraticProblem(this_F_i, g_i.Row(i), opts, &delta_w);
1369 
1370  delta_w.Scale(step_size);
1371  double predicted_impr = VecVec(delta_w, g_i.Row(i)) -
1372  0.5 * VecSpVec(delta_w, this_F_i, delta_w);
1373 
1374  // should never be negative because
1375  // we checked inside SolveQuadraticProblem.
1376  KALDI_ASSERT(predicted_impr >= -1.0e-05);
1377 
1378  if (i < 10)
1379  KALDI_LOG << "Predicted objf impr for w, iter = " << iter
1380  << ", i = " << i << " is "
1381  << (predicted_impr/gamma_i(i)+1.0e-20)
1382  << " per frame over " << gamma_i(i) << " frames.";
1383  k_predicted_like_impr += predicted_impr;
1384  w.Row(i).AddVec(1.0, delta_w);
1385  }
1386  for (int32 j1 = 0; j1 < accs.num_groups_; j1++) {
1387  int32 M = model->NumSubstatesForGroup(j1);
1388  Matrix<double> w_j(M, accs.num_gaussians_);
1389  w_j.AddMatMat(1.0, Matrix<double>(model->v_[j1]), kNoTrans,
1390  w, kTrans, 0.0);
1391  if (!log_a.empty()) w_j.AddMat(1.0, log_a[j1]); // SSGMM techreport eq. 42
1392  for (int32 m = 0; m < M; m++) {
1393  SubVector<double> w_jm(w_j, m);
1394  w_jm.Add(-1.0 * w_jm.LogSumExp());
1395  }
1396  k_like_after += TraceMatMat(w_j, accs.gamma_[j1], kTrans);
1397  }
1398  KALDI_VLOG(2) << "For iteration " << iter << ", updating w gives "
1399  << "predicted per-frame like impr "
1400  << (k_predicted_like_impr / tot_count) << ", actual "
1401  << ((k_like_after - k_like_before) / tot_count) << ", over "
1402  << tot_count << " frames";
1403  if (k_like_after < k_like_before) {
1404  w.CopyFromMat(w_orig); // Undo what we computed.
1405  if (fabs(k_like_after - k_like_before) / tot_count < 1.0e-05) {
1406  k_like_after = k_like_before;
1407  KALDI_WARN << "Not updating weights as not increasing auxf and "
1408  << "probably due to numerical issues (since small change).";
1409  break;
1410  } else {
1411  KALDI_WARN << "Halving step size for weights as likelihood did "
1412  << "not increase";
1413  }
1414  } else {
1415  break;
1416  }
1417  }
1418  if (step_size < min_step) {
1419  // Undo any step as we have no confidence that this is right.
1420  w.CopyFromMat(w_orig);
1421  } else {
1422  tot_predicted_like_impr += k_predicted_like_impr;
1423  tot_like_after += k_like_after;
1424  tot_like_before += k_like_before;
1425  }
1426  }
1427 
1428  model->w_.CopyFromMat(w);
1429  model->w_jmi_.clear(); // invalidated.
1430 
1431  tot_predicted_like_impr /= tot_count;
1432  tot_like_after = (tot_like_after - tot_like_before) / tot_count;
1433  KALDI_LOG << "**Overall objf impr for w is " << tot_predicted_like_impr
1434  << ", actual " << tot_like_after << ", over "
1435  << tot_count << " frames";
1436  return tot_like_after;
1437 }
1438 
1440  const Vector<double> &gamma_i,
1441  AmSgmm2 *model) {
1442  double tot_impr = 0.0;
1443  SolverOptions opts;
1444  opts.name = "u";
1445  opts.K = options_.max_cond;
1446  opts.eps = options_.epsilon;
1447 
1448  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1449  if (gamma_i(i) < 200.0) {
1450  KALDI_LOG << "Count is small " << gamma_i(i) << " for gaussian "
1451  << i << ", not updating u_i.";
1452  continue;
1453  }
1454  Vector<double> u_i(model->u_.Row(i));
1455  Vector<double> delta_u(accs.spk_space_dim_);
1456  double impr =
1457  SolveQuadraticProblem(accs.U_[i], accs.t_.Row(i), opts, &delta_u);
1458  double impr_per_frame = impr / gamma_i(i);
1459  if (impr_per_frame > options_.max_impr_u) {
1460  KALDI_WARN << "Updating speaker weight projections u, for Gaussian index "
1461  << i << ", impr/frame is " << impr_per_frame << " over "
1462  << gamma_i(i) << " frames, scaling back to not exceed "
1463  << options_.max_impr_u;
1464  double scale = options_.max_impr_u / impr_per_frame;
1465  impr *= scale;
1466  delta_u.Scale(scale);
1467  // Note: a linear scaling of "impr" with "scale" is not quite accurate
1468  // in depicting how the quadratic auxiliary function varies as we change
1469  // the scale on "delta", but this does not really matter-- the goal is
1470  // to limit the auxiliary-function change to not be too large.
1471  }
1472  if (i < 10) {
1473  KALDI_LOG << "Objf impr for spk weight-projection u for i = " << (i)
1474  << ", is " << (impr / (gamma_i(i) + 1.0e-20)) << " over "
1475  << gamma_i(i) << " frames";
1476  }
1477  u_i.AddVec(1.0, delta_u);
1478  model->u_.Row(i).CopyFromVec(u_i);
1479  tot_impr += impr;
1480  }
1481  KALDI_LOG << "**Overall objf impr for u is " << (tot_impr/gamma_i.Sum())
1482  << ", over " << gamma_i.Sum() << " frames";
1483  return tot_impr / gamma_i.Sum();
1484 }
1485 
1487  const Vector<double> &gamma_i,
1488  AmSgmm2 *model) {
1489  double tot_count = 0.0, tot_like_impr = 0.0;
1490  if (accs.spk_space_dim_ == 0 || accs.R_.size() == 0 || accs.Z_.size() == 0) {
1491  KALDI_ERR << "Speaker subspace dim is zero or no stats accumulated";
1492  }
1493  SolverOptions opts;
1494  opts.name = "N";
1495  opts.K = options_.max_cond;
1496  opts.eps = options_.epsilon;
1497 
1498 
1499  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1500  if (gamma_i(i) < 2 * accs.spk_space_dim_) {
1501  KALDI_WARN << "Not updating speaker basis for i = " << (i)
1502  << " because count is too small " << (gamma_i(i));
1503  continue;
1504  }
1505  Matrix<double> Ni(model->N_[i]);
1506  double impr =
1507  SolveQuadraticMatrixProblem(accs.R_[i], accs.Z_[i],
1508  SpMatrix<double>(model->SigmaInv_[i]),
1509  opts, &Ni);
1510  model->N_[i].CopyFromMat(Ni);
1511  if (i < 10) {
1512  KALDI_LOG << "Objf impr for spk projection N for i = " << (i)
1513  << ", is " << (impr / (gamma_i(i) + 1.0e-20)) << " over "
1514  << gamma_i(i) << " frames";
1515  }
1516  tot_count += gamma_i(i);
1517  tot_like_impr += impr;
1518  }
1519 
1520  KALDI_LOG << "**Overall objf impr for N is " << (tot_like_impr/tot_count)
1521  << " over " << tot_count << " frames";
1522  return (tot_like_impr/tot_count);
1523 }
1524 
1526  const Vector<double> &gamma_i,
1527  AmSgmm2 *model) {
1528  KALDI_ASSERT(accs.R_.size() != 0);
1529  double tot_count = gamma_i.Sum();
1530  if (tot_count == 0) {
1531  KALDI_WARN << "Not renormalizing N, since there are no counts.";
1532  return;
1533  }
1534 
1535  SpMatrix<double> RTot(accs.spk_space_dim_);
1536  // for (int32 i = 0; i < accs.num_gaussians_; i++) {
1537  // RTot.AddSp(1.0, accs.R_[i]);
1538  // }
1539  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1540  RTot.AddSp(gamma_i(i), accs.R_[i]);
1541  }
1542  RTot.Scale(1.0 / tot_count);
1544  Vector<double> eigs(accs.spk_space_dim_);
1545  RTot.SymPosSemiDefEig(&eigs, &U);
1546  KALDI_LOG << "Renormalizing N, eigs are: " << (eigs);
1547  Vector<double> sqrteigs(accs.spk_space_dim_);
1548  for (int32 t = 0; t < accs.spk_space_dim_; t++) {
1549  sqrteigs(t) = sqrt(eigs(t));
1550  }
1551  // e.g. diag(eigs)^{-0.5} * U' * RTot * U * diag(eigs)^{-0.5} = 1
1552  // But inverse transpose of this transformation needs to take place on R,
1553  // i.e. not (on left: diag(eigs)^{-0.5} * U')
1554  // but: (inverse it: U . diag(eigs)^{0.5},
1555  // transpose it: diag(eigs)^{0.5} U^T. Need to do this on the right to N
1556  // (because N has the spk vecs on the right), so N := N U diag(eigs)^{0.5}
1557  U.MulColsVec(sqrteigs);
1558  Matrix<double> Ntmp(accs.feature_dim_, accs.spk_space_dim_);
1559  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1560  Ntmp.AddMatMat(1.0, Matrix<double>(model->N_[i]), kNoTrans, U, kNoTrans, 0.0);
1561  model->N_[i].CopyFromMat(Ntmp);
1562  }
1563 }
1564 
1565 
1567  const std::vector< SpMatrix<double> > &S_means,
1568  const Vector<double> &gamma_i,
1569  AmSgmm2 *model) {
1570  KALDI_ASSERT(S_means.size() == static_cast<size_t>(accs.num_gaussians_));
1571 
1572  SpMatrix<double> Sigma_i(accs.feature_dim_), Sigma_i_ml(accs.feature_dim_);
1573  double tot_objf_impr = 0.0, tot_t = 0.0;
1574  SpMatrix<double> covfloor(accs.feature_dim_);
1575  Vector<double> objf_improv(accs.num_gaussians_);
1576 
1577  // First pass over all (shared) Gaussian components to calculate the
1578  // ML estimate of the covariances, and the total covariance for flooring.
1579  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1580  // Eq. (75): Sigma_{i}^{ml} = 1/gamma_{i} [S_{i} + S_{i}^{(means)} - ...
1581  // Y_{i} M_{i}^T - M_{i} Y_{i}^T]
1582  // Note the S_means already contains the Y_{i} M_{i}^T terms.
1583  Sigma_i_ml.CopyFromSp(S_means[i]);
1584  Sigma_i_ml.AddSp(1.0, accs.S_[i]);
1585 
1586  covfloor.AddSp(1.0, Sigma_i_ml);
1587  // inverting small values e.g. 4.41745328e-40 seems to generate inf,
1588  // although would be fixed up later.
1589  if (gamma_i(i) > 1.0e-20) {
1590  Sigma_i_ml.Scale(1 / (gamma_i(i) + 1.0e-20));
1591  } else {
1592  Sigma_i_ml.SetUnit();
1593  }
1594  KALDI_ASSERT(1.0 / Sigma_i_ml(0, 0) != 0.0);
1595  // Eq. (76): Compute the objective function with the old parameter values
1596  objf_improv(i) = model->SigmaInv_[i].LogPosDefDet() -
1597  TraceSpSp(SpMatrix<double>(model->SigmaInv_[i]), Sigma_i_ml);
1598 
1599  model->SigmaInv_[i].CopyFromSp(Sigma_i_ml); // inverted in the next loop.
1600  }
1601 
1602  // Compute the covariance floor.
1603  if (gamma_i.Sum() == 0) { // If no count, use identity.
1604  KALDI_WARN << "Updating variances: zero counts. Setting floor to unit.";
1605  covfloor.SetUnit();
1606  } else { // else, use the global average covariance.
1607  covfloor.Scale(options_.cov_floor / gamma_i.Sum());
1608  int32 tmp;
1609  if ((tmp = covfloor.LimitCondDouble(options_.max_cond)) != 0) {
1610  KALDI_WARN << "Covariance flooring matrix is poorly conditioned. Fixed "
1611  << "up " << tmp << " eigenvalues.";
1612  }
1613  }
1614 
1615  if (options_.cov_diag_ratio > 1000) {
1616  KALDI_LOG << "Assuming you want to build a diagonal system since "
1617  << "cov_diag_ratio is large: making diagonal covFloor.";
1618  for (int32 i = 0; i < covfloor.NumRows(); i++)
1619  for (int32 j = 0; j < i; j++)
1620  covfloor(i, j) = 0.0;
1621  }
1622 
1623  // Second pass over all (shared) Gaussian components to calculate the
1624  // floored estimate of the covariances, and update the model.
1625  for (int32 i = 0; i < accs.num_gaussians_; i++) {
1626  Sigma_i.CopyFromSp(model->SigmaInv_[i]);
1627  Sigma_i_ml.CopyFromSp(Sigma_i);
1628  // In case of insufficient counts, make the covariance matrix diagonal.
1629  // cov_diag_ratio is 2 by default, set to very large to always get diag-cov
1630  if (gamma_i(i) < options_.cov_diag_ratio * accs.feature_dim_) {
1631  KALDI_WARN << "For Gaussian component " << i << ": Too low count "
1632  << gamma_i(i) << " for covariance matrix estimation. Setting to "
1633  << "diagonal";
1634  for (int32 d = 0; d < accs.feature_dim_; d++)
1635  for (int32 e = 0; e < d; e++)
1636  Sigma_i(d, e) = 0.0; // SpMatrix, can only set lower triangular part
1637 
1638  int floored = Sigma_i.ApplyFloor(covfloor);
1639  if (floored > 0) {
1640  KALDI_WARN << "For Gaussian component " << i << ": Floored " << floored
1641  << " covariance eigenvalues.";
1642  }
1643  model->SigmaInv_[i].CopyFromSp(Sigma_i);
1644  model->SigmaInv_[i].InvertDouble();
1645  } else { // Updating the full covariance matrix.
1646  try {
1647  int floored = Sigma_i.ApplyFloor(covfloor);
1648  if (floored > 0) {
1649  KALDI_WARN << "For Gaussian component " << i << ": Floored "
1650  << floored << " covariance eigenvalues.";
1651  }
1652  model->SigmaInv_[i].CopyFromSp(Sigma_i);
1653  model->SigmaInv_[i].InvertDouble();
1654 
1655  objf_improv(i) += Sigma_i.LogPosDefDet() +
1656  TraceSpSp(SpMatrix<double>(model->SigmaInv_[i]), Sigma_i_ml);
1657  objf_improv(i) *= (-0.5 * gamma_i(i)); // Eq. (76)
1658 
1659  tot_objf_impr += objf_improv(i);
1660  tot_t += gamma_i(i);
1661  if (i < 5) {
1662  KALDI_VLOG(2) << "objf impr from variance update =" << objf_improv(i)
1663  / (gamma_i(i) + 1.0e-20) << " over " << (gamma_i(i))
1664  << " frames for i = " << (i);
1665  }
1666  } catch(...) {
1667  KALDI_WARN << "Updating within-class covariance matrix i = " << (i)
1668  << ", numerical problem";
1669  // This is a catch-all thing in case of unanticipated errors, but
1670  // flooring should prevent this occurring for the most part.
1671  model->SigmaInv_[i].SetUnit(); // Set to unit.
1672  }
1673  }
1674  }
1675  KALDI_LOG << "**Overall objf impr for variance update = "
1676  << (tot_objf_impr / (tot_t+ 1.0e-20))
1677  << " over " << tot_t << " frames";
1678  return tot_objf_impr / (tot_t + 1.0e-20);
1679 }
1680 
1681 
1683  const MleAmSgmm2Accs &accs, AmSgmm2 *model) {
1684  KALDI_LOG << "Updating substate mixture weights";
1685  // Also set the vector gamma_j which is a cache of the state occupancies.
1686 
1687  double tot_gamma = 0.0, objf_impr = 0.0;
1688  for (int32 j2 = 0; j2 < accs.num_pdfs_; j2++) {
1689  double gamma_j_sm = 0.0;
1690  int32 num_substates = model->NumSubstatesForPdf(j2);
1691  const Vector<double> &occs(accs.gamma_c_[j2]);
1692  Vector<double> smoothed_occs(occs);
1693  smoothed_occs.Add(options_.tau_c);
1694  gamma_j_sm += smoothed_occs.Sum();
1695  tot_gamma += occs.Sum();
1696 
1697  for (int32 m = 0; m < num_substates; m++) {
1698  double cur_weight = model->c_[j2](m);
1699  if (cur_weight <= 0) {
1700  KALDI_WARN << "Zero or negative weight, flooring";
1701  cur_weight = 1.0e-10; // future work(arnab): remove magic numbers
1702  }
1703  model->c_[j2](m) = smoothed_occs(m) / gamma_j_sm;
1704  objf_impr += Log(model->c_[j2](m) / cur_weight) * occs(m);
1705  }
1706  }
1707  KALDI_LOG << "**Overall objf impr for c is " << (objf_impr/tot_gamma)
1708  << ", over " << tot_gamma << " frames.";
1709  return (objf_impr/tot_gamma);
1710 }
1711 
1712 
1714  BaseFloat prune)
1715  : rand_prune_(prune) {
1716  KALDI_ASSERT(model.SpkSpaceDim() != 0);
1717  H_spk_.resize(model.NumGauss());
1718  for (int32 i = 0; i < model.NumGauss(); i++) {
1719  // Eq. (82): H_{i}^{spk} = N_{i}^T \Sigma_{i}^{-1} N_{i}
1720  H_spk_[i].Resize(model.SpkSpaceDim());
1721  H_spk_[i].AddMat2Sp(1.0, Matrix<double>(model.N_[i]),
1722  kTrans, SpMatrix<double>(model.SigmaInv_[i]), 0.0);
1723  }
1724 
1726 
1727  gamma_s_.Resize(model.NumGauss());
1728  y_s_.Resize(model.SpkSpaceDim());
1729  if (model.HasSpeakerDependentWeights())
1730  a_s_.Resize(model.NumGauss());
1731 }
1732 
1734  y_s_.SetZero();
1735  gamma_s_.SetZero();
1736  if (a_s_.Dim() != 0) a_s_.SetZero();
1737 }
1738 
1739 BaseFloat
1741  const Sgmm2PerFrameDerivedVars &frame_vars,
1742  int32 j2,
1743  BaseFloat weight,
1744  Sgmm2PerSpkDerivedVars *spk_vars) {
1745  // Calculate Gaussian posteriors and collect statistics
1746  Matrix<BaseFloat> posteriors;
1747  BaseFloat log_like = model.ComponentPosteriors(frame_vars, j2, spk_vars,
1748  &posteriors);
1749  posteriors.Scale(weight);
1750  AccumulateFromPosteriors(model, frame_vars, posteriors, j2, spk_vars);
1751  return log_like;
1752 }
1753 
1754 BaseFloat
1756  const Sgmm2PerFrameDerivedVars &frame_vars,
1757  const Matrix<BaseFloat> &posteriors,
1758  int32 j2,
1759  Sgmm2PerSpkDerivedVars *spk_vars) {
1760  double tot_count = 0.0;
1761  int32 feature_dim = model.FeatureDim(),
1762  spk_space_dim = model.SpkSpaceDim();
1763  KALDI_ASSERT(spk_space_dim != 0);
1764  const vector<int32> &gselect = frame_vars.gselect;
1765 
1766  // Intermediate variables
1767  Vector<double> xt_jmi(feature_dim), mu_jmi(feature_dim),
1768  zt_jmi(spk_space_dim);
1769  int32 num_substates = model.NumSubstatesForPdf(j2),
1770  j1 = model.Pdf2Group(j2);
1771  bool have_spk_dep_weights = (a_s_.Dim() != 0);
1772 
1773  for (int32 m = 0; m < num_substates; m++) {
1774  BaseFloat gammat_jm = 0.0;
1775  for (int32 ki = 0; ki < static_cast<int32>(gselect.size()); ki++) {
1776  int32 i = gselect[ki];
1777  // Eq. (39): gamma_{jmi}(t) = p (j, m, i|t)
1778  BaseFloat gammat_jmi = RandPrune(posteriors(ki, m), rand_prune_);
1779  if (gammat_jmi != 0.0) {
1780  gammat_jm += gammat_jmi;
1781  tot_count += gammat_jmi;
1782  model.GetSubstateMean(j1, m, i, &mu_jmi);
1783  xt_jmi.CopyFromVec(frame_vars.xt);
1784  xt_jmi.AddVec(-1.0, mu_jmi);
1785  // Eq. (48): z{jmi}(t) = N_{i}^{T} \Sigma_{i}^{-1} x_{jmi}(t)
1786  zt_jmi.AddMatVec(1.0, NtransSigmaInv_[i], kNoTrans, xt_jmi, 0.0);
1787  // Eq. (49): \gamma_{i}^{(s)} = \sum_{t\in\Tau(s), j, m} gamma_{jmi}
1788  gamma_s_(i) += gammat_jmi;
1789  // Eq. (50): y^{(s)} = \sum_{t, j, m, i} gamma_{jmi}(t) z_{jmi}(t)
1790  y_s_.AddVec(gammat_jmi, zt_jmi);
1791  }
1792  }
1793  if (have_spk_dep_weights) {
1794  KALDI_ASSERT(!model.w_jmi_.empty());
1795  BaseFloat d_jms = model.GetDjms(j1, m, spk_vars);
1796  if (d_jms == -1.0) d_jms = 1.0; // Explanation: d_jms is set to -1 when we didn't have
1797  // speaker vectors in training. We treat this the same as the speaker vector being
1798  // zero, and d_jms becomes 1 in this case.
1799  a_s_.AddVec(gammat_jm/d_jms, model.w_jmi_[j1].Row(m));
1800  }
1801  }
1802  return tot_count;
1803 }
1804 
1806  BaseFloat min_count,
1807  Vector<BaseFloat> *v_s,
1808  BaseFloat *objf_impr_out,
1809  BaseFloat *count_out) {
1810  double tot_gamma = gamma_s_.Sum();
1811  if (tot_gamma < min_count) {
1812  KALDI_WARN << "Updating speaker vectors, count is " << tot_gamma
1813  << " < " << min_count << "not updating.";
1814  if (objf_impr_out) *objf_impr_out = 0.0;
1815  if (count_out) *count_out = 0.0;
1816  return;
1817  }
1818  if (a_s_.Dim() == 0) // No speaker-dependent weights...
1819  UpdateNoU(v_s, objf_impr_out, count_out);
1820  else
1821  UpdateWithU(model, v_s, objf_impr_out, count_out);
1822 }
1823 
1824 
1825 // Basic update, no SSGMM.
1827  BaseFloat *objf_impr_out,
1828  BaseFloat *count_out) {
1829  double tot_gamma = gamma_s_.Sum();
1830  KALDI_ASSERT(y_s_.Dim() != 0);
1831  int32 T = y_s_.Dim(); // speaker-subspace dim.
1832  int32 num_gauss = gamma_s_.Dim();
1833  if (v_s->Dim() != T) v_s->Resize(T); // will set it to zero.
1834 
1835  // Eq. (84): H^{(s)} = \sum_{i} \gamma_{i}(s) H_{i}^{spk}
1836  SpMatrix<double> H_s(T);
1837 
1838  for (int32 i = 0; i < num_gauss; i++)
1839  H_s.AddSp(gamma_s_(i), H_spk_[i]);
1840 
1841  // Don't make these options to SolveQuadraticProblem configurable...
1842  // they really don't make a difference at all unless the matrix in
1843  // question is singular, which wouldn't happen in this case.
1844  Vector<double> v_s_dbl(*v_s);
1845  double tot_objf_impr =
1846  SolveQuadraticProblem(H_s, y_s_, SolverOptions("v_s"), &v_s_dbl);
1847 
1848  v_s->CopyFromVec(v_s_dbl);
1849 
1850  KALDI_LOG << "*Objf impr for speaker vector is " << (tot_objf_impr / tot_gamma)
1851  << " over " << tot_gamma << " frames.";
1852 
1853  if (objf_impr_out) *objf_impr_out = tot_objf_impr;
1854  if (count_out) *count_out = tot_gamma;
1855 }
1856 
1857 // Basic update, no SSGMM.
1859  Vector<BaseFloat> *v_s_ptr,
1860  BaseFloat *objf_impr_out,
1861  BaseFloat *count_out) {
1862  double tot_gamma = gamma_s_.Sum();
1863  KALDI_ASSERT(y_s_.Dim() != 0);
1864  int32 T = y_s_.Dim(); // speaker-subspace dim.
1865  int32 num_gauss = gamma_s_.Dim();
1866  if (v_s_ptr->Dim() != T) v_s_ptr->Resize(T); // will set it to zero.
1867 
1868  // Eq. (84): H^{(s)} = \sum_{i} \gamma_{i}(s) H_{i}^{spk}
1869  SpMatrix<double> H_s(T);
1870 
1871  for (int32 i = 0; i < num_gauss; i++)
1872  H_s.AddSp(gamma_s_(i), H_spk_[i]);
1873 
1874  Vector<double> v_s(*v_s_ptr);
1875  int32 num_iters = 5, // don't set this to 1, as we discard last iter.
1876  num_backtracks = 0,
1877  max_backtracks = 10;
1878  Vector<double> auxf(num_iters);
1879  Matrix<double> v_s_per_iter(num_iters, T);
1880  // The update for v^{(s)} is the one described in the technical report
1881  // section 5.1 (eq. 33 and below).
1882 
1883  for (int32 iter = 0; iter < num_iters; iter++) { // converges very fast,
1884  // and each iteration is fast, so don't need to make this configurable.
1885  v_s_per_iter.Row(iter).CopyFromVec(v_s);
1886 
1887  SpMatrix<double> F(H_s); // the 2nd-order quadratic term on this iteration...
1888  // F^{(p)} in the techerport.
1889  Vector<double> g(y_s_); // g^{(p)} in the techreport.
1890  g.AddSpVec(-1.0, H_s, v_s, 1.0);
1891  Vector<double> log_b_is(num_gauss); // b_i^{(s)}, indexed by i.
1892  log_b_is.AddMatVec(1.0, Matrix<double>(model.u_), kNoTrans, v_s, 0.0);
1893  Vector<double> tilde_w_is(log_b_is);
1894  Vector<double> log_a_s_(a_s_);
1895  log_a_s_.ApplyLog();
1896  tilde_w_is.AddVec(1.0, log_a_s_);
1897  tilde_w_is.Add(-1.0 * tilde_w_is.LogSumExp()); // normalize.
1898  // currently tilde_w_is is in log form.
1899  auxf(iter) = VecVec(v_s, y_s_) - 0.5 * VecSpVec(v_s, H_s, v_s)
1900  + VecVec(gamma_s_, tilde_w_is); // "new" term (weights)
1901 
1902  if (iter > 0 && auxf(iter) < auxf(iter-1) &&
1903  !ApproxEqual(auxf(iter), auxf(iter-1))) { // auxf did not improve.
1904  // backtrack halfway, and do this iteration again.
1905  KALDI_WARN << "Backtracking in speaker vector update, on iter "
1906  << iter << ", auxfs are " << auxf(iter-1) << " -> "
1907  << auxf(iter);
1908  v_s.Scale(0.5);
1909  v_s.AddVec(0.5, v_s_per_iter.Row(iter-1));
1910  if (++num_backtracks >= max_backtracks) {
1911  KALDI_WARN << "Backtracked " << max_backtracks
1912  << " times in speaker-vector update.";
1913  // backtrack all the way, and terminate:
1914  v_s_per_iter.Row(num_iters-1).CopyFromVec(v_s_per_iter.Row(iter-1));
1915  // the following statement ensures we will get
1916  // the appropriate auxiliary-function.
1917  auxf(num_iters-1) = auxf(iter-1);
1918  break;
1919  }
1920  iter--;
1921  }
1922  tilde_w_is.ApplyExp();
1923  for (int32 i = 0; i < num_gauss; i++) {
1924  g.AddVec(gamma_s_(i) - tot_gamma * tilde_w_is(i), model.u_.Row(i));
1925  F.AddVec2(tot_gamma * tilde_w_is(i), model.u_.Row(i));
1926  }
1927  Vector<double> delta(v_s.Dim());
1928  SolveQuadraticProblem(F, g, SolverOptions("v_s"), &delta);
1929  v_s.AddVec(1.0, delta);
1930  }
1931  // so that we only accept things where the auxf has been checked, we
1932  // actually take the penultimate speaker-vector. --> don't set
1933  // num-iters = 1.
1934  v_s_ptr->CopyFromVec(v_s_per_iter.Row(num_iters-1));
1935 
1936  double auxf_change = auxf(num_iters-1) - auxf(0);
1937  KALDI_LOG << "*Objf impr for speaker vector is " << (auxf_change / tot_gamma)
1938  << " per frame, over " << tot_gamma << " frames.";
1939 
1940  if (objf_impr_out) *objf_impr_out = auxf_change;
1941  if (count_out) *count_out = tot_gamma;
1942 }
1943 
1944 
1946  if (gamma_s_.Sum() != 0.0)
1947  KALDI_ERR << "In destructor of MleAmSgmm2Accs: detected that you forgot to "
1948  "call CommitStatsForSpk()";
1949 }
1950 
1951 
1952 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Matrix< double > t_
[SSGMM] each row is one of the t_i quantities in the less-exact version of the SSGMM update for the s...
std::vector< Vector< double > > gamma_c_
Sub-state occupancies gamma_{jm}^{(c)} for each sub-state.
bool IsUnit(Real cutoff=1.0e-05) const
Definition: sp-matrix.cc:480
void CopyFromVec(const SubVector< OtherReal > &orig)
CopyFromVec just interprets the vector as having the same layout as the packed matrix.
Matrix< BaseFloat > u_
[SSGMM] Speaker-subspace weight projection vectors. Dimension is [I][T]
Definition: am-sgmm2.h:431
bool IsDiagonal(Real cutoff=1.0e-05) const
Definition: sp-matrix.cc:465
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231
void UpdateNoU(Vector< BaseFloat > *v_s, BaseFloat *objf_impr_out, BaseFloat *count_out)
Packed symetric matrix class.
Definition: matrix-common.h:62
void Write(std::ostream &out, bool binary) const
write to stream.
void ApplyExp()
Apply exponential to each value in vector.
BaseFloat Accumulate(const AmSgmm2 &model, const Sgmm2PerFrameDerivedVars &frame_vars, int32 pdf_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk_vars)
Returns likelihood.
BaseFloat Accumulate(const AmSgmm2 &model, const Sgmm2PerFrameDerivedVars &frame_vars, int32 pdf_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk_vars)
Accumulate statistics. Returns per-frame log-likelihood.
MleSgmm2SpeakerAccs(const AmSgmm2 &model, BaseFloat rand_prune_=1.0e-05)
Initialize the object. Error if speaker subspace not set up.
void RenormalizeV(const MleAmSgmm2Accs &accs, AmSgmm2 *model, const Vector< double > &gamma_i, const std::vector< SpMatrix< double > > &H)
BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *post) const
Similar to LogLikelihood() function above, but also computes the posterior probabilities for the pre-...
Definition: am-sgmm2.cc:574
static void ComputeQ(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, std::vector< SpMatrix< double > > *Q)
Compute the Q_i quantities (Eq. 64).
This class describes the options for maximizing various quadratic objective functions.
Definition: sp-matrix.h:443
void Scale(Real c)
Vector< BaseFloat > xt
x&#39;(t), FMLLR-adapted, dim = [D], eq.(33)
Definition: am-sgmm2.h:144
double SolveQuadraticProblem(const SpMatrix< double > &H, const VectorBase< double > &g, const SolverOptions &opts, VectorBase< double > *x)
Definition: sp-matrix.cc:635
void GetSubstateMean(int32 j1, int32 m, int32 i, VectorBase< Real > *mean_out) const
Definition: am-sgmm2.h:519
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
bool IsZero(Real cutoff=1.0e-06) const
Returns true if matrix is all zeros.
std::vector< Vector< BaseFloat > > c_
c_{jm}, mixture weights. Dimension is [J2][#mix]
Definition: am-sgmm2.h:438
#define M_PI
Definition: kaldi-math.h:44
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Real SolveQuadraticMatrixProblem(const SpMatrix< Real > &Q, const MatrixBase< Real > &Y, const SpMatrix< Real > &SigmaInv, const SolverOptions &opts, MatrixBase< Real > *M)
Maximizes the auxiliary function : Like a numerically stable version of .
Definition: sp-matrix.cc:729
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
Matrix< BaseFloat > w_
Phonetic-subspace weight projection vectors. Dimension is [I][S].
Definition: am-sgmm2.h:429
Vector< BaseFloat > v_s
Speaker adaptation vector v_^{(s)}. Dim is [T].
Definition: am-sgmm2.h:187
static void UpdateWGetStats(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, const Matrix< double > &w, const std::vector< Matrix< double > > &log_a, Matrix< double > *F_i, Matrix< double > *g_i, double *tot_like, int32 num_threads, int32 thread_id)
Called, multithreaded, inside UpdateW.
void GetStateOccupancies(Vector< BaseFloat > *occs) const
Accessors.
void Read(std::istream &in_stream, bool binary, bool add)
Float RandPrune(Float post, BaseFloat prune_thresh, struct RandomState *state=NULL)
Definition: kaldi-math.h:174
double UpdateM(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &Q, const Vector< double > &gamma_i, AmSgmm2 *model)
double MapUpdateM(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &Q, const Vector< double > &gamma_i, AmSgmm2 *model)
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
Vector< double > a_s_
a_i^{(s)}. For SSGMM.
kaldi::int32 int32
std::vector< Matrix< BaseFloat > > n_
n_{jim}, per-Gaussian normalizer. Dimension is [J1][I][#mix]
Definition: am-sgmm2.h:440
std::vector< Matrix< BaseFloat > > N_
Speaker-subspace projections. Dimension is [I][D][T].
Definition: am-sgmm2.h:427
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
double UpdateW(const MleAmSgmm2Accs &accs, const std::vector< Matrix< double > > &log_a, const Vector< double > &gamma_i, AmSgmm2 *model)
BaseFloat rand_prune_
small constant to randomly prune tiny posteriors
bool IsPosDef() const
Definition: sp-matrix.cc:75
static void ComputeSMeans(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, std::vector< SpMatrix< double > > *S_means)
Compute the S_means quantities, minus sum: (Y_i M_i^T + M_i Y_I^T).
const MleAmSgmm2Updater & updater_
double UpdatePhoneVectors(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &H, const std::vector< Matrix< double > > &log_a, AmSgmm2 *model) const
In this update, smoothing terms are not supported.
std::vector< SpMatrix< double > > S_
S_{i}^{-}, scatter of adapted feature vectors x_{i}(t). Dim is [I][D][D].
SpMatrix< BaseFloat > col_cov_inv_
Definition: am-sgmm2.h:451
void SetUnit()
< Set to zero
void ApplyLog()
Apply natural log to all elements.
std::vector< Matrix< double > > gamma_
Gaussian occupancies gamma_{jmi} for each substate and Gaussian index, pooled over groups...
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
MatrixIndexT NumRows() const
void RenormalizeN(const MleAmSgmm2Accs &accs, const Vector< double > &gamma_i, AmSgmm2 *model)
int32 PhoneSpaceDim() const
Definition: am-sgmm2.h:361
double UpdateVars(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &S_means, const Vector< double > &gamma_i, AmSgmm2 *model)
Real SolveDoubleQuadraticMatrixProblem(const MatrixBase< Real > &G, const SpMatrix< Real > &P1, const SpMatrix< Real > &P2, const SpMatrix< Real > &Q1, const SpMatrix< Real > &Q2, const SolverOptions &opts, MatrixBase< Real > *M)
Maximizes the auxiliary function : Encountered in matrix update with a prior.
Definition: sp-matrix.cc:827
std::vector< Matrix< BaseFloat > > v_
The parameters in a particular SGMM state.
Definition: am-sgmm2.h:436
void CopyFromSp(const SpMatrix< Real > &other)
Definition: sp-matrix.h:85
int32 num_groups_
Other model specifications.
void Update(const MleAmSgmm2Accs &accs, AmSgmm2 *model, SgmmUpdateFlagsType flags)
std::vector< SpMatrix< double > > U_
the U_i quantities from the less-exact version of the SSGMM update for the speaker weight projections...
void UpdatePhoneVectorsInternal(const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &H, const std::vector< Matrix< double > > &log_a, AmSgmm2 *model, double *auxf_impr, int32 num_threads, int32 thread_id) const
Matrix< BaseFloat > zti
z_{i}(t), dim = [I][S], eq.(35)
Definition: am-sgmm2.h:146
BaseFloat GetDjms(int32 j1, int32 m, Sgmm2PerSpkDerivedVars *spk_vars) const
Definition: am-sgmm2.cc:948
Vector< BaseFloat > b_is
Definition: am-sgmm2.h:189
std::vector< Matrix< BaseFloat > > M_
Phonetic-subspace projections. Dimension is [I][D][S].
Definition: am-sgmm2.h:425
void PrintEigs(const char *name)
Definition: sp-matrix.h:203
void CommitStatsForSpk(const AmSgmm2 &model, const Sgmm2PerSpkDerivedVars &spk_vars)
Accumulates global stats for the current speaker (if applicable).
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
int32 FeatureDim() const
Definition: am-sgmm2.h:363
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
Definition: kaldi-vector.h:149
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
const size_t count
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
int32 NumSubstatesForPdf(int32 j2) const
Definition: am-sgmm2.h:354
int32 NumGroups() const
Definition: am-sgmm2.h:351
t .. not really part of SGMM.
Definition: model-common.h:55
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
std::vector< Matrix< double > > Y_
The stats which are not tied to any state.
BaseFloat AccumulateFromPosteriors(const AmSgmm2 &model, const Sgmm2PerFrameDerivedVars &frame_vars, const Matrix< BaseFloat > &posteriors, int32 pdf_index, Sgmm2PerSpkDerivedVars *spk_vars)
Accumulate statistics, given posteriors.
uint16 SgmmUpdateFlagsType
Bitwise OR of the above flags.
Definition: model-common.h:59
Matrix< BaseFloat > xti
x_{i}(t) = x&#39;(t) - o_i(s): dim = [I][D], eq.(34)
Definition: am-sgmm2.h:145
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
std::vector< SpMatrix< BaseFloat > > SigmaInv_
Globally shared parameters of the subspace GMM.
Definition: am-sgmm2.h:423
double Log(double x)
Definition: kaldi-math.h:100
std::vector< Matrix< double > > y_
The SGMM state specific stats.
void Scale(Real alpha)
Multiply each element with a scalar value.
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
Definition: sp-matrix.h:211
The letters correspond to the variable names.
Definition: model-common.h:48
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
std::vector< Matrix< BaseFloat > > M_prior_
Definition: am-sgmm2.h:449
int32 Pdf2Group(int32 j2) const
Definition: am-sgmm2.cc:196
void RunMultiThreaded(const C &c_in)
Here, class C should inherit from MultiThreadable.
Definition: kaldi-thread.h:151
void AddMatSp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const SpMatrix< Real > &B, const Real beta)
this <– beta*this + alpha*A*B.
Definition: kaldi-matrix.h:708
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
void Update(const AmSgmm2 &model, BaseFloat min_count, Vector< BaseFloat > *v_s, BaseFloat *objf_impr_out, BaseFloat *count_out)
Update speaker vector.
int32 NumPdfs() const
Various model dimensions.
Definition: am-sgmm2.h:350
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
std::string name
Definition: sp-matrix.h:446
UpdatePhoneVectorsClass(const UpdatePhoneVectorsClass &other)
#define KALDI_ERR
Definition: kaldi-error.h:147
void ComputeH(std::vector< SpMatrix< Real > > *H_i) const
Computes quantities H = M_i Sigma_i^{-1} M_i^T.
Definition: am-sgmm2.cc:1107
double TraceSpSp(const SpMatrix< double > &A, const SpMatrix< double > &B)
Definition: sp-matrix.cc:326
std::vector< Matrix< double > > NtransSigmaInv_
N_i^T {i}^{-1}. Needed for y^{(s)}.
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
Definition: sp-matrix.cc:964
const std::vector< Matrix< double > > & log_a_
#define KALDI_WARN
Definition: kaldi-error.h:150
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetZero()
Sets matrix to zero.
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
void Clear()
Clear the statistics.
BaseFloat AccumulateFromPosteriors(const AmSgmm2 &model, const Sgmm2PerFrameDerivedVars &frame_vars, const Matrix< BaseFloat > &posteriors, int32 pdf_index, Sgmm2PerSpkDerivedVars *spk_vars)
Returns count accumulated (may differ from posteriors.Sum() due to weight pruning).
int32 NumGauss() const
Definition: am-sgmm2.h:360
Real Sum() const
Returns sum of the elements.
std::vector< int32 > gselect
Definition: am-sgmm2.h:143
SpMatrix< BaseFloat > row_cov_inv_
Definition: am-sgmm2.h:450
void MulColsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = (*this) * diag(scale).
void Write(std::ostream &out_stream, bool binary) const
std::vector< Matrix< double > > a_
[SSGMM] These a_{jmi} quantities are dimensionally the same as the gamma quantities.
Vector< double > y_s_
Statistics for speaker adaptation (vectors), stored per-speaker.
std::vector< SpMatrix< double > > H_spk_
The following variable does not change per speaker, it just relates to the speaker subspace...
UpdatePhoneVectorsClass(const MleAmSgmm2Updater &updater, const MleAmSgmm2Accs &accs, const std::vector< SpMatrix< double > > &H, const std::vector< Matrix< double > > &log_a, AmSgmm2 *model, double *auxf_impr)
Vector< double > a_s_
[SSGMM], this is a per-speaker variable storing the a_i^{(s)} quantities that we will use in order to...
void GetNtransSigmaInv(std::vector< Matrix< Real > > *out) const
Definition: am-sgmm2.cc:1084
A class representing a vector.
Definition: kaldi-vector.h:406
std::vector< SpMatrix< double > > R_
R_{i}, quadratic term for speaker subspace estimation. Dim is [I][T][T].
double UpdateSubstateWeights(const MleAmSgmm2Accs &accs, AmSgmm2 *model)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void UpdateWithU(const AmSgmm2 &model, Vector< BaseFloat > *v_s, BaseFloat *objf_impr_out, BaseFloat *count_out)
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Definition: sp-matrix.cc:982
Real FrobeniusNorm() const
Frobenius norm, which is the sqrt of sum of square elements.
int32 feature_dim_
Dimensionality of various subspaces.
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
const std::vector< SpMatrix< double > > & H_
std::vector< Matrix< double > > Z_
Stats Z_{i} for speaker-subspace projections N. Dim is [I][D][T].
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void Check(const AmSgmm2 &model, bool show_properties=true) const
Checks the various accumulators for correct sizes given a model.
double UpdateU(const MleAmSgmm2Accs &accs, const Vector< double > &gamma_i, AmSgmm2 *model)
bool HasSpeakerDependentWeights() const
True if doing SSGMM.
Definition: am-sgmm2.h:366
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: kaldi-matrix.cc:38
void SymPosSemiDefEig(VectorBase< Real > *s, MatrixBase< Real > *P, Real tolerance=0.001) const
This is the version of SVD that we implement for symmetric positive definite matrices.
Definition: sp-matrix.cc:57
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void Add(Real c)
Add a constant to each element of a vector.
Class for the accumulators associated with the phonetic-subspace model parameters.
void SetZero()
Set vector to all zeros.
Vector< double > gamma_s_
gamma_{i}^{(s)}. Per-speaker counts for each Gaussian. Dimension is [I]
int32 SpkSpaceDim() const
Definition: am-sgmm2.h:362
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void AddSpMat(const Real alpha, const SpMatrix< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*SpA*B.
Definition: kaldi-matrix.h:692
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Definition: am-sgmm2.h:142
int32 NumSubstatesForGroup(int32 j1) const
Definition: am-sgmm2.h:357
void ComputeMPrior(AmSgmm2 *model)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
Vector< double > gamma_s_
gamma_{i}^{(s)}.
void ResizeAccumulators(const AmSgmm2 &model, SgmmUpdateFlagsType flags, bool have_spk_vecs)
Resizes the accumulators to the correct sizes given the model.
std::vector< Matrix< BaseFloat > > w_jmi_
[SSGMM] w_{jmi}, dimension is [J1][#mix][I]. Computed from w_ and v_.
Definition: am-sgmm2.h:442
MatrixIndexT LimitCondDouble(Real maxCond=1.0e+5, bool invert=false)
Definition: sp-matrix.h:333
double UpdateN(const MleAmSgmm2Accs &accs, const Vector< double > &gamma_i, AmSgmm2 *model)
static void ComputeLogA(const MleAmSgmm2Accs &accs, std::vector< Matrix< double > > *log_a)