am-sgmm2.cc
Go to the documentation of this file.
1 // sgmm2/am-sgmm2.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation; Lukas Burget;
4 // Saarland University (Author: Arnab Ghoshal);
5 // Ondrej Glembek; Yanmin Qian;
6 // Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
7 // Liang Lu; Arnab Ghoshal
8 
9 // See ../../COPYING for clarification regarding multiple authors
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
19 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
20 // MERCHANTABLITY OR NON-INFRINGEMENT.
21 // See the Apache 2 License for the specific language governing permissions and
22 // limitations under the License.
23 
24 #include <functional>
25 
26 #include "sgmm2/am-sgmm2.h"
27 #include "util/kaldi-thread.h"
28 
29 namespace kaldi {
30 using std::vector;
31 
32 // This function needs to be added because std::generate is complaining
33 // about RandGauss(), which takes an optional arguments.
34 static inline float _RandGauss()
35 {
36  return RandGauss();
37 }
38 
40  t++;
41  if (t == 0) {
42  t++; // skip over zero; zero is used to invalidate frames.
43  for (size_t i = 0; i < substate_cache.size(); i++)
44  substate_cache[i].t = 0;
45  for (size_t i = 0; i < pdf_cache.size(); i++)
46  pdf_cache[i].t = 0;
47  }
48 }
49 
50 void AmSgmm2::ComputeGammaI(const Vector<BaseFloat> &state_occupancies,
51  Vector<BaseFloat> *gamma_i) const {
52  KALDI_ASSERT(state_occupancies.Dim() == NumPdfs());
53  Vector<BaseFloat> w_jm(NumGauss());
54  gamma_i->Resize(NumGauss());
55  for (int32 j1 = 0; j1 < NumGroups(); j1++) {
56  int32 M = NumSubstatesForGroup(j1);
57  const std::vector<int32> &pdfs = group2pdf_[j1];
58  Vector<BaseFloat> substate_weight(M); // total weight for each substate.
59  for (size_t i = 0; i < pdfs.size(); i++) {
60  int32 j2 = pdfs[i];
61  substate_weight.AddVec(state_occupancies(j2), c_[j2]);
62  }
63  for (int32 m = 0; m < M; m++) {
64  w_jm.AddMatVec(1.0, w_, kNoTrans, v_[j1].Row(m), 0.0);
65  w_jm.ApplySoftMax();
66  gamma_i->AddVec(substate_weight(m), w_jm);
67  }
68  }
69 }
70 
71 
73  if (pdf2group_.empty()) {
74  KALDI_WARN << "ComputePdfMappings(): no pdf2group_ map, assuming you "
75  "are reading in old model.";
76  KALDI_ASSERT(v_.size() != 0);
77  pdf2group_.resize(v_.size());
78  for (int32 j2 = 0; j2 < static_cast<int32>(pdf2group_.size()); j2++)
79  pdf2group_[j2] = j2;
80  }
81  group2pdf_.clear();
82  for (int32 j2 = 0; j2 < static_cast<int32>(pdf2group_.size()); j2++) {
83  int32 j1 = pdf2group_[j2];
84  if (group2pdf_.size() <= j1) group2pdf_.resize(j1+1);
85  group2pdf_[j1].push_back(j2);
86  }
87 }
88 
89 void AmSgmm2::Read(std::istream &in_stream, bool binary) {
90  { // We want this to work even if the object was previously
91  // populated, so we clear the items that are more likely
92  // to cause problems.
93  pdf2group_.clear();
94  group2pdf_.clear();
95  u_.Resize(0,0);
96  w_jmi_.clear();
97  v_.clear();
98  }
99  // removing anything that was in the object before.
100  int32 num_pdfs = -1, feat_dim, num_gauss;
101  std::string token;
102 
103  ExpectToken(in_stream, binary, "<SGMM>");
104  ExpectToken(in_stream, binary, "<NUMSTATES>");
105  ReadBasicType(in_stream, binary, &num_pdfs);
106  ExpectToken(in_stream, binary, "<DIMENSION>");
107  ReadBasicType(in_stream, binary, &feat_dim);
108  ExpectToken(in_stream, binary, "<NUMGAUSS>");
109  ReadBasicType(in_stream, binary, &num_gauss);
110 
111  KALDI_ASSERT(num_pdfs > 0 && feat_dim > 0);
112 
113  ReadToken(in_stream, binary, &token);
114 
115  while (token != "</SGMM>") {
116  if (token == "<PDF2GROUP>") {
117  ReadIntegerVector(in_stream, binary, &pdf2group_);
118  ComputePdfMappings();
119  } else if (token == "<WEIGHTIDX2GAUSS>") { // TEMP! Will remove.
120  std::vector<int32> garbage;
121  ReadIntegerVector(in_stream, binary, &garbage);
122  } else if (token == "<DIAG_UBM>") {
123  diag_ubm_.Read(in_stream, binary);
124  } else if (token == "<FULL_UBM>") {
125  full_ubm_.Read(in_stream, binary);
126  } else if (token == "<SigmaInv>") {
127  SigmaInv_.resize(num_gauss);
128  for (int32 i = 0; i < num_gauss; i++) {
129  SigmaInv_[i].Read(in_stream, binary);
130  }
131  } else if (token == "<M>") {
132  M_.resize(num_gauss);
133  for (int32 i = 0; i < num_gauss; i++) {
134  M_[i].Read(in_stream, binary);
135  }
136  } else if (token == "<N>") {
137  N_.resize(num_gauss);
138  for (int32 i = 0; i < num_gauss; i++) {
139  N_[i].Read(in_stream, binary);
140  }
141  } else if (token == "<w>") {
142  w_.Read(in_stream, binary);
143  } else if (token == "<u>") {
144  u_.Read(in_stream, binary);
145  } else if (token == "<v>") {
146  int32 num_groups = group2pdf_.size();
147  if (num_groups == 0) {
148  KALDI_WARN << "Reading old model with new code (should still work)";
149  num_groups = num_pdfs;
150  }
151  v_.resize(num_groups);
152  for (int32 j1 = 0; j1 < num_groups; j1++) {
153  v_[j1].Read(in_stream, binary);
154  }
155  } else if (token == "<c>") {
156  c_.resize(num_pdfs);
157  for (int32 j2 = 0; j2 < num_pdfs; j2++) {
158  c_[j2].Read(in_stream, binary);
159  }
160  } else if (token == "<n>") {
161  int32 num_groups = group2pdf_.size();
162  if (num_groups == 0) num_groups = num_pdfs;
163  n_.resize(num_groups);
164  for (int32 j1 = 0; j1 < num_groups; j1++) {
165  n_[j1].Read(in_stream, binary);
166  }
167  // The following are the Gaussian prior parameters for MAP adaptation of M
168  // They may be moved to somewhere else eventually.
169  } else if (token == "<M_Prior>") {
170  ExpectToken(in_stream, binary, "<NUMGaussians>");
171  ReadBasicType(in_stream, binary, &num_gauss);
172  M_prior_.resize(num_gauss);
173  for (int32 i = 0; i < num_gauss; i++) {
174  M_prior_[i].Read(in_stream, binary);
175  }
176  } else if (token == "<Row_Cov_Inv>") {
177  row_cov_inv_.Read(in_stream, binary);
178  } else if (token == "<Col_Cov_Inv>") {
179  col_cov_inv_.Read(in_stream, binary);
180  } else {
181  KALDI_ERR << "Unexpected token '" << token << "' in model file ";
182  }
183  ReadToken(in_stream, binary, &token);
184  }
185 
186  if (pdf2group_.empty())
187  ComputePdfMappings(); // sets up group2pdf_, and pdf2group_ if reading
188  // old model.
189 
190  if (n_.empty())
191  ComputeNormalizers();
192  if (HasSpeakerDependentWeights())
193  ComputeWeights();
194 }
195 
197  KALDI_ASSERT(static_cast<size_t>(j2) < pdf2group_.size());
198  int32 j1 = pdf2group_[j2];
199  return j1;
200 }
201 
202 
203 void AmSgmm2::Write(std::ostream &out_stream,
204  bool binary,
205  SgmmWriteFlagsType write_params) const {
206  int32 num_pdfs = NumPdfs(),
207  feat_dim = FeatureDim(),
208  num_gauss = NumGauss();
209 
210  WriteToken(out_stream, binary, "<SGMM>");
211  if (!binary) out_stream << "\n";
212  WriteToken(out_stream, binary, "<NUMSTATES>");
213  WriteBasicType(out_stream, binary, num_pdfs);
214  WriteToken(out_stream, binary, "<DIMENSION>");
215  WriteBasicType(out_stream, binary, feat_dim);
216  WriteToken(out_stream, binary, "<NUMGAUSS>");
217  WriteBasicType(out_stream, binary, num_gauss);
218  if (!binary) out_stream << "\n";
219 
220  if (write_params & kSgmmBackgroundGmms) {
221  WriteToken(out_stream, binary, "<DIAG_UBM>");
222  diag_ubm_.Write(out_stream, binary);
223  WriteToken(out_stream, binary, "<FULL_UBM>");
224  full_ubm_.Write(out_stream, binary);
225  }
226 
227  if (write_params & kSgmmGlobalParams) {
228  WriteToken(out_stream, binary, "<SigmaInv>");
229  if (!binary) out_stream << "\n";
230  for (int32 i = 0; i < num_gauss; i++) {
231  SigmaInv_[i].Write(out_stream, binary);
232  }
233  WriteToken(out_stream, binary, "<M>");
234  if (!binary) out_stream << "\n";
235  for (int32 i = 0; i < num_gauss; i++) {
236  M_[i].Write(out_stream, binary);
237  }
238  if (N_.size() != 0) {
239  WriteToken(out_stream, binary, "<N>");
240  if (!binary) out_stream << "\n";
241  for (int32 i = 0; i < num_gauss; i++) {
242  N_[i].Write(out_stream, binary);
243  }
244  }
245  WriteToken(out_stream, binary, "<w>");
246  w_.Write(out_stream, binary);
247  WriteToken(out_stream, binary, "<u>");
248  u_.Write(out_stream, binary);
249  }
250 
251  if (write_params & kSgmmStateParams) {
252  WriteToken(out_stream, binary, "<PDF2GROUP>");
253  WriteIntegerVector(out_stream, binary, pdf2group_);
254  WriteToken(out_stream, binary, "<v>");
255  for (int32 j1 = 0; j1 < NumGroups(); j1++) {
256  v_[j1].Write(out_stream, binary);
257  }
258  WriteToken(out_stream, binary, "<c>");
259  for (int32 j2 = 0; j2 < num_pdfs; j2++) {
260  c_[j2].Write(out_stream, binary);
261  }
262  }
263 
264  if (write_params & kSgmmNormalizers) {
265  WriteToken(out_stream, binary, "<n>");
266  if (n_.empty())
267  KALDI_WARN << "Not writing normalizers since they are not present.";
268  else
269  for (int32 j1 = 0; j1 < NumGroups(); j1++)
270  n_[j1].Write(out_stream, binary);
271  }
272  WriteToken(out_stream, binary, "</SGMM>");
273 }
274 
275 
276 void AmSgmm2::Check(bool show_properties) {
277  int32 J1 = NumGroups(),
278  J2 = NumPdfs(),
279  num_gauss = NumGauss(),
280  feat_dim = FeatureDim(),
281  phn_dim = PhoneSpaceDim(),
282  spk_dim = SpkSpaceDim();
283 
284  if (show_properties)
285  KALDI_LOG << "AmSgmm2: #pdfs = " << J2 << ", #pdf-groups = "
286  << J1 << ", #Gaussians = "
287  << num_gauss << ", feature dim = " << feat_dim
288  << ", phone-space dim =" << phn_dim
289  << ", speaker-space dim =" << spk_dim;
290  KALDI_ASSERT(J1 > 0 && num_gauss > 0 && feat_dim > 0 && phn_dim > 0
291  && J2 > 0 && J2 >= J1);
292 
293  std::ostringstream debug_str;
294 
295  // First check the diagonal-covariance UBM.
296  KALDI_ASSERT(diag_ubm_.NumGauss() == num_gauss);
297  KALDI_ASSERT(diag_ubm_.Dim() == feat_dim);
298 
299  // Check the full-covariance UBM.
300  KALDI_ASSERT(full_ubm_.NumGauss() == num_gauss);
301  KALDI_ASSERT(full_ubm_.Dim() == feat_dim);
302 
303  // Check the globally-shared covariance matrices.
304  KALDI_ASSERT(SigmaInv_.size() == static_cast<size_t>(num_gauss));
305  for (int32 i = 0; i < num_gauss; i++) {
306  KALDI_ASSERT(SigmaInv_[i].NumRows() == feat_dim &&
307  SigmaInv_[i](0, 0) > 0.0); // or it wouldn't be +ve definite.
308  }
309 
310  if (spk_dim != 0) {
311  KALDI_ASSERT(N_.size() == static_cast<size_t>(num_gauss));
312  for (int32 i = 0; i < num_gauss; i++)
313  KALDI_ASSERT(N_[i].NumRows() == feat_dim && N_[i].NumCols() == spk_dim);
314  if (u_.NumRows() == 0) {
315  debug_str << "Speaker-weight projections: no.";
316  } else {
317  KALDI_ASSERT(u_.NumRows() == num_gauss && u_.NumCols() == spk_dim);
318  debug_str << "Speaker-weight projections: yes.";
319  }
320  } else {
321  KALDI_ASSERT(N_.size() == 0 && u_.NumRows() == 0);
322  }
323 
324  KALDI_ASSERT(M_.size() == static_cast<size_t>(num_gauss));
325  for (int32 i = 0; i < num_gauss; i++) {
326  KALDI_ASSERT(M_[i].NumRows() == feat_dim && M_[i].NumCols() == phn_dim);
327  }
328 
329  KALDI_ASSERT(w_.NumRows() == num_gauss && w_.NumCols() == phn_dim);
330 
331  { // check v, c.
332  KALDI_ASSERT(v_.size() == static_cast<size_t>(J1) &&
333  c_.size() == static_cast<size_t>(J2));
334  int32 nSubstatesTot = 0;
335  for (int32 j1 = 0; j1 < J1; j1++) {
336  int32 M_j = NumSubstatesForGroup(j1);
337  nSubstatesTot += M_j;
338  KALDI_ASSERT(M_j > 0 && v_[j1].NumRows() == M_j &&
339  v_[j1].NumCols() == phn_dim);
340  }
341  debug_str << "Substates: "<< (nSubstatesTot) << ". ";
342  int32 nSubstateWeights = 0;
343  for (int32 j2 = 0; j2 < J2; j2++) {
344  int32 j1 = Pdf2Group(j2);
345  int32 M = NumSubstatesForPdf(j2);
346  KALDI_ASSERT(M == NumSubstatesForGroup(j1));
347  nSubstateWeights += M;
348  }
349  KALDI_ASSERT(nSubstateWeights >= nSubstatesTot);
350  debug_str << "SubstateWeights: "<< (nSubstateWeights) << ". ";
351  }
352 
353  // check normalizers.
354  if (n_.size() == 0) {
355  debug_str << "Normalizers: no. ";
356  } else {
357  debug_str << "Normalizers: yes. ";
358  KALDI_ASSERT(n_.size() == static_cast<size_t>(J1));
359  for (int32 j1 = 0; j1 < J1; j1++) {
360  KALDI_ASSERT(n_[j1].NumRows() == num_gauss &&
361  n_[j1].NumCols() == NumSubstatesForGroup(j1));
362  }
363  }
364 
365  // check w_jmi_.
366  if (w_jmi_.size() == 0) {
367  debug_str << "Computed weights: no. ";
368  } else {
369  debug_str << "Computed weights: yes. ";
370  KALDI_ASSERT(w_jmi_.size() == static_cast<size_t>(J1));
371  for (int32 j1 = 0; j1 < J1; j1++) {
372  KALDI_ASSERT(w_jmi_[j1].NumRows() == NumSubstatesForGroup(j1) &&
373  w_jmi_[j1].NumCols() == num_gauss);
374  }
375  }
376 
377  if (show_properties)
378  KALDI_LOG << "Subspace GMM model properties: " << debug_str.str();
379 }
380 
382  const std::vector<int32> &pdf2group,
383  int32 phn_subspace_dim,
384  int32 spk_subspace_dim,
385  bool speaker_dependent_weights,
386  BaseFloat self_weight) {
387  pdf2group_ = pdf2group;
388  ComputePdfMappings();
389  full_ubm_.CopyFromFullGmm(full_gmm);
390  diag_ubm_.CopyFromFullGmm(full_gmm);
391  if (phn_subspace_dim < 1 || phn_subspace_dim > full_gmm.Dim() + 1) {
392  KALDI_WARN << "Initial phone-subspace dimension must be >= 1, value is "
393  << phn_subspace_dim << "; setting to " << full_gmm.Dim() + 1;
394  phn_subspace_dim = full_gmm.Dim() + 1;
395  }
396  KALDI_ASSERT(spk_subspace_dim >= 0);
397 
398  w_.Resize(0, 0);
399  N_.clear();
400  c_.clear();
401  v_.clear();
402  SigmaInv_.clear();
403 
404  KALDI_LOG << "Initializing model";
405  Matrix<BaseFloat> norm_xform;
406  ComputeFeatureNormalizingTransform(full_gmm, &norm_xform);
407  InitializeMw(phn_subspace_dim, norm_xform);
408  if (spk_subspace_dim > 0)
409  InitializeNu(spk_subspace_dim, norm_xform, speaker_dependent_weights);
410  InitializeVecsAndSubstateWeights(self_weight);
411  KALDI_LOG << "Initializing variances";
412  InitializeCovars();
413 }
414 
415 void AmSgmm2::CopyFromSgmm2(const AmSgmm2 &other,
416  bool copy_normalizers,
417  bool copy_weights) {
418  KALDI_LOG << "Copying AmSgmm2";
419  pdf2group_ = other.pdf2group_;
420  group2pdf_ = other.group2pdf_;
421 
422  // Copy background GMMs
423  diag_ubm_.CopyFromDiagGmm(other.diag_ubm_);
424  full_ubm_.CopyFromFullGmm(other.full_ubm_);
425 
426  // Copy global params
427  SigmaInv_ = other.SigmaInv_;
428  M_ = other.M_;
429  w_ = other.w_;
430  N_ = other.N_;
431  u_ = other.u_;
432 
433  // Copy state-specific params, but only copy normalizers if requested.
434  v_ = other.v_;
435  c_ = other.c_;
436  if (copy_normalizers) n_ = other.n_;
437  if (copy_weights) w_jmi_ = other.w_jmi_;
438 
439  KALDI_LOG << "Done.";
440 }
441 
443  const std::vector<int32> &gselect,
444  const Sgmm2PerSpkDerivedVars &spk_vars,
445  Sgmm2PerFrameDerivedVars *per_frame_vars) const {
446  KALDI_ASSERT(!n_.empty() && "ComputeNormalizers() must be called.");
447 
448  per_frame_vars->Resize(gselect.size(), FeatureDim(), PhoneSpaceDim());
449 
450  per_frame_vars->gselect = gselect;
451  per_frame_vars->xt.CopyFromVec(data);
452 
453  for (int32 ki = 0, last = gselect.size(); ki < last; ki++) {
454  int32 i = gselect[ki];
455  per_frame_vars->xti.Row(ki).CopyFromVec(per_frame_vars->xt);
456  if (spk_vars.v_s.Dim() != 0)
457  per_frame_vars->xti.Row(ki).AddVec(-1.0, spk_vars.o_s.Row(i));
458  }
459  Vector<BaseFloat> SigmaInv_xt(FeatureDim());
460 
461  bool speaker_dep_weights =
462  (spk_vars.v_s.Dim() != 0 && HasSpeakerDependentWeights());
463  for (int32 ki = 0, last = gselect.size(); ki < last; ki++) {
464  int32 i = gselect[ki];
465  BaseFloat ssgmm_term = (speaker_dep_weights ? spk_vars.log_b_is(i) : 0.0);
466  SigmaInv_xt.AddSpVec(1.0, SigmaInv_[i], per_frame_vars->xti.Row(ki), 0.0);
467  // Eq (35): z_{i}(t) = M_{i}^{T} \Sigma_{i}^{-1} x_{i}(t)
468  per_frame_vars->zti.Row(ki).AddMatVec(1.0, M_[i], kTrans, SigmaInv_xt, 0.0);
469  // Eq.(36): n_{i}(t) = -0.5 x_{i}^{T} \Sigma_{i}^{-1} x_{i}(t)
470  per_frame_vars->nti(ki) = -0.5 * VecVec(per_frame_vars->xti.Row(ki),
471  SigmaInv_xt) + ssgmm_term;
472  }
473 }
474 
475 // inline
477  int32 j1,
478  Sgmm2PerSpkDerivedVars *spk_vars,
479  Matrix<BaseFloat> *loglikes) const {
480  const vector<int32> &gselect = per_frame_vars.gselect;
481  int32 num_gselect = gselect.size(), num_substates = v_[j1].NumRows();
482 
483  // Eq.(37): log p(x(t), m, i|j) [indexed by j, ki]
484  // Although the extra memory allocation of storing this as a
485  // matrix might seem unnecessary, we save time in the LogSumExp()
486  // via more effective pruning.
487  loglikes->Resize(num_gselect, num_substates);
488  bool speaker_dep_weights =
489  (spk_vars->v_s.Dim() != 0 && HasSpeakerDependentWeights());
490  if (speaker_dep_weights) {
491  KALDI_ASSERT(static_cast<int32>(spk_vars->log_d_jms.size()) == NumGroups());
492  KALDI_ASSERT(static_cast<int32>(w_jmi_.size()) == NumGroups() ||
493  "You need to call ComputeWeights().");
494  }
495  for (int32 ki = 0; ki < num_gselect; ki++) {
496  SubVector<BaseFloat> logp_xi(*loglikes, ki);
497  int32 i = gselect[ki];
498  // for all substates, compute z_{i}^T v_{jm}
499  logp_xi.AddMatVec(1.0, v_[j1], kNoTrans, per_frame_vars.zti.Row(ki), 0.0);
500  logp_xi.AddVec(1.0, n_[j1].Row(i)); // for all substates, add n_{jim}
501  logp_xi.Add(per_frame_vars.nti(ki)); // for all substates, add n_{i}(t)
502  }
503  if (speaker_dep_weights) { // [SSGMM]
504  Vector<BaseFloat> &log_d = spk_vars->log_d_jms[j1];
505  if (log_d.Dim() == 0) { // have not yet cached this quantity.
506  log_d.Resize(num_substates);
507  log_d.AddMatVec(1.0, w_jmi_[j1], kNoTrans, spk_vars->b_is, 0.0);
508  log_d.ApplyLog();
509  }
510  loglikes->AddVecToRows(-1.0, log_d); // [SSGMM] this is the term
511  // - log d_{jm}^{(s)} in the likelihood function [eq. 25 in
512  // the techreport]
513  }
514 }
515 
516 
518  int32 j2,
519  Sgmm2LikelihoodCache *cache,
520  Sgmm2PerSpkDerivedVars *spk_vars,
521  BaseFloat log_prune) const {
522  int32 t = cache->t; // not a real time; used to uniquely identify frames.
523  // Forgo asserts here, as this is frequently called.
524  // We'll probably get a segfault if an error is made.
526  cache->pdf_cache[j2];
527 #ifdef KALDI_PARANOID
528  bool random_test = (Rand() % 1000 == 1); // to check that the user is
529  // calling Next() on the cache, as they should.
530 #else
531  bool random_test = false; // compiler will ignore test branches.
532 #endif
533  if (pdf_cache.t == t) {
534  if (!random_test) return pdf_cache.log_like;
535  } else {
536  random_test = false;
537  }
538  // if random_test == true at this point, it was already cached, and we will
539  // verify that we return the same value as the cached one.
540  pdf_cache.t = t;
541 
542  int32 j1 = pdf2group_[j2];
544  cache->substate_cache[j1];
545  if (substate_cache.t != t) { // Need to compute sub-state likelihoods.
546  substate_cache.t = t;
547  Matrix<BaseFloat> loglikes; // indexed [gselect-index][substate-index]
548  ComponentLogLikes(per_frame_vars, j1, spk_vars, &loglikes);
549  BaseFloat max = loglikes.Max(); // use this to keep things in good numerical range.
550  loglikes.Add(-max);
551  loglikes.ApplyExp();
552  substate_cache.remaining_log_like = max;
553  int32 num_substates = loglikes.NumCols();
554  substate_cache.likes.Resize(num_substates); // zeroes it.
555  substate_cache.likes.AddRowSumMat(1.0, loglikes); // add likelihoods [not in log!] for
556  // each column [i.e. summing over the rows], so we get the sum for
557  // each substate index. You have to multiply by exp(remaining_log_like)
558  // to get a real likelihood.
559  }
560 
561  BaseFloat log_like = substate_cache.remaining_log_like
562  + Log(VecVec(substate_cache.likes, c_[j2]));
563 
564  if (random_test)
565  KALDI_ASSERT(ApproxEqual(pdf_cache.log_like, log_like));
566 
567  pdf_cache.log_like = log_like;
568  KALDI_ASSERT(log_like == log_like && log_like - log_like == 0); // check
569  // that it's not NaN or infinity.
570  return log_like;
571 }
572 
573 BaseFloat
575  int32 j2,
576  Sgmm2PerSpkDerivedVars *spk_vars,
577  Matrix<BaseFloat> *post) const {
578  KALDI_ASSERT(j2 < NumPdfs() && post != NULL);
579  int32 j1 = pdf2group_[j2];
580  ComponentLogLikes(per_frame_vars, j1, spk_vars, post); // now
581  // post is a matrix of log-likelihoods indexed by [gaussian-selection index]
582  // [sub-state index]. It doesn't include the sub-state weights,
583  // though.
584  BaseFloat loglike = post->Max();
585  post->Add(-loglike); // get it to nicer numeric range.
586  post->ApplyExp(); // so we're dealing with likelihoods (with an arbitrary offset
587  // "loglike" removed to make it in a nice numeric range)
588  post->MulColsVec(c_[j2]); // include the sub-state weights.
589 
590  BaseFloat tot_like = post->Sum();
591  KALDI_ASSERT(tot_like != 0.0); // note: not valid to have zero weights.
592  loglike += Log(tot_like);
593  post->Scale(1.0 / tot_like); // so "post" now sums to one, and "loglike"
594  // contains the correct log-likelihood of the data given the pdf.
595 
596  return loglike;
597 }
598 
600  const Sgmm2SplitSubstatesConfig &opts,
601  const SpMatrix<BaseFloat> &sqrt_H_sm,
602  int32 j1,
603  int32 tgt_M) {
604  const std::vector<int32> &pdfs = group2pdf_[j1];
605  int32 phn_dim = PhoneSpaceDim(), cur_M = NumSubstatesForGroup(j1),
606  num_pdfs_for_group = pdfs.size();
607  Vector<BaseFloat> rand_vec(phn_dim), v_shift(phn_dim);
608 
609  KALDI_ASSERT(tgt_M >= cur_M);
610  if (cur_M == tgt_M) return;
611  // Resize v[j1] to fit new substates
612  {
613  Matrix<BaseFloat> tmp_v_j(v_[j1]);
614  v_[j1].Resize(tgt_M, phn_dim);
615  v_[j1].Range(0, cur_M, 0, phn_dim).CopyFromMat(tmp_v_j);
616  }
617 
618  // we'll use a temporary matrix for the c quantities.
619  Matrix<BaseFloat> c_j(num_pdfs_for_group, tgt_M);
620  for (int32 i = 0; i < num_pdfs_for_group; i++) {
621  int32 j2 = pdfs[i];
622  c_j.Row(i).Range(0, cur_M).CopyFromVec(c_[j2]);
623  }
624 
625  // Keep splitting substates until obtaining the desired number
626  for (; cur_M < tgt_M; cur_M++) {
627  int32 split_m; // substate to split.
628  {
629  Vector<BaseFloat> substate_count(tgt_M);
630  substate_count.AddRowSumMat(1.0, c_j);
631  BaseFloat *data = substate_count.Data();
632  split_m = std::max_element(data, data+cur_M) - data;
633  }
634  for (int32 i = 0; i < num_pdfs_for_group; i++) { // divide count of split
635  // substate. [extended for SCTM]
636  // c_{jkm} := c_{jmk}' := c_{jkm} / 2
637  c_j(i, split_m) = c_j(i, cur_M) = c_j(i, split_m) / 2;
638  }
639  // v_{jkm} := +/- split_perturb * H_k^{(sm)}^{-0.5} * rand_vec
640  std::generate(rand_vec.Data(), rand_vec.Data() + rand_vec.Dim(),
641  _RandGauss);
642  v_shift.AddSpVec(opts.perturb_factor, sqrt_H_sm, rand_vec, 0.0);
643  v_[j1].Row(cur_M).CopyFromVec(v_[j1].Row(split_m));
644  v_[j1].Row(cur_M).AddVec(1.0, v_shift);
645  v_[j1].Row(split_m).AddVec(-1.0, v_shift);
646  }
647  // copy the temporary matrix for the c_ (sub-state weight)
648  // quantities back to the place it belongs.
649  for (int32 i = 0; i < num_pdfs_for_group; i++) {
650  int32 j2 = pdfs[i];
651  c_[j2].Resize(tgt_M);
652  c_[j2].CopyFromVec(c_j.Row(i));
653  }
654 }
655 
656 
657 void AmSgmm2::SplitSubstates(const Vector<BaseFloat> &pdf_occupancies,
658  const Sgmm2SplitSubstatesConfig &opts) {
659  KALDI_ASSERT(pdf_occupancies.Dim() == NumPdfs());
660  int32 J1 = NumGroups(), J2 = NumPdfs();
661  Vector<BaseFloat> group_occupancies(J1);
662  for (int32 j2 = 0; j2 < J2; j2++)
663  group_occupancies(Pdf2Group(j2)) += pdf_occupancies(j2);
664 
665  vector<int32> tgt_num_substates;
666 
667  GetSplitTargets(group_occupancies, opts.split_substates,
668  opts.power, opts.min_count, &tgt_num_substates);
669 
670  int32 tot_num_substates_old = 0, tot_num_substates_new = 0;
671  vector< SpMatrix<BaseFloat> > H_i;
672  SpMatrix<BaseFloat> sqrt_H_sm;
673 
674  ComputeH(&H_i); // set up that array.
675  ComputeHsmFromModel(H_i, pdf_occupancies, &sqrt_H_sm, opts.max_cond);
676  H_i.clear();
677  sqrt_H_sm.ApplyPow(-0.5);
678 
679  for (int32 j1 = 0; j1 < J1; j1++) {
680  int32 cur_M = NumSubstatesForGroup(j1),
681  tgt_M = tgt_num_substates[j1];
682  tot_num_substates_old += cur_M;
683  tot_num_substates_new += std::max(cur_M, tgt_M);
684  if (cur_M < tgt_M)
685  SplitSubstatesInGroup(pdf_occupancies, opts, sqrt_H_sm, j1, tgt_M);
686  }
687  if (tot_num_substates_old == tot_num_substates_new) {
688  KALDI_LOG << "Not splitting substates; current #substates is "
689  << tot_num_substates_old << " and target is "
690  << opts.split_substates;
691  } else {
692  KALDI_LOG << "Getting rid of normalizers as they will no longer be valid";
693  n_.clear();
694  KALDI_LOG << "Split " << tot_num_substates_old << " substates to "
695  << tot_num_substates_new;
696  }
697 }
698 
700  const Matrix<BaseFloat> &norm_xform) {
701  KALDI_ASSERT(!M_.empty());
702  int32 initial_dim = PhoneSpaceDim(),
703  feat_dim = FeatureDim();
704  KALDI_ASSERT(norm_xform.NumRows() == feat_dim);
705 
706  if (target_dim < initial_dim)
707  KALDI_ERR << "You asked to increase phn dim to a value lower than the "
708  << " current dimension, " << target_dim << " < " << initial_dim;
709 
710  if (target_dim > initial_dim + feat_dim) {
711  KALDI_WARN << "Cannot increase phone subspace dimensionality from "
712  << initial_dim << " to " << target_dim << ", increasing to "
713  << initial_dim + feat_dim;
714  target_dim = initial_dim + feat_dim;
715  }
716 
717  if (initial_dim < target_dim) {
718  Matrix<BaseFloat> tmp_M(feat_dim, initial_dim);
719  for (int32 i = 0; i < NumGauss(); i++) {
720  tmp_M.CopyFromMat(M_[i]);
721  M_[i].Resize(feat_dim, target_dim);
722  M_[i].Range(0, feat_dim, 0, tmp_M.NumCols()).CopyFromMat(tmp_M);
723  M_[i].Range(0, feat_dim, tmp_M.NumCols(),
724  target_dim - tmp_M.NumCols()).CopyFromMat(norm_xform.Range(0,
725  feat_dim, 0, target_dim-tmp_M.NumCols()));
726  }
727  Matrix<BaseFloat> tmp_w = w_;
728  w_.Resize(tmp_w.NumRows(), target_dim);
729  w_.Range(0, tmp_w.NumRows(), 0, tmp_w.NumCols()).CopyFromMat(tmp_w);
730 
731  for (int32 j1 = 0; j1 < NumGroups(); j1++) {
732  // Resize phonetic-subspce vectors.
733  Matrix<BaseFloat> tmp_v_j = v_[j1];
734  v_[j1].Resize(tmp_v_j.NumRows(), target_dim);
735  v_[j1].Range(0, tmp_v_j.NumRows(), 0, tmp_v_j.NumCols()).CopyFromMat(
736  tmp_v_j);
737  }
738  KALDI_LOG << "Phone subspace dimensionality increased from " <<
739  initial_dim << " to " << target_dim;
740  } else {
741  KALDI_LOG << "Phone subspace dimensionality unchanged, since target " <<
742  "dimension (" << target_dim << ") <= initial dimansion (" <<
743  initial_dim << ")";
744  }
745 }
746 
748  const Matrix<BaseFloat> &norm_xform,
749  bool speaker_dependent_weights) {
750  int32 initial_dim = SpkSpaceDim(),
751  feat_dim = FeatureDim();
752  KALDI_ASSERT(norm_xform.NumRows() == feat_dim);
753 
754  if (N_.size() == 0)
755  N_.resize(NumGauss());
756 
757  if (target_dim < initial_dim)
758  KALDI_ERR << "You asked to increase spk dim to a value lower than the "
759  << " current dimension, " << target_dim << " < " << initial_dim;
760 
761  if (target_dim > initial_dim + feat_dim) {
762  KALDI_WARN << "Cannot increase speaker subspace dimensionality from "
763  << initial_dim << " to " << target_dim << ", increasing to "
764  << initial_dim + feat_dim;
765  target_dim = initial_dim + feat_dim;
766  }
767 
768  if (initial_dim < target_dim) {
769  int32 dim_change = target_dim - initial_dim;
770  Matrix<BaseFloat> tmp_N((initial_dim != 0) ? feat_dim : 0,
771  initial_dim);
772  for (int32 i = 0; i < NumGauss(); i++) {
773  if (initial_dim != 0) tmp_N.CopyFromMat(N_[i]);
774  N_[i].Resize(feat_dim, target_dim);
775  if (initial_dim != 0) {
776  N_[i].Range(0, feat_dim, 0, tmp_N.NumCols()).CopyFromMat(tmp_N);
777  }
778  N_[i].Range(0, feat_dim, tmp_N.NumCols(), dim_change).CopyFromMat(
779  norm_xform.Range(0, feat_dim, 0, dim_change));
780  }
781  // if we already have speaker-dependent weights or we are increasing
782  // spk-dim from zero and are asked to add them...
783  if (u_.NumRows() != 0 || (initial_dim == 0 && speaker_dependent_weights))
784  u_.Resize(NumGauss(), target_dim, kCopyData); // extend dim of u_i's
785  KALDI_LOG << "Speaker subspace dimensionality increased from " <<
786  initial_dim << " to " << target_dim;
787  if (initial_dim == 0 && speaker_dependent_weights)
788  KALDI_LOG << "Added parameters u for speaker-dependent weights.";
789  } else {
790  KALDI_LOG << "Speaker subspace dimensionality unchanged, since target " <<
791  "dimension (" << target_dim << ") <= initial dimansion (" <<
792  initial_dim << ")";
793  }
794 }
795 
797  int32 J1 = NumGroups();
798  w_jmi_.resize(J1);
799  int32 i = NumGauss();
800  for (int32 j1 = 0; j1 < J1; j1++) {
801  int32 M = NumSubstatesForGroup(j1);
802  w_jmi_[j1].Resize(M, i);
803  w_jmi_[j1].AddMatMat(1.0, v_[j1], kNoTrans, w_, kTrans, 0.0);
804  // now w_jmi_ contains un-normalized log weights.
805  for (int32 m = 0; m < M; m++)
806  w_jmi_[j1].Row(m).ApplySoftMax(); // get the actual weights.
807  }
808 }
809 
811  if (n_.empty()) ComputeNormalizers();
812  if (diag_ubm_.NumGauss() != full_ubm_.NumGauss()
813  || diag_ubm_.Dim() != full_ubm_.Dim()) {
814  diag_ubm_.CopyFromFullGmm(full_ubm_);
815  }
816  if (w_jmi_.empty() && HasSpeakerDependentWeights())
817  ComputeWeights();
818 }
819 
820 class ComputeNormalizersClass: public MultiThreadable { // For multi-threaded.
821  public:
823  int32 *entropy_count_ptr,
824  double *entropy_sum_ptr):
825  am_sgmm_(am_sgmm), entropy_count_ptr_(entropy_count_ptr),
826  entropy_sum_ptr_(entropy_sum_ptr), entropy_count_(0),
827  entropy_sum_(0.0) { }
828 
830  MultiThreadable(other),
831  am_sgmm_(other.am_sgmm_), entropy_count_ptr_(other.entropy_count_ptr_),
832  entropy_sum_ptr_(other.entropy_sum_ptr_), entropy_count_(0),
833  entropy_sum_(0.0) { }
834 
836  *entropy_count_ptr_ += entropy_count_;
837  *entropy_sum_ptr_ += entropy_sum_;
838  }
839 
840  inline void operator() () {
841  // Note: give them local copy of the sums we're computing,
842  // which will be propagated to original pointer in the destructor.
843  am_sgmm_->ComputeNormalizersInternal(num_threads_, thread_id_,
844  &entropy_count_,
845  &entropy_sum_);
846  }
847  private:
848  ComputeNormalizersClass() { } // Disallow empty constructor.
853  double entropy_sum_;
854 
855 };
856 
858  KALDI_LOG << "Computing normalizers";
859  n_.resize(NumPdfs());
860  int32 entropy_count = 0;
861  double entropy_sum = 0.0;
862  ComputeNormalizersClass c(this, &entropy_count, &entropy_sum);
863  RunMultiThreaded(c);
864 
865  KALDI_LOG << "Entropy of weights in substates is "
866  << (entropy_sum / entropy_count) << " over " << entropy_count
867  << " substates, equivalent to perplexity of "
868  << (Exp(entropy_sum /entropy_count));
869  KALDI_LOG << "Done computing normalizers";
870 }
871 
872 
874  int32 *entropy_count,
875  double *entropy_sum) {
876 
877  BaseFloat DLog2pi = FeatureDim() * Log(2 * M_PI);
878  Vector<BaseFloat> log_det_Sigma(NumGauss());
879 
880  for (int32 i = 0; i < NumGauss(); i++) {
881  try {
882  log_det_Sigma(i) = - SigmaInv_[i].LogPosDefDet();
883  } catch(...) {
884  if (thread == 0) // just for one thread, print errors [else, duplicates]
885  KALDI_WARN << "Covariance is not positive definite, setting to unit";
886  SigmaInv_[i].SetUnit();
887  log_det_Sigma(i) = 0.0;
888  }
889  }
890 
891  int32 J1 = NumGroups();
892 
893  int block_size = (NumPdfs() + num_threads-1) / num_threads;
894  int j_start = thread * block_size, j_end = std::min(J1, j_start + block_size);
895 
896  int32 I = NumGauss();
897  for (int32 j1 = j_start; j1 < j_end; j1++) {
898  int32 M = NumSubstatesForGroup(j1);
899  Matrix<BaseFloat> log_w_jm(M, I);
900  n_[j1].Resize(I, M);
901  Matrix<BaseFloat> mu_jmi(M, FeatureDim());
902  Matrix<BaseFloat> SigmaInv_mu(M, FeatureDim());
903 
904  // (in logs): w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7)
905  log_w_jm.AddMatMat(1.0, v_[j1], kNoTrans, w_, kTrans, 0.0);
906  for (int32 m = 0; m < M; m++) {
907  log_w_jm.Row(m).Add(-1.0 * log_w_jm.Row(m).LogSumExp());
908  { // DIAGNOSTIC CODE
909  (*entropy_count)++;
910  for (int32 i = 0; i < NumGauss(); i++) {
911  (*entropy_sum) -= log_w_jm(m, i) * Exp(log_w_jm(m, i));
912  }
913  }
914  }
915 
916  for (int32 i = 0; i < I; i++) {
917  // mu_jmi = M_{i} * v_{jm}
918  mu_jmi.AddMatMat(1.0, v_[j1], kNoTrans, M_[i], kTrans, 0.0);
919  SigmaInv_mu.AddMatSp(1.0, mu_jmi, kNoTrans, SigmaInv_[i], 0.0);
920 
921  for (int32 m = 0; m < M; m++) {
922  // mu_{jmi} * \Sigma_{i}^{-1} * mu_{jmi}
923  BaseFloat mu_SigmaInv_mu = VecVec(mu_jmi.Row(m), SigmaInv_mu.Row(m));
924  // Previously had:
925  // BaseFloat logc = log(c_[j](m));
926  // but because of STCM aspect, we can't include the sub-state mixture weights
927  // at this point [included later on.]
928 
929  // eq.(31)
930  n_[j1](i, m) = log_w_jm(m, i) - 0.5 * (log_det_Sigma(i) + DLog2pi
931  + mu_SigmaInv_mu);
932  { // Mainly diagnostic code. Not necessary.
933  BaseFloat tmp = n_[j1](i, m);
934  if (!KALDI_ISFINITE(tmp)) { // NaN or inf
935  KALDI_LOG << "Warning: normalizer for j1 = " << j1 << ", m = " << m
936  << ", i = " << i << " is infinite or NaN " << tmp << "= "
937  << log_w_jm(m, i) << "+"
938  << (-0.5 * log_det_Sigma(i)) << "+" << (-0.5 * DLog2pi)
939  << "+" << (mu_SigmaInv_mu) << ", setting to finite.";
940  n_[j1](i, m) = -1.0e+40; // future work(arnab): get rid of magic number
941  }
942  }
943  }
944  }
945  }
946 }
947 
949  Sgmm2PerSpkDerivedVars *spk_vars) const {
950  // This relates to SSGMMs (speaker-dependent weights).
951  if (spk_vars->log_d_jms.empty()) return -1; // this would be
952  // because we don't have speaker-dependent weights ("u" not set up).
953 
954  KALDI_ASSERT(!w_jmi_.empty() && "You need to call ComputeWeights() on SGMM.");
955  Vector<BaseFloat> &log_d = spk_vars->log_d_jms[j1];
956  if (log_d.Dim() == 0) {
957  log_d.Resize(NumSubstatesForGroup(j1));
958  log_d.AddMatVec(1.0, w_jmi_[j1], kNoTrans, spk_vars->b_is, 0.0);
959  log_d.ApplyLog();
960  }
961  return Exp(log_d(m));
962 }
963 
964 
966  Matrix<BaseFloat> *xform,
967  Matrix<BaseFloat> *inv_xform,
968  Vector<BaseFloat> *diag_mean_scatter) const {
969  int32 num_pdfs = NumPdfs(),
970  num_gauss = NumGauss(),
971  dim = FeatureDim();
972  KALDI_ASSERT(state_occs.Dim() == num_pdfs);
973 
974  BaseFloat total_occ = state_occs.Sum();
975 
976  // Degenerate case: unlikely to ever happen.
977  if (total_occ == 0) {
978  KALDI_WARN << "Zero probability (computing transform). Using unit "
979  << "pre-transform";
980  xform->Resize(dim, dim + 1, kUndefined);
981  xform->SetUnit();
982  inv_xform->Resize(dim, dim + 1, kUndefined);
983  inv_xform->SetUnit();
984  diag_mean_scatter->Resize(dim, kSetZero);
985  return;
986  }
987 
988  // Convert state occupancies to posteriors; Eq. (B.1)
989  Vector<BaseFloat> state_posteriors(state_occs);
990  state_posteriors.Scale(1/total_occ);
991 
992  Vector<BaseFloat> mu_jmi(dim), global_mean(dim);
993  SpMatrix<BaseFloat> within_class_covar(dim), between_class_covar(dim);
994  Vector<BaseFloat> gauss_weight(num_gauss); // weights for within-class vars.
995  Vector<BaseFloat> w_jm(num_gauss);
996  for (int32 j1 = 0; j1 < NumGroups(); j1++) {
997  const std::vector<int32> &pdfs = group2pdf_[j1];
998  int32 M = NumSubstatesForGroup(j1);
999  Vector<BaseFloat> substate_weight(M); // total weight for each substate.
1000  for (size_t i = 0; i < pdfs.size(); i++) {
1001  int32 j2 = pdfs[i];
1002  substate_weight.AddVec(state_posteriors(j2), c_[j2]);
1003  }
1004  for (int32 m = 0; m < M; m++) {
1005  BaseFloat this_substate_weight = substate_weight(m);
1006  // Eq. (7): w_jm = softmax([w_{1}^T ... w_{D}^T] * v_{jm})
1007  w_jm.AddMatVec(1.0, w_, kNoTrans, v_[j1].Row(m), 0.0);
1008  w_jm.ApplySoftMax();
1009 
1010  for (int32 i = 0; i < num_gauss; i++) {
1011  BaseFloat weight = this_substate_weight * w_jm(i);
1012  mu_jmi.AddMatVec(1.0, M_[i], kNoTrans, v_[j1].Row(m), 0.0); // Eq. (6)
1013  // Eq. (B.3): \mu_avg = \sum_{jmi} p(j) c_{jm} w_{jmi} \mu_{jmi}
1014  global_mean.AddVec(weight, mu_jmi);
1015  // \Sigma_B = \sum_{jmi} p(j) c_{jm} w_{jmi} \mu_{jmi} \mu_{jmi}^T
1016  between_class_covar.AddVec2(weight, mu_jmi); // Eq. (B.4)
1017  gauss_weight(i) += weight;
1018  }
1019  }
1020  }
1021  between_class_covar.AddVec2(-1.0, global_mean); // Eq. (B.4)
1022 
1023  for (int32 i = 0; i < num_gauss; i++) {
1024  SpMatrix<BaseFloat> Sigma(SigmaInv_[i]);
1025  Sigma.InvertDouble();
1026  // Eq. (B.2): \Sigma_W = \sum_{jmi} p(j) c_{jm} w_{jmi} \Sigma_i
1027  within_class_covar.AddSp(gauss_weight(i), Sigma);
1028  }
1029 
1030  TpMatrix<BaseFloat> tmpL(dim);
1031  Matrix<BaseFloat> tmpLInvFull(dim, dim);
1032  tmpL.Cholesky(within_class_covar); // \Sigma_W = L L^T
1033  tmpL.InvertDouble(); // L^{-1}
1034  tmpLInvFull.CopyFromTp(tmpL); // get as full matrix.
1035 
1036  // B := L^{-1} * \Sigma_B * L^{-T}
1037  SpMatrix<BaseFloat> tmpB(dim);
1038  tmpB.AddMat2Sp(1.0, tmpLInvFull, kNoTrans, between_class_covar, 0.0);
1039 
1040  Matrix<BaseFloat> U(dim, dim);
1041  diag_mean_scatter->Resize(dim);
1042  xform->Resize(dim, dim + 1);
1043  inv_xform->Resize(dim, dim + 1);
1044 
1045  tmpB.Eig(diag_mean_scatter, &U); // Eq. (B.5): B = U D V^T
1046 
1047  int32 n;
1048  diag_mean_scatter->ApplyFloor(1.0e-04, &n);
1049  if (n != 0)
1050  KALDI_WARN << "Floored " << n << " elements of the mean-scatter matrix.";
1051 
1052  // Eq. (B.6): A_{pre} = U^T * L^{-1}
1053  SubMatrix<BaseFloat> Apre(*xform, 0, dim, 0, dim);
1054  Apre.AddMatMat(1.0, U, kTrans, tmpLInvFull, kNoTrans, 0.0);
1055 
1056 #ifdef KALDI_PARANOID
1057  {
1058  SpMatrix<BaseFloat> tmp(dim);
1059  tmp.AddMat2Sp(1.0, Apre, kNoTrans, within_class_covar, 0.0);
1060  KALDI_ASSERT(tmp.IsUnit(0.01));
1061  }
1062  {
1063  SpMatrix<BaseFloat> tmp(dim);
1064  tmp.AddMat2Sp(1.0, Apre, kNoTrans, between_class_covar, 0.0);
1065  KALDI_ASSERT(tmp.IsDiagonal(0.01));
1066  }
1067 #endif
1068 
1069  // Eq. (B.7): b_{pre} = - A_{pre} \mu_{avg}
1070  Vector<BaseFloat> b_pre(dim);
1071  b_pre.AddMatVec(-1.0, Apre, kNoTrans, global_mean, 0.0);
1072  for (int32 r = 0; r < dim; r++) {
1073  xform->Row(r)(dim) = b_pre(r); // W_{pre} = [ A_{pre}, b_{pre} ]
1074  }
1075 
1076  // Eq. (B.8) & (B.9): W_{inv} = [ A_{pre}^{-1}, \mu_{avg} ]
1077  inv_xform->CopyFromMat(*xform);
1078  inv_xform->Range(0, dim, 0, dim).InvertDouble();
1079  for (int32 r = 0; r < dim; r++)
1080  inv_xform->Row(r)(dim) = global_mean(r);
1081 } // End of ComputePreXform()
1082 
1083 template<typename Real>
1084 void AmSgmm2::GetNtransSigmaInv(vector< Matrix<Real> > *out) const {
1085  KALDI_ASSERT(SpkSpaceDim() > 0 &&
1086  "Cannot compute N^{T} \\Sigma_{i}^{-1} without speaker projections.");
1087  out->resize(NumGauss());
1088  Matrix<Real> tmpcov(FeatureDim(), FeatureDim());
1089  Matrix<Real> tmp_n(FeatureDim(), SpkSpaceDim());
1090  for (int32 i = 0; i < NumGauss(); i++) {
1091  tmpcov.CopyFromSp(SigmaInv_[i]);
1092  tmp_n.CopyFromMat(N_[i]);
1093  (*out)[i].Resize(SpkSpaceDim(), FeatureDim());
1094  (*out)[i].AddMatMat(1.0, tmp_n, kTrans, tmpcov, kNoTrans, 0.0);
1095  }
1096 }
1097 
1098 // Instantiate the above template.
1099 template
1100 void AmSgmm2::GetNtransSigmaInv(vector< Matrix<float> > *out) const;
1101 template
1102 void AmSgmm2::GetNtransSigmaInv(vector< Matrix<double> > *out) const;
1103 
1105 
1106 template<class Real>
1107 void AmSgmm2::ComputeH(std::vector< SpMatrix<Real> > *H_i) const {
1108  KALDI_ASSERT(NumGauss() != 0);
1109  (*H_i).resize(NumGauss());
1110  SpMatrix<BaseFloat> H_i_tmp(PhoneSpaceDim());
1111  for (int32 i = 0; i < NumGauss(); i++) {
1112  (*H_i)[i].Resize(PhoneSpaceDim());
1113  H_i_tmp.AddMat2Sp(1.0, M_[i], kTrans, SigmaInv_[i], 0.0);
1114  (*H_i)[i].CopyFromSp(H_i_tmp);
1115  }
1116 }
1117 
1118 // Instantiate the template.
1119 template
1120 void AmSgmm2::ComputeH(std::vector< SpMatrix<float> > *H_i) const;
1121 template
1122 void AmSgmm2::ComputeH(std::vector< SpMatrix<double> > *H_i) const;
1123 
1124 
1125 // Initializes the matrices M_{i} and w_i
1126 void AmSgmm2::InitializeMw(int32 phn_subspace_dim,
1127  const Matrix<BaseFloat> &norm_xform) {
1128  int32 ddim = full_ubm_.Dim();
1129  KALDI_ASSERT(phn_subspace_dim <= ddim + 1);
1130  KALDI_ASSERT(phn_subspace_dim <= norm_xform.NumCols() + 1);
1131  KALDI_ASSERT(ddim <= norm_xform.NumRows());
1132 
1133  Vector<BaseFloat> mean(ddim);
1134  int32 num_gauss = full_ubm_.NumGauss();
1135  w_.Resize(num_gauss, phn_subspace_dim);
1136  M_.resize(num_gauss);
1137  for (int32 i = 0; i < num_gauss; i++) {
1138  full_ubm_.GetComponentMean(i, &mean);
1139  Matrix<BaseFloat> &thisM(M_[i]);
1140  thisM.Resize(ddim, phn_subspace_dim);
1141  // Eq. (27): M_{i} = [ \bar{\mu}_{i} (J)_{1:D, 1:(S-1)}]
1142  thisM.CopyColFromVec(mean, 0);
1143  int32 nonrandom_dim = std::min(phn_subspace_dim - 1, ddim),
1144  random_dim = phn_subspace_dim - 1 - nonrandom_dim;
1145  thisM.Range(0, ddim, 1, nonrandom_dim).CopyFromMat(
1146  norm_xform.Range(0, ddim, 0, nonrandom_dim), kNoTrans);
1147  // The following extension to the original paper allows us to
1148  // initialize the model with a larger dimension of phone-subspace vector.
1149  if (random_dim > 0)
1150  thisM.Range(0, ddim, nonrandom_dim + 1, random_dim).SetRandn();
1151  }
1152 }
1153 
1154 // Initializes the matrices N_i, and [if speaker_dependent_weights==true] u_i.
1155 void AmSgmm2::InitializeNu(int32 spk_subspace_dim,
1156  const Matrix<BaseFloat> &norm_xform,
1157  bool speaker_dependent_weights) {
1158  int32 ddim = full_ubm_.Dim();
1159 
1160  int32 num_gauss = full_ubm_.NumGauss();
1161  N_.resize(num_gauss);
1162  for (int32 i = 0; i < num_gauss; i++) {
1163  N_[i].Resize(ddim, spk_subspace_dim);
1164  // Eq. (28): N_{i} = [ (J)_{1:D, 1:T)}]
1165 
1166  int32 nonrandom_dim = std::min(spk_subspace_dim, ddim),
1167  random_dim = spk_subspace_dim - nonrandom_dim;
1168 
1169  N_[i].Range(0, ddim, 0, nonrandom_dim).
1170  CopyFromMat(norm_xform.Range(0, ddim, 0, nonrandom_dim), kNoTrans);
1171  // The following extension to the original paper allows us to
1172  // initialize the model with a larger dimension of speaker-subspace vector.
1173  if (random_dim > 0)
1174  N_[i].Range(0, ddim, nonrandom_dim, random_dim).SetRandn();
1175  }
1176  if (speaker_dependent_weights) {
1177  u_.Resize(num_gauss, spk_subspace_dim); // will set to zero.
1178  } else {
1179  u_.Resize(0, 0);
1180  }
1181 }
1182 
1184  const std::vector<int32> &pdf2group,
1185  BaseFloat self_weight) {
1186  KALDI_LOG << "Initializing model";
1187  pdf2group_ = pdf2group;
1188  ComputePdfMappings();
1189 
1190  // Copy background GMMs
1191  diag_ubm_.CopyFromDiagGmm(other.diag_ubm_);
1192  full_ubm_.CopyFromFullGmm(other.full_ubm_);
1193 
1194  // Copy global params
1195  SigmaInv_ = other.SigmaInv_;
1196 
1197  M_ = other.M_;
1198  w_ = other.w_;
1199  u_ = other.u_;
1200  N_ = other.N_;
1201 
1202  InitializeVecsAndSubstateWeights(self_weight);
1203 }
1204 
1205 
1206 // Initializes the vectors v_{j1,m} and substate weights c_{j2,m}.
1208  int32 J1 = NumGroups(), J2 = NumPdfs();
1209  KALDI_ASSERT(J1 > 0 && J2 >= J1);
1210  int32 phn_subspace_dim = PhoneSpaceDim();
1211  KALDI_ASSERT(phn_subspace_dim > 0 && "Initialize M and w first.");
1212 
1213  v_.resize(J1);
1214  if (self_weight == 1.0) {
1215  for (int32 j1 = 0; j1 < J1; j1++) {
1216  v_[j1].Resize(1, phn_subspace_dim);
1217  v_[j1](0, 0) = 1.0; // Eq. (26): v_{j1} = [1 0 0 ... 0]
1218  }
1219  c_.resize(J2);
1220  for (int32 j2 = 0; j2 < J2; j2++) {
1221  c_[j2].Resize(1);
1222  c_[j2](0) = 1.0; // Eq. (25): c_{j1} = 1.0
1223  }
1224  } else {
1225  for (int32 j1 = 0; j1 < J1; j1++) {
1226  int32 npdfs = group2pdf_[j1].size();
1227  v_[j1].Resize(npdfs, phn_subspace_dim);
1228  for (int32 m = 0; m < npdfs; m++)
1229  v_[j1](m, 0) = 1.0; // Eq. (26): v_{j1} = [1 0 0 ... 0]
1230  }
1231  c_.resize(J2);
1232  for (int32 j2 = 0; j2 < J2; j2++) {
1233  int32 j1 = pdf2group_[j2], npdfs = group2pdf_[j1].size();
1234  c_[j2].Resize(npdfs);
1235  if (npdfs == 1) c_[j2].Set(1.0);
1236  else {
1237  // note: just avoid NaNs if npdfs-1... value won't matter.
1238  double other_weight = (1.0 - self_weight) / std::max((1-npdfs), 1);
1239  c_[j2].Set(other_weight);
1240  for (int32 k = 0; k < npdfs; k++)
1241  if(group2pdf_[j1][k] == j2) c_[j2](k) = self_weight;
1242  }
1243  }
1244  }
1245 }
1246 
1247 // Initializes the within-class vars Sigma_{ki}
1249  std::vector< SpMatrix<BaseFloat> > &inv_covars(full_ubm_.inv_covars());
1250  int32 num_gauss = full_ubm_.NumGauss();
1251  int32 dim = full_ubm_.Dim();
1252  SigmaInv_.resize(num_gauss);
1253  for (int32 i = 0; i < num_gauss; i++) {
1254  SigmaInv_[i].Resize(dim);
1255  SigmaInv_[i].CopyFromSp(inv_covars[i]);
1256  }
1257 }
1258 
1259 // Compute the "smoothing" matrix H^{(sm)} from expected counts given the model.
1261  const std::vector< SpMatrix<BaseFloat> > &H,
1262  const Vector<BaseFloat> &state_occupancies,
1263  SpMatrix<BaseFloat> *H_sm,
1264  BaseFloat max_cond) const {
1265  int32 num_gauss = NumGauss();
1266  BaseFloat tot_sum = 0.0;
1267  KALDI_ASSERT(state_occupancies.Dim() == NumPdfs());
1268  Vector<BaseFloat> w_jm(num_gauss);
1269  H_sm->Resize(PhoneSpaceDim());
1270  H_sm->SetZero();
1271  Vector<BaseFloat> gamma_i;
1272  ComputeGammaI(state_occupancies, &gamma_i);
1273 
1274  BaseFloat sum = 0.0;
1275  for (int32 i = 0; i < num_gauss; i++) {
1276  if (gamma_i(i) > 0) {
1277  H_sm->AddSp(gamma_i(i), H[i]);
1278  sum += gamma_i(i);
1279  }
1280  }
1281  if (sum == 0.0) {
1282  KALDI_WARN << "Sum of counts is zero. ";
1283  // set to unit matrix--arbitrary non-singular matrix.. won't ever matter.
1284  H_sm->SetUnit();
1285  } else {
1286  H_sm->Scale(1.0 / sum);
1287  int32 tmp = H_sm->LimitCondDouble(max_cond);
1288  if (tmp > 0) {
1289  KALDI_WARN << "Limited " << (tmp) << " eigenvalues of H_sm";
1290  }
1291  }
1292  tot_sum += sum;
1293 
1294  KALDI_LOG << "total count is " << tot_sum;
1295 }
1296 
1298  int32 dim = gmm.Dim();
1299  int32 num_gauss = gmm.NumGauss();
1300  SpMatrix<BaseFloat> within_class_covar(dim);
1301  SpMatrix<BaseFloat> between_class_covar(dim);
1302  Vector<BaseFloat> global_mean(dim);
1303 
1304  // Accumulate LDA statistics from the GMM parameters.
1305  {
1306  BaseFloat total_weight = 0.0;
1307  Vector<BaseFloat> tmp_weight(num_gauss);
1308  Matrix<BaseFloat> tmp_means;
1309  std::vector< SpMatrix<BaseFloat> > tmp_covars;
1310  tmp_weight.CopyFromVec(gmm.weights());
1311  gmm.GetCovarsAndMeans(&tmp_covars, &tmp_means);
1312  for (int32 i = 0; i < num_gauss; i++) {
1313  BaseFloat w_i = tmp_weight(i);
1314  total_weight += w_i;
1315  within_class_covar.AddSp(w_i, tmp_covars[i]);
1316  between_class_covar.AddVec2(w_i, tmp_means.Row(i));
1317  global_mean.AddVec(w_i, tmp_means.Row(i));
1318  }
1319  KALDI_ASSERT(total_weight > 0);
1320  if (fabs(total_weight - 1.0) > 0.001) {
1321  KALDI_WARN << "Total weight across the GMMs is " << (total_weight)
1322  << ", renormalizing.";
1323  global_mean.Scale(1.0 / total_weight);
1324  within_class_covar.Scale(1.0 / total_weight);
1325  between_class_covar.Scale(1.0 / total_weight);
1326  }
1327  between_class_covar.AddVec2(-1.0, global_mean);
1328  }
1329 
1330  TpMatrix<BaseFloat> chol(dim);
1331  chol.Cholesky(within_class_covar); // Sigma_W = L L^T
1332  TpMatrix<BaseFloat> chol_inv(chol);
1333  chol_inv.InvertDouble();
1334  Matrix<BaseFloat> chol_full(dim, dim);
1335  chol_full.CopyFromTp(chol_inv);
1336  SpMatrix<BaseFloat> LBL(dim);
1337  // LBL = L^{-1} \Sigma_B L^{-T}
1338  LBL.AddMat2Sp(1.0, chol_full, kNoTrans, between_class_covar, 0.0);
1339  Vector<BaseFloat> Dvec(dim);
1340  Matrix<BaseFloat> U(dim, dim);
1341  LBL.Eig(&Dvec, &U);
1342  SortSvd(&Dvec, &U);
1343 
1344  xform->Resize(dim, dim);
1345  chol_full.CopyFromTp(chol);
1346  // T := L U, eq (23)
1347  xform->AddMatMat(1.0, chol_full, kNoTrans, U, kNoTrans, 0.0);
1348 
1349 #ifdef KALDI_PARANOID
1350  Matrix<BaseFloat> inv_xform(*xform);
1351  inv_xform.InvertDouble();
1352  { // Check that T*within_class_covar*T' = I.
1353  Matrix<BaseFloat> wc_covar_full(dim, dim), tmp(dim, dim);
1354  wc_covar_full.CopyFromSp(within_class_covar);
1355  tmp.AddMatMat(1.0, inv_xform, kNoTrans, wc_covar_full, kNoTrans, 0.0);
1356  wc_covar_full.AddMatMat(1.0, tmp, kNoTrans, inv_xform, kTrans, 0.0);
1357  KALDI_ASSERT(wc_covar_full.IsUnit(0.01));
1358  }
1359  { // Check that T*between_class_covar*T' = diagonal.
1360  Matrix<BaseFloat> bc_covar_full(dim, dim), tmp(dim, dim);
1361  bc_covar_full.CopyFromSp(between_class_covar);
1362  tmp.AddMatMat(1.0, inv_xform, kNoTrans, bc_covar_full, kNoTrans, 0.0);
1363  bc_covar_full.AddMatMat(1.0, tmp, kNoTrans, inv_xform, kTrans, 0.0);
1364  KALDI_ASSERT(bc_covar_full.IsDiagonal(0.01));
1365  }
1366 #endif
1367 }
1368 
1370  KALDI_ASSERT(vars != NULL);
1371  if (vars->v_s.Dim() != 0) {
1372  KALDI_ASSERT(vars->v_s.Dim() == SpkSpaceDim());
1373  vars->o_s.Resize(NumGauss(), FeatureDim());
1374  int32 num_gauss = NumGauss();
1375  // first compute the o_i^{(s)} quantities.
1376  for (int32 i = 0; i < num_gauss; i++) {
1377  // Eqn. (32): o_i^{(s)} = N_i v^{(s)}
1378  vars->o_s.Row(i).AddMatVec(1.0, N_[i], kNoTrans, vars->v_s, 0.0);
1379  }
1380  // the rest relates to the SSGMM. We only need to to this
1381  // if we're using speaker-dependent weights.
1382  if (HasSpeakerDependentWeights()) {
1383  vars->log_d_jms.clear();
1384  vars->log_d_jms.resize(NumGroups());
1385  vars->log_b_is.Resize(NumGauss());
1386  vars->log_b_is.AddMatVec(1.0, u_, kNoTrans, vars->v_s, 0.0);
1387  vars->b_is.Resize(NumGauss());
1388  vars->b_is.CopyFromVec(vars->log_b_is);
1389  vars->b_is.ApplyExp();
1390  for (int32 i = 0; i < vars->b_is.Dim(); i++) {
1391  if (vars->b_is(i) - vars->b_is(i) != 0.0) { // NaN.
1392  vars->b_is(i) = 1.0;
1393  KALDI_WARN << "Set NaN in b_is to 1.0";
1394  }
1395  }
1396  } else {
1397  vars->b_is.Resize(0);
1398  vars->log_b_is.Resize(0);
1399  vars->log_d_jms.resize(0);
1400  }
1401  } else {
1402  vars->Clear(); // make sure everything is cleared.
1403  }
1404 }
1405 
1407  const VectorBase<BaseFloat> &data,
1408  std::vector<int32> *gselect) const {
1409  KALDI_ASSERT(diag_ubm_.NumGauss() != 0 &&
1410  diag_ubm_.NumGauss() == full_ubm_.NumGauss() &&
1411  diag_ubm_.Dim() == data.Dim());
1412  KALDI_ASSERT(config.diag_gmm_nbest > 0 && config.full_gmm_nbest > 0 &&
1413  config.full_gmm_nbest < config.diag_gmm_nbest);
1414  int32 num_gauss = diag_ubm_.NumGauss();
1415 
1416  std::vector< std::pair<BaseFloat, int32> > pruned_pairs;
1417  if (config.diag_gmm_nbest < num_gauss) { Vector<BaseFloat> loglikes(num_gauss);
1418  diag_ubm_.LogLikelihoods(data, &loglikes);
1419  Vector<BaseFloat> loglikes_copy(loglikes);
1420  BaseFloat *ptr = loglikes_copy.Data();
1421  std::nth_element(ptr, ptr+num_gauss-config.diag_gmm_nbest, ptr+num_gauss);
1422  BaseFloat thresh = ptr[num_gauss-config.diag_gmm_nbest];
1423  for (int32 g = 0; g < num_gauss; g++)
1424  if (loglikes(g) >= thresh) // met threshold for diagonal phase.
1425  pruned_pairs.push_back(
1426  std::make_pair(full_ubm_.ComponentLogLikelihood(data, g), g));
1427  } else {
1428  Vector<BaseFloat> loglikes(num_gauss);
1429  full_ubm_.LogLikelihoods(data, &loglikes);
1430  for (int32 g = 0; g < num_gauss; g++)
1431  pruned_pairs.push_back(std::make_pair(loglikes(g), g));
1432  }
1433  KALDI_ASSERT(!pruned_pairs.empty());
1434  if (pruned_pairs.size() > static_cast<size_t>(config.full_gmm_nbest)) {
1435  std::nth_element(pruned_pairs.begin(),
1436  pruned_pairs.end() - config.full_gmm_nbest,
1437  pruned_pairs.end());
1438  pruned_pairs.erase(pruned_pairs.begin(),
1439  pruned_pairs.end() - config.full_gmm_nbest);
1440  }
1441  Vector<BaseFloat> loglikes_tmp(pruned_pairs.size()); // for return value.
1442  KALDI_ASSERT(gselect != NULL);
1443  gselect->resize(pruned_pairs.size());
1444  // Make sure pruned Gaussians appear from best to worst.
1445  std::sort(pruned_pairs.begin(), pruned_pairs.end(),
1446  std::greater< std::pair<BaseFloat, int32> >());
1447  for (size_t i = 0; i < pruned_pairs.size(); i++) {
1448  loglikes_tmp(i) = pruned_pairs[i].first;
1449  (*gselect)[i] = pruned_pairs[i].second;
1450  }
1451  return loglikes_tmp.LogSumExp();
1452 }
1453 
1454 void Sgmm2GauPost::Write(std::ostream &os, bool binary) const {
1455  WriteToken(os, binary, "<Sgmm2GauPost>");
1456  int32 T = this->size();
1457  WriteBasicType(os, binary, T);
1458  for (int32 t = 0; t < T; t++) {
1459  WriteToken(os, binary, "<gselect>");
1460  WriteIntegerVector(os, binary, (*this)[t].gselect);
1461  WriteToken(os, binary, "<tids>");
1462  WriteIntegerVector(os, binary, (*this)[t].tids);
1463  KALDI_ASSERT((*this)[t].tids.size() == (*this)[t].posteriors.size());
1464  for (size_t i = 0; i < (*this)[t].posteriors.size(); i++) {
1465  (*this)[t].posteriors[i].Write(os, binary);
1466  }
1467  }
1468  WriteToken(os, binary, "</Sgmm2GauPost>");
1469 }
1470 
1471 
1472 void Sgmm2GauPost::Read(std::istream &is, bool binary) {
1473  ExpectToken(is, binary, "<Sgmm2GauPost>");
1474  int32 T;
1475  ReadBasicType(is, binary, &T);
1476  KALDI_ASSERT(T >= 0);
1477  this->resize(T);
1478  for (int32 t = 0; t < T; t++) {
1479  ExpectToken(is, binary, "<gselect>");
1480  ReadIntegerVector(is, binary, &((*this)[t].gselect));
1481  ExpectToken(is, binary, "<tids>");
1482  ReadIntegerVector(is, binary, &((*this)[t].tids));
1483  size_t sz = (*this)[t].tids.size();
1484  (*this)[t].posteriors.resize(sz);
1485  for (size_t i = 0; i < sz; i++)
1486  (*this)[t].posteriors[i].Read(is, binary);
1487  }
1488  ExpectToken(is, binary, "</Sgmm2GauPost>");
1489 }
1490 
1491 
1492 
1493 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
double Exp(double x)
Definition: kaldi-math.h:83
uint16 SgmmWriteFlagsType
Bitwise OR of the above flags.
Definition: model-common.h:70
void ComputeWeights()
Computes the weights w_jmi_, which is needed for likelihood evaluation with SSGMMs.
Definition: am-sgmm2.cc:796
void Add(const Real alpha)
Add a scalar to each element.
void Write(std::ostream &os, bool binary, SgmmWriteFlagsType write_params) const
Definition: am-sgmm2.cc:203
bool IsUnit(Real cutoff=1.0e-05) const
Definition: sp-matrix.cc:480
void InvertDouble(Real *LogDet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse [double].
Matrix< BaseFloat > u_
[SSGMM] Speaker-subspace weight projection vectors. Dimension is [I][T]
Definition: am-sgmm2.h:431
bool IsDiagonal(Real cutoff=1.0e-05) const
Definition: sp-matrix.cc:465
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231
void IncreasePhoneSpaceDim(int32 target_dim, const Matrix< BaseFloat > &norm_xform)
Functions for increasing the phonetic and speaker space dimensions.
Definition: am-sgmm2.cc:699
Packed symetric matrix class.
Definition: matrix-common.h:62
std::vector< int32 > pdf2group_
Definition: am-sgmm2.h:409
void CopyColFromVec(const VectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
BaseFloat ComponentPosteriors(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *post) const
Similar to LogLikelihood() function above, but also computes the posterior probabilities for the pre-...
Definition: am-sgmm2.cc:574
void Scale(Real c)
Vector< BaseFloat > xt
x&#39;(t), FMLLR-adapted, dim = [D], eq.(33)
Definition: am-sgmm2.h:144
void CopyFromSgmm2(const AmSgmm2 &other, bool copy_normalizers, bool copy_weights)
Used to copy models (useful in update)
Definition: am-sgmm2.cc:415
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
std::vector< Vector< BaseFloat > > c_
c_{jm}, mixture weights. Dimension is [J2][#mix]
Definition: am-sgmm2.h:438
#define M_PI
Definition: kaldi-math.h:44
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
Matrix< BaseFloat > w_
Phonetic-subspace weight projection vectors. Dimension is [I][S].
Definition: am-sgmm2.h:429
Vector< BaseFloat > v_s
Speaker adaptation vector v_^{(s)}. Dim is [T].
Definition: am-sgmm2.h:187
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
Real Max() const
Returns maximum element of matrix.
void InvertDouble(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
Definition: sp-matrix.cc:312
void Read(std::istream &is, bool binary)
Definition: am-sgmm2.cc:89
#define KALDI_ISFINITE(x)
Definition: kaldi-math.h:74
void InitializeFromFullGmm(const FullGmm &gmm, const std::vector< int32 > &pdf2group, int32 phn_subspace_dim, int32 spk_subspace_dim, bool speaker_dependent_weights, BaseFloat self_weight)
Initializes the SGMM parameters from a full-covariance UBM.
Definition: am-sgmm2.cc:381
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
DiagGmm diag_ubm_
These contain the "background" model associated with the subspace GMM.
Definition: am-sgmm2.h:413
std::vector< Matrix< BaseFloat > > n_
n_{jim}, per-Gaussian normalizer. Dimension is [J1][I][#mix]
Definition: am-sgmm2.h:440
std::vector< Matrix< BaseFloat > > N_
Speaker-subspace projections. Dimension is [I][D][T].
Definition: am-sgmm2.h:427
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
void ComputeHsmFromModel(const std::vector< SpMatrix< BaseFloat > > &H, const Vector< BaseFloat > &state_occupancies, SpMatrix< BaseFloat > *H_sm, BaseFloat max_cond) const
Definition: am-sgmm2.cc:1260
void ApplyPow(Real exponent)
Takes matrix to a fraction power via Svd.
Definition: sp-matrix.cc:91
void Eig(VectorBase< Real > *s, MatrixBase< Real > *P=NULL) const
Solves the symmetric eigenvalue problem: at end we should have (*this) = P * diag(s) * P^T...
Definition: qr.cc:433
void IncreaseSpkSpaceDim(int32 target_dim, const Matrix< BaseFloat > &norm_xform, bool speaker_dependent_weights)
Increase the subspace dimension for speakers.
Definition: am-sgmm2.cc:747
void SetUnit()
< Set to zero
void ApplyLog()
Apply natural log to all elements.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void ComputeNormalizersInternal(int32 num_threads, int32 thread, int32 *entropy_count, double *entropy_sum)
Compute a subset of normalizers; used in multi-threaded implementation.
Definition: am-sgmm2.cc:873
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
void ComputePerSpkDerivedVars(Sgmm2PerSpkDerivedVars *vars) const
Computes the per-speaker derived vars; assumes vars->v_s is already set up.
Definition: am-sgmm2.cc:1369
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
std::vector< Matrix< BaseFloat > > v_
The parameters in a particular SGMM state.
Definition: am-sgmm2.h:436
static float _RandGauss()
Definition: am-sgmm2.cc:34
void InitializeCovars()
initializes the within-class covariances.
Definition: am-sgmm2.cc:1248
void InitializeVecsAndSubstateWeights(BaseFloat self_weight)
Definition: am-sgmm2.cc:1207
Matrix< BaseFloat > zti
z_{i}(t), dim = [I][S], eq.(35)
Definition: am-sgmm2.h:146
BaseFloat GetDjms(int32 j1, int32 m, Sgmm2PerSpkDerivedVars *spk_vars) const
Definition: am-sgmm2.cc:948
Vector< BaseFloat > b_is
Definition: am-sgmm2.h:189
std::vector< Matrix< BaseFloat > > M_
Phonetic-subspace projections. Dimension is [I][D][S].
Definition: am-sgmm2.h:425
void CopyFromSp(const SpMatrix< OtherReal > &M)
Copy given spmatrix. (no resize is done).
void GetSplitTargets(const Vector< BaseFloat > &state_occs, int32 target_components, BaseFloat power, BaseFloat min_count, std::vector< int32 > *targets)
Get Gaussian-mixture or substate-mixture splitting targets, according to a power rule (e...
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=nullptr)
Applies floor to all elements.
Definition: kaldi-vector.h:149
void Read(std::istream &is, bool binary)
Definition: am-sgmm2.cc:1472
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
std::vector< Vector< BaseFloat > > log_d_jms
< [SSGMM] log of the above (more efficient to store both).
Definition: am-sgmm2.h:191
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
BaseFloat LogLikelihood(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2LikelihoodCache *cache, Sgmm2PerSpkDerivedVars *spk_vars, BaseFloat log_prune=0.0) const
This does a likelihood computation for a given state using the pre-selected Gaussian components (in p...
Definition: am-sgmm2.cc:517
Matrix< BaseFloat > xti
x_{i}(t) = x&#39;(t) - o_i(s): dim = [I][D], eq.(34)
Definition: am-sgmm2.h:145
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
std::vector< SpMatrix< BaseFloat > > SigmaInv_
Globally shared parameters of the subspace GMM.
Definition: am-sgmm2.h:423
double Log(double x)
Definition: kaldi-math.h:100
void ComponentLogLikes(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j1, Sgmm2PerSpkDerivedVars *spk_vars, Matrix< BaseFloat > *loglikes) const
The code below is called internally from LogLikelihood() and ComponentPosteriors().
Definition: am-sgmm2.cc:476
void SplitSubstates(const Vector< BaseFloat > &state_occupancies, const Sgmm2SplitSubstatesConfig &config)
Increases the total number of substates based on the state occupancies.
Definition: am-sgmm2.cc:657
void Scale(Real alpha)
Multiply each element with a scalar value.
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
Definition: sp-matrix.h:211
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void AddVecToRows(const Real alpha, const VectorBase< OtherReal > &v)
[each row of *this] += alpha * v
struct rnnlm::@11::@12 n
int32 Pdf2Group(int32 j2) const
Definition: am-sgmm2.cc:196
void Check(bool show_properties=true)
Checks the various components for correct sizes.
Definition: am-sgmm2.cc:276
FullGmm full_ubm_
Definition: am-sgmm2.h:414
void RunMultiThreaded(const C &c_in)
Here, class C should inherit from MultiThreadable.
Definition: kaldi-thread.h:151
void AddMatSp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const SpMatrix< Real > &B, const Real beta)
this <– beta*this + alpha*A*B.
Definition: kaldi-matrix.h:708
Vector< BaseFloat > nti
n_{i}(t), dim = [I], eq.
Definition: am-sgmm2.h:147
std::vector< PdfCacheElement > pdf_cache
Definition: am-sgmm2.h:223
BaseFloat GaussianSelection(const Sgmm2GselectConfig &config, const VectorBase< BaseFloat > &data, std::vector< int32 > *gselect) const
Computes the top-scoring Gaussian indices (used for pruning of later stages of computation).
Definition: am-sgmm2.cc:1406
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
void SplitSubstatesInGroup(const Vector< BaseFloat > &pdf_occupancies, const Sgmm2SplitSubstatesConfig &opts, const SpMatrix< BaseFloat > &sqrt_H_sm, int32 j1, int32 M)
Called inside SplitSubstates(); splits substates of one group.
Definition: am-sgmm2.cc:599
void CopyGlobalsInitVecs(const AmSgmm2 &other, const std::vector< int32 > &pdf2group, BaseFloat self_weight)
Copies the global parameters from the supplied model, but sets the state vectors to zero...
Definition: am-sgmm2.cc:1183
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
int32 full_gmm_nbest
Number of highest-scoring full-covariance Gaussians per frame.
Definition: am-sgmm2.h:120
#define KALDI_ERR
Definition: kaldi-error.h:147
void ComputeH(std::vector< SpMatrix< Real > > *H_i) const
Computes quantities H = M_i Sigma_i^{-1} M_i^T.
Definition: am-sgmm2.cc:1107
void ComputePerFrameVars(const VectorBase< BaseFloat > &data, const std::vector< int32 > &gselect, const Sgmm2PerSpkDerivedVars &spk_vars, Sgmm2PerFrameDerivedVars *per_frame_vars) const
This needs to be called with each new frame of data, prior to accumulation or likelihood evaluation: ...
Definition: am-sgmm2.cc:442
Packed symetric matrix class.
Definition: matrix-common.h:63
#define KALDI_WARN
Definition: kaldi-error.h:150
void Resize(int32 ngauss, int32 feat_dim, int32 phn_dim)
Definition: am-sgmm2.h:151
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
void InitializeNu(int32 spk_subspace_dim, const Matrix< BaseFloat > &norm_xform, bool speaker_dependent_weights)
Initializes the matrices N_ and [if speaker_dependent_weights==true] u_.
Definition: am-sgmm2.cc:1155
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
Real Sum() const
Returns sum of all elements in matrix.
void Scale(Real alpha)
Multiplies all elements by this constant.
const Vector< BaseFloat > & weights() const
Definition: full-gmm.h:144
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
void ComputePdfMappings()
Definition: am-sgmm2.cc:72
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
Real Sum() const
Returns sum of the elements.
std::vector< int32 > gselect
Definition: am-sgmm2.h:143
void MulColsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = (*this) * diag(scale).
ComputeNormalizersClass(const ComputeNormalizersClass &other)
Definition: am-sgmm2.cc:829
int32 diag_gmm_nbest
Number of highest-scoring diagonal-covariance Gaussians per frame.
Definition: am-sgmm2.h:122
std::vector< std::vector< int32 > > group2pdf_
Definition: am-sgmm2.h:410
void GetNtransSigmaInv(std::vector< Matrix< Real > > *out) const
Definition: am-sgmm2.cc:1084
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Definition: sp-matrix.cc:982
void ComputeDerivedVars()
Computes (and initializes if necessary) derived vars...
Definition: am-sgmm2.cc:810
void ComputeNormalizers()
Computes the data-independent terms in the log-likelihood computation for each Gaussian component and...
Definition: am-sgmm2.cc:857
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
Sgmm2LikelihoodCache caches SGMM likelihoods at two levels: the final pdf likelihoods, and the sub-state level likelihoods, which means that with the SCTM system we can avoid redundant computation.
Definition: am-sgmm2.h:199
std::vector< SubstateCacheElement > substate_cache
Definition: am-sgmm2.h:222
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void Write(std::ostream &os, bool binary) const
Definition: am-sgmm2.cc:1454
void InvertDouble()
Definition: tp-matrix.h:83
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void Add(Real c)
Add a constant to each element of a vector.
void ComputeGammaI(const Vector< BaseFloat > &state_occupancies, Vector< BaseFloat > *gamma_i) const
Computes quasi-occupancies gamma_i from the state-level occupancies, assuming model correctness...
Definition: am-sgmm2.cc:50
void ComputeFeatureNormalizingTransform(const FullGmm &gmm, Matrix< BaseFloat > *xform)
Computes the inverse of an LDA transform (without dimensionality reduction) The computed transform is...
Definition: am-sgmm2.cc:1297
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Vector< BaseFloat > log_b_is
< [SSGMM]: Eq. (22) in techreport, b_i^{(s)} = (^T ^{(s)})
Definition: am-sgmm2.h:190
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Definition: am-sgmm2.h:142
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void InitializeMw(int32 phn_subspace_dim, const Matrix< BaseFloat > &norm_xform)
Initializes the matrices M_ and w_.
Definition: am-sgmm2.cc:1126
void GetCovarsAndMeans(std::vector< SpMatrix< Real > > *covars, Matrix< Real > *means) const
Accessor for covariances and means.
Definition: full-gmm-inl.h:132
std::vector< Matrix< BaseFloat > > w_jmi_
[SSGMM] w_{jmi}, dimension is [J1][#mix][I]. Computed from w_ and v_.
Definition: am-sgmm2.h:442
MatrixIndexT LimitCondDouble(Real maxCond=1.0e+5, bool invert=false)
Definition: sp-matrix.h:333
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
ComputeNormalizersClass(AmSgmm2 *am_sgmm, int32 *entropy_count_ptr, double *entropy_sum_ptr)
Definition: am-sgmm2.cc:822
Matrix< BaseFloat > o_s
Per-speaker offsets o_{i}. Dimension is [I][D].
Definition: am-sgmm2.h:188
void ComputeFmllrPreXform(const Vector< BaseFloat > &pdf_occs, Matrix< BaseFloat > *xform, Matrix< BaseFloat > *inv_xform, Vector< BaseFloat > *diag_mean_scatter) const
Computes the LDA-like pre-transform and its inverse as well as the eigenvalues of the scatter of the ...
Definition: am-sgmm2.cc:965