diag-gmm.cc
Go to the documentation of this file.
1 // gmm/diag-gmm.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation;
4 // Saarland University (Author: Arnab Ghoshal);
5 // Georg Stemmer; Jan Silovsky
6 // 2012 Arnab Ghoshal
7 // 2013-2014 Johns Hopkins University (author: Daniel Povey)
8 
9 // See ../../COPYING for clarification regarding multiple authors
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
19 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
20 // MERCHANTABLITY OR NON-INFRINGEMENT.
21 // See the Apache 2 License for the specific language governing permissions and
22 // limitations under the License.
23 
24 #include <algorithm>
25 #include <functional>
26 #include <limits>
27 #include <string>
28 #include <vector>
29 
30 #include "gmm/diag-gmm.h"
31 #include "gmm/diag-gmm-normal.h"
32 #include "gmm/full-gmm.h"
33 #include "gmm/full-gmm-normal.h"
35 
36 namespace kaldi {
37 
38 // Constructor that allows us to merge GMMs.
39 DiagGmm::DiagGmm(const std::vector<std::pair<BaseFloat, const DiagGmm*> > &gmms)
40  : valid_gconsts_(false) {
41  if (gmms.empty()) {
42  return; // GMM will be empty.
43  } else {
44  int32 num_gauss = 0, dim = gmms[0].second->Dim();
45  for (size_t i = 0; i < gmms.size(); i++)
46  num_gauss += gmms[i].second->NumGauss();
47  Resize(num_gauss, dim);
48  int32 cur_gauss = 0;
49  for (size_t i = 0; i < gmms.size(); i++) {
50  BaseFloat weight = gmms[i].first;
51  KALDI_ASSERT(weight > 0.0);
52  const DiagGmm &gmm = *(gmms[i].second);
53  for (int32 g = 0; g < gmm.NumGauss(); g++, cur_gauss++) {
54  means_invvars_.Row(cur_gauss).CopyFromVec(gmm.means_invvars().Row(g));
55  inv_vars_.Row(cur_gauss).CopyFromVec(gmm.inv_vars().Row(g));
56  weights_(cur_gauss) = weight * gmm.weights()(g);
57  }
58  }
59  KALDI_ASSERT(cur_gauss == NumGauss());
61  }
62 }
63 
64 
65 
66 void DiagGmm::Resize(int32 nmix, int32 dim) {
67  KALDI_ASSERT(nmix > 0 && dim > 0);
68  if (gconsts_.Dim() != nmix) gconsts_.Resize(nmix);
69  if (weights_.Dim() != nmix) weights_.Resize(nmix);
70  if (inv_vars_.NumRows() != nmix ||
71  inv_vars_.NumCols() != dim) {
72  inv_vars_.Resize(nmix, dim);
73  inv_vars_.Set(1.0);
74  // must be initialized to unit for case of calling SetMeans while having
75  // covars/invcovars that are not set yet (i.e. zero)
76  }
77  if (means_invvars_.NumRows() != nmix ||
78  means_invvars_.NumCols() != dim)
79  means_invvars_.Resize(nmix, dim);
80  valid_gconsts_ = false;
81 }
82 
83 void DiagGmm::CopyFromDiagGmm(const DiagGmm &diaggmm) {
84  Resize(diaggmm.weights_.Dim(), diaggmm.means_invvars_.NumCols());
85  gconsts_.CopyFromVec(diaggmm.gconsts_);
86  weights_.CopyFromVec(diaggmm.weights_);
90 }
91 
92 void DiagGmm::CopyFromFullGmm(const FullGmm &fullgmm) {
93  int32 num_comp = fullgmm.NumGauss(), dim = fullgmm.Dim();
94  Resize(num_comp, dim);
95  gconsts_.CopyFromVec(fullgmm.gconsts());
96  weights_.CopyFromVec(fullgmm.weights());
97  Matrix<BaseFloat> means(num_comp, dim);
98  fullgmm.GetMeans(&means);
99  int32 ncomp = NumGauss();
100  for (int32 mix = 0; mix < ncomp; mix++) {
101  SpMatrix<double> covar(dim);
102  covar.CopyFromSp(fullgmm.inv_covars()[mix]);
103  covar.Invert();
104  Vector<double> diag(dim);
105  diag.CopyDiagFromPacked(covar);
106  diag.InvertElements();
107  inv_vars_.Row(mix).CopyFromVec(diag);
108  }
111  ComputeGconsts();
112 }
113 
115  int32 num_mix = NumGauss();
116  int32 dim = Dim();
117  BaseFloat offset = -0.5 * M_LOG_2PI * dim; // constant term in gconst.
118  int32 num_bad = 0;
119 
120  // Resize if Gaussians have been removed during Update()
121  if (num_mix != static_cast<int32>(gconsts_.Dim()))
122  gconsts_.Resize(num_mix);
123 
124  for (int32 mix = 0; mix < num_mix; mix++) {
125  KALDI_ASSERT(weights_(mix) >= 0); // Cannot have negative weights.
126  BaseFloat gc = Log(weights_(mix)) + offset; // May be -inf if weights == 0
127  for (int32 d = 0; d < dim; d++) {
128  gc += 0.5 * Log(inv_vars_(mix, d)) - 0.5 * means_invvars_(mix, d)
129  * means_invvars_(mix, d) / inv_vars_(mix, d);
130  }
131  // Change sign for logdet because var is inverted. Also, note that
132  // mean_invvars(mix, d)*mean_invvars(mix, d)/inv_vars(mix, d) is the
133  // mean-squared times inverse variance, since mean_invvars(mix, d) contains
134  // the mean times inverse variance.
135  // So gc is the likelihood at zero feature value.
136 
137  if (KALDI_ISNAN(gc)) { // negative infinity is OK but NaN is not acceptable
138  KALDI_ERR << "At component " << mix
139  << ", not a number in gconst computation";
140  }
141  if (KALDI_ISINF(gc)) {
142  num_bad++;
143  // If positive infinity, make it negative infinity.
144  // Want to make sure the answer becomes -inf in the end, not NaN.
145  if (gc > 0) gc = -gc;
146  }
147  gconsts_(mix) = gc;
148  }
149 
150  valid_gconsts_ = true;
151  return num_bad;
152 }
153 
154 void DiagGmm::Split(int32 target_components, float perturb_factor,
155  std::vector<int32> *history) {
156  if (target_components < NumGauss() || NumGauss() == 0) {
157  KALDI_ERR << "Cannot split from " << NumGauss() << " to "
158  << target_components << " components";
159  }
160  if (target_components == NumGauss()) {
161  KALDI_WARN << "Already have the target # of Gaussians. Doing nothing.";
162  return;
163  }
164 
165  int32 current_components = NumGauss(), dim = Dim();
166  DiagGmm *tmp = new DiagGmm;
167  tmp->CopyFromDiagGmm(*this); // so we have copies of matrices
168  // First do the resize:
169  weights_.Resize(target_components);
170  weights_.Range(0, current_components).CopyFromVec(tmp->weights_);
171  means_invvars_.Resize(target_components, dim);
172  means_invvars_.Range(0, current_components, 0, dim).CopyFromMat(
173  tmp->means_invvars_);
174  inv_vars_.Resize(target_components, dim);
175  inv_vars_.Range(0, current_components, 0, dim).CopyFromMat(tmp->inv_vars_);
176  gconsts_.Resize(target_components);
177 
178  delete tmp;
179 
180  // future work(arnab): Use a priority queue instead?
181  while (current_components < target_components) {
182  BaseFloat max_weight = weights_(0);
183  int32 max_idx = 0;
184  for (int32 i = 1; i < current_components; i++) {
185  if (weights_(i) > max_weight) {
186  max_weight = weights_(i);
187  max_idx = i;
188  }
189  }
190 
191  // remember what component was split
192  if (history != NULL)
193  history->push_back(max_idx);
194 
195  weights_(max_idx) /= 2;
196  weights_(current_components) = weights_(max_idx);
197  Vector<BaseFloat> rand_vec(dim);
198  for (int32 i = 0; i < dim; i++) {
199  rand_vec(i) = RandGauss() * std::sqrt(inv_vars_(max_idx, i));
200  // note, this looks wrong but is really right because it's the
201  // means_invvars we're multiplying and they have the dimension
202  // of an inverse standard variance. [dan]
203  }
204  inv_vars_.Row(current_components).CopyFromVec(inv_vars_.Row(max_idx));
205  means_invvars_.Row(current_components).CopyFromVec(means_invvars_.Row(
206  max_idx));
207  means_invvars_.Row(current_components).AddVec(perturb_factor, rand_vec);
208  means_invvars_.Row(max_idx).AddVec(-perturb_factor, rand_vec);
209  current_components++;
210  }
211  ComputeGconsts();
212 }
213 
214 
215 void DiagGmm::Perturb(float perturb_factor) {
216  int32 num_comps = NumGauss(),
217  dim = Dim();
218  Matrix<BaseFloat> rand_mat(num_comps, dim);
219  for (int32 i = 0; i < num_comps; i++) {
220  for (int32 d = 0; d < dim; d++) {
221  rand_mat(i, d) = RandGauss() * std::sqrt(inv_vars_(i, d));
222  // as in DiagGmm::Split, we perturb the means_invvars using a random
223  // fraction of inv_vars_
224  }
225  }
226  means_invvars_.AddMat(perturb_factor, rand_mat, kNoTrans);
227  ComputeGconsts();
228 }
229 
230 
231 void DiagGmm::MergeKmeans(int32 target_components,
232  ClusterKMeansOptions cfg) {
233  if (target_components <= 0 || NumGauss() < target_components) {
234  KALDI_ERR << "Invalid argument for target number of Gaussians (="
235  << target_components << "), #Gauss = " << NumGauss();
236  }
237  if (NumGauss() == target_components) {
238  KALDI_VLOG(2) << "No components merged, as target (" << target_components
239  << ") = total.";
240  return; // Nothing to do.
241  }
242  double min_var = 1.0e-10;
243  std::vector<Clusterable*> clusterable_vec;
244  for (int32 g = 0; g < NumGauss(); g++) {
245  if (weights_(g) == 0) {
246  KALDI_WARN << "Not using zero-weight Gaussians in clustering.";
247  continue;
248  }
249  Vector<BaseFloat> x_stats(Dim()),
250  x2_stats(Dim());
251  BaseFloat count = weights_(g);
252 
253  SubVector<BaseFloat> inv_var(inv_vars_, g),
254  mean_invvar(means_invvars_, g);
255  x_stats.AddVecDivVec(1.0, mean_invvar, inv_var, count); // x_stats is now mean.
256  x2_stats.CopyFromVec(inv_var);
257  x2_stats.InvertElements(); // x2_stats is now var.
258  x2_stats.AddVec2(1.0, x_stats); // x2_stats is now var + mean^2
259  x_stats.Scale(count); // x_stats is now scaled by count.
260  x2_stats.Scale(count); // x2_stats is now scaled by count.
261  clusterable_vec.push_back(new GaussClusterable(x_stats, x2_stats, min_var,
262  count));
263  }
264  if (clusterable_vec.size() <= target_components) {
265  KALDI_WARN << "Not doing clustering phase since lost too many Gaussians "
266  << "due to zero weight. Warning: zero-weight Gaussians are "
267  << "still there.";
268  DeletePointers(&clusterable_vec);
269  return;
270  } else {
271  std::vector<Clusterable*> clusters;
272  ClusterKMeans(clusterable_vec,
273  target_components,
274  &clusters, NULL, cfg);
275  Resize(clusters.size(), Dim());
276  for (int32 g = 0; g < static_cast<int32>(clusters.size()); g++) {
277  GaussClusterable *gc = static_cast<GaussClusterable*>(clusters[g]);
278  weights_(g) = gc->count();
279  SubVector<BaseFloat> inv_var(inv_vars_, g),
280  mean_invvar(means_invvars_, g);
281  inv_var.CopyFromVec(gc->x2_stats());
282  inv_var.Scale(1.0 / gc->count()); // inv_var is now the var + mean^2
283  mean_invvar.CopyFromVec(gc->x_stats());
284  mean_invvar.Scale(1.0 / gc->count()); // mean_invvar is now the mean.
285  inv_var.AddVec2(-1.0, mean_invvar); // subtract mean^2; inv_var is now the var
286  inv_var.InvertElements(); // inv_var is now the inverse var.
287  mean_invvar.MulElements(inv_var); // mean_invvar is now mean * inverse var.
288  }
289  ComputeGconsts();
290  DeletePointers(&clusterable_vec);
291  DeletePointers(&clusters);
292  }
293 }
294 
295 void DiagGmm::Merge(int32 target_components, std::vector<int32> *history) {
296  if (target_components <= 0 || NumGauss() < target_components) {
297  KALDI_ERR << "Invalid argument for target number of Gaussians (="
298  << target_components << "), #Gauss = " << NumGauss();
299  }
300  if (NumGauss() == target_components) {
301  KALDI_VLOG(2) << "No components merged, as target (" << target_components
302  << ") = total.";
303  return; // Nothing to do.
304  }
305 
306  int32 num_comp = NumGauss(), dim = Dim();
307 
308  if (target_components == 1) { // global mean and variance
310  // Undo variance inversion and multiplication of mean by inv var.
313  vars.InvertElements();
314  means.MulElements(vars);
315  // add means square to variances; get second-order stats
316  for (int32 i = 0; i < num_comp; i++) {
317  vars.Row(i).AddVec2(1.0, means.Row(i));
318  }
319 
320  // Slightly more efficient than calling this->Resize(1, dim)
321  gconsts_.Resize(1);
322  weights_.Resize(1);
323  means_invvars_.Resize(1, dim);
324  inv_vars_.Resize(1, dim);
325 
326  for (int32 i = 0; i < num_comp; i++) {
327  weights_(0) += weights(i);
328  means_invvars_.Row(0).AddVec(weights(i), means.Row(i));
329  inv_vars_.Row(0).AddVec(weights(i), vars.Row(i));
330  }
331  if (!ApproxEqual(weights_(0), 1.0, 1e-6)) {
332  KALDI_WARN << "Weights sum to " << weights_(0) << ": rescaling.";
335  weights_(0) = 1.0;
336  }
337  inv_vars_.Row(0).AddVec2(-1.0, means_invvars_.Row(0));
340  ComputeGconsts();
341  return;
342  }
343 
344  // If more than 1 merged component is required, use the hierarchical
345  // clustering of components that lead to the smallest decrease in likelihood.
346  std::vector<bool> discarded_component(num_comp);
347  Vector<BaseFloat> logdet(num_comp); // logdet for each component
348  for (int32 i = 0; i < num_comp; i++) {
349  discarded_component[i] = false;
350  for (int32 d = 0; d < dim; d++) {
351  logdet(i) += 0.5 * Log(inv_vars_(i, d)); // +0.5 because var is inverted
352  }
353  }
354 
355  // Undo variance inversion and multiplication of mean by this
356  // Makes copy of means and vars for all components - memory inefficient?
359  vars.InvertElements();
360  means.MulElements(vars);
361 
362  // add means square to variances; get second-order stats
363  // (normalized by zero-order stats)
364  for (int32 i = 0; i < num_comp; i++) {
365  vars.Row(i).AddVec2(1.0, means.Row(i));
366  }
367 
368  // compute change of likelihood for all combinations of components
369  SpMatrix<BaseFloat> delta_like(num_comp);
370  for (int32 i = 0; i < num_comp; i++) {
371  for (int32 j = 0; j < i; j++) {
372  BaseFloat w1 = weights_(i), w2 = weights_(j), w_sum = w1 + w2;
373  BaseFloat merged_logdet = merged_components_logdet(w1, w2,
374  means.Row(i), means.Row(j), vars.Row(i), vars.Row(j));
375  delta_like(i, j) = w_sum * merged_logdet
376  - w1 * logdet(i) - w2 * logdet(j);
377  }
378  }
379 
380  // Merge components with smallest impact on the loglike
381  for (int32 removed = 0; removed < num_comp - target_components; removed++) {
382  // Search for the least significant change in likelihood
383  // (maximum of negative delta_likes)
384  BaseFloat max_delta_like = -std::numeric_limits<BaseFloat>::max();
385  int32 max_i = -1, max_j = -1;
386  for (int32 i = 0; i < NumGauss(); i++) {
387  if (discarded_component[i]) continue;
388  for (int32 j = 0; j < i; j++) {
389  if (discarded_component[j]) continue;
390  if (delta_like(i, j) > max_delta_like) {
391  max_delta_like = delta_like(i, j);
392  max_i = i;
393  max_j = j;
394  }
395  }
396  }
397 
398  // make sure that different components will be merged
399  KALDI_ASSERT(max_i != max_j && max_i != -1 && max_j != -1);
400 
401  // remember the merge candidates
402  if (history != NULL) {
403  history->push_back(max_i);
404  history->push_back(max_j);
405  }
406 
407  // Merge components
408  BaseFloat w1 = weights_(max_i), w2 = weights_(max_j);
409  BaseFloat w_sum = w1 + w2;
410  // merge means
411  means.Row(max_i).AddVec(w2/w1, means.Row(max_j));
412  means.Row(max_i).Scale(w1/w_sum);
413  // merge vars
414  vars.Row(max_i).AddVec(w2/w1, vars.Row(max_j));
415  vars.Row(max_i).Scale(w1/w_sum);
416  // merge weights
417  weights_(max_i) = w_sum;
418 
419  // Update gmm for merged component
420  // copy second-order stats (normalized by zero-order stats)
421  inv_vars_.Row(max_i).CopyFromVec(vars.Row(max_i));
422  // centralize
423  inv_vars_.Row(max_i).AddVec2(-1.0, means.Row(max_i));
424  // invert
425  inv_vars_.Row(max_i).InvertElements();
426  // copy first-order stats (normalized by zero-order stats)
427  means_invvars_.Row(max_i).CopyFromVec(means.Row(max_i));
428  // multiply by inv_vars
429  means_invvars_.Row(max_i).MulElements(inv_vars_.Row(max_i));
430 
431  // Update logdet for merged component
432  logdet(max_i) = 0.0;
433  for (int32 d = 0; d < dim; d++) {
434  logdet(max_i) += 0.5 * Log(inv_vars_(max_i, d));
435  // +0.5 because var is inverted
436  }
437 
438  // Label the removed component as discarded
439  discarded_component[max_j] = true;
440 
441  // Update delta_like for merged component
442  for (int32 j = 0; j < num_comp; j++) {
443  if ((j == max_i) || (discarded_component[j])) continue;
444  BaseFloat w1 = weights_(max_i),
445  w2 = weights_(j),
446  w_sum = w1 + w2;
447  BaseFloat merged_logdet = merged_components_logdet(w1, w2,
448  means.Row(max_i), means.Row(j), vars.Row(max_i), vars.Row(j));
449  delta_like(max_i, j) = w_sum * merged_logdet - w1 * logdet(max_i)
450  - w2 * logdet(j);
451  // doesn't respect lower triangular indeces,
452  // relies on implicitly performed swap of coordinates if necessary
453  }
454  }
455 
456  // Remove the consumed components
457  int32 m = 0;
458  for (int32 i = 0; i < num_comp; i++) {
459  if (discarded_component[i]) {
460  weights_.RemoveElement(m);
462  inv_vars_.RemoveRow(m);
463  } else {
464  ++m;
465  }
466  }
467 
468  ComputeGconsts();
469 }
470 
472  const VectorBase<BaseFloat> &f1,
473  const VectorBase<BaseFloat> &f2,
474  const VectorBase<BaseFloat> &s1,
475  const VectorBase<BaseFloat> &s2)
476  const {
477  int32 dim = f1.Dim();
478  Vector<BaseFloat> tmp_mean(dim);
479  Vector<BaseFloat> tmp_var(dim);
480 
481  BaseFloat w_sum = w1 + w2;
482  tmp_mean.CopyFromVec(f1);
483  tmp_mean.AddVec(w2/w1, f2);
484  tmp_mean.Scale(w1/w_sum);
485  tmp_var.CopyFromVec(s1);
486  tmp_var.AddVec(w2/w1, s2);
487  tmp_var.Scale(w1/w_sum);
488  tmp_var.AddVec2(-1.0, tmp_mean);
489  BaseFloat merged_logdet = 0.0;
490  for (int32 d = 0; d < dim; d++) {
491  merged_logdet -= 0.5 * Log(tmp_var(d));
492  // -0.5 because var is not inverted
493  }
494  return merged_logdet;
495 }
496 
498  int32 comp_id) const {
499  if (!valid_gconsts_)
500  KALDI_ERR << "Must call ComputeGconsts() before computing likelihood";
501  if (static_cast<int32>(data.Dim()) != Dim()) {
502  KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
503  << "mismatch " << (data.Dim()) << " vs. "<< (Dim());
504  }
505  BaseFloat loglike;
506  Vector<BaseFloat> data_sq(data);
507  data_sq.ApplyPow(2.0);
508 
509  // loglike = means * inv(vars) * data.
510  loglike = VecVec(means_invvars_.Row(comp_id), data);
511  // loglike += -0.5 * inv(vars) * data_sq.
512  loglike -= 0.5 * VecVec(inv_vars_.Row(comp_id), data_sq);
513  return loglike + gconsts_(comp_id);
514 }
515 
516 // Gets likelihood of data given this.
518  if (!valid_gconsts_)
519  KALDI_ERR << "Must call ComputeGconsts() before computing likelihood";
520  Vector<BaseFloat> loglikes;
521  LogLikelihoods(data, &loglikes);
522  BaseFloat log_sum = loglikes.LogSumExp();
523  if (KALDI_ISNAN(log_sum) || KALDI_ISINF(log_sum))
524  KALDI_ERR << "Invalid answer (overflow or invalid variances/features?)";
525  return log_sum;
526 }
527 
529  Vector<BaseFloat> *loglikes) const {
530  loglikes->Resize(gconsts_.Dim(), kUndefined);
531  loglikes->CopyFromVec(gconsts_);
532  if (data.Dim() != Dim()) {
533  KALDI_ERR << "DiagGmm::LogLikelihoods, dimension "
534  << "mismatch " << data.Dim() << " vs. "<< Dim();
535  }
536  Vector<BaseFloat> data_sq(data);
537  data_sq.ApplyPow(2.0);
538 
539  // loglikes += means * inv(vars) * data.
540  loglikes->AddMatVec(1.0, means_invvars_, kNoTrans, data, 1.0);
541  // loglikes += -0.5 * inv(vars) * data_sq.
542  loglikes->AddMatVec(-0.5, inv_vars_, kNoTrans, data_sq, 1.0);
543 }
544 
545 
547  Matrix<BaseFloat> *loglikes) const {
548  KALDI_ASSERT(data.NumRows() != 0);
549  loglikes->Resize(data.NumRows(), gconsts_.Dim(), kUndefined);
550  loglikes->CopyRowsFromVec(gconsts_);
551  if (data.NumCols() != Dim()) {
552  KALDI_ERR << "DiagGmm::LogLikelihoods, dimension "
553  << "mismatch " << data.NumCols() << " vs. "<< Dim();
554  }
555  Matrix<BaseFloat> data_sq(data);
556  data_sq.ApplyPow(2.0);
557 
558  // loglikes += means * inv(vars) * data.
559  loglikes->AddMatMat(1.0, data, kNoTrans, means_invvars_, kTrans, 1.0);
560  // loglikes += -0.5 * inv(vars) * data_sq.
561  loglikes->AddMatMat(-0.5, data_sq, kNoTrans, inv_vars_, kTrans, 1.0);
562 }
563 
564 
565 
567  const std::vector<int32> &indices,
568  Vector<BaseFloat> *loglikes) const {
569  KALDI_ASSERT(data.Dim() == Dim());
570  Vector<BaseFloat> data_sq(data);
571  data_sq.ApplyPow(2.0);
572 
573  int32 num_indices = static_cast<int32>(indices.size());
574  loglikes->Resize(num_indices, kUndefined);
575  if (indices.back() + 1 - indices.front() == num_indices) {
576  // A special (but common) case when the indices form a contiguous range.
577  int32 start_idx = indices.front();
578  loglikes->CopyFromVec(SubVector<BaseFloat>(gconsts_, start_idx, num_indices));
579  // loglikes += means * inv(vars) * data.
580  SubMatrix<BaseFloat> means_invvars_sub(means_invvars_, start_idx, num_indices,
581  0, Dim());
582  loglikes->AddMatVec(1.0, means_invvars_sub, kNoTrans, data, 1.0);
583  SubMatrix<BaseFloat> inv_vars_sub(inv_vars_, start_idx, num_indices,
584  0, Dim());
585  // loglikes += -0.5 * inv(vars) * data_sq.
586  loglikes->AddMatVec(-0.5, inv_vars_sub, kNoTrans, data_sq, 1.0);
587  } else {
588  for (int32 i = 0; i < num_indices; i++) {
589  int32 idx = indices[i]; // The Gaussian index.
590  BaseFloat this_loglike =
591  gconsts_(idx) + VecVec(means_invvars_.Row(idx), data)
592  - 0.5*VecVec(inv_vars_.Row(idx), data_sq);
593  (*loglikes)(i) = this_loglike;
594  }
595  }
596 }
597 
598 
599 
600 // Gets likelihood of data given this. Also provides per-Gaussian posteriors.
602  Vector<BaseFloat> *posterior) const {
603  if (!valid_gconsts_)
604  KALDI_ERR << "Must call ComputeGconsts() before computing likelihood";
605  if (posterior == NULL) KALDI_ERR << "NULL pointer passed as return argument.";
606  Vector<BaseFloat> loglikes;
607  LogLikelihoods(data, &loglikes);
608  BaseFloat log_sum = loglikes.ApplySoftMax();
609  if (KALDI_ISNAN(log_sum) || KALDI_ISINF(log_sum))
610  KALDI_ERR << "Invalid answer (overflow or invalid variances/features?)";
611  if (posterior->Dim() != loglikes.Dim())
612  posterior->Resize(loglikes.Dim());
613  posterior->CopyFromVec(loglikes);
614  return log_sum;
615 }
616 
617 void DiagGmm::RemoveComponent(int32 gauss, bool renorm_weights) {
618  KALDI_ASSERT(gauss < NumGauss());
619  if (NumGauss() == 1)
620  KALDI_ERR << "Attempting to remove the only remaining component.";
621  weights_.RemoveElement(gauss);
622  gconsts_.RemoveElement(gauss);
623  means_invvars_.RemoveRow(gauss);
624  inv_vars_.RemoveRow(gauss);
625  BaseFloat sum_weights = weights_.Sum();
626  if (renorm_weights) {
627  weights_.Scale(1.0/sum_weights);
628  valid_gconsts_ = false;
629  }
630 }
631 
632 void DiagGmm::RemoveComponents(const std::vector<int32> &gauss_in,
633  bool renorm_weights) {
634  std::vector<int32> gauss(gauss_in);
635  std::sort(gauss.begin(), gauss.end());
637  // If efficiency is later an issue, will code this specially (unlikely).
638  for (size_t i = 0; i < gauss.size(); i++) {
639  RemoveComponent(gauss[i], renorm_weights);
640  for (size_t j = i + 1; j < gauss.size(); j++)
641  gauss[j]--;
642  }
643 }
644 
645 void DiagGmm::Interpolate(BaseFloat rho, const DiagGmm &source,
646  GmmFlagsType flags) {
647  KALDI_ASSERT(NumGauss() == source.NumGauss());
648  KALDI_ASSERT(Dim() == source.Dim());
649 
650  DiagGmmNormal us(*this);
651  DiagGmmNormal them(source);
652 
653  if (flags & kGmmWeights) {
654  us.weights_.Scale(1.0 - rho);
655  us.weights_.AddVec(rho, them.weights_);
656  us.weights_.Scale(1.0 / us.weights_.Sum());
657  }
658 
659  if (flags & kGmmMeans) {
660  us.means_.Scale(1.0 - rho);
661  us.means_.AddMat(rho, them.means_);
662  }
663 
664  if (flags & kGmmVariances) {
665  us.vars_.Scale(1.0 - rho);
666  us.vars_.AddMat(rho, them.vars_);
667  }
668 
669  us.CopyToDiagGmm(this);
670  ComputeGconsts();
671 }
672 
673 void DiagGmm::Interpolate(BaseFloat rho, const FullGmm &source,
674  GmmFlagsType flags) {
675  KALDI_ASSERT(NumGauss() == source.NumGauss());
676  KALDI_ASSERT(Dim() == source.Dim());
677  DiagGmmNormal us(*this);
678  FullGmmNormal them(source);
679 
680  if (flags & kGmmWeights) {
681  us.weights_.Scale(1.0 - rho);
682  us.weights_.AddVec(rho, them.weights_);
683  us.weights_.Scale(1.0 / us.weights_.Sum());
684  }
685 
686  if (flags & kGmmMeans) {
687  us.means_.Scale(1.0 - rho);
688  us.means_.AddMat(rho, them.means_);
689  }
690 
691  if (flags & kGmmVariances) {
692  for (int32 i = 0; i < NumGauss(); i++) {
693  us.vars_.Scale(1. - rho);
695  for (int32 j = 0; j < Dim(); j++)
696  diag(j) = them.vars_[i](j, j);
697  us.vars_.Row(i).AddVec(rho, diag);
698  }
699  }
700 
701  us.CopyToDiagGmm(this);
702  ComputeGconsts();
703 }
704 
705 void DiagGmm::Write(std::ostream &out_stream, bool binary) const {
706  if (!valid_gconsts_)
707  KALDI_ERR << "Must call ComputeGconsts() before writing the model.";
708  WriteToken(out_stream, binary, "<DiagGMM>");
709  if (!binary) out_stream << "\n";
710  WriteToken(out_stream, binary, "<GCONSTS>");
711  gconsts_.Write(out_stream, binary);
712  WriteToken(out_stream, binary, "<WEIGHTS>");
713  weights_.Write(out_stream, binary);
714  WriteToken(out_stream, binary, "<MEANS_INVVARS>");
715  means_invvars_.Write(out_stream, binary);
716  WriteToken(out_stream, binary, "<INV_VARS>");
717  inv_vars_.Write(out_stream, binary);
718  WriteToken(out_stream, binary, "</DiagGMM>");
719  if (!binary) out_stream << "\n";
720 }
721 
722 std::ostream & operator <<(std::ostream & os,
723  const kaldi::DiagGmm &gmm) {
724  gmm.Write(os, false);
725  return os;
726 }
727 
728 void DiagGmm::Read(std::istream &is, bool binary) {
729 // ExpectToken(is, binary, "<DiagGMMBegin>");
730  std::string token;
731  ReadToken(is, binary, &token);
732  // <DiagGMMBegin> is for compatibility. Will be deleted later
733  if (token != "<DiagGMMBegin>" && token != "<DiagGMM>")
734  KALDI_ERR << "Expected <DiagGMM>, got " << token;
735  ReadToken(is, binary, &token);
736  if (token == "<GCONSTS>") { // The gconsts are optional.
737  gconsts_.Read(is, binary);
738  ExpectToken(is, binary, "<WEIGHTS>");
739  } else {
740  if (token != "<WEIGHTS>")
741  KALDI_ERR << "DiagGmm::Read, expected <WEIGHTS> or <GCONSTS>, got "
742  << token;
743  }
744  weights_.Read(is, binary);
745  ExpectToken(is, binary, "<MEANS_INVVARS>");
746  means_invvars_.Read(is, binary);
747  ExpectToken(is, binary, "<INV_VARS>");
748  inv_vars_.Read(is, binary);
749 // ExpectToken(is, binary, "<DiagGMMEnd>");
750  ReadToken(is, binary, &token);
751  // <DiagGMMEnd> is for compatibility. Will be deleted later
752  if (token != "<DiagGMMEnd>" && token != "</DiagGMM>")
753  KALDI_ERR << "Expected </DiagGMM>, got " << token;
754 
755  ComputeGconsts(); // safer option than trusting the read gconsts
756 }
757 
758 std::istream & operator >>(std::istream &is, kaldi::DiagGmm &gmm) {
759  gmm.Read(is, false); // false == non-binary.
760  return is;
761 }
762 
763 
766  int32 num_gselect,
767  std::vector<int32> *output) const {
768  int32 num_gauss = NumGauss();
769  Vector<BaseFloat> loglikes(num_gauss, kUndefined);
770  output->clear();
771  this->LogLikelihoods(data, &loglikes);
772 
773  BaseFloat thresh;
774  if (num_gselect < num_gauss) {
775  Vector<BaseFloat> loglikes_copy(loglikes);
776  BaseFloat *ptr = loglikes_copy.Data();
777  std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
778  thresh = ptr[num_gauss-num_gselect];
779  } else {
780  thresh = -std::numeric_limits<BaseFloat>::infinity();
781  }
782  BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
783  std::vector<std::pair<BaseFloat, int32> > pairs;
784  for (int32 p = 0; p < num_gauss; p++) {
785  if (loglikes(p) >= thresh) {
786  pairs.push_back(std::make_pair(loglikes(p), p));
787  }
788  }
789  std::sort(pairs.begin(), pairs.end(),
790  std::greater<std::pair<BaseFloat, int32> >());
791  for (int32 j = 0;
792  j < num_gselect && j < static_cast<int32>(pairs.size());
793  j++) {
794  output->push_back(pairs[j].second);
795  tot_loglike = LogAdd(tot_loglike, pairs[j].first);
796  }
797  KALDI_ASSERT(!output->empty());
798  return tot_loglike;
799 }
800 
802  int32 num_gselect,
803  std::vector<std::vector<int32> > *output) const {
804  double ans = 0.0;
805  int32 num_frames = data.NumRows(), num_gauss = NumGauss();
806 
807  int32 max_mem = 10000000; // Don't devote more than 10Mb to loglikes_mat;
808  // break up the utterance if needed.
809  int32 mem_needed = num_frames * num_gauss * sizeof(BaseFloat);
810  if (mem_needed > max_mem) {
811  // Break into parts and recurse, we don't want to consume too
812  // much memory.
813  int32 num_parts = (mem_needed + max_mem - 1) / max_mem;
814  int32 part_frames = (data.NumRows() + num_parts - 1) / num_parts;
815  double tot_ans = 0.0;
816  std::vector<std::vector<int32> > part_output;
817  output->clear();
818  output->resize(num_frames);
819  for (int32 p = 0; p < num_parts; p++) {
820  int32 start_frame = p * part_frames,
821  this_num_frames = std::min(num_frames - start_frame, part_frames);
822  SubMatrix<BaseFloat> data_part(data, start_frame, this_num_frames,
823  0, data.NumCols());
824  tot_ans += GaussianSelection(data_part, num_gselect, &part_output);
825  for (int32 t = 0; t < this_num_frames; t++)
826  (*output)[start_frame + t].swap(part_output[t]);
827  }
828  KALDI_ASSERT(!output->back().empty());
829  return tot_ans;
830  }
831 
832  KALDI_ASSERT(num_frames != 0);
833  Matrix<BaseFloat> loglikes_mat(num_frames, num_gauss, kUndefined);
834  this->LogLikelihoods(data, &loglikes_mat);
835 
836  output->clear();
837  output->resize(num_frames);
838 
839  for (int32 i = 0; i < num_frames; i++) {
840  SubVector<BaseFloat> loglikes(loglikes_mat, i);
841 
842  BaseFloat thresh;
843  if (num_gselect < num_gauss) {
844  Vector<BaseFloat> loglikes_copy(loglikes);
845  BaseFloat *ptr = loglikes_copy.Data();
846  std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
847  thresh = ptr[num_gauss-num_gselect];
848  } else {
849  thresh = -std::numeric_limits<BaseFloat>::infinity();
850  }
851  BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
852  std::vector<std::pair<BaseFloat, int32> > pairs;
853  for (int32 p = 0; p < num_gauss; p++) {
854  if (loglikes(p) >= thresh) {
855  pairs.push_back(std::make_pair(loglikes(p), p));
856  }
857  }
858  std::sort(pairs.begin(), pairs.end(),
859  std::greater<std::pair<BaseFloat, int32> >());
860  std::vector<int32> &this_output = (*output)[i];
861  for (int32 j = 0;
862  j < num_gselect && j < static_cast<int32>(pairs.size());
863  j++) {
864  this_output.push_back(pairs[j].second);
865  tot_loglike = LogAdd(tot_loglike, pairs[j].first);
866  }
867  KALDI_ASSERT(!this_output.empty());
868  ans += tot_loglike;
869  }
870  return ans;
871 }
872 
873 
874 
876  const VectorBase<BaseFloat> &data,
877  const std::vector<int32> &preselect,
878  int32 num_gselect,
879  std::vector<int32> *output) const {
880  static bool warned_size = false;
881  int32 preselect_sz = preselect.size();
882  int32 this_num_gselect = std::min(num_gselect, preselect_sz);
883  if (preselect_sz <= num_gselect && !warned_size) {
884  warned_size = true;
885  KALDI_WARN << "Preselect size is less or equal to than final size, "
886  << "doing nothing: " << preselect_sz << " < " << num_gselect
887  << " [won't warn again]";
888  }
889  Vector<BaseFloat> loglikes(preselect_sz);
890  LogLikelihoodsPreselect(data, preselect, &loglikes);
891 
892  Vector<BaseFloat> loglikes_copy(loglikes);
893  BaseFloat *ptr = loglikes_copy.Data();
894  std::nth_element(ptr, ptr+preselect_sz-this_num_gselect,
895  ptr+preselect_sz);
896  BaseFloat thresh = ptr[preselect_sz-this_num_gselect];
897 
898  BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
899  // we want the output sorted from best likelihood to worse
900  // (so we can prune further without the model)...
901  std::vector<std::pair<BaseFloat, int32> > pairs;
902  for (int32 p = 0; p < preselect_sz; p++)
903  if (loglikes(p) >= thresh)
904  pairs.push_back(std::make_pair(loglikes(p), preselect[p]));
905  std::sort(pairs.begin(), pairs.end(),
906  std::greater<std::pair<BaseFloat, int32> >());
907  output->clear();
908  for (int32 j = 0;
909  j < this_num_gselect && j < static_cast<int32>(pairs.size());
910  j++) {
911  output->push_back(pairs[j].second);
912  tot_loglike = LogAdd(tot_loglike, pairs[j].first);
913  }
914  KALDI_ASSERT(!output->empty());
915  return tot_loglike;
916 }
917 
918 void DiagGmm::CopyFromNormal(const DiagGmmNormal &diag_gmm_normal) {
919  diag_gmm_normal.CopyToDiagGmm(this);
920 }
921 
923  KALDI_ASSERT(static_cast<int32>(output->Dim()) == Dim());
924  BaseFloat tot = weights_.Sum();
925  KALDI_ASSERT(tot > 0.0);
926  double r = tot * RandUniform() * 0.99999;
927  int32 i = 0;
928  double sum = 0.0;
929  while (sum + weights_(i) < r) {
930  sum += weights_(i);
931  i++;
932  KALDI_ASSERT(i < static_cast<int32>(weights_.Dim()));
933  }
934  // now i is the index of the Gaussian we chose.
935  SubVector<BaseFloat> inv_var(inv_vars_, i),
936  mean_invvar(means_invvars_, i);
937  for (int32 d = 0; d < inv_var.Dim(); d++) {
938  BaseFloat stddev = 1.0 / sqrt(inv_var(d)),
939  mean = mean_invvar(d) / inv_var(d);
940  (*output)(d) = mean + RandGauss() * stddev;
941  }
942 }
943 
945  BaseFloat var_floor): valid_gconsts_(false) {
946  Vector<BaseFloat> x (gc.x_stats());
947  Vector<BaseFloat> x2 (gc.x2_stats());
948  BaseFloat count = gc.count();
949  KALDI_ASSERT(count > 0.0);
950  this->Resize(1, x.Dim());
951  x.Scale(1.0/count);
952  x2.Scale(1.0/count);
953  x2.AddVec2(-1.0, x); // subtract mean^2.
954  x2.ApplyFloor(var_floor);
955  x2.InvertElements(); // get inv-var.
956  KALDI_ASSERT(x2.Min() > 0);
957  Matrix<BaseFloat> mean(1, x.Dim());
958  mean.Row(0).CopyFromVec(x);
959  Matrix<BaseFloat> inv_var(1, x.Dim());
960  inv_var.Row(0).CopyFromVec(x2);
961  this->SetInvVarsAndMeans(inv_var, mean);
963  weights(0) = 1.0;
964  this->SetWeights(weights);
965  this->ComputeGconsts();
966 }
967 
968 } // End namespace kaldi
std::ostream & operator<<(std::ostream &os, const MatrixBase< Real > &M)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74
void CopyFromDiagGmm(const DiagGmm &diaggmm)
Copies from given DiagGmm.
Definition: diag-gmm.cc:83
void Write(std::ostream &out, bool binary) const
write to stream.
void Perturb(float perturb_factor)
Perturbs the component means with a random vector multiplied by the pertrub factor.
Definition: diag-gmm.cc:215
void Interpolate(BaseFloat rho, const DiagGmm &source, GmmFlagsType flags=kGmmAll)
this = rho x source + (1-rho) x this
Definition: diag-gmm.cc:645
const std::vector< SpMatrix< BaseFloat > > & inv_covars() const
Definition: full-gmm.h:146
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
Definition: stl-utils.h:184
#define M_LOG_2PI
Definition: kaldi-math.h:60
void SetInvVarsAndMeans(const MatrixBase< Real > &invvars, const MatrixBase< Real > &means)
Use SetInvVarsAndMeans if updating both means and (inverse) variances.
Definition: diag-gmm-inl.h:63
void Write(std::ostream &os, bool binary) const
Definition: diag-gmm.cc:705
void Merge(int32 target_components, std::vector< int32 > *history=NULL)
Merge the components and remember the order in which the components were merged (flat list of pairs) ...
Definition: diag-gmm.cc:295
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition: diag-gmm.cc:566
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
void Split(int32 target_components, float perturb_factor, std::vector< int32 > *history=NULL)
Split the components and remember the order in which the components were split.
Definition: diag-gmm.cc:154
Definition for Gaussian Mixture Model with full covariances in normal mode: where the parameters are ...
Definition for Gaussian Mixture Model with diagonal covariances in normal mode: where the parameters ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
#define KALDI_ISINF
Definition: kaldi-math.h:73
BaseFloat ClusterKMeans(const std::vector< Clusterable *> &points, int32 num_clust, std::vector< Clusterable *> *clusters_out, std::vector< int32 > *assignments_out, ClusterKMeansOptions cfg)
ClusterKMeans is a K-means-like clustering algorithm.
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: diag-gmm.cc:66
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
int32 ComputeGconsts()
Sets the gconsts.
Definition: diag-gmm.cc:114
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
uint16 GmmFlagsType
Bitwise OR of the above flags.
Definition: model-common.h:35
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
const Vector< BaseFloat > & gconsts() const
Const accessors.
Definition: full-gmm.h:143
void CopyFromSp(const SpMatrix< Real > &other)
Definition: sp-matrix.h:85
std::vector< SpMatrix< double > > vars_
covariances
void AddVec2(const Real alpha, const VectorBase< Real > &v)
Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
BaseFloat ComponentLogLikelihood(const VectorBase< BaseFloat > &data, int32 comp_id) const
Computes the log-likelihood of a data point given a single Gaussian component.
Definition: diag-gmm.cc:497
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
const size_t count
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void CopyDiagFromPacked(const PackedMatrix< Real > &M)
Extracts the diagonal of a packed matrix M; works for Sp or Tp.
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: diag-gmm.cc:601
float BaseFloat
Definition: kaldi-types.h:29
Vector< double > weights_
weights (not log).
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
double Log(double x)
Definition: kaldi-math.h:100
void RemoveComponent(int32 gauss, bool renorm_weights)
Removes single component from model.
Definition: diag-gmm.cc:617
void Scale(Real alpha)
Multiply each element with a scalar value.
void MulElements(const VectorBase< Real > &v)
Multiply element-by-element by another vector.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
BaseFloat LogLikelihood(const VectorBase< BaseFloat > &data) const
Returns the log-likelihood of a data point (vector) given the GMM.
Definition: diag-gmm.cc:517
void RemoveComponents(const std::vector< int32 > &gauss, bool renorm_weights)
Removes multiple components from model; "gauss" must not have dups.
Definition: diag-gmm.cc:632
bool valid_gconsts_
Recompute gconsts_ if false.
Definition: diag-gmm.h:233
Matrix< BaseFloat > inv_vars_
Inverted (diagonal) variances.
Definition: diag-gmm.h:235
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
const Vector< BaseFloat > & weights() const
Definition: diag-gmm.h:178
Matrix< double > vars_
diagonal variance
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Scale(Real alpha)
Multiplies all elements by this constant.
const Vector< BaseFloat > & weights() const
Definition: full-gmm.h:144
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
void LogLikelihoods(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods.
Definition: diag-gmm.cc:528
void MulElements(const MatrixBase< Real > &A)
Element by element multiplication with a given matrix.
BaseFloat GaussianSelectionPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &preselect, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition: diag-gmm.cc:875
void MergeKmeans(int32 target_components, ClusterKMeansOptions cfg=ClusterKMeansOptions())
Definition: diag-gmm.cc:231
double LogAdd(double x, double y)
Definition: kaldi-math.h:184
void GetMeans(Matrix< Real > *m) const
Accessor for means.
Definition: full-gmm-inl.h:118
std::istream & operator>>(std::istream &is, Matrix< Real > &M)
void CopyFromFullGmm(const FullGmm &fullgmm)
Copies from given FullGmm.
Definition: diag-gmm.cc:92
Matrix< double > means_
Means.
void InvertElements()
Inverts all the elements of the matrix.
void Read(std::istream &in, bool binary)
Definition: diag-gmm.cc:728
Vector< BaseFloat > weights_
weights (not log).
Definition: diag-gmm.h:234
void InvertElements()
Invert all elements.
#define KALDI_ISNAN
Definition: kaldi-math.h:72
SubVector< double > x2_stats() const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
Vector< BaseFloat > gconsts_
Equals log(weight) - 0.5 * (log det(var) + mean*mean*inv(var))
Definition: diag-gmm.h:232
void ApplyPow(Real power)
Take all elements of vector to a power.
Definition: kaldi-vector.h:179
BaseFloat GaussianSelection(const VectorBase< BaseFloat > &data, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition: diag-gmm.cc:765
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void Generate(VectorBase< BaseFloat > *output)
Generates a random data-point from this distribution.
Definition: diag-gmm.cc:922
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void RemoveRow(MatrixIndexT i)
Remove a specified row.
DiagGmm()
Empty constructor.
Definition: diag-gmm.h:48
void SetWeights(const VectorBase< Real > &w)
Mutators for both float or double.
Definition: diag-gmm-inl.h:28
Vector< double > weights_
weights (not log).
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
GaussClusterable wraps Gaussian statistics in a form accessible to generic clustering algorithms...
void CopyFromNormal(const DiagGmmNormal &diag_gmm_normal)
Copies from DiagGmmNormal; does not resize.
Definition: diag-gmm.cc:918
void ApplyPow(Real power)
Definition: kaldi-matrix.h:341
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
Matrix< double > means_
Means.
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
void Invert(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: sp-matrix.cc:219
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
SubVector< double > x_stats() const
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
Matrix< BaseFloat > means_invvars_
Means times inverted variance.
Definition: diag-gmm.h:236
void Set(Real)
Sets all elements to a specific value.
const Matrix< BaseFloat > & inv_vars() const
Definition: diag-gmm.h:180
void CopyToDiagGmm(DiagGmm *diaggmm, GmmFlagsType flags=kGmmAll) const
Copies to DiagGmm the requested parameters.
BaseFloat merged_components_logdet(BaseFloat w1, BaseFloat w2, const VectorBase< BaseFloat > &f1, const VectorBase< BaseFloat > &f2, const VectorBase< BaseFloat > &s1, const VectorBase< BaseFloat > &s2) const
Definition: diag-gmm.cc:471