full-gmm.cc
Go to the documentation of this file.
1 // gmm/full-gmm.cc
2 
3 // Copyright 2009-2011 Jan Silovsky;
4 // Saarland University (Author: Arnab Ghoshal);
5 // Microsoft Corporation
6 // Copyright 2012 Arnab Ghoshal
7 // Copyright 2012-2013 Johns Hopkins University (author: Daniel Povey);
8 
9 // See ../../COPYING for clarification regarding multiple authors
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
19 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
20 // MERCHANTABLITY OR NON-INFRINGEMENT.
21 // See the Apache 2 License for the specific language governing permissions and
22 // limitations under the License.
23 
24 #include <algorithm>
25 #include <functional>
26 #include <limits>
27 #include <string>
28 #include <queue>
29 #include <utility>
30 using std::pair;
31 #include <vector>
32 using std::vector;
33 
34 #include "gmm/full-gmm.h"
35 #include "gmm/full-gmm-normal.h"
36 #include "gmm/diag-gmm.h"
37 #include "util/stl-utils.h"
38 
39 namespace kaldi {
40 
41 void FullGmm::Resize(int32 nmix, int32 dim) {
42  KALDI_ASSERT(nmix > 0 && dim > 0);
43  if (gconsts_.Dim() != nmix) gconsts_.Resize(nmix);
44  if (weights_.Dim() != nmix) weights_.Resize(nmix);
45  if (means_invcovars_.NumRows() != nmix
46  || means_invcovars_.NumCols() != dim)
47  means_invcovars_.Resize(nmix, dim);
48  ResizeInvCovars(nmix, dim);
49 }
50 
52  KALDI_ASSERT(nmix > 0 && dim > 0);
53  if (inv_covars_.size() != static_cast<size_t>(nmix))
54  inv_covars_.resize(nmix);
55  for (int32 i = 0; i < nmix; i++) {
56  if (inv_covars_[i].NumRows() != dim) {
57  inv_covars_[i].Resize(dim);
58  inv_covars_[i].SetUnit();
59  // must be initialized to unit for case of calling SetMeans while having
60  // covars/invcovars that are not set yet (i.e. zero)
61  }
62  }
63 }
64 
65 void FullGmm::CopyFromFullGmm(const FullGmm &fullgmm) {
66  Resize(fullgmm.NumGauss(), fullgmm.Dim());
67  gconsts_.CopyFromVec(fullgmm.gconsts_);
68  weights_.CopyFromVec(fullgmm.weights_);
70  int32 ncomp = NumGauss();
71  for (int32 mix = 0; mix < ncomp; mix++) {
72  inv_covars_[mix].CopyFromSp(fullgmm.inv_covars_[mix]);
73  }
75 }
76 
77 void FullGmm::CopyFromDiagGmm(const DiagGmm &diaggmm) {
78  Resize(diaggmm.NumGauss(), diaggmm.Dim());
79  gconsts_.CopyFromVec(diaggmm.gconsts());
80  weights_.CopyFromVec(diaggmm.weights());
82  int32 ncomp = NumGauss(), dim = Dim();
83  for (int32 mix = 0; mix < ncomp; mix++) {
84  inv_covars_[mix].SetZero();
85  for (int32 d = 0; d < dim; d++) {
86  inv_covars_[mix](d, d) = diaggmm.inv_vars()(mix, d);
87  }
88  }
90 }
91 
93  int32 num_mix = NumGauss(),
94  dim = Dim();
95  KALDI_ASSERT(num_mix > 0 && dim > 0);
96  BaseFloat offset = -0.5 * M_LOG_2PI * dim; // constant term in gconst.
97  int32 num_bad = 0;
98 
99  // Resize if Gaussians have been removed during Update()
100  if (num_mix != gconsts_.Dim()) gconsts_.Resize(num_mix);
101 
102  for (int32 mix = 0; mix < num_mix; mix++) {
103  KALDI_ASSERT(weights_(mix) >= 0); // Cannot have negative weights.
104  BaseFloat gc = Log(weights_(mix)) + offset; // May be -inf if weights == 0
105  SpMatrix<BaseFloat> covar(inv_covars_[mix]);
106  covar.InvertDouble();
107  BaseFloat logdet = covar.LogPosDefDet();
108  gc -= 0.5 * (logdet + VecSpVec(means_invcovars_.Row(mix),
109  covar, means_invcovars_.Row(mix)));
110  // Note that mean_invcovars(mix)' * covar(mix) * mean_invcovars(mix, d) is
111  // really mean' * inv(covar) * mean, since mean_invcovars(mix, d) contains
112  // the inverse covariance times mean.
113  // So gc is the likelihood at zero feature value.
114 
115  if (KALDI_ISNAN(gc)) { // negative infinity is OK but NaN is not acceptable
116  KALDI_ERR << "At component " << mix
117  << ", not a number in gconst computation";
118  }
119  if (KALDI_ISINF(gc)) {
120  num_bad++;
121  // If positive infinity, make it negative infinity.
122  // Want to make sure the answer becomes -inf in the end, not NaN.
123  if (gc > 0) gc = -gc;
124  }
125  gconsts_(mix) = gc;
126  }
127 
128  valid_gconsts_ = true;
129  return num_bad;
130 }
131 
132 void FullGmm::Split(int32 target_components, float perturb_factor,
133  vector<int32> *history) {
134  if (target_components <= NumGauss() || NumGauss() == 0) {
135  KALDI_WARN << "Cannot split from " << NumGauss() << " to "
136  << target_components << " components";
137  return;
138  }
139  int32 current_components = NumGauss(), dim = Dim();
140  FullGmm *tmp = new FullGmm();
141  tmp->CopyFromFullGmm(*this); // so we have copies of matrices...
142  // First do the resize:
143  weights_.Resize(target_components);
144  weights_.Range(0, current_components).CopyFromVec(tmp->weights_);
145  means_invcovars_.Resize(target_components, dim);
146  means_invcovars_.Range(0, current_components, 0,
147  dim).CopyFromMat(tmp->means_invcovars_);
148  ResizeInvCovars(target_components, dim);
149  for (int32 mix = 0; mix < current_components; mix++) {
150  inv_covars_[mix].CopyFromSp(tmp->inv_covars_[mix]);
151  }
152  for (int32 mix = current_components; mix < target_components; mix++) {
153  inv_covars_[mix].SetZero();
154  }
155  gconsts_.Resize(target_components);
156 
157  delete tmp;
158 
159  // future work(arnab): Use a priority queue instead?
160  while (current_components < target_components) {
161  BaseFloat max_weight = weights_(0);
162  int32 max_idx = 0;
163  for (int32 i = 1; i < current_components; i++) {
164  if (weights_(i) > max_weight) {
165  max_weight = weights_(i);
166  max_idx = i;
167  }
168  }
169 
170  // remember history
171  if (history != NULL)
172  history->push_back(max_idx);
173 
174  weights_(max_idx) /= 2;
175  weights_(current_components) = weights_(max_idx);
176  Vector<BaseFloat> rand_vec(dim);
177  rand_vec.SetRandn();
178  TpMatrix<BaseFloat> invcovar_l(dim);
179  invcovar_l.Cholesky(inv_covars_[max_idx]);
180  rand_vec.MulTp(invcovar_l, kTrans);
181  inv_covars_[current_components].CopyFromSp(inv_covars_[max_idx]);
182  means_invcovars_.Row(current_components).CopyFromVec(means_invcovars_.Row(
183  max_idx));
184  means_invcovars_.Row(current_components).AddVec(perturb_factor, rand_vec);
185  means_invcovars_.Row(max_idx).AddVec(-perturb_factor, rand_vec);
186  current_components++;
187  }
188  ComputeGconsts();
189 }
190 
191 void FullGmm::Perturb(float perturb_factor) {
192  int32 num_comps = NumGauss(),
193  dim = Dim();
194  Vector<BaseFloat> rand_vec(dim);
195  for (int32 i = 0; i < num_comps; i++) {
196  rand_vec.SetRandn();
197  TpMatrix<BaseFloat> invcovar_l(dim);
198  invcovar_l.Cholesky(inv_covars_[i]);
199  rand_vec.MulTp(invcovar_l, kTrans);
200  means_invcovars_.Row(i).AddVec(perturb_factor, rand_vec);
201  }
202  ComputeGconsts();
203 }
204 
205 
206 void FullGmm::Merge(int32 target_components, vector<int32> *history) {
207  if (target_components <= 0 || NumGauss() < target_components) {
208  KALDI_ERR << "Invalid argument for target number of Gaussians (="
209  << target_components << ")";
210  }
211  if (NumGauss() == target_components) {
212  KALDI_WARN << "No components merged, as target = total.";
213  return;
214  }
215 
216  int32 num_comp = NumGauss(), dim = Dim();
217 
218  if (target_components == 1) { // global mean and variance
220  // Undo variance inversion and multiplication of mean by this
221  vector<SpMatrix<BaseFloat> > covars(num_comp);
222  Matrix<BaseFloat> means(num_comp, dim);
223  for (int32 i = 0; i < num_comp; i++) {
224  covars[i].Resize(dim);
225  covars[i].CopyFromSp(inv_covars_[i]);
226  covars[i].InvertDouble();
227  means.Row(i).AddSpVec(1.0, covars[i], means_invcovars_.Row(i), 0.0);
228  covars[i].AddVec2(1.0, means.Row(i));
229  }
230 
231  // Slightly more efficient than calling this->Resize(1, dim)
232  gconsts_.Resize(1);
233  weights_.Resize(1);
234  means_invcovars_.Resize(1, dim);
235  inv_covars_.resize(1);
236  inv_covars_[0].Resize(dim);
237  Vector<BaseFloat> tmp_mean(dim);
238 
239  for (int32 i = 0; i < num_comp; i++) {
240  weights_(0) += weights(i);
241  tmp_mean.AddVec(weights(i), means.Row(i));
242  inv_covars_[0].AddSp(weights(i), covars[i]);
243  }
244  if (!ApproxEqual(weights_(0), 1.0, 1e-6)) {
245  KALDI_WARN << "Weights sum to " << weights_(0) << ": rescaling.";
246  tmp_mean.Scale(weights_(0));
247  inv_covars_[0].Scale(weights_(0));
248  weights_(0) = 1.0;
249  }
250  inv_covars_[0].AddVec2(-1.0, tmp_mean);
251  inv_covars_[0].InvertDouble();
252  means_invcovars_.Row(0).AddSpVec(1.0, inv_covars_[0], tmp_mean, 0.0);
253  ComputeGconsts();
254  return;
255  }
256 
257  // If more than 1 merged component is required, do greedy bottom-up
258  // clustering, always picking the pair of components that lead to the smallest
259  // decrease in likelihood.
260  vector<bool> discarded_component(num_comp);
261  Vector<BaseFloat> logdet(num_comp); // logdet for each component
262  logdet.SetZero();
263  for (int32 i = 0; i < num_comp; i++) {
264  discarded_component[i] = false;
265  logdet(i) += 0.5 * inv_covars_[i].LogPosDefDet();
266  // +0.5 because var is inverted
267  }
268 
269  // Undo variance inversion and multiplication of mean by this
270  // Makes copy of means and vars for all components.
271  vector<SpMatrix<BaseFloat> > vars(num_comp);
272  Matrix<BaseFloat> means(num_comp, dim);
273  for (int32 i = 0; i < num_comp; i++) {
274  vars[i].Resize(dim);
275  vars[i].CopyFromSp(inv_covars_[i]);
276  vars[i].InvertDouble();
277  means.Row(i).AddSpVec(1.0, vars[i], means_invcovars_.Row(i), 0.0);
278 
279  // add means square to variances; get second-order stats
280  // (normalized by zero-order stats)
281  vars[i].AddVec2(1.0, means.Row(i));
282  }
283 
284  // compute change of likelihood for all combinations of components
285  SpMatrix<BaseFloat> delta_like(num_comp);
286  for (int32 i = 0; i < num_comp; i++) {
287  for (int32 j = 0; j < i; j++) {
288  BaseFloat w1 = weights_(i), w2 = weights_(j), w_sum = w1 + w2;
289  BaseFloat merged_logdet = MergedComponentsLogdet(w1, w2,
290  means.Row(i), means.Row(j), vars[i], vars[j]);
291  delta_like(i, j) = w_sum * merged_logdet
292  - w1 * logdet(i) - w2 * logdet(j);
293  }
294  }
295 
296  // Merge components with smallest impact on the loglike
297  for (int32 removed = 0; removed < num_comp - target_components; removed++) {
298  // Search for the least significant change in likelihood
299  // (maximum of negative delta_likes)
300  BaseFloat max_delta_like = -std::numeric_limits<BaseFloat>::max();
301  int32 max_i = 0, max_j = 0;
302  for (int32 i = 0; i < NumGauss(); i++) {
303  if (discarded_component[i]) continue;
304  for (int32 j = 0; j < i; j++) {
305  if (discarded_component[j]) continue;
306  if (delta_like(i, j) > max_delta_like) {
307  max_delta_like = delta_like(i, j);
308  max_i = i;
309  max_j = j;
310  }
311  }
312  }
313 
314  // make sure that different components will be merged
315  KALDI_ASSERT(max_i != max_j);
316 
317  // remember history
318  if (history != NULL) {
319  history->push_back(max_i);
320  history->push_back(max_j);
321  }
322 
323  // Merge components
324  BaseFloat w1 = weights_(max_i), w2 = weights_(max_j);
325  BaseFloat w_sum = w1 + w2;
326  // merge means
327  means.Row(max_i).AddVec(w2/w1, means.Row(max_j));
328  means.Row(max_i).Scale(w1/w_sum);
329  // merge vars
330  vars[max_i].AddSp(w2/w1, vars[max_j]);
331  vars[max_i].Scale(w1/w_sum);
332  // merge weights
333  weights_(max_i) = w_sum;
334 
335  // Update gmm for merged component
336  // copy second-order stats (normalized by zero-order stats)
337  inv_covars_[max_i].CopyFromSp(vars[max_i]);
338  // centralize
339  inv_covars_[max_i].AddVec2(-1.0, means.Row(max_i));
340  // invert
341  inv_covars_[max_i].InvertDouble();
342  // copy first-order stats (normalized by zero-order stats)
343  // and multiply by inv_vars
344  means_invcovars_.Row(max_i).AddSpVec(1.0, inv_covars_[max_i],
345  means.Row(max_i), 0.0);
346 
347  // Update logdet for merged component
348  logdet(max_i) += 0.5 * inv_covars_[max_i].LogPosDefDet();
349  // +0.5 because var is inverted
350 
351  // Label the removed component as discarded
352  discarded_component[max_j] = true;
353 
354  // Update delta_like for merged component
355  for (int32 j = 0; j < num_comp; j++) {
356  if ((j == max_i) || (discarded_component[j])) continue;
357  BaseFloat w1 = weights_(max_i), w2 = weights_(j), w_sum = w1 + w2;
358  BaseFloat merged_logdet = MergedComponentsLogdet(w1, w2,
359  means.Row(max_i), means.Row(j), vars[max_i], vars[j]);
360  delta_like(max_i, j) = w_sum * merged_logdet
361  - w1 * logdet(max_i) - w2 * logdet(j);
362  // doesn't respect lower triangular indeces,
363  // relies on implicitly performed swap of coordinates if necessary
364  }
365  }
366 
367  // Remove the consumed components
368  int32 m = 0;
369  for (int32 i = 0; i < num_comp; i++) {
370  if (discarded_component[i]) {
371  weights_.RemoveElement(m);
373  inv_covars_.erase(inv_covars_.begin() + m);
374  } else {
375  ++m;
376  }
377  }
378 
379  ComputeGconsts();
380 }
381 
383  const vector<pair<int32, int32> > &preselect) {
384  KALDI_ASSERT(!preselect.empty());
385  double ans = 0.0;
386  if (target_components <= 0 || NumGauss() < target_components) {
387  KALDI_WARN << "Invalid argument for target number of Gaussians (="
388  << target_components << "), currently "
389  << NumGauss() << ", not mixing down";
390  return 0.0;
391  }
392  if (NumGauss() == target_components) {
393  KALDI_WARN << "No components merged, as target = total.";
394  return 0.0;
395  }
396  // likelihood change (a negative or zero value), and then the pair of indices.
397  typedef pair<BaseFloat, pair<int32, int32> > QueueElem;
398  std::priority_queue<QueueElem> queue;
399 
400  int32 num_comp = NumGauss(), dim = Dim();
401 
402  // Do greedy bottom-up clustering, always picking the pair of components that
403  // lead to the smallest decrease in likelihood.
404  vector<bool> discarded_component(num_comp);
405  Vector<BaseFloat> logdet(num_comp); // logdet for each component
406  logdet.SetZero();
407  for (int32 i = 0; i < num_comp; i++) {
408  discarded_component[i] = false;
409  logdet(i) = 0.5 * inv_covars_[i].LogPosDefDet();
410  // +0.5 because var is inverted
411  }
412 
413  // Undo variance inversion and multiplication of mean by
414  // inverse variance.
415  // Makes copy of means and vars for all components.
416  vector<SpMatrix<BaseFloat> > vars(num_comp);
417  Matrix<BaseFloat> means(num_comp, dim);
418  for (int32 i = 0; i < num_comp; i++) {
419  vars[i].Resize(dim);
420  vars[i].CopyFromSp(inv_covars_[i]);
421  vars[i].InvertDouble();
422  means.Row(i).AddSpVec(1.0, vars[i], means_invcovars_.Row(i), 0.0);
423 
424  // add means square to variances; get second-order stats
425  // (normalized by zero-order stats)
426  vars[i].AddVec2(1.0, means.Row(i));
427  }
428 
429  // compute change of likelihood for all combinations of components
430  for (int32 i = 0; i < preselect.size(); i++) {
431  int32 idx1 = preselect[i].first, idx2 = preselect[i].second;
432  KALDI_ASSERT(static_cast<size_t>(idx1) < static_cast<size_t>(num_comp));
433  KALDI_ASSERT(static_cast<size_t>(idx2) < static_cast<size_t>(num_comp));
434  BaseFloat w1 = weights_(idx1), w2 = weights_(idx2), w_sum = w1 + w2;
435  BaseFloat merged_logdet = MergedComponentsLogdet(w1, w2,
436  means.Row(idx1), means.Row(idx2),
437  vars[idx1], vars[idx2]),
438  delta_like = w_sum * merged_logdet - w1 * logdet(idx1) - w2 * logdet(idx2);
439  queue.push(std::make_pair(delta_like, preselect[i]));
440  }
441 
442  vector<int32> mapping(num_comp); // map of old index to where it
443  // got merged to.
444  for (int32 i = 0; i < num_comp; i++) mapping[i] = i;
445 
446  // Merge components with smallest impact on the loglike
447  int32 removed;
448  for (removed = 0;
449  removed < num_comp - target_components && !queue.empty(); ) {
450  QueueElem qelem = queue.top();
451  queue.pop();
452  BaseFloat delta_log_like_old = qelem.first;
453  int32 idx1 = qelem.second.first, idx2 = qelem.second.second;
454  // the next 3 lines are to handle when components got merged
455  // and moved to different indices, but we still want to consider
456  // merging their descendants. [descendant = current index where
457  // their data is.]
458  while (discarded_component[idx1]) idx1 = mapping[idx1];
459  while (discarded_component[idx2]) idx2 = mapping[idx2];
460  if (idx1 == idx2) continue; // can't merge something with itself.
461 
462  BaseFloat delta_log_like;
463  { // work out delta_log_like.
464  BaseFloat w1 = weights_(idx1), w2 = weights_(idx2), w_sum = w1 + w2;
465  BaseFloat merged_logdet = MergedComponentsLogdet(w1, w2,
466  means.Row(idx1), means.Row(idx2),
467  vars[idx1], vars[idx2]);
468  delta_log_like = w_sum * merged_logdet - w1 * logdet(idx1) - w2 * logdet(idx2);
469  }
470  if (ApproxEqual(delta_log_like, delta_log_like_old) ||
471  delta_log_like > delta_log_like_old) {
472  // if the log-like change did not change, or if it actually got smaller
473  // (closer to zero, more positive), then merge the components; otherwise
474  // put it back on the queue. Note: there is no test for "freshness" --
475  // we assume nothing is fresh.
476  BaseFloat w1 = weights_(idx1), w2 = weights_(idx2);
477  BaseFloat w_sum = w1 + w2;
478  // merge means
479  means.Row(idx1).AddVec(w2/w1, means.Row(idx2));
480  means.Row(idx1).Scale(w1/w_sum);
481  // merge vars
482  vars[idx1].AddSp(w2/w1, vars[idx2]);
483  vars[idx1].Scale(w1/w_sum);
484  // merge weights
485  weights_(idx1) = w_sum;
486 
487  // Update gmm for merged component
488  // copy second-order stats (normalized by zero-order stats)
489  inv_covars_[idx1].CopyFromSp(vars[idx1]);
490  // centralize
491  inv_covars_[idx1].AddVec2(-1.0, means.Row(idx1));
492  // invert
493  inv_covars_[idx1].InvertDouble();
494  // copy first-order stats (normalized by zero-order stats)
495  // and multiply by inv_vars
496  means_invcovars_.Row(idx1).AddSpVec(1.0, inv_covars_[idx1],
497  means.Row(idx1), 0.0);
498 
499  // Update logdet for merged component
500  logdet(idx1) = 0.5 * inv_covars_[idx1].LogPosDefDet();
501  // +0.5 because var is inverted
502 
503  // Label the removed component as discarded
504  discarded_component[idx2] = true;
505  KALDI_VLOG(2) << "Delta-log-like is " << delta_log_like << " (merging "
506  << idx1 << " and " << idx2 << ")";
507  ans += delta_log_like;
508  mapping[idx2] = idx1;
509  removed++;
510  } else {
511  QueueElem new_elem(delta_log_like, std::make_pair(idx1, idx2));
512  queue.push(new_elem); // push back more accurate elem.
513  }
514  }
515 
516  // Renumber the components.
517  int32 cur_idx = 0;
518  for (int32 i = 0; i < num_comp; i++) {
519  if (mapping[i] == i) { // This component is kept, not merged into another.
520  weights_(cur_idx) = weights_(i);
521  means_invcovars_.Row(cur_idx).CopyFromVec(means_invcovars_.Row(i));
522  inv_covars_[cur_idx].CopyFromSp(inv_covars_[i]);
523  cur_idx++;
524  }
525  }
526  KALDI_ASSERT(cur_idx + removed == num_comp);
527  gconsts_.Resize(cur_idx);
528  valid_gconsts_ = false;
529  weights_.Resize(cur_idx, kCopyData);
530  means_invcovars_.Resize(cur_idx, Dim(), kCopyData);
531  inv_covars_.resize(cur_idx);
532  ComputeGconsts();
533  return ans;
534 }
535 
536 
538  const VectorBase<BaseFloat> &f1,
539  const VectorBase<BaseFloat> &f2,
540  const SpMatrix<BaseFloat> &s1,
541  const SpMatrix<BaseFloat> &s2)
542  const {
543  int32 dim = f1.Dim();
544  Vector<BaseFloat> tmp_mean(dim);
545  SpMatrix<BaseFloat> tmp_var(dim);
546  BaseFloat merged_logdet = 0.0;
547 
548  BaseFloat w_sum = w1 + w2;
549  tmp_mean.CopyFromVec(f1);
550  tmp_mean.AddVec(w2/w1, f2);
551  tmp_mean.Scale(w1/w_sum);
552  tmp_var.CopyFromSp(s1);
553  tmp_var.AddSp(w2/w1, s2);
554  tmp_var.Scale(w1/w_sum);
555  tmp_var.AddVec2(-1.0, tmp_mean);
556  merged_logdet -= 0.5 * tmp_var.LogPosDefDet();
557  // -0.5 because var is not inverted
558  return merged_logdet;
559 }
560 
561 // returns the component of the log-likelihood due to this mixture
563  int32 comp_id) const {
564  if (!valid_gconsts_)
565  KALDI_ERR << "Must call ComputeGconsts() before computing likelihood";
566  if (data.Dim() != Dim()) {
567  KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
568  << "mismatch " << (data.Dim()) << "vs. "<< (Dim());
569  }
570  BaseFloat loglike;
571 
572  // loglike = means * inv(vars) * data.
573  loglike = VecVec(means_invcovars_.Row(comp_id), data);
574  // loglike += -0.5 * tr(data*data'*inv(covar))
575  loglike -= 0.5 * VecSpVec(data, inv_covars_[comp_id], data);
576  return loglike + gconsts_(comp_id);
577 }
578 
579 
580 
581 // Gets likelihood of data given this.
583  Vector<BaseFloat> loglikes;
584  LogLikelihoods(data, &loglikes);
585  BaseFloat log_sum = loglikes.LogSumExp();
586  if (KALDI_ISNAN(log_sum) || KALDI_ISINF(log_sum))
587  KALDI_ERR << "Invalid answer (overflow or invalid variances/features?)";
588  return log_sum;
589 }
590 
592  Vector<BaseFloat> *loglikes) const {
593  loglikes->Resize(gconsts_.Dim(), kUndefined);
594  loglikes->CopyFromVec(gconsts_);
595  int32 dim = Dim();
596  KALDI_ASSERT(dim == data.Dim());
597  SpMatrix<BaseFloat> data_sq(dim); // Initialize and make zero
598  data_sq.AddVec2(1.0, data);
599  // The following enables an optimization below: TraceSpSpLower, which is
600  // just like a dot product internally.
601  data_sq.ScaleDiag(0.5);
602 
603  // loglikes += mean' * inv(covar) * data.
604  loglikes->AddMatVec(1.0, means_invcovars_, kNoTrans, data, 1.0);
605  // loglikes -= 0.5 * data'*inv(covar)*data = 0.5 * tr(data*data'*inv(covar))
606  int32 num_comp = NumGauss();
607  for (int32 mix = 0; mix < num_comp; mix++) {
608  // was: (*loglikes)(mix) -= 0.5 * TraceSpSp(data_sq, inv_covars_[mix]);
609  (*loglikes)(mix) -= TraceSpSpLower(data_sq, inv_covars_[mix]);
610  }
611 }
612 
614  const vector<int32> &indices,
615  Vector<BaseFloat> *loglikes) const {
616  int32 dim = Dim();
617  KALDI_ASSERT(dim == data.Dim());
618  int32 num_indices = static_cast<int32>(indices.size());
619  loglikes->Resize(num_indices, kUndefined);
620 
621  SpMatrix<BaseFloat> data_sq(dim); // Initialize and make zero
622  data_sq.AddVec2(1.0, data);
623  // The following enables an optimization below: TraceSpSpLower, which is
624  // just like a dot product internally.
625  data_sq.ScaleDiag(0.5);
626 
627  for (int32 i = 0; i < num_indices; i++) {
628  int32 idx = indices[i];
629  (*loglikes)(i) = gconsts_(idx)
630  + VecVec(means_invcovars_.Row(idx), data)
631  - TraceSpSpLower(data_sq, inv_covars_[idx]);
632  }
633 }
634 
635 
638  int32 num_gselect,
639  std::vector<int32> *output) const {
640  int32 num_gauss = NumGauss();
641  Vector<BaseFloat> loglikes(num_gauss, kUndefined);
642  output->clear();
643  this->LogLikelihoods(data, &loglikes);
644 
645  BaseFloat thresh;
646  if (num_gselect < num_gauss) {
647  Vector<BaseFloat> loglikes_copy(loglikes);
648  BaseFloat *ptr = loglikes_copy.Data();
649  std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
650  thresh = ptr[num_gauss-num_gselect];
651  } else {
652  thresh = -std::numeric_limits<BaseFloat>::infinity();
653  }
654  BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
655  std::vector<std::pair<BaseFloat, int32> > pairs;
656  for (int32 p = 0; p < num_gauss; p++) {
657  if (loglikes(p) >= thresh) {
658  pairs.push_back(std::make_pair(loglikes(p), p));
659  }
660  }
661  std::sort(pairs.begin(), pairs.end(),
662  std::greater<std::pair<BaseFloat, int32> >());
663  for (int32 j = 0;
664  j < num_gselect && j < static_cast<int32>(pairs.size());
665  j++) {
666  output->push_back(pairs[j].second);
667  tot_loglike = LogAdd(tot_loglike, pairs[j].first);
668  }
669  KALDI_ASSERT(!output->empty());
670  return tot_loglike;
671 }
672 
673 
675  const VectorBase<BaseFloat> &data,
676  const std::vector<int32> &preselect,
677  int32 num_gselect,
678  std::vector<int32> *output) const {
679  static bool warned_size = false;
680  int32 preselect_sz = preselect.size();
681  int32 this_num_gselect = std::min(num_gselect, preselect_sz);
682  if (preselect_sz <= num_gselect && !warned_size) {
683  warned_size = true;
684  KALDI_WARN << "Preselect size is less or equal to than final size, "
685  << "doing nothing: " << preselect_sz << " < " << num_gselect
686  << " [won't warn again]";
687  }
688  Vector<BaseFloat> loglikes(preselect_sz);
689  LogLikelihoodsPreselect(data, preselect, &loglikes);
690 
691  Vector<BaseFloat> loglikes_copy(loglikes);
692  BaseFloat *ptr = loglikes_copy.Data();
693  std::nth_element(ptr, ptr+preselect_sz-this_num_gselect,
694  ptr+preselect_sz);
695  BaseFloat thresh = ptr[preselect_sz-this_num_gselect];
696 
697  BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
698  // we want the output sorted from best likelihood to worse
699  // (so we can prune further without the model)...
700  std::vector<std::pair<BaseFloat, int32> > pairs;
701  for (int32 p = 0; p < preselect_sz; p++)
702  if (loglikes(p) >= thresh)
703  pairs.push_back(std::make_pair(loglikes(p), preselect[p]));
704  std::sort(pairs.begin(), pairs.end(),
705  std::greater<std::pair<BaseFloat, int32> >());
706  output->clear();
707  for (int32 j = 0;
708  j < this_num_gselect && j < static_cast<int32>(pairs.size());
709  j++) {
710  output->push_back(pairs[j].second);
711  tot_loglike = LogAdd(tot_loglike, pairs[j].first);
712  }
713  KALDI_ASSERT(!output->empty());
714  return tot_loglike;
715 }
716 
717 
718 // Gets likelihood of data given this. Also provides per-Gaussian posteriors.
720  VectorBase<BaseFloat> *posterior) const {
721  if (posterior == NULL) KALDI_ERR << "NULL pointer passed as return argument.";
722  Vector<BaseFloat> loglikes;
723  LogLikelihoods(data, &loglikes);
724  BaseFloat log_sum = loglikes.ApplySoftMax();
725  if (KALDI_ISNAN(log_sum) || KALDI_ISINF(log_sum))
726  KALDI_ERR << "Invalid answer (overflow or invalid variances/features?)";
727  posterior->CopyFromVec(loglikes);
728  return log_sum;
729 }
730 
731 void FullGmm::RemoveComponent(int32 gauss, bool renorm_weights) {
732  KALDI_ASSERT(gauss < NumGauss());
733 
734  weights_.RemoveElement(gauss);
735  gconsts_.RemoveElement(gauss);
737  inv_covars_.erase(inv_covars_.begin() + gauss);
738  if (renorm_weights) {
739  BaseFloat sum_weights = weights_.Sum();
740  weights_.Scale(1.0/sum_weights);
741  valid_gconsts_ = false;
742  }
743 }
744 
745 void FullGmm::RemoveComponents(const vector<int32> &gauss_in, bool renorm_weights) {
746  vector<int32> gauss(gauss_in);
747  std::sort(gauss.begin(), gauss.end());
749  // If efficiency is later an issue, will code this specially (unlikely,
750  // except for quite large GMMs).
751  for (size_t i = 0; i < gauss.size(); i++) {
752  RemoveComponent(gauss[i], renorm_weights);
753  for (size_t j = i + 1; j < gauss.size(); j++)
754  gauss[j]--;
755  }
756 }
757 
758 void FullGmm::Write(std::ostream &out_stream, bool binary) const {
759  if (!valid_gconsts_)
760  KALDI_ERR << "Must call ComputeGconsts() before writing the model.";
761  WriteToken(out_stream, binary, "<FullGMM>");
762  if (!binary) out_stream << "\n";
763  WriteToken(out_stream, binary, "<GCONSTS>");
764  gconsts_.Write(out_stream, binary);
765  WriteToken(out_stream, binary, "<WEIGHTS>");
766  weights_.Write(out_stream, binary);
767  WriteToken(out_stream, binary, "<MEANS_INVCOVARS>");
768  means_invcovars_.Write(out_stream, binary);
769  WriteToken(out_stream, binary, "<INV_COVARS>");
770  for (int32 i = 0; i < NumGauss(); i++) {
771  inv_covars_[i].Write(out_stream, binary);
772  }
773  WriteToken(out_stream, binary, "</FullGMM>");
774  if (!binary) out_stream << "\n";
775 }
776 
777 std::ostream & operator <<(std::ostream & out_stream,
778  const kaldi::FullGmm &gmm) {
779  gmm.Write(out_stream, false);
780  return out_stream;
781 }
782 
784 void FullGmm::Interpolate(BaseFloat rho, const FullGmm &source,
785  GmmFlagsType flags) {
786  KALDI_ASSERT(NumGauss() == source.NumGauss());
787  KALDI_ASSERT(Dim() == source.Dim());
788  FullGmmNormal us(*this);
789  FullGmmNormal them(source);
790 
791  if (flags & kGmmWeights) {
792  us.weights_.Scale(1.0 - rho);
793  us.weights_.AddVec(rho, them.weights_);
794  us.weights_.Scale(1.0 / us.weights_.Sum());
795  }
796 
797  if (flags & kGmmMeans) {
798  us.means_.Scale(1.0 - rho);
799  us.means_.AddMat(rho, them.means_);
800  }
801 
802  if (flags & kGmmVariances) {
803  for (int32 i = 0; i < NumGauss(); i++) {
804  us.vars_[i].Scale(1.0 - rho);
805  us.vars_[i].AddSp(rho, them.vars_[i]);
806  }
807  }
808 
809  us.CopyToFullGmm(this);
810  ComputeGconsts();
811 }
812 
813 void FullGmm::Read(std::istream &in_stream, bool binary) {
814 // ExpectToken(in_stream, binary, "<FullGMMBegin>");
815  std::string token;
816  ReadToken(in_stream, binary, &token);
817  // <FullGMMBegin> is for compatibility. Will be deleted later
818  if (token != "<FullGMMBegin>" && token != "<FullGMM>")
819  KALDI_ERR << "Expected <FullGMM>, got " << token;
820 // ExpectToken(in_stream, binary, "<GCONSTS>");
821  ReadToken(in_stream, binary, &token);
822  if (token == "<GCONSTS>") { // The gconsts are optional.
823  gconsts_.Read(in_stream, binary);
824  ExpectToken(in_stream, binary, "<WEIGHTS>");
825  } else {
826  if (token != "<WEIGHTS>")
827  KALDI_ERR << "FullGmm::Read, expected <WEIGHTS> or <GCONSTS>, got "
828  << token;
829  }
830  weights_.Read(in_stream, binary);
831  ExpectToken(in_stream, binary, "<MEANS_INVCOVARS>");
832  means_invcovars_.Read(in_stream, binary);
833  ExpectToken(in_stream, binary, "<INV_COVARS>");
834  int32 ncomp = weights_.Dim(), dim = means_invcovars_.NumCols();
835  ResizeInvCovars(ncomp, dim);
836  for (int32 i = 0; i < ncomp; i++) {
837  inv_covars_[i].Read(in_stream, binary);
838  }
839 // ExpectToken(in_stream, binary, "<FullGMMEnd>");
840  ReadToken(in_stream, binary, &token);
841  // <FullGMMEnd> is for compatibility. Will be deleted later
842  if (token != "<FullGMMEnd>" && token != "</FullGMM>")
843  KALDI_ERR << "Expected </FullGMM>, got " << token;
844 
845  ComputeGconsts(); // safer option than trusting the read gconsts
846 }
847 
848 std::istream & operator >>(std::istream & in_stream, kaldi::FullGmm &gmm) {
849  gmm.Read(in_stream, false); // false == non-binary.
850  return in_stream;
851 }
852 
853 } // End namespace kaldi
std::ostream & operator<<(std::ostream &os, const MatrixBase< Real > &M)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74
Packed symetric matrix class.
Definition: matrix-common.h:62
void Write(std::ostream &out, bool binary) const
write to stream.
void Merge(int32 target_components, std::vector< int32 > *history=NULL)
Merge the components and remember the order in which the components were merged (flat list of pairs) ...
Definition: full-gmm.cc:206
#define M_LOG_2PI
Definition: kaldi-math.h:60
void Scale(Real c)
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, VectorBase< BaseFloat > *posterior) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: full-gmm.cc:719
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
Definition for Gaussian Mixture Model with full covariances in normal mode: where the parameters are ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition: full-gmm.cc:613
int32 ComputeGconsts()
Sets the gconsts.
Definition: full-gmm.cc:92
void Split(int32 target_components, float perturb_factor, std::vector< int32 > *history=NULL)
Merge the components and remember the order in which the components were merged (flat list of pairs) ...
Definition: full-gmm.cc:132
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
#define KALDI_ISINF
Definition: kaldi-math.h:73
void InvertDouble(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
Definition: sp-matrix.cc:312
const Vector< BaseFloat > & gconsts() const
Const accessors.
Definition: diag-gmm.h:174
Vector< BaseFloat > weights_
weights (not log).
Definition: full-gmm.h:201
kaldi::int32 int32
void Write(std::ostream &os, bool binary) const
Definition: full-gmm.cc:758
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
void MulTp(const TpMatrix< Real > &M, const MatrixTransposeType trans)
Multiplies this vector by lower-triangular matrix: *this <– *this *M.
uint16 GmmFlagsType
Bitwise OR of the above flags.
Definition: model-common.h:35
void ScaleDiag(const Real alpha)
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real LogSumExp(Real prune=-1.0) const
Returns log(sum(exp())) without exp overflow If prune > 0.0, ignores terms less than the max - prune...
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
void CopyFromSp(const SpMatrix< Real > &other)
Definition: sp-matrix.h:85
std::vector< SpMatrix< double > > vars_
covariances
void Perturb(float perturb_factor)
Perturbs the component means with a random vector multiplied by the pertrub factor.
Definition: full-gmm.cc:191
void RemoveComponent(int32 gauss, bool renorm_weights)
Mutators for single component, supports float or double Removes single component from model...
Definition: full-gmm.cc:731
FullGmm()
Empty constructor.
Definition: full-gmm.h:46
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void CopyFromFullGmm(const FullGmm &fullgmm)
Copies from given FullGmm.
Definition: full-gmm.cc:65
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: full-gmm.cc:41
Vector< double > weights_
weights (not log).
void ResizeInvCovars(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: full-gmm.cc:51
Real LogPosDefDet() const
Computes log determinant but only for +ve-def matrices (it uses Cholesky).
Definition: sp-matrix.cc:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
double Log(double x)
Definition: kaldi-math.h:100
void RemoveComponents(const std::vector< int32 > &gauss, bool renorm_weights)
Removes multiple components from model; "gauss" must not have dups.
Definition: full-gmm.cc:745
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
Definition: sp-matrix.h:211
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
bool valid_gconsts_
Recompute gconsts_ if false.
Definition: full-gmm.h:200
BaseFloat GaussianSelectionPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &preselect, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition: full-gmm.cc:674
BaseFloat GaussianSelection(const VectorBase< BaseFloat > &data, int32 num_gselect, std::vector< int32 > *output) const
Get gaussian selection information for one frame.
Definition: full-gmm.cc:637
#define KALDI_ERR
Definition: kaldi-error.h:147
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
Definition: sp-matrix.cc:964
Real TraceSpSpLower(const SpMatrix< Real > &A, const SpMatrix< Real > &B)
Definition: sp-matrix.cc:1171
Packed symetric matrix class.
Definition: matrix-common.h:63
BaseFloat MergedComponentsLogdet(BaseFloat w1, BaseFloat w2, const VectorBase< BaseFloat > &f1, const VectorBase< BaseFloat > &f2, const SpMatrix< BaseFloat > &s1, const SpMatrix< BaseFloat > &s2) const
Definition: full-gmm.cc:537
#define KALDI_WARN
Definition: kaldi-error.h:150
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: kaldi-vector.h:70
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
const Vector< BaseFloat > & weights() const
Definition: diag-gmm.h:178
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Scale(Real alpha)
Multiplies all elements by this constant.
const Vector< BaseFloat > & weights() const
Definition: full-gmm.h:144
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
void SetRandn()
Set vector to random normally-distributed noise.
void Read(std::istream &is, bool binary)
Definition: full-gmm.cc:813
BaseFloat MergePreselect(int32 target_components, const std::vector< std::pair< int32, int32 > > &preselect_pairs)
Merge the components and remember the order in which the components were merged (flat list of pairs);...
Definition: full-gmm.cc:382
double LogAdd(double x, double y)
Definition: kaldi-math.h:184
std::istream & operator>>(std::istream &is, Matrix< Real > &M)
BaseFloat ComponentLogLikelihood(const VectorBase< BaseFloat > &data, int32 comp_id) const
Computes the contribution log-likelihood of a data point from a single Gaussian component.
Definition: full-gmm.cc:562
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ISNAN
Definition: kaldi-math.h:72
void LogLikelihoods(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *loglikes) const
Outputs the per-component contributions to the log-likelihood.
Definition: full-gmm.cc:591
void CopyFromDiagGmm(const DiagGmm &diaggmm)
Copies from given DiagGmm.
Definition: full-gmm.cc:77
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void RemoveRow(MatrixIndexT i)
Remove a specified row.
std::vector< SpMatrix< BaseFloat > > inv_covars_
Inverse covariances.
Definition: full-gmm.h:202
void Interpolate(BaseFloat rho, const FullGmm &source, GmmFlagsType flags=kGmmAll)
this = rho x source + (1-rho) x this
Definition: full-gmm.cc:784
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Matrix< BaseFloat > means_invcovars_
Means times inverse covariances.
Definition: full-gmm.h:203
void SetZero()
Set vector to all zeros.
Matrix< double > means_
Means.
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
BaseFloat LogLikelihood(const VectorBase< BaseFloat > &data) const
Returns the log-likelihood of a data point (vector) given the GMM.
Definition: full-gmm.cc:582
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
const Matrix< BaseFloat > & inv_vars() const
Definition: diag-gmm.h:180
Vector< BaseFloat > gconsts_
Equals log(weight) - 0.5 * (log det(var) + mean&#39;*inv(var)*mean)
Definition: full-gmm.h:199