estimate-am-sgmm2-ebw.cc
Go to the documentation of this file.
1 // sgmm2/estimate-am-sgmm2-ebw.cc
2 
3 // Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-common.h"
22 #include "util/kaldi-thread.h"
23 using std::vector;
24 
25 namespace kaldi {
26 
28  const MleAmSgmm2Accs &den_accs,
29  AmSgmm2 *model,
30  SgmmUpdateFlagsType flags,
31  BaseFloat *auxf_change_out,
32  BaseFloat *count_out) {
33 
34  // Various quantities need to be computed at the start, before we
35  // change any of the model parameters.
36  std::vector< SpMatrix<double> > Q_num, Q_den, H, S_means;
37 
38  if (flags & kSgmmPhoneProjections) {
39  MleAmSgmm2Updater::ComputeQ(num_accs, *model, &Q_num);
40  MleAmSgmm2Updater::ComputeQ(den_accs, *model, &Q_den);
41  }
42  if (flags & kSgmmCovarianceMatrix) { // compute the difference between
43  // the num and den S_means matrices... this is what we will need.
44  MleAmSgmm2Updater::ComputeSMeans(num_accs, *model, &S_means);
45  std::vector< SpMatrix<double> > S_means_tmp;
46  MleAmSgmm2Updater::ComputeSMeans(den_accs, *model, &S_means_tmp);
47  for (size_t i = 0; i < S_means.size(); i++)
48  S_means[i].AddSp(-1.0, S_means_tmp[i]);
49  }
51  model->ComputeH(&H);
52 
53  Vector<double> gamma_num(num_accs.num_gaussians_);
54  for (int32 j1 = 0; j1 < num_accs.num_groups_; j1++)
55  gamma_num.AddRowSumMat(1.0, num_accs.gamma_[j1]);
56  Vector<double> gamma_den(den_accs.num_gaussians_);
57  for (int32 j1 = 0; j1 < den_accs.num_groups_; j1++)
58  gamma_den.AddRowSumMat(1.0, den_accs.gamma_[j1]);
59 
60  BaseFloat tot_impr = 0.0;
61 
62  if (flags & kSgmmPhoneVectors)
63  tot_impr += UpdatePhoneVectors(num_accs, den_accs, H, model);
64 
65  if (flags & kSgmmPhoneProjections)
66  tot_impr += UpdateM(num_accs, den_accs, Q_num, Q_den,
67  gamma_num, gamma_den, model);
68 
69  if (flags & kSgmmPhoneWeightProjections)
70  tot_impr += UpdateW(num_accs, den_accs, gamma_num, gamma_den, model);
71 
73  tot_impr += UpdateU(num_accs, den_accs, gamma_num, gamma_den, model);
74 
75  if (flags & kSgmmCovarianceMatrix)
76  tot_impr += UpdateVars(num_accs, den_accs,
77  gamma_num, gamma_den, S_means, model);
78 
79  if (flags & kSgmmSubstateWeights)
80  tot_impr += UpdateSubstateWeights(num_accs, den_accs, model);
81 
82  if (flags & kSgmmSpeakerProjections)
83  tot_impr += UpdateN(num_accs, den_accs, gamma_num, gamma_den, model);
84 
85 
86  if (auxf_change_out) *auxf_change_out = tot_impr * num_accs.total_frames_;
87  if (count_out) *count_out = num_accs.total_frames_;
88 
89  if (fabs(num_accs.total_frames_ - den_accs.total_frames_) >
90  0.01*(num_accs.total_frames_ + den_accs.total_frames_))
91  KALDI_WARN << "Num and den frame counts differ, "
92  << num_accs.total_frames_ << " vs. " << den_accs.total_frames_;
93 
94  BaseFloat like_diff = num_accs.total_like_ - den_accs.total_like_;
95 
96  KALDI_LOG << "***Averaged differenced likelihood per frame is "
97  << (like_diff/num_accs.total_frames_)
98  << " over " << (num_accs.total_frames_) << " frames.";
99  KALDI_LOG << "***Note: for this to be at all meaningful, if you use "
100  << "\"canceled\" stats you will have to renormalize this over "
101  << "the \"real\" frame count.";
102  KALDI_ASSERT(num_accs.total_frames_ > 0 && den_accs.total_frames_ > 0);
103 
104  model->ComputeNormalizers();
105 }
106 
107 
108 class EbwUpdatePhoneVectorsClass: public MultiThreadable { // For multi-threaded.
109  public:
111  const MleAmSgmm2Accs &num_accs,
112  const MleAmSgmm2Accs &den_accs,
113  const std::vector<SpMatrix<double> > &H,
114  AmSgmm2 *model,
115  double *auxf_impr):
116  updater_(updater), num_accs_(num_accs), den_accs_(den_accs),
117  model_(model), H_(H), auxf_impr_ptr_(auxf_impr), auxf_impr_(0.0) { }
118 
120  MultiThreadable(other),
121  updater_(other.updater_), num_accs_(other.num_accs_),
122  den_accs_(other.den_accs_), model_(other.model_),
123  H_(other.H_), auxf_impr_ptr_(other.auxf_impr_ptr_), auxf_impr_(0.0) { }
124 
126  *auxf_impr_ptr_ += auxf_impr_;
127  }
128 
129  inline void operator() () {
130  // Note: give them local copy of the sums we're computing,
131  // which will be propagated to the total sums in the destructor.
132  updater_->UpdatePhoneVectorsInternal(num_accs_, den_accs_, H_, model_,
133  &auxf_impr_, num_threads_, thread_id_);
134  }
135  private:
140  const std::vector<SpMatrix<double> > &H_;
141  double *auxf_impr_ptr_;
142  double auxf_impr_;
143 };
144 
145 
147  const MleAmSgmm2Accs &accs,
148  const AmSgmm2 &model,
149  const std::vector<SpMatrix<double> > &H,
150  int32 j1,
151  int32 m,
152  const Vector<double> &w_jm_in,
153  double gamma_jm,
154  Vector<double> *g_jm,
155  SpMatrix<double> *H_jm) {
156  Vector<double> w_jm(w_jm_in);
157  if (!accs.a_.empty() && accs.a_[j1](m, 0) != 0) { // [SSGMM]
158  w_jm.MulElements(accs.a_[j1].Row(m)); // multiply by "a" quantities..
159  w_jm.Scale(1.0 / w_jm.Sum()); // renormalize.
160  }
161  g_jm->CopyFromVec(accs.y_[j1].Row(m));
162  for (int32 i = 0; i < accs.num_gaussians_; i++) {
163  double gamma_jmi = accs.gamma_[j1](m, i);
164  double quadratic_term = std::max(gamma_jmi, gamma_jm * w_jm(i));
165  double scalar = gamma_jmi - gamma_jm * w_jm(i) + quadratic_term
166  * VecVec(model.w_.Row(i), model.v_[j1].Row(m));
167  g_jm->AddVec(scalar, model.w_.Row(i));
168  if (gamma_jmi != 0.0)
169  H_jm->AddSp(gamma_jmi, H[i]); // The most important term..
170  if (quadratic_term > 1.0e-10)
171  H_jm->AddVec2(static_cast<BaseFloat>(quadratic_term), model.w_.Row(i));
172  }
173 }
174 
175 
176 // Runs the phone vectors update for a subset of states (called
177 // multi-threaded).
179  const MleAmSgmm2Accs &num_accs,
180  const MleAmSgmm2Accs &den_accs,
181  const std::vector<SpMatrix<double> > &H,
182  AmSgmm2 *model,
183  double *auxf_impr,
184  int32 num_threads,
185  int32 thread_id) const {
186 
187  int32 block_size = (num_accs.num_groups_ + (num_threads-1)) / num_threads,
188  j1_start = block_size * thread_id,
189  j1_end = std::min(num_accs.num_groups_, j1_start + block_size);
190 
191  int32 S = num_accs.phn_space_dim_, I = num_accs.num_gaussians_;
192 
193  for (int32 j1 = j1_start; j1 < j1_end; j1++) {
194  double num_state_count = 0.0,
195  state_auxf_impr = 0.0;
196  Vector<double> w_jm(I);
197  for (int32 m = 0; m < model->NumSubstatesForGroup(j1); m++) {
198  double gamma_jm_num = num_accs.gamma_[j1].Row(m).Sum();
199  double gamma_jm_den = den_accs.gamma_[j1].Row(m).Sum();
200  num_state_count += gamma_jm_num;
201  Vector<double> g_jm_num(S); // computed using eq. 58 of SGMM paper [for numerator stats]
202  SpMatrix<double> H_jm_num(S); // computed using eq. 59 of SGMM paper [for numerator stats]
203  Vector<double> g_jm_den(S); // same, but for denominator stats.
204  SpMatrix<double> H_jm_den(S);
205 
206  // Compute the weights for this sub-state.
207  // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7)
208  w_jm.AddMatVec(1.0, Matrix<double>(model->w_), kNoTrans,
209  Vector<double>(model->v_[j1].Row(m)), 0.0);
210  w_jm.ApplySoftMax();
211  // Note: in the ML code, in the SSGMM case, at this point the w_jm would
212  // be modified with the "a" quantities to get the "\tilde{w}_{jm}" of the
213  // SSGMM techreport. But in this code, it gets done inside ComputePhoneVecStats.
214 
215  ComputePhoneVecStats(num_accs, *model, H, j1, m, w_jm, gamma_jm_num,
216  &g_jm_num, &H_jm_num);
217  ComputePhoneVecStats(den_accs, *model, H, j1, m, w_jm, gamma_jm_den,
218  &g_jm_den, &H_jm_den);
219 
220  Vector<double> v_jm(model->v_[j1].Row(m));
221  Vector<double> local_derivative(S); // difference of derivative of numerator
222  // and denominator objetive function.
223  local_derivative.AddVec(1.0, g_jm_num);
224  local_derivative.AddSpVec(-1.0, H_jm_num, v_jm, 1.0);
225  local_derivative.AddVec(-1.0, g_jm_den);
226  local_derivative.AddSpVec(-1.0 * -1.0, H_jm_den, v_jm, 1.0);
227 
228  SpMatrix<double> quadratic_term(H_jm_num);
229  quadratic_term.AddSp(1.0, H_jm_den);
230  double substate_count = 1.0e-10 + gamma_jm_num + gamma_jm_den;
231  quadratic_term.Scale( (substate_count + options_.tau_v) / substate_count);
232  quadratic_term.Scale(1.0 / (options_.lrate_v + 1.0e-10) );
233 
234  Vector<double> delta_v_jm(S);
235 
236  SolverOptions opts;
237  opts.name = "v";
238  opts.K = options_.max_cond;
239  opts.eps = options_.epsilon;
240 
241  double auxf_impr =
242  ((gamma_jm_num + gamma_jm_den == 0) ? 0.0 :
243  SolveQuadraticProblem(quadratic_term,
244  local_derivative,
245  opts, &delta_v_jm));
246 
247  v_jm.AddVec(1.0, delta_v_jm);
248  model->v_[j1].Row(m).CopyFromVec(v_jm);
249  state_auxf_impr += auxf_impr;
250  }
251 
252  *auxf_impr += state_auxf_impr;
253  if (j1 < 10 && thread_id == 0) {
254  KALDI_LOG << "Objf impr for group j = " << j1 << " is "
255  << (state_auxf_impr / (num_state_count + 1.0e-10))
256  << " over " << num_state_count << " frames";
257  }
258  }
259 }
260 
262  const MleAmSgmm2Accs &den_accs,
263  const vector< SpMatrix<double> > &H,
264  AmSgmm2 *model) const {
265  KALDI_LOG << "Updating phone vectors.";
266 
267  double count = 0.0, auxf_impr = 0.0;
268 
269  int32 J1 = num_accs.num_groups_;
270  for (int32 j1 = 0; j1 < J1; j1++) count += num_accs.gamma_[j1].Sum();
271 
272  EbwUpdatePhoneVectorsClass c(this, num_accs, den_accs, H, model, &auxf_impr);
273  RunMultiThreaded(c);
274 
275  auxf_impr /= count;
276 
277  KALDI_LOG << "**Overall auxf improvement for v is " << auxf_impr
278  << " over " << count << " frames";
279  return auxf_impr;
280 }
281 
282 
284  const MleAmSgmm2Accs &den_accs,
285  const std::vector< SpMatrix<double> > &Q_num,
286  const std::vector< SpMatrix<double> > &Q_den,
287  const Vector<double> &gamma_num,
288  const Vector<double> &gamma_den,
289  AmSgmm2 *model) const {
290  int32 S = model->PhoneSpaceDim(),
291  D = model->FeatureDim(),
292  I = model->NumGauss();
293 
294  Vector<double> impr_vec(I);
295 
296  for (int32 i = 0; i < I; i++) {
297  double gamma_i_num = gamma_num(i), gamma_i_den = gamma_den(i);
298 
299  if (gamma_i_num + gamma_i_den == 0.0) {
300  KALDI_WARN << "Not updating phonetic basis for i = " << i
301  << " because count is zero. ";
302  continue;
303  }
304 
305  Matrix<double> Mi(model->M_[i]);
306  Matrix<double> L(D, S); // this is something like the Y quantity, which
307  // represents the linear term in the objf on M-- except that we make it the local
308  // derivative about the current value, instead of the derivative around zero.
309  // But it's not exactly the derivative w.r.t. M, due to the factor of Sigma_i.
310  // The auxiliary function is Q(x) = tr(M^T P Y) - 0.5 tr(P M Q M^T),
311  // where P is Y^{-1}. The quantity L we define here will be Y - M Q,
312  // and you can think of this as like the local derivative, except there is
313  // a term P in there.
314  L.AddMat(1.0, num_accs.Y_[i]);
315  L.AddMatSp(-1.0, Mi, kNoTrans, Q_num[i], 1.0);
316  L.AddMat(-1.0, den_accs.Y_[i]);
317  L.AddMatSp(-1.0*-1.0, Mi, kNoTrans, Q_den[i], 1.0);
318 
319  SpMatrix<double> Q(S); // This is a combination of the Q's for the numerator and denominator.
320  Q.AddSp(1.0, Q_num[i]);
321  Q.AddSp(1.0, Q_den[i]);
322 
323  double state_count = 1.0e-10 + gamma_i_num + gamma_i_den; // the count
324  // represented by the quadratic part of the stats.
325  Q.Scale( (state_count + options_.tau_M) / state_count );
326  Q.Scale( 1.0 / (options_.lrate_M + 1.0e-10) );
327 
328 
329  SolverOptions opts;
330  opts.name = "M";
331  opts.K = options_.max_cond;
332  opts.eps = options_.epsilon;
333 
334  Matrix<double> deltaM(D, S);
335  double impr =
337  SpMatrix<double>(model->SigmaInv_[i]),
338  opts, &deltaM);
339 
340  impr_vec(i) = impr;
341  Mi.AddMat(1.0, deltaM);
342  model->M_[i].CopyFromMat(Mi);
343  if (i < 10 || impr / state_count > 3.0) {
344  KALDI_VLOG(2) << "Objf impr for projection M for i = " << i << ", is "
345  << (impr/(gamma_i_num + 1.0e-20)) << " over " << gamma_i_num
346  << " frames";
347  }
348  }
349  BaseFloat tot_count = gamma_num.Sum(), tot_impr = impr_vec.Sum();
350 
351  tot_impr /= (tot_count + 1.0e-20);
352  KALDI_LOG << "Overall auxiliary function improvement for model projections "
353  << "M is " << tot_impr << " over " << tot_count << " frames";
354 
355  KALDI_VLOG(1) << "Updating M: num-count is " << gamma_num;
356  KALDI_VLOG(1) << "Updating M: den-count is " << gamma_den;
357  KALDI_VLOG(1) << "Updating M: objf-impr is " << impr_vec;
358 
359  return tot_impr;
360 }
361 
362 
363 // Note: we do just one iteration of the weight-projection update here. The
364 // weak-sense auxiliary functions used don't really make sense if we do it for
365 // multiple iterations. It would be possible to use a similar auxiliary
366 // function to the one on my (D. Povey)'s thesis for the Gaussian mixture
367 // weights, which would make sense for multiple iterations, but this would be a
368 // bit more complex to implement and probably would not give much improvement
369 // over this approach.
371  const MleAmSgmm2Accs &den_accs,
372  const Vector<double> &gamma_num,
373  const Vector<double> &gamma_den,
374  AmSgmm2 *model) {
375  KALDI_LOG << "Updating weight projections";
376 
377  int32 I = num_accs.num_gaussians_, S = num_accs.phn_space_dim_;
378 
379  Matrix<double> g_i_num(I, S), g_i_den(I, S);
380 
381  // View F_i_{num,den} as vectors of SpMatrix [i.e. symmetric matrices,
382  // linearized into vectors]
383  Matrix<double> F_i_num(I, (S*(S+1))/2), F_i_den(I, (S*(S+1))/2);
384 
385  Vector<double> impr_vec(I);
386 
387  // Get the F_i and g_i quantities-- this is done in parallel (multi-core),
388  // using the same code we use in the ML update [except we get it for
389  // numerator and denominator separately.]
390  Matrix<double> w(model->w_);
391  {
392  std::vector<Matrix<double> > log_a_num;
393  if (model->HasSpeakerDependentWeights())
394  MleAmSgmm2Updater::ComputeLogA(num_accs, &log_a_num);
395  double garbage;
396  UpdateWClass c_num(num_accs, *model, w, log_a_num, &F_i_num, &g_i_num, &garbage);
397  RunMultiThreaded(c_num);
398  }
399  {
400  std::vector<Matrix<double> > log_a_den;
401  if (model->HasSpeakerDependentWeights())
402  MleAmSgmm2Updater::ComputeLogA(den_accs, &log_a_den);
403  double garbage;
404  UpdateWClass c_den(den_accs, *model, w, log_a_den, &F_i_den, &g_i_den, &garbage);
405  RunMultiThreaded(c_den);
406  }
407 
408  for (int32 i = 0; i < I; i++) {
409 
410  // auxf was originally formulated in terms of the change in w (i.e. the
411  // g quantities are the local derivatives), so there is less hassle than
412  // with some of the other updates, in changing it to be discriminative.
413  // we essentially just difference the linear terms and add the quadratic
414  // terms.
415 
416  Vector<double> derivative(g_i_num.Row(i));
417  derivative.AddVec(-1.0, g_i_den.Row(i));
418  // F_i_num quadratic_term is a bit like the negated 2nd derivative
419  // of the numerator stats-- actually it's not the actual 2nd deriv,
420  // but an upper bound on it.
421  SpMatrix<double> quadratic_term(S), tmp_F(S);
422  quadratic_term.CopyFromVec(F_i_num.Row(i));
423  tmp_F.CopyFromVec(F_i_den.Row(i)); // tmp_F is used for Vector->SpMatrix conversion.
424  quadratic_term.AddSp(1.0, tmp_F);
425 
426  double state_count = gamma_num(i) + gamma_den(i);
427 
428  quadratic_term.Scale((state_count + options_.tau_w) / (state_count + 1.0e-10));
429  quadratic_term.Scale(1.0 / (options_.lrate_w + 1.0e-10) );
430 
431  Vector<double> delta_w(S);
432 
433  SolverOptions opts;
434  opts.name = "w";
435  opts.K = options_.max_cond;
436  opts.eps = options_.epsilon;
437 
438  double objf_impr =
439  SolveQuadraticProblem(quadratic_term, derivative, opts, &delta_w);
440 
441  impr_vec(i) = objf_impr;
442  if (i < 10 || objf_impr / (gamma_num(i) + 1.0e-10) > 2.0) {
443  KALDI_LOG << "Predicted objf impr for w per frame is "
444  << (objf_impr / (gamma_num(i) + 1.0e-10))
445  << " over " << gamma_num(i) << " frames.";
446  }
447  model->w_.Row(i).AddVec(1.0, delta_w);
448  }
449  KALDI_VLOG(1) << "Updating w: numerator count is " << gamma_num;
450  KALDI_VLOG(1) << "Updating w: denominator count is " << gamma_den;
451  KALDI_VLOG(1) << "Updating w: objf-impr is " << impr_vec;
452 
453  double tot_num_count = gamma_num.Sum(), tot_impr = impr_vec.Sum();
454  tot_impr /= tot_num_count;
455 
456  KALDI_LOG << "**Overall objf impr for w per frame is "
457  << tot_impr << " over " << tot_num_count
458  << " frames.";
459  return tot_impr;
460 }
461 
462 
464  const MleAmSgmm2Accs &den_accs,
465  const Vector<double> &gamma_num,
466  const Vector<double> &gamma_den,
467  AmSgmm2 *model) {
468  int32 T = num_accs.spk_space_dim_;
469  double tot_impr = 0.0;
470  for (int32 i = 0; i < num_accs.num_gaussians_; i++) {
471  if (gamma_num(i) < 200.0) {
472  KALDI_LOG << "Numerator count is small " << gamma_num(i) << " for gaussian "
473  << i << ", not updating u_i.";
474  continue;
475  }
476  Vector<double> u_i(model->u_.Row(i));
477  Vector<double> delta_u(T);
478  Vector<double> t(T); // derivative.
479  t.AddVec(1.0, num_accs.t_.Row(i));
480  t.AddVec(-1.0, den_accs.t_.Row(i));
481  SpMatrix<double> U(T); // quadratic term.
482  U.AddSp(1.0, num_accs.U_[i]);
483  U.AddSp(1.0, den_accs.U_[i]);
484 
485  double state_count = gamma_num(i) + gamma_den(i);
486  U.Scale((state_count + options_.tau_u) / (state_count + 1.0e-10));
487  U.Scale(1.0 / (options_.lrate_u + 1.0e-10) );
488 
489  SolverOptions opts;
490  opts.name = "u";
491  opts.K = options_.max_cond;
492  opts.eps = options_.epsilon;
493 
494  double impr = SolveQuadraticProblem(U, t, opts, &delta_u);
495  double impr_per_frame = impr / gamma_num(i);
496  if (impr_per_frame > options_.max_impr_u) {
497  KALDI_WARN << "Updating speaker weight projections u, for Gaussian index "
498  << i << ", impr/frame is " << impr_per_frame << " over "
499  << gamma_num(i) << " frames, scaling back to not exceed "
500  << options_.max_impr_u;
501  double scale = options_.max_impr_u / impr_per_frame;
502  impr *= scale;
503  delta_u.Scale(scale);
504  // Note: a linear scaling of "impr" with "scale" is not quite accurate
505  // in depicting how the quadratic auxiliary function varies as we change
506  // the scale on "delta", but this does not really matter-- the goal is
507  // to limit the auxiliary-function change to not be too large.
508  }
509  if (i < 10) {
510  KALDI_LOG << "Objf impr for spk weight-projection u for i = " << (i)
511  << ", is " << (impr / (gamma_num(i) + 1.0e-20)) << " over "
512  << gamma_num(i) << " frames";
513  }
514  u_i.AddVec(1.0, delta_u);
515  model->u_.Row(i).CopyFromVec(u_i);
516  tot_impr += impr;
517  }
518  KALDI_LOG << "**Overall objf impr for u is " << (tot_impr/gamma_num.Sum())
519  << ", over " << gamma_num.Sum() << " frames";
520  return tot_impr;
521 }
522 
523 
525  const MleAmSgmm2Accs &den_accs,
526  const Vector<double> &gamma_num,
527  const Vector<double> &gamma_den,
528  AmSgmm2 *model) const {
529  if (num_accs.spk_space_dim_ == 0 || num_accs.R_.size() == 0 ||
530  num_accs.Z_.size() == 0) {
531  KALDI_ERR << "Speaker subspace dim is zero or no stats accumulated";
532  }
533 
534  int32 I = num_accs.num_gaussians_, D = num_accs.feature_dim_,
535  T = num_accs.spk_space_dim_;
536 
537  Vector<double> impr_vec(I);
538 
539  for (int32 i = 0; i < I; i++) {
540  double gamma_i_num = gamma_num(i), gamma_i_den = gamma_den(i);
541  if (gamma_i_num + gamma_i_den == 0.0) {
542  KALDI_WARN << "Not updating speaker basis for i = " << i
543  << " because count is zero. ";
544  continue;
545  }
546  Matrix<double> Ni(model->N_[i]);
547  // See comment near declaration of L in UpdateM(). This update is the
548  // same, but change M->N, Y->Z and Q->R.
549 
550  Matrix<double> L(D, T);
551  L.AddMat(1.0, num_accs.Z_[i]);
552  L.AddMatSp(-1.0, Ni, kNoTrans, num_accs.R_[i], 1.0);
553  L.AddMat(-1.0, den_accs.Z_[i]);
554  L.AddMatSp(-1.0*-1.0, Ni, kNoTrans, den_accs.R_[i], 1.0);
555 
556  SpMatrix<double> R(T); // combination of the numerator and denominator R's.
557  R.AddSp(1.0, num_accs.R_[i]);
558  R.AddSp(1.0, den_accs.R_[i]);
559 
560  double state_count = 1.0e-10 + gamma_i_num + gamma_i_den; // the count
561  // represented by the quadratic part of the stats.
562  R.Scale( (state_count + options_.tau_N) / state_count );
563  R.Scale( 1.0 / (options_.lrate_N + 1.0e-10) );
564 
565  Matrix<double> deltaN(D, T);
566 
567  SolverOptions opts;
568  opts.name = "N";
569  opts.K = options_.max_cond;
570  opts.eps = options_.epsilon;
571 
572  double impr =
574  SpMatrix<double>(model->SigmaInv_[i]),
575  opts, &deltaN);
576  impr_vec(i) = impr;
577  Ni.AddMat(1.0, deltaN);
578  model->N_[i].CopyFromMat(Ni);
579  if (i < 10 || impr / (state_count+1.0e-20) > 3.0) {
580  KALDI_LOG << "Objf impr for spk projection N for i = " << (i)
581  << ", is " << (impr / (gamma_i_num + 1.0e-20)) << " over "
582  << gamma_i_num << " frames";
583  }
584  }
585 
586  KALDI_VLOG(1) << "Updating N: numerator count is " << gamma_num;
587  KALDI_VLOG(1) << "Updating N: denominator count is " << gamma_den;
588  KALDI_VLOG(1) << "Updating N: objf-impr is " << impr_vec;
589 
590  double tot_count = gamma_num.Sum(), tot_impr = impr_vec.Sum();
591  tot_impr /= (tot_count + 1.0e-20);
592  KALDI_LOG << "**Overall auxf impr for N is " << tot_impr
593  << " over " << tot_count << " frames";
594  return tot_impr;
595 }
596 
598  const MleAmSgmm2Accs &den_accs,
599  const Vector<double> &gamma_num,
600  const Vector<double> &gamma_den,
601  const std::vector< SpMatrix<double> > &S_means,
602  AmSgmm2 *model) const {
603  // Note: S_means contains not only the quantity S_means in the paper,
604  // but also has a term - (Y_i M_i^T + M_i Y_i^T). Plus, it is differenced
605  // between numerator and denominator. We don't calculate it here,
606  // because it had to be computed with the original model, before we
607  // changed the M quantities.
608  int32 I = num_accs.num_gaussians_;
609  KALDI_ASSERT(S_means.size() == I);
610  Vector<double> impr_vec(I);
611 
612  for (int32 i = 0; i < I; i++) {
613  double num_count = gamma_num(i), den_count = gamma_den(i);
614 
615  SpMatrix<double> SigmaStats(S_means[i]);
616  SigmaStats.AddSp(1.0, num_accs.S_[i]);
617  SigmaStats.AddSp(-1.0, den_accs.S_[i]);
618  // SigmaStats now contain the stats for estimating Sigma (as in the main SGMM paper),
619  // differenced between num and den.
620  SpMatrix<double> SigmaInvOld(model->SigmaInv_[i]), SigmaOld(model->SigmaInv_[i]);
621  SigmaOld.Invert();
622  double count = num_count - den_count;
624  double inv_lrate = 1.0 / options_.lrate_Sigma;
625  // These formulas assure that the objective function behaves in
626  // a roughly symmetric way w.r.t. num and den counts.
627  double E_den = 1.0 + inv_lrate, E_num = inv_lrate - 1.0;
628 
629  double smoothing_count =
630  (options_.tau_Sigma * inv_lrate) + // multiply tau_Sigma by inverse-lrate
631  (E_den * den_count) + // for compatibility with other updates.
632  (E_num * num_count) +
633  1.0e-10;
634  SigmaStats.AddSp(smoothing_count, SigmaOld);
635  count += smoothing_count;
636  SigmaStats.Scale(1.0 / count);
637  SpMatrix<double> SigmaInv(SigmaStats); // before floor and ceiling. Currently sigma,
638  // not its inverse.
639  bool verbose = false;
640  int n_floor = SigmaInv.ApplyFloor(SigmaOld, options_.cov_min_value, verbose);
641  SigmaInv.Invert(); // make it inverse variance.
642  int n_ceiling = SigmaInv.ApplyFloor(SigmaInvOld, options_.cov_min_value, verbose);
643 
644  // this auxf_change.
645  double auxf_change = -0.5 * count *(TraceSpSp(SigmaInv, SigmaStats)
646  - TraceSpSp(SigmaInvOld, SigmaStats)
647  - SigmaInv.LogDet()
648  + SigmaInvOld.LogDet());
649 
650  model->SigmaInv_[i].CopyFromSp(SigmaInv);
651  impr_vec(i) = auxf_change;
652  if (i < 10 || auxf_change / (num_count+den_count+1.0e-10) > 2.0
653  || n_floor+n_ceiling > 0) {
654  KALDI_LOG << "Updating variance: Auxf change per frame for Gaussian "
655  << i << " is " << (auxf_change / num_count) << " over "
656  << num_count << " frames " << "(den count was " << den_count
657  << "), #floor,ceil was " << n_floor << ", " << n_ceiling;
658  }
659  }
660  KALDI_VLOG(1) << "Updating Sigma: numerator count is " << gamma_num;
661  KALDI_VLOG(1) << "Updating Sigma: denominator count is " << gamma_den;
662  KALDI_VLOG(1) << "Updating Sigma: objf-impr is " << impr_vec;
663 
664  double tot_count = gamma_num.Sum(), tot_impr = impr_vec.Sum();
665  tot_impr /= tot_count+1.0e-20;
666  KALDI_LOG << "**Overall auxf impr for Sigma is " << tot_impr
667  << " over " << tot_count << " frames";
668  return tot_impr;
669 }
670 
671 
673  const MleAmSgmm2Accs &num_accs,
674  const MleAmSgmm2Accs &den_accs,
675  AmSgmm2 *model) {
676  KALDI_LOG << "Updating substate mixture weights";
677 
678  double tot_count = 0.0, tot_impr = 0.0;
679  for (int32 j2 = 0; j2 < num_accs.num_pdfs_; j2++) {
680  int32 M = model->NumSubstatesForPdf(j2);
681  Vector<double> num_occs(M), den_occs(M),
682  orig_weights(model->c_[j2]), weights(model->c_[j2]);
683 
684  for (int32 m = 0; m < M; m++) {
685  num_occs(m) = num_accs.gamma_c_[j2](m)
686  + options_.tau_c * weights(m);
687  den_occs(m) = den_accs.gamma_c_[j2](m);
688  }
689 
690  if (weights.Dim() > 1) {
691  double begin_auxf = 0.0, end_auxf = 0.0;
692  for (int32 m = 0; m < M; m++) { // see eq. 4.32, Dan Povey's PhD thesis.
693  begin_auxf += num_occs(m) * log (weights(m))
694  - den_occs(m) * weights(m) / orig_weights(m);
695  }
696  for (int32 iter = 0; iter < 50; iter++) {
697  Vector<double> k_jm(M);
698  double max_m = 0.0;
699  for (int32 m = 0; m < M; m++)
700  max_m = std::max(max_m, den_occs(m)/orig_weights(m));
701  for (int32 m = 0; m < M; m++)
702  k_jm(m) = max_m - den_occs(m)/orig_weights(m);
703  for (int32 m = 0; m < M; m++)
704  weights(m) = num_occs(m) + k_jm(m)*weights(m);
705  weights.Scale(1.0 / weights.Sum());
706  }
707  for (int32 m = 0; m < M; m++)
708  weights(m) = std::max(weights(m),
709  static_cast<double>(options_.min_substate_weight));
710  weights.Scale(1.0 / weights.Sum()); // renormalize.
711 
712  for (int32 m = 0; m < M; m++) {
713  end_auxf += num_occs(m) * log (weights(m))
714  - den_occs(m) * weights(m) / orig_weights(m);
715  }
716  tot_impr += end_auxf - begin_auxf;
717  double this_impr = ((end_auxf - begin_auxf) / num_occs.Sum());
718  if (j2 < 10 || this_impr > 0.5) {
719  KALDI_LOG << "Updating substate weights: auxf impr for pdf " << j2
720  << " is " << this_impr << " per frame over " << num_occs.Sum()
721  << " frames (den count is " << den_occs.Sum() << ")";
722  }
723  }
724  model->c_[j2].CopyFromVec(weights);
725  tot_count += den_occs.Sum(); // Note: num and den occs should be the
726  // same, except num occs are smoothed, so this is what we want.
727  }
728 
729  tot_impr /= (tot_count + 1.0e-20);
730 
731  KALDI_LOG << "**Overall auxf impr for c is " << tot_impr
732  << " over " << tot_count << " frames";
733  return tot_impr;
734 }
735 
736 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Matrix< double > t_
[SSGMM] each row is one of the t_i quantities in the less-exact version of the SSGMM update for the s...
std::vector< Vector< double > > gamma_c_
Sub-state occupancies gamma_{jm}^{(c)} for each sub-state.
void CopyFromVec(const SubVector< OtherReal > &orig)
CopyFromVec just interprets the vector as having the same layout as the packed matrix.
Matrix< BaseFloat > u_
[SSGMM] Speaker-subspace weight projection vectors. Dimension is [I][T]
Definition: am-sgmm2.h:431
double UpdateN(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, const Vector< double > &gamma_num, const Vector< double > &gamma_den, AmSgmm2 *model) const
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231
BaseFloat tau_Sigma
Tau value for smoothing covariance-matrices Sigma.
BaseFloat max_cond
is allowed to change.
static void ComputeQ(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, std::vector< SpMatrix< double > > *Q)
Compute the Q_i quantities (Eq. 64).
This class describes the options for maximizing various quadratic objective functions.
Definition: sp-matrix.h:443
double UpdateU(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, const Vector< double > &gamma_num, const Vector< double > &gamma_den, AmSgmm2 *model)
void Scale(Real c)
double UpdateVars(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, const Vector< double > &gamma_num, const Vector< double > &gamma_den, const std::vector< SpMatrix< double > > &S_means, AmSgmm2 *model) const
double SolveQuadraticProblem(const SpMatrix< double > &H, const VectorBase< double > &g, const SolverOptions &opts, VectorBase< double > *x)
Definition: sp-matrix.cc:635
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
BaseFloat max_impr_u
Maximum improvement/frame allowed for u [0.25, carried over from ML update.].
std::vector< Vector< BaseFloat > > c_
c_{jm}, mixture weights. Dimension is [J2][#mix]
Definition: am-sgmm2.h:438
BaseFloat tau_u
Tau value for smoothing update of speaker-subspace weight projectsions (u)
Real SolveQuadraticMatrixProblem(const SpMatrix< Real > &Q, const MatrixBase< Real > &Y, const SpMatrix< Real > &SigmaInv, const SolverOptions &opts, MatrixBase< Real > *M)
Maximizes the auxiliary function : Like a numerically stable version of .
Definition: sp-matrix.cc:729
Matrix< BaseFloat > w_
Phonetic-subspace weight projection vectors. Dimension is [I][S].
Definition: am-sgmm2.h:429
BaseFloat lrate_Sigma
Learning rate used in updating Sigma– default 0.5.
BaseFloat lrate_v
Learning rate used in updating v– default 0.5.
BaseFloat lrate_w
Learning rate used in updating w– default 1.0.
BaseFloat lrate_N
Learning rate used in updating N– default 0.5.
BaseFloat tau_v
Smoothing constant for updates of sub-state vectors v_{jm}.
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
kaldi::int32 int32
std::vector< Matrix< BaseFloat > > N_
Speaker-subspace projections. Dimension is [I][D][T].
Definition: am-sgmm2.h:427
static void ComputeSMeans(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, std::vector< SpMatrix< double > > *S_means)
Compute the S_means quantities, minus sum: (Y_i M_i^T + M_i Y_I^T).
BaseFloat lrate_u
Learning rate used in updating u– default 1.0.
std::vector< SpMatrix< double > > S_
S_{i}^{-}, scatter of adapted feature vectors x_{i}(t). Dim is [I][D][D].
std::vector< Matrix< double > > gamma_
Gaussian occupancies gamma_{jmi} for each substate and Gaussian index, pooled over groups...
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
BaseFloat tau_M
Smoothing constant for the M quantities (phone-subspace projections)
int32 PhoneSpaceDim() const
Definition: am-sgmm2.h:361
std::vector< Matrix< BaseFloat > > v_
The parameters in a particular SGMM state.
Definition: am-sgmm2.h:436
int32 num_groups_
Other model specifications.
std::vector< SpMatrix< double > > U_
the U_i quantities from the less-exact version of the SSGMM update for the speaker weight projections...
std::vector< Matrix< BaseFloat > > M_
Phonetic-subspace projections. Dimension is [I][D][S].
Definition: am-sgmm2.h:425
int ApplyFloor(const SpMatrix< Real > &Floor, Real alpha=1.0, bool verbose=false)
Floors this symmetric matrix to the matrix alpha * Floor, where the matrix Floor is positive definite...
Definition: sp-matrix.cc:536
int32 FeatureDim() const
Definition: am-sgmm2.h:363
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
const size_t count
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
BaseFloat lrate_M
Learning rate used in updating M– default 0.5.
int32 NumSubstatesForPdf(int32 j2) const
Definition: am-sgmm2.h:354
t .. not really part of SGMM.
Definition: model-common.h:55
std::vector< Matrix< double > > Y_
The stats which are not tied to any state.
uint16 SgmmUpdateFlagsType
Bitwise OR of the above flags.
Definition: model-common.h:59
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
std::vector< SpMatrix< BaseFloat > > SigmaInv_
Globally shared parameters of the subspace GMM.
Definition: am-sgmm2.h:423
std::vector< Matrix< double > > y_
The SGMM state specific stats.
void MulElements(const VectorBase< Real > &v)
Multiply element-by-element by another vector.
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
Definition: sp-matrix.h:211
The letters correspond to the variable names.
Definition: model-common.h:48
void RunMultiThreaded(const C &c_in)
Here, class C should inherit from MultiThreadable.
Definition: kaldi-thread.h:151
BaseFloat tau_c
Tau value for smoothing substate weights (c)
void AddMatSp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const SpMatrix< Real > &B, const Real beta)
this <– beta*this + alpha*A*B.
Definition: kaldi-matrix.h:708
BaseFloat tau_N
Smoothing constant for the N quantities (speaker-subspace projections)
std::string name
Definition: sp-matrix.h:446
#define KALDI_ERR
Definition: kaldi-error.h:147
void ComputeH(std::vector< SpMatrix< Real > > *H_i) const
Computes quantities H = M_i Sigma_i^{-1} M_i^T.
Definition: am-sgmm2.cc:1107
double TraceSpSp(const SpMatrix< double > &A, const SpMatrix< double > &B)
Definition: sp-matrix.cc:326
#define KALDI_WARN
Definition: kaldi-error.h:150
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Scale(Real alpha)
Multiplies all elements by this constant.
void UpdatePhoneVectorsInternal(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, const std::vector< SpMatrix< double > > &H, AmSgmm2 *model, double *auxf_impr, int32 num_threads, int32 thread_id) const
double UpdateSubstateWeights(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, AmSgmm2 *model)
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
int32 NumGauss() const
Definition: am-sgmm2.h:360
Real Sum() const
Returns sum of the elements.
std::vector< Matrix< double > > a_
[SSGMM] These a_{jmi} quantities are dimensionally the same as the gamma quantities.
EbwUpdatePhoneVectorsClass(const EbwUpdatePhoneVectorsClass &other)
BaseFloat tau_w
Tau value for smoothing update of phonetic-subspace weight projectsions (w)
double UpdateW(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, const Vector< double > &gamma_num, const Vector< double > &gamma_den, AmSgmm2 *model)
Note: in the discriminative case we do just one iteration of updating the w quantities.
std::vector< SpMatrix< double > > R_
R_{i}, quadratic term for speaker subspace estimation. Dim is [I][T][T].
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
double UpdatePhoneVectors(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, const std::vector< SpMatrix< double > > &H, AmSgmm2 *model) const
int32 feature_dim_
Dimensionality of various subspaces.
void ComputeNormalizers()
Computes the data-independent terms in the log-likelihood computation for each Gaussian component and...
Definition: am-sgmm2.cc:857
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
std::vector< Matrix< double > > Z_
Stats Z_{i} for speaker-subspace projections N. Dim is [I][D][T].
void Update(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, AmSgmm2 *model, SgmmUpdateFlagsType flags, BaseFloat *auxf_change_out, BaseFloat *count_out)
BaseFloat epsilon
very small value used in SolveQuadraticProblem; workaround
BaseFloat min_substate_weight
Minimum allowed weight in a sub-state.
bool HasSpeakerDependentWeights() const
True if doing SSGMM.
Definition: am-sgmm2.h:366
Class for the accumulators associated with the phonetic-subspace model parameters.
void Invert(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: sp-matrix.cc:219
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
EbwUpdatePhoneVectorsClass(const EbwAmSgmm2Updater *updater, const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, const std::vector< SpMatrix< double > > &H, AmSgmm2 *model, double *auxf_impr)
static void ComputePhoneVecStats(const MleAmSgmm2Accs &accs, const AmSgmm2 &model, const std::vector< SpMatrix< double > > &H, int32 j1, int32 m, const Vector< double > &w_jm, double gamma_jm, Vector< double > *g_jm, SpMatrix< double > *H_jm)
double UpdateM(const MleAmSgmm2Accs &num_accs, const MleAmSgmm2Accs &den_accs, const std::vector< SpMatrix< double > > &Q_num, const std::vector< SpMatrix< double > > &Q_den, const Vector< double > &gamma_num, const Vector< double > &gamma_den, AmSgmm2 *model) const
int32 NumSubstatesForGroup(int32 j1) const
Definition: am-sgmm2.h:357
Real LogDet(Real *det_sign=NULL) const
Definition: sp-matrix.cc:579
const std::vector< SpMatrix< double > > & H_
static void ComputeLogA(const MleAmSgmm2Accs &accs, std::vector< Matrix< double > > *log_a)