fmpe.cc
Go to the documentation of this file.
1 // transform/fmpe.cc
2 
3 // Copyright 2011-2012 Yanmin Qian Johns Hopkins University (Author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "transform/fmpe.h"
22 #include "util/text-utils.h"
23 #include "gmm/diag-gmm-normal.h"
24 #include "gmm/am-diag-gmm.h"
25 #include "hmm/transition-model.h"
26 
27 namespace kaldi {
28 
29 void Fmpe::SetContexts(std::string context_str) {
30  // sets the contexts_ variable.
31  using std::vector;
32  using std::string;
33  contexts_.clear();
34  vector<string> ctx_vec; // splitting context_str on ":"
35  SplitStringToVector(context_str, ":", false, &ctx_vec);
36  contexts_.resize(ctx_vec.size());
37  for (size_t i = 0; i < ctx_vec.size(); i++) {
38  vector<string> pair_vec; // splitting ctx_vec[i] on ";"
39  SplitStringToVector(ctx_vec[i], ";", false, &pair_vec);
40  KALDI_ASSERT(pair_vec.size() != 0 && "empty context!");
41  for (size_t j = 0; j < pair_vec.size(); j++) {
42  vector<string> one_pair;
43  SplitStringToVector(pair_vec[j], ",", false, &one_pair);
44  KALDI_ASSERT(one_pair.size() == 2 &&
45  "Mal-formed context string: bad --context-expansion option?");
46  int32 pos = 0;
47  BaseFloat weight = BaseFloat(0);
48  bool ok = ConvertStringToInteger(one_pair[0], &pos);
49  ok = ConvertStringToReal(one_pair[1], &weight) && ok;
50  if (!ok)
51  KALDI_ERR << "Mal-formed context string: bad --context-expansion option?";
52  contexts_[i].push_back(std::make_pair(pos, weight));
53  }
54  }
55 }
56 
58  KALDI_ASSERT(gmm_.NumGauss() != 0.0);
59  int32 dim = gmm_.Dim();
60 
61  // Getting stats from the GMM... assume the model is
62  // correct.
63  SpMatrix<double> x2_stats(dim);
64  Vector<double> x_stats(dim);
65  double tot_count = 0.0;
66  DiagGmmNormal ngmm(gmm_);
67  for (int32 pdf = 0; pdf < ngmm.NumGauss(); pdf++) {
68  x2_stats.AddVec2(ngmm.weights_(pdf), ngmm.means_.Row(pdf));
69  x2_stats.AddDiagVec(ngmm.weights_(pdf), ngmm.vars_.Row(pdf)); // add diagonal
70  // covar to diagonal elements of x2_stats.
71  x_stats.AddVec(ngmm.weights_(pdf), ngmm.means_.Row(pdf));
72  tot_count += ngmm.weights_(pdf);
73  }
74  KALDI_ASSERT(tot_count != 0.0);
75  x2_stats.Scale(1.0 / tot_count);
76  x_stats.Scale(1.0 / tot_count);
77  x2_stats.AddVec2(-1.0, x_stats); // subtract outer product of mean,
78  // to get centered covariance.
79  C_.Resize(dim);
80  try {
81  TpMatrix<double> Ctmp(dim); Ctmp.Cholesky(x2_stats);
82  C_.CopyFromTp(Ctmp);
83  } catch (...) {
84  KALDI_ERR << "Error initializing fMPE object: cholesky of "
85  "feature variance failed. Probably code error, or NaN/inf in model";
86  }
87 }
88 
90  const Matrix<BaseFloat> &inv_vars = gmm_.inv_vars();
91  stddevs_.Resize(inv_vars.NumRows(), inv_vars.NumCols());
92  stddevs_.CopyFromMat(inv_vars);
93  stddevs_.ApplyPow(-0.5);
94 }
95 
96 
97 void Fmpe::ApplyContext(const MatrixBase<BaseFloat> &intermed_feat,
98  MatrixBase<BaseFloat> *feat_out) const {
99  // Applies the temporal-context part of the transformation.
100  int32 dim = FeatDim(), ncontexts = NumContexts(),
101  T = intermed_feat.NumRows();
102  KALDI_ASSERT(intermed_feat.NumCols() == dim * ncontexts &&
103  intermed_feat.NumRows() == feat_out->NumRows()
104  && feat_out->NumCols() == dim);
105  // note: ncontexts == contexts_.size().
106  for (int32 i = 0; i < ncontexts; i++) {
107  // this_intermed_feat is the chunk of the "intermediate features"
108  // that corresponds to this "context"
109  SubMatrix<BaseFloat> this_intermed_feat(intermed_feat, 0, T,
110  dim*i, dim);
111  for (int32 j = 0; j < static_cast<int32>(contexts_[i].size()); j++) {
112  int32 t_offset = contexts_[i][j].first;
113  BaseFloat weight = contexts_[i][j].second;
114  // Note: we could do this more efficiently using matrix operations,
115  // but this doesn't dominate the computation and I think this is
116  // clearer.
117  for (int32 t_out = 0; t_out < T; t_out++) { // t_out indexes the output
118  int32 t_in = t_out + t_offset; // t_in indexes the input.
119  if (t_in >= 0 && t_in < T) // Discard frames outside range.
120  feat_out->Row(t_out).AddVec(weight, this_intermed_feat.Row(t_in));
121  }
122  }
123  }
124 }
125 
127  MatrixBase<BaseFloat> *intermed_feat_deriv)
128  const {
129  // Applies the temporal-context part of the transformation,
130  // in reverse, for getting derivatives for training.
131  int32 dim = FeatDim(), ncontexts = NumContexts(),
132  T = feat_deriv.NumRows();
133  KALDI_ASSERT(intermed_feat_deriv->NumCols() == dim * ncontexts &&
134  intermed_feat_deriv->NumRows() == feat_deriv.NumRows()
135  && feat_deriv.NumCols() == dim);
136  // note: ncontexts == contexts_.size().
137  for (int32 i = 0; i < ncontexts; i++) {
138  // this_intermed_feat is the chunk of the derivative of
139  // "intermediate features" that corresponds to this "context"
140  // (this is output, in this routine).
141  SubMatrix<BaseFloat> this_intermed_feat_deriv(*intermed_feat_deriv, 0, T,
142  dim*i, dim);
143  for (int32 j = 0; j < static_cast<int32>(contexts_[i].size()); j++) {
144  int32 t_offset = contexts_[i][j].first;
145  BaseFloat weight = contexts_[i][j].second;
146  // Note: we could do this more efficiently using matrix operations,
147  // but this doesn't dominate the computation and I think this is
148  // clearer.
149  for (int32 t_out = 0; t_out < T; t_out++) { // t_out indexes the output
150  int32 t_in = t_out + t_offset; // t_in indexes the input.
151  if (t_in >= 0 && t_in < T) // Discard frames outside range.
152  this_intermed_feat_deriv.Row(t_in).AddVec(weight,
153  feat_deriv.Row(t_out));
154  // Note: the line above is where the work happens; it's the same
155  // as in ApplyContext except reversing the input and output.
156  }
157  }
158  }
159 }
160 
161 void Fmpe::ApplyC(MatrixBase<BaseFloat> *feat_out, bool reverse) const {
162  int32 T = feat_out->NumRows();
163  Vector<BaseFloat> tmp(feat_out->NumCols());
164  for (int32 t = 0; t < T; t++) {
165  SubVector<BaseFloat> row(*feat_out, t);
166  // Next line does: tmp = C_ * row
167  tmp.AddTpVec(1.0, C_, (reverse ? kTrans : kNoTrans), row, 0.0);
168  row.CopyFromVec(tmp);
169  }
170 }
171 
172 // Constructs the high-dim features and applies the main projection matrix
173 // projT_. This projects from dimension ngauss*(dim+1) to dim*ncontexts. Note:
174 // because the input vector of size ngauss*(dim+1) is sparse in a blocky way
175 // (i.e. each frame only has a couple of nonzero posteriors), we deal with
176 // sub-matrices of the projection matrix projT_. We actually further optimize
177 // the code by taking all frames in a file that had nonzero posteriors for a
178 // particular Gaussian, and forming a matrix out of the corresponding
179 // high-dimensional features; we can then use a matrix-matrix multiply rather
180 // than using vector-matrix operations.
181 
183  const std::vector<std::vector<int32> > &gselect,
184  MatrixBase<BaseFloat> *intermed_feat) const {
185  int32 dim = FeatDim(), ncontexts = NumContexts();
186 
187  Vector<BaseFloat> post; // will be posteriors of selected Gaussians.
188  Vector<BaseFloat> input_chunk(dim+1); // will be a segment of
189  // the high-dimensional features.
190 
191  // "all_posts" is a vector of ((gauss-index, time-index), gaussian
192  // posterior).
193  // We'll compute the posterior information, sort it, and then
194  // go through it in sorted order, which maintains memory locality
195  // when accessing the projection matrix.
196  // Note: if we really cared we could make this use level-3 BLAS
197  // (matrix-matrix multiply), but we'd need to have a temporary
198  // matrix for the output and input.
199  std::vector<std::pair<std::pair<int32, int32>, BaseFloat> > all_posts;
200 
201  for (int32 t = 0; t < feat_in.NumRows(); t++) {
202  SubVector<BaseFloat> this_feat(feat_in, t);
203  gmm_.LogLikelihoodsPreselect(this_feat, gselect[t], &post);
204  // At this point, post will contain log-likes of the selected
205  // Gaussians.
206  post.ApplySoftMax(); // Now they are posteriors (which sum to one).
207  for (int32 i = 0; i < post.Dim(); i++) {
208  int32 gauss = gselect[t][i];
209  all_posts.push_back(std::make_pair(std::make_pair(gauss, t), post(i)));
210  }
211  }
212  std::sort(all_posts.begin(), all_posts.end());
213 
214  bool optimize = true;
215 
216  if (!optimize) { // Why do we keep this un-optimized code around?
217  // For clarity, so you can see what's going on, and for easier
218  // comparision with ApplyProjectionReverse which is similar to this
219  // un-optimized segment. Both un-optimized and optimized versions
220  // should give identical transforms (up to tiny roundoff differences).
221  for (size_t i = 0; i < all_posts.size(); i++) {
222  int32 gauss = all_posts[i].first.first, t = all_posts[i].first.second;
223  SubVector<BaseFloat> this_feat(feat_in, t);
224  SubVector<BaseFloat> this_intermed_feat(*intermed_feat, t);
225  BaseFloat this_post = all_posts[i].second;
226  SubVector<BaseFloat> this_stddev(stddevs_, gauss);
227 
228  // The next line is equivalent to setting input_chunk to
229  // -this_post * the gaussian mean / (gaussian stddev). Note: we use
230  // the fact that mean * inv_var * stddev == mean / stddev.
231  input_chunk.Range(0, dim).AddVecVec(-this_post, gmm_.means_invvars().Row(gauss),
232  this_stddev, 0.0);
233  // The next line is equivalent to adding (feat / gaussian stddev) to
234  // input_chunk, so now it contains (feat - mean) / stddev, which is
235  // our "normalized" feature offset.
236  input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat, this_stddev,
237  1.0);
238  // The last element of this input_chunk is the posterior itself
239  // (between 0 and 1).
240  input_chunk(dim) = this_post * config_.post_scale;
241 
242  // this_intermed_feat += [appropriate chjunk of projT_] * input_chunk.
243  this_intermed_feat.AddMatVec(1.0, projT_.Range(gauss*(dim+1), dim+1,
244  0, dim*ncontexts),
245  kTrans, input_chunk, 1.0);
246  }
247  } else {
248  size_t i = 0;
249  // We process the "posts" vector in chunks, where each chunk corresponds to
250  // the same Gaussian index (but different times).
251  while (i < all_posts.size()) {
252  int32 gauss = all_posts[i].first.first;
253  SubVector<BaseFloat> this_stddev(stddevs_, gauss),
254  this_mean_invvar(gmm_.means_invvars(), gauss);
255  SubMatrix<BaseFloat> this_projT_chunk(projT_, gauss*(dim+1), dim+1,
256  0, dim*ncontexts);
257  int32 batch_size; // number of posteriors with same Gaussian..
258  for (batch_size = 0;
259  batch_size+i < static_cast<int32>(all_posts.size()) &&
260  all_posts[batch_size+i].first.first == gauss;
261  batch_size++); // empty loop body.
262  Matrix<BaseFloat> input_chunks(batch_size, dim+1);
263  Matrix<BaseFloat> intermed_temp(batch_size, dim*ncontexts);
264  for (int32 j = 0; j < batch_size; j++) { // set up "input_chunks".
265  // To understand this code, first examine code and comments in "non-optimized"
266  // code chunk above (the other branch of the if/else statement).
267  int32 t = all_posts[i+j].first.second;
268  SubVector<BaseFloat> this_feat(feat_in, t);
269  SubVector<BaseFloat> this_input_chunk(input_chunks, j);
270  BaseFloat this_post = all_posts[i+j].second;
271  this_input_chunk.Range(0, dim).AddVecVec(-this_post,
272  this_mean_invvar,
273  this_stddev, 0.0);
274  this_input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat,
275  this_stddev, 1.0);
276  this_input_chunk(dim) = this_post * config_.post_scale;
277  }
278  // The next line is where most of the computation will happen,
279  // during the feature computation phase. We have rearranged
280  // stuff so it's a matrix-matrix operation, for greater
281  // efficiency (when using optimized libraries like ATLAS).
282  intermed_temp.AddMatMat(1.0, input_chunks, kNoTrans,
283  this_projT_chunk, kNoTrans, 0.0);
284  for (int32 j = 0; j < batch_size; j++) { // add data from
285  // intermed_temp to the output "intermed_feat"
286  int32 t = all_posts[i+j].first.second;
287  SubVector<BaseFloat> this_intermed_feat(*intermed_feat, t);
288  SubVector<BaseFloat> this_intermed_temp(intermed_temp, j);
289  // this_intermed_feat += this_intermed_temp.
290  this_intermed_feat.AddVec(1.0, this_intermed_temp);
291  }
292  i += batch_size;
293  }
294  }
295 }
296 
297 
298 
299 // This function does the reverse to ApplyProjection, for the case
300 // where we want the derivatives w.r.t. the projection matrix.
301 // It stores the positive and negative parts of this separately.
303  const std::vector<std::vector<int32> > &gselect,
304  const MatrixBase<BaseFloat> &intermed_feat_deriv,
305  MatrixBase<BaseFloat> *proj_deriv_plus,
306  MatrixBase<BaseFloat> *proj_deriv_minus) const {
307  int32 dim = FeatDim(), ncontexts = NumContexts();
308 
309  Vector<BaseFloat> post; // will be posteriors of selected Gaussians.
310  Vector<BaseFloat> input_chunk(dim+1); // will be a segment of
311  // the high-dimensional features.
312 
313  // "all_posts" is a vector of ((gauss-index, time-index), gaussian
314  // posterior).
315  // We'll compute the posterior information, sort it, and then
316  // go through it in sorted order, which maintains memory locality
317  // when accessing the projection matrix.
318  std::vector<std::pair<std::pair<int32, int32>, BaseFloat> > all_posts;
319 
320  for (int32 t = 0; t < feat_in.NumRows(); t++) {
321  SubVector<BaseFloat> this_feat(feat_in, t);
322  gmm_.LogLikelihoodsPreselect(this_feat, gselect[t], &post);
323  // At this point, post will contain log-likes of the selected
324  // Gaussians.
325  post.ApplySoftMax(); // Now they are posteriors (which sum to one).
326  for (int32 i = 0; i < post.Dim(); i++) {
327  // The next few lines (where we set up "input_chunk") are identical
328  // to ApplyProjection.
329  int32 gauss = gselect[t][i];
330  all_posts.push_back(std::make_pair(std::make_pair(gauss, t), post(i)));
331  }
332  }
333  std::sort(all_posts.begin(), all_posts.end());
334  for (size_t i = 0; i < all_posts.size(); i++) {
335  int32 gauss = all_posts[i].first.first, t = all_posts[i].first.second;
336  BaseFloat this_post = all_posts[i].second;
337  SubVector<BaseFloat> this_feat(feat_in, t);
338  SubVector<BaseFloat> this_intermed_feat_deriv(intermed_feat_deriv, t);
339  SubVector<BaseFloat> this_stddev(stddevs_, gauss);
340  input_chunk.Range(0, dim).AddVecVec(-this_post, gmm_.means_invvars().Row(gauss),
341  this_stddev, 0.0);
342  input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat, this_stddev,
343  1.0);
344  input_chunk(dim) = this_post * config_.post_scale;
345 
346  // If not for accumulating the + and - parts separately, we would be
347  // doing something like:
348  // proj_deriv_.Range(0, dim*ncontexts, gauss*(dim+1), dim+1).AddVecVec(
349  // 1.0, this_intermed_feat_deriv, input_chunk);
350 
351 
352  SubMatrix<BaseFloat> plus_chunk(*proj_deriv_plus,
353  gauss*(dim+1), dim+1,
354  0, dim*ncontexts),
355  minus_chunk(*proj_deriv_minus,
356  gauss*(dim+1), dim+1,
357  0, dim*ncontexts);
358 
359  // This next function takes the rank-one matrix
360  // (input_chunk * this_intermed_deriv'), and adds the positive
361  // part to proj_deriv_plus, and minus the negative part to
362  // proj_deriv_minus.
363  AddOuterProductPlusMinus(static_cast<BaseFloat>(1.0),
364  input_chunk,
365  this_intermed_feat_deriv,
366  &plus_chunk, &minus_chunk);
367  }
368 }
369 
371  const std::vector<std::vector<int32> > &gselect,
372  Matrix<BaseFloat> *feat_out) const {
373  int32 dim = FeatDim();
374  KALDI_ASSERT(feat_in.NumRows() != 0 && feat_in.NumCols() == dim);
375  KALDI_ASSERT(feat_in.NumRows() == static_cast<int32>(gselect.size()));
376  feat_out->Resize(feat_in.NumRows(), feat_in.NumCols()); // will zero it.
377 
378  // Intermediate-dimension features
379  Matrix<BaseFloat> intermed_feat(feat_in.NumRows(),
380  dim * NumContexts());
381 
382  // Apply the main projection, from high-dim to intermediate
383  // dimension (dim * NumContexts()).
384  ApplyProjection(feat_in, gselect, &intermed_feat);
385 
386  // Apply the temporal context and reduces from
387  // dimension dim*ncontexts to dim.
388  ApplyContext(intermed_feat, feat_out);
389 
390  // Lastly, apply the the "C" matrix-- linear transform on the offsets.
391  ApplyC(feat_out);
392 }
393 
394 
396  const std::vector<std::vector<int32> > &gselect,
397  const MatrixBase<BaseFloat> &direct_feat_deriv,
398  const MatrixBase<BaseFloat> *indirect_feat_deriv, // may be NULL
399  FmpeStats *fmpe_stats) const {
400  SubMatrix<BaseFloat> stats_plus(fmpe_stats->DerivPlus());
401  SubMatrix<BaseFloat> stats_minus(fmpe_stats->DerivMinus());
402  int32 dim = FeatDim(), ncontexts = NumContexts();
403  KALDI_ASSERT(feat_in.NumRows() != 0 && feat_in.NumCols() == dim);
404  KALDI_ASSERT(feat_in.NumRows() == static_cast<int32>(gselect.size()));
405  KALDI_ASSERT(SameDim(stats_plus, projT_) && SameDim(stats_minus, projT_) &&
406  SameDim(feat_in, direct_feat_deriv));
407 
408  if (indirect_feat_deriv != NULL)
409  fmpe_stats->AccumulateChecks(feat_in, direct_feat_deriv, *indirect_feat_deriv);
410 
411  Matrix<BaseFloat> feat_deriv(direct_feat_deriv); // "feat_deriv" is initially direct+indirect.
412  if (indirect_feat_deriv != NULL)
413  feat_deriv.AddMat(1.0, *indirect_feat_deriv);
414 
415  // We do the "*Reverse" version of each stage now, in reverse order.
416  ApplyCReverse(&feat_deriv);
417 
418  Matrix<BaseFloat> intermed_feat_deriv(feat_in.NumRows(), dim*ncontexts);
419  ApplyContextReverse(feat_deriv, &intermed_feat_deriv);
420 
421  ApplyProjectionReverse(feat_in, gselect, intermed_feat_deriv,
422  &stats_plus, &stats_minus);
423 }
424 
425 
426 void FmpeOptions::Write(std::ostream &os, bool binary) const {
427  WriteToken(os, binary, context_expansion);
428  WriteBasicType(os, binary, post_scale);
429 }
430 void FmpeOptions::Read(std::istream &is, bool binary) {
431  ReadToken(is, binary, &context_expansion);
432  ReadBasicType(is, binary, &post_scale);
433 }
434 
435 Fmpe::Fmpe(const DiagGmm &gmm, const FmpeOptions &config): gmm_(gmm),
436  config_(config) {
438  ComputeC();
439  ComputeStddevs();
440  projT_.Resize(NumGauss() * (FeatDim()+1), FeatDim() * NumContexts());
441 }
442 
444  const FmpeStats &stats) {
445  SubMatrix<BaseFloat> proj_deriv_plus = stats.DerivPlus(),
446  proj_deriv_minus = stats.DerivMinus();
447  // tot_linear_objf_impr is the change in the actual
448  // objective function if it were linear, i.e.
449  // objf-gradient . parameter-change
450  // Note: none of this is normalized by the #frames (we don't have
451  // this info here), so that is done at the script level.
452  BaseFloat tot_linear_objf_impr = 0.0;
453  int32 changed = 0; // Keep track of how many elements change sign.
454  KALDI_ASSERT(SameDim(proj_deriv_plus, projT_) && SameDim(proj_deriv_minus, projT_));
455  KALDI_ASSERT(proj_deriv_plus.Min() >= 0);
456  KALDI_ASSERT(proj_deriv_minus.Min() >= 0);
457  BaseFloat learning_rate = config.learning_rate,
458  l2_weight = config.l2_weight;
459 
460  for (int32 i = 0; i < projT_.NumRows(); i++) {
461  for (int32 j = 0; j < projT_.NumCols(); j++) {
462  BaseFloat p = proj_deriv_plus(i, j), n = proj_deriv_minus(i, j),
463  x = projT_(i, j);
464  // Suppose the basic update (before regularization) is:
465  // z <-- x + learning_rate * (p - n) / (p + n),
466  // where z is the new parameter and x is the old one.
467  // Here, we view (learning_rate / (p + n)) as a parameter-specific
468  // learning rate. In fact we view this update as the maximization
469  // of an auxiliary function of the form:
470  // (z-x).(p-n) - 0.5 (z - x)^2 (p+n)/learning_rate
471  // and taking the derivative w.r.t z, we get:
472  // Q'(z) = (p-n) - (z - x) (p+n) / learning_rate
473  // which we set to zero and solve for z, to get z = x + learning_rate.(p-n)/(p+n)
474  // At this point we add regularization, a term of the form -l2_weight * z^2.
475  // Our new auxiliary function derivative is:
476  // Q(z) = -2.l2_weight.z + (p-n) - (z - x) (p+n) / learning_rate
477  // We can write this as:
478  // Q(z) = z . (-2.l2_weight - (p+n)/learning_rate)
479  // + (p-n) + x(p+n)/learning_rate
480  // solving for z, we get:
481  // z = ((p-n) + x (p+n)/learning_rate) / (2.l2_weight + (p+n)/learning_rate)
482 
483  BaseFloat z = ((p-n) + x*(p+n)/learning_rate) / (2*l2_weight + (p+n)/learning_rate);
484  // z is the new parameter value.
485 
486  tot_linear_objf_impr += (z-x) * (p-n); // objf impr based on linear assumption.
487  projT_(i, j) = z;
488  if (z*x < 0) changed++;
489  }
490  }
491  KALDI_LOG << "Objf impr (assuming linear) is " << tot_linear_objf_impr;
492  KALDI_LOG << ((100.0*changed)/(projT_.NumRows()*projT_.NumCols()))
493  << "% of matrix elements changed sign.";
494  return tot_linear_objf_impr;
495 }
496 
497 // Note: we write the GMM first, without any other header.
498 // This way, the gselect code can treat the form on disk as
499 // a normal GMM object.
500 void Fmpe::Write(std::ostream &os, bool binary) const {
501  if (gmm_.NumGauss() == 0)
502  KALDI_ERR << "Fmpe::Write, object not initialized.";
503  gmm_.Write(os, binary);
504  config_.Write(os, binary);
505  // stddevs_ are derived, don't write them.
506  projT_.Write(os, binary);
507  C_.Write(os, binary);
508  // contexts_ are derived from config, don't write them.
509 }
510 
511 
512 void Fmpe::Read(std::istream &is, bool binary) {
513  gmm_.Read(is, binary);
514  config_.Read(is, binary);
515  ComputeStddevs(); // computed from gmm.
516  projT_.Read(is, binary);
517  C_.Read(is, binary);
519 }
520 
521 
523  const TransitionModel &trans_model,
524  const Posterior &posterior,
525  const MatrixBase<BaseFloat> &features,
526  Matrix<BaseFloat> *direct_deriv,
527  const AccumAmDiagGmm *model_diff,
528  Matrix<BaseFloat> *indirect_deriv) {
529  KALDI_ASSERT((model_diff != NULL) == (indirect_deriv != NULL));
530  BaseFloat ans = 0.0;
531  KALDI_ASSERT(posterior.size() == static_cast<size_t>(features.NumRows()));
532  direct_deriv->Resize(features.NumRows(), features.NumCols());
533  if (indirect_deriv != NULL)
534  indirect_deriv->Resize(features.NumRows(), features.NumCols());
535  Vector<BaseFloat> temp_vec(features.NumCols());
536  Vector<double> temp_vec_dbl(features.NumCols());
537  for (size_t i = 0; i < posterior.size(); i++) {
538  for (size_t j = 0; j < posterior[i].size(); j++) {
539  int32 tid = posterior[i][j].first, // transition identifier.
540  pdf_id = trans_model.TransitionIdToPdf(tid);
541  BaseFloat weight = posterior[i][j].second;
542  const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
543  Vector<BaseFloat> gauss_posteriors;
544  SubVector<BaseFloat> this_feat(features, i);
545  SubVector<BaseFloat> this_direct_deriv(*direct_deriv, i);
546  ans += weight *
547  gmm.ComponentPosteriors(this_feat, &gauss_posteriors);
548 
549  gauss_posteriors.Scale(weight);
550  // The next line does: to i'th row of deriv, add
551  // means_invvars^T * gauss_posteriors,
552  // where each row of means_invvars is the mean times
553  // diagonal inverse covariance... after transposing,
554  // this becomes a weighted of these rows, weighted by
555  // the posteriors. This comes from the term
556  // feat^T * inv_var * mean
557  // in the objective function.
558  this_direct_deriv.AddMatVec(1.0, gmm.means_invvars(), kTrans,
559  gauss_posteriors, 1.0);
560 
561  // next line does temp_vec == inv_vars^T * gauss_posteriors,
562  // which sets temp_vec to a weighted sum of the inv_vars,
563  // weighed by Gaussian posterior.
564  temp_vec.AddMatVec(1.0, gmm.inv_vars(), kTrans,
565  gauss_posteriors, 0.0);
566  // Add to the derivative, -(this_feat .* temp_vec),
567  // which is the term that comes from the -0.5 * inv_var^T feat_sq,
568  // in the objective function (where inv_var is a vector, and feat_sq
569  // is a vector of squares of the feature values).
570  // Note: we have to do some messing about with double-precision here
571  // because the stats only come in double precision.
572  this_direct_deriv.AddVecVec(-1.0, this_feat, temp_vec, 1.0);
573  if (model_diff != NULL && weight > 0.0) { // We need to get the indirect diff.
574  // This "weight > 0.0" checks that this is the numerator stats, as the
575  // fMPE indirect diff applies only to the ML stats-- CAUTION, this
576  // code will only work as-is for fMMI (and the stats should not be
577  // canceled), due to the assumption that ML stats == num stats.
578  Vector<double> gauss_posteriors_dbl(gauss_posteriors);
579  const AccumDiagGmm &deriv_acc = model_diff->GetAcc(pdf_id);
580  // part of the derivative. Note: we could just store the direct and
581  // indirect derivatives together in one matrix, but it makes it easier
582  // to accumulate certain diagnostics if we store them separately.
583  SubVector<BaseFloat> this_indirect_deriv(*indirect_deriv, i);
584  // note: deriv_acc.mean_accumulator() contains the derivative of
585  // the objective function w.r.t. the "x stats" accumulated for
586  // this GMM. variance_accumulator() is the same for the "x^2 stats".
587  temp_vec_dbl.AddMatVec(1.0, deriv_acc.mean_accumulator(), kTrans,
588  gauss_posteriors_dbl, 0.0);
589  this_indirect_deriv.AddVec(1.0, temp_vec_dbl);
590  temp_vec_dbl.AddMatVec(1.0, deriv_acc.variance_accumulator(), kTrans,
591  gauss_posteriors_dbl, 0.0);
592  temp_vec.CopyFromVec(temp_vec_dbl); // convert to float.
593  // next line because d(x^2 stats for Gaussian)/d(feature) =
594  // 2 * (gaussian posterior) * feature.
595  this_indirect_deriv.AddVecVec(2.0, this_feat, temp_vec, 1.0);
596  }
597  }
598  }
599  return ans;
600 }
601 
602 
603 SubMatrix<BaseFloat> FmpeStats::DerivPlus() const { // const-ness not preserved.
604  KALDI_ASSERT(deriv.NumRows() != 0);
605  int32 proj_num_rows = deriv.NumRows(),
606  proj_num_cols = deriv.NumCols()/2;
607  return SubMatrix<BaseFloat>(deriv, 0, proj_num_rows,
608  0, proj_num_cols);
609 }
610 SubMatrix<BaseFloat> FmpeStats::DerivMinus() const { // const-ness not preserved.
611  KALDI_ASSERT(deriv.NumRows() != 0);
612  int32 proj_num_rows = deriv.NumRows(),
613  proj_num_cols = deriv.NumCols()/2;
614  return SubMatrix<BaseFloat>(deriv, 0, proj_num_rows,
615  proj_num_cols, proj_num_cols);
616 }
617 
618 void FmpeStats::Init(const Fmpe &fmpe) {
619  int32 num_rows = fmpe.ProjectionTNumRows(),
620  num_cols = fmpe.ProjectionTNumCols();
621  deriv.Resize(num_rows, num_cols*2);
622 
623  int32 feat_dim = fmpe.FeatDim();
624  checks.Resize(8, feat_dim);
625 }
626 
628  const MatrixBase<BaseFloat> &direct_deriv,
629  const MatrixBase<BaseFloat> &indirect_deriv) {
630  int32 T = feats.NumRows(), dim = feats.NumCols();
631  KALDI_ASSERT(direct_deriv.NumRows() == T && direct_deriv.NumCols() == dim &&
632  indirect_deriv.NumRows() == T && indirect_deriv.NumCols() == dim);
633  KALDI_ASSERT(checks.NumRows() == 8 && checks.NumCols() == dim);
634  for (int32 t = 0; t < T; t++) {
635  for (int32 d = 0; d < dim; d++) {
636  BaseFloat zero = 0.0;
637  checks(0, d) += std::max(zero, direct_deriv(t, d));
638  checks(1, d) += std::max(zero, -direct_deriv(t, d));
639  checks(2, d) += std::max(zero, indirect_deriv(t, d));
640  checks(3, d) += std::max(zero, -indirect_deriv(t, d));
641  checks(4, d) += std::max(zero, feats(t, d)*direct_deriv(t, d));
642  checks(5, d) += std::max(zero, -feats(t, d)*direct_deriv(t, d));
643  checks(6, d) += std::max(zero, feats(t, d)*indirect_deriv(t, d));
644  checks(7, d) += std::max(zero, -feats(t, d)*indirect_deriv(t, d));
645  }
646  }
647 }
648 
650  if (checks.IsZero()) {
651  KALDI_LOG << "No checks will be done, probably indirect derivative was not used.";
652  return;
653  }
654  int32 dim = checks.NumCols();
655  Vector<double> shift_check(dim), shift_check2(dim), scale_check(dim), scale_check2(dim);
656  for (int32 d = 0; d < dim; d++) {
657  // shiftnumerator = direct+indirect deriv-- should be zero.
658  double shift_num = checks(0, d) - checks(1, d) + checks(2, d) - checks(3, d),
659  shift_den = checks(0, d) + checks(1, d) + checks(2, d) + checks(3, d),
660  shift_den2 = fabs(checks(0, d) - checks(1, d)) + fabs(checks(2, d) - checks(3, d));
661  shift_check(d) = shift_num / shift_den;
662  shift_check2(d) = shift_num / shift_den2;
663  double scale_num = checks(4, d) - checks(5, d) + checks(6, d) - checks(7, d),
664  scale_den = checks(4, d) + checks(5, d) + checks(6, d) + checks(7, d),
665  scale_den2 = fabs(checks(4, d) - checks(5, d)) + fabs(checks(6, d) - checks(7, d));
666  scale_check(d) = scale_num / scale_den;
667  scale_check2(d) = scale_num / scale_den2;
668  }
669 
670  KALDI_LOG << "Shift-check is as follows (should be in range +- 0.01 or less)."
671  << shift_check;
672  KALDI_LOG << "Scale-check is as follows (should be in range +- 0.01 or less)."
673  << scale_check;
674  KALDI_LOG << "Shift-check(2) is as follows: most elements should be in range +-0.1: "
675  << shift_check2;
676  KALDI_LOG << "Scale-check(2) is as follows: most elements should be in range +-0.1: "
677  << scale_check2;
678 }
679 
680 void FmpeStats::Write(std::ostream &os, bool binary) const {
681  deriv.Write(os, binary);
682  checks.Write(os, binary);
683 }
684 
685 void FmpeStats::Read(std::istream &is, bool binary, bool add) {
686  deriv.Read(is, binary, add);
687  checks.Read(is, binary, add);
688 }
689 
690 
691 } // End of namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: diag-gmm.h:74
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
Real Min() const
Returns minimum element of matrix.
void Write(std::ostream &out, bool binary) const
write to stream.
void Scale(Real c)
void Write(std::ostream &os, bool binary) const
Definition: diag-gmm.cc:705
void LogLikelihoodsPreselect(const VectorBase< BaseFloat > &data, const std::vector< int32 > &indices, Vector< BaseFloat > *loglikes) const
Outputs the per-component log-likelihoods of a subset of mixture components.
Definition: diag-gmm.cc:566
void ComputeFeatures(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, Matrix< BaseFloat > *feat_out) const
Definition: fmpe.cc:370
Fmpe()
Definition: fmpe.h:140
BaseFloat learning_rate
Definition: fmpe.h:89
Definition for Gaussian Mixture Model with diagonal covariances in normal mode: where the parameters ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179
BaseFloat Update(const FmpeUpdateOptions &config, const FmpeStats &stats)
Definition: fmpe.cc:443
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void ApplyC(MatrixBase< BaseFloat > *feat_out, bool reverse=false) const
Definition: fmpe.cc:161
void ApplyContext(const MatrixBase< BaseFloat > &intermed_feat, MatrixBase< BaseFloat > *feat_out) const
Definition: fmpe.cc:97
int32 NumGauss() const
Definition: fmpe.h:144
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
void ApplyProjection(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, MatrixBase< BaseFloat > *intermed_feat) const
Definition: fmpe.cc:182
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
BaseFloat ComputeAmGmmFeatureDeriv(const AmDiagGmm &am_gmm, const TransitionModel &trans_model, const Posterior &posterior, const MatrixBase< BaseFloat > &features, Matrix< BaseFloat > *direct_deriv, const AccumAmDiagGmm *model_diff, Matrix< BaseFloat > *indirect_deriv)
Computes derivatives of the likelihood of these states (weighted), w.r.t.
Definition: fmpe.cc:522
Matrix< BaseFloat > projT_
Definition: fmpe.h:228
void ApplyContextReverse(const MatrixBase< BaseFloat > &feat_deriv, MatrixBase< BaseFloat > *intermed_feat_deriv) const
Definition: fmpe.cc:126
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Real ApplySoftMax()
Apply soft-max to vector and return normalizer (log sum of exponentials).
int32 TransitionIdToPdf(int32 trans_id) const
std::string context_expansion
Definition: fmpe.h:47
std::vector< std::vector< std::pair< int32, BaseFloat > > > contexts_
Definition: fmpe.h:242
void ComputeStddevs()
Definition: fmpe.cc:89
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void ApplyProjectionReverse(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, const MatrixBase< BaseFloat > &intermed_feat_deriv, MatrixBase< BaseFloat > *proj_deriv_plus, MatrixBase< BaseFloat > *proj_deriv_minus) const
Definition: fmpe.cc:302
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
void Init(const Fmpe &fmpe)
Definition: fmpe.cc:618
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void Read(std::istream &is, bool binary)
Definition: fmpe.cc:430
void Read(std::istream &in, bool binary, bool add=false)
read from stream.
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: diag-gmm.cc:601
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
void SetContexts(std::string context_str)
Definition: fmpe.cc:29
int32 ProjectionTNumCols() const
Definition: fmpe.h:152
int32 FeatDim() const
Definition: fmpe.h:143
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
const MatrixBase< double > & variance_accumulator() const
Definition: mle-diag-gmm.h:185
const MatrixBase< double > & mean_accumulator() const
Definition: mle-diag-gmm.h:184
BaseFloat l2_weight
Definition: fmpe.h:91
void DoChecks()
Definition: fmpe.cc:649
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
struct rnnlm::@11::@12 n
SubMatrix< BaseFloat > DerivMinus() const
Definition: fmpe.cc:610
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
void Read(std::istream &is, bool binary)
Definition: fmpe.cc:512
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
Definition: text-utils.cc:238
void AddOuterProductPlusMinus(Real alpha, const VectorBase< Real > &a, const VectorBase< Real > &b, MatrixBase< Real > *plus, MatrixBase< Real > *minus)
Matrix< double > vars_
diagonal variance
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
TpMatrix< BaseFloat > C_
Definition: fmpe.h:232
FmpeOptions config_
Definition: fmpe.h:223
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
void ComputeC()
Definition: fmpe.cc:57
int32 NumContexts() const
Definition: fmpe.h:145
Matrix< double > means_
Means.
DiagGmm & GetPdf(int32 pdf_index)
Accessors.
Definition: am-diag-gmm.h:119
void Read(std::istream &in, bool binary)
Definition: diag-gmm.cc:728
void Write(std::ostream &os, bool binary) const
Definition: fmpe.cc:680
const AccumDiagGmm & GetAcc(int32 index) const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AccumulateChecks(const MatrixBase< BaseFloat > &feats, const MatrixBase< BaseFloat > &direct_deriv, const MatrixBase< BaseFloat > &indirect_deriv)
If we&#39;re using the indirect differential, accumulates certain quantities that will be used in the upd...
Definition: fmpe.cc:627
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Matrix< BaseFloat > stddevs_
Definition: fmpe.h:224
Vector< double > weights_
weights (not log).
int32 ProjectionTNumRows() const
Definition: fmpe.h:151
void ApplyPow(Real power)
Definition: kaldi-matrix.h:341
BaseFloat post_scale
Definition: fmpe.h:67
#define KALDI_LOG
Definition: kaldi-error.h:153
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void Read(std::istream &is, bool binary, bool add=false)
Definition: fmpe.cc:685
void ApplyCReverse(MatrixBase< BaseFloat > *deriv) const
Definition: fmpe.h:218
Sub-matrix representation.
Definition: kaldi-matrix.h:988
void AddDiagVec(const Real alpha, const VectorBase< OtherReal > &v)
diagonal update, this <– this + diag(v)
Definition: sp-matrix.cc:183
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void AccStats(const MatrixBase< BaseFloat > &feat_in, const std::vector< std::vector< int32 > > &gselect, const MatrixBase< BaseFloat > &direct_feat_deriv, const MatrixBase< BaseFloat > *indirect_feat_deriv, FmpeStats *stats) const
Definition: fmpe.cc:395
SubMatrix< BaseFloat > DerivPlus() const
Definition: fmpe.cc:603
DiagGmm gmm_
Definition: fmpe.h:222
void Write(std::ostream &os, bool binary) const
Definition: fmpe.cc:426
const Matrix< BaseFloat > & inv_vars() const
Definition: diag-gmm.h:180
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94
void Write(std::ostream &os, bool binary) const
Definition: fmpe.cc:500