fmllr-raw.cc
Go to the documentation of this file.
1 // transform/fmllr-raw.cc
2 
3 // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <utility>
21 #include <vector>
22 using std::vector;
23 
24 #include "transform/fmllr-raw.h"
26 
27 namespace kaldi {
28 
30  int32 model_dim,
31  const Matrix<BaseFloat> &full_transform):
32  raw_dim_(raw_dim),
33  model_dim_(model_dim) {
34  if (full_transform.NumCols() != full_transform.NumRows() &&
35  full_transform.NumCols() != full_transform.NumRows() + 1) {
36  KALDI_ERR << "Expecting full LDA+MLLT transform to be square or d by d+1 "
37  << "(make sure you are including rejected rows).";
38  }
39  if (raw_dim <= 0 || full_transform.NumRows() % raw_dim != 0)
40  KALDI_ERR << "Raw feature dimension is invalid " << raw_dim
41  << "(must be positive and divide feature dimension)";
42  int32 full_dim = full_transform.NumRows();
43  full_transform_ = full_transform.Range(0, full_dim, 0, full_dim);
44  transform_offset_.Resize(full_dim);
45  if (full_transform_.NumCols() == full_dim + 1)
46  transform_offset_.CopyColFromMat(full_transform_, full_dim);
47 
48  int32 full_dim2 = ((full_dim+1)*(full_dim+2))/2;
49  count_ = 0.0;
50 
51  temp_.Resize(full_dim + 1);
52  Q_.Resize(model_dim + 1, full_dim + 1);
53  S_.Resize(model_dim + 1, full_dim2);
54 
55  single_frame_stats_.s.Resize(full_dim + 1);
56  single_frame_stats_.transformed_data.Resize(full_dim);
58  single_frame_stats_.a.Resize(model_dim);
59  single_frame_stats_.b.Resize(model_dim);
60 }
61 
62 
64  KALDI_ASSERT(data.Dim() == FullDim());
65  return !data.ApproxEqual(single_frame_stats_.s.Range(0, FullDim()), 0.0);
66 }
67 
69  // Commit the stats for this from (in SingleFrameStats).
70  int32 model_dim = ModelDim(), full_dim = FullDim();
72  if (stats.count == 0.0) return;
73 
74  count_ += stats.count;
75 
76  // a_ext and b_ext are a and b extended with the count,
77  // which we'll later use to reconstruct the full stats for
78  // the rejected dimensions.
79  Vector<double> a_ext(model_dim + 1), b_ext(model_dim + 1);
80  a_ext.Range(0, model_dim).CopyFromVec(stats.a);
81  b_ext.Range(0, model_dim).CopyFromVec(stats.b);
82  a_ext(model_dim) = stats.count;
83  b_ext(model_dim) = stats.count;
84  Q_.AddVecVec(1.0, a_ext, Vector<double>(stats.s));
85 
86  temp_.SetZero();
87  temp_.AddVec2(1.0, stats.s);
88  int32 full_dim2 = ((full_dim + 1) * (full_dim + 2)) / 2;
89  SubVector<double> temp_vec(temp_.Data(), full_dim2);
90  S_.AddVecVec(1.0, b_ext, temp_vec);
91 }
92 
95  int32 full_dim = FullDim();
96  KALDI_ASSERT(data.Dim() == full_dim);
97  stats.s.Range(0, full_dim).CopyFromVec(data);
98  stats.s(full_dim) = 1.0;
99  stats.transformed_data.AddMatVec(1.0, full_transform_, kNoTrans, data, 0.0);
100  stats.transformed_data.AddVec(1.0, transform_offset_);
101  stats.count = 0.0;
102  stats.a.SetZero();
103  stats.b.SetZero();
104 }
105 
106 
108  const VectorBase<BaseFloat> &data,
109  BaseFloat weight) {
110  int32 model_dim = ModelDim(), full_dim = FullDim();
111  KALDI_ASSERT(data.Dim() == full_dim &&
112  "Expect raw, spliced data, which should have same dimension as "
113  "full transform.");
114  if (DataHasChanged(data)) {
115  // this is part of our mechanism to accumulate certain sub-parts of
116  // the computation for each frame, to avoid excessive compute.
118  InitSingleFrameStats(data);
119  }
121 
122  SubVector<BaseFloat> projected_data(stats.transformed_data, 0, model_dim);
123 
124  int32 num_gauss = gmm.NumGauss();
125  Vector<BaseFloat> posterior(num_gauss);
126  BaseFloat log_like = gmm.ComponentPosteriors(projected_data, &posterior);
127  posterior.Scale(weight);
128  // Note: AccumulateFromPosteriors takes the original, spliced data,
129  // and returns the log-like of the rejected dimensions.
130  AccumulateFromPosteriors(gmm, data, posterior);
131 
132  // Add the likelihood of the rejected dimensions to the objective function
133  // (assume zero-mean, unit-variance Gaussian; the LDA should have any offset
134  // required to ensure this).
135  if (full_dim > model_dim) {
136  SubVector<BaseFloat> rejected_data(stats.transformed_data,
137  model_dim, full_dim - model_dim);
138  log_like += -0.5 * (VecVec(rejected_data, rejected_data)
139  + (full_dim - model_dim) * M_LOG_2PI);
140  }
141  return log_like;
142 }
143 
144 /*
145  // Extended comment here.
146  //
147  // Let x_t(i) be the fully processed feature, dimension i (with fMLLR transform
148  // and LDA transform), but *without* any offset term from the LDA, which
149  // it's more convenient to view as an offset in the model.
150  //
151  //
152  // For a given dimension i (either accepted or rejected), the auxf can
153  // be expressed as a quadratic function of x_t(i). We ultimately will want to
154  // express x_t(i) as a linear function of the parameters of the linearized
155  // fMLLR transform matrix. Some notation:
156  // Let l be the linearized transform matrix, i.e. the concatenation of the
157  // m rows, each of length m+1, of the fMLLR transform.
158  // Let n be the number of frames we splice together each time.
159  // Let s_t be the spliced-together features on time t, with a one appended;
160  // it will have n blocks each of size m, followed by a 1. (dim is n*m + 1).
161  //
162  // x(i) [note, this is the feature without any LDA offset], is bilinear in the
163  // transform matrix and the features, so:
164  //
165  // x(i) = l^T M_i s_t, where s_t is the spliced features on time t,
166  // with a 1 appended
167  // [we need to compute M_i but we know the function is bilinear so it exists].
168  //
169  // The auxf can be written as:
170  // F = sum_i sum_t a_{ti} x(i) - 0.5 b_{ti} x(i)^2
171  // = sum_i sum_t a_{ti} x(i) - 0.5 b_{ti} x(i)^2
172  // = sum_i sum_t a_{ti} (l^T M_i s_t) - 0.5 b_{ti} (l^T M_i s_t )^2
173  // = sum_i l^T M_i q_i + l^T M_i S_i M_i^T l
174  // where
175  // q_i = sum_t a_{ti} s_t, and
176  // S_i = sum_t b_{ti} s_t s_t^T
177  // [Note that we only need store S_i for the model-dim plus one, because
178  // all the rejected dimensions have the same value]
179  //
180  // We define a matrix Q whose rows are q_d, with
181  // Q = \sum_t d_t s_t^T
182  // [The Q we actually store as stats will use a modified form of d that
183  // has a 1 for all dimensions past the model dim, to avoid redundancy;
184  // we'll reconstruct the true Q from this later on.]
185  //
186  //
187  // What is M_i? Working it out is a little tedious.
188  // Note: each M_i (from i = 0 ... full_dim) is of
189  // dimension (raw_dim*(raw_dim+1)) by full_dim + 1
190  //
191  // We want to express x(i) [we forget the subscript "t" sometimes],
192  // as a bilinear function of l and s_t.
193  // We have x(i) = l^T M_i s.
194  //
195  // The (j,k)'th component of M_i is the term in x(i) that corresponds to the j'th
196  // component of l and the k'th of s.
197 
198  // Before defining M_i, let us define N_i, where l^t N_i s will equal the spliced and
199  // transformed pre-LDA features of dimension i. the N's have the same dimensions as the
200  // M's.
201  //
202  // We'll first define the j,k'th component of N_i, as this is easier; we'll then define the M_i
203  // as combinations of N_i.
204  //
205  // For a given i, j and k, the value of n_{i,j,k} will be as follows:
206  // We first decompose index j into j1, j2 (both functions of
207  // the original index j), where
208  // j1 corresponds to the row-index of the fMLLR transform, j2 to the col-index.
209  // We next decompose i into i1, i2, where i1 corresponds to the splicing number
210  // (0...n-1), and i2 corresponds to the cepstral index.
211  //
212  // If (j1 != i2) then n_{ijk} == 0.
213  //
214  // Elsif k corresponds to the last element [i.e. k == m * n], then this m_{ijk} corresponds
215  // to the effect of the j'th component of l for zero input, so:
216  // If j2 == m (i.e. this the offset term in the fMLLR matrix), then
217  // n_{ijk} = 1.0,
218  // Else
219  // n_{ijk} = 0.0
220  // Fi
221  //
222  // Else:
223  // Decompose k into k1, k2, where k1 = 0.. n-1 is the splicing index, and k2 = 0...m-1 is
224  // the cepstral index.
225  // If k1 != i1 then
226  // n_{ijk} = 0.0
227  // elsif k2 != j2 then
228  // n_{ijk} = 0.0
229  // else
230  // n_{ijk} = 1.0
231  // fi
232  // Endif
233  // Now, M_i will be defined as sum_i T_{ij} N_j, where T_{ij} are the elements of the
234  // LDA+MLLT transform (but excluding any linear offset, which gets accounted for by
235  // c_i, above).
236  //
237  // Now suppose we want to express the auxiliary function in a simpler form
238  // as l^T v - 0.5 l^T W l, where v and W are the "simple" linear and quadratic stats,
239  // we can do so with:
240  // v = \sum_i M_i q_i
241  // and
242  // W = \sum_i M_i S_i M_i^T
243  //
244  */
245 
247  const DiagGmm &diag_gmm,
248  const VectorBase<BaseFloat> &data,
249  const VectorBase<BaseFloat> &posterior) {
250  // The user may call this function directly, even though we also
251  // call it from AccumulateForGmm(), so check again:
252  if (DataHasChanged(data)) {
254  InitSingleFrameStats(data);
255  }
256 
257  int32 model_dim = ModelDim();
258 
260 
261  // The quantities a and b describe the diagonal auxiliary function
262  // for each of the retained dimensions in the transformed space--
263  // in the format F = \sum_d alpha(d) x(d) -0.5 beta(d) x(d)^2,
264  // where x(d) is the d'th dimensional fully processed feature.
265  // For d, see the comment-- it's alpha processed to take into
266  // account any offset in the LDA. Note that it's a reference.
267  //
268  Vector<double> a(model_dim), b(model_dim);
269 
270  int32 num_comp = diag_gmm.NumGauss();
271 
272  double count = 0.0; // data-count contribution from this frame.
273 
274  // Note: we could do this using matrix-matrix operations instead of
275  // row by row. In the end it won't really matter as this is not
276  // the slowest part of the computation.
277  for (size_t m = 0; m < num_comp; m++) {
278  BaseFloat this_post = posterior(m);
279  if (this_post != 0.0) {
280  count += this_post;
281  a.AddVec(this_post, diag_gmm.means_invvars().Row(m));
282  b.AddVec(this_post, diag_gmm.inv_vars().Row(m));
283  }
284  }
285  // Correct "a" for any offset term in the LDA transform-- we view it as
286  // the opposite offset in the model [note: we'll handle the rejected dimensions
287  // in update time.] Here, multiplying the element of "b" (which is the
288  // weighted inv-vars) by transform_offset_, and subtracting the result from
289  // a, is like subtracting the transform-offset from the original means
290  // (because a contains the means times inv-vars_.
291  Vector<double> offset(transform_offset_.Range(0, model_dim));
292  a.AddVecVec(-1.0, b, offset, 1.0);
293  stats.a.AddVec(1.0, a);
294  stats.b.AddVec(1.0, b);
295  stats.count += count;
296 }
297 
298 
300  MatrixBase<BaseFloat> *raw_fmllr_mat,
301  BaseFloat *objf_impr,
302  BaseFloat *count) {
303  // First commit any pending stats from the last frame.
304  if (single_frame_stats_.count != 0.0)
306 
307  if (this->count_ < opts.min_count) {
308  KALDI_WARN << "Not updating (raw) fMLLR since count " << this->count_
309  << " is less than min count " << opts.min_count;
310  *objf_impr = 0.0;
311  *count = this->count_;
312  return;
313  }
314  KALDI_ASSERT(raw_fmllr_mat->NumRows() == RawDim() &&
315  raw_fmllr_mat->NumCols() == RawDim() + 1 &&
316  !raw_fmllr_mat->IsZero());
317  Matrix<double> fmllr_mat(*raw_fmllr_mat); // temporary, double-precision version
318  // of matrix.
319 
320 
321  Matrix<double> linear_stats; // like K in diagonal update.
322  std::vector<SpMatrix<double> > diag_stats; // like G in diagonal update.
323  // Note: we will invert these.
324  std::vector<std::vector<Matrix<double> > > off_diag_stats; // these will
325  // contribute to the linear term.
326 
327  Vector<double> simple_linear_stats;
328  SpMatrix<double> simple_quadratic_stats;
329  ConvertToSimpleStats(&simple_linear_stats, &simple_quadratic_stats);
330 
331  ConvertToPerRowStats(simple_linear_stats, simple_quadratic_stats,
332  &linear_stats, &diag_stats, &off_diag_stats);
333 
334  try {
335  for (size_t i = 0; i < diag_stats.size(); i++) {
336  diag_stats[i].Invert();
337  }
338  } catch (...) {
339  KALDI_WARN << "Error inverting stats matrices for fMLLR "
340  << "[min-count too small? Bad data?], not updating.";
341  return;
342  }
343 
344  int32 raw_dim = RawDim(), splice_width = SpliceWidth();
345 
346  double effective_beta = count_ * splice_width; // We "count" the determinant
347  // splice_width times in the objective function.
348 
349  double auxf_orig = GetAuxf(simple_linear_stats, simple_quadratic_stats,
350  fmllr_mat);
351  for (int32 iter = 0; iter < opts.num_iters; iter++) {
352  for (int32 row = 0; row < raw_dim; row++) {
353  SubVector<double> this_row(fmllr_mat, row);
354  Vector<double> this_linear(raw_dim + 1); // Here, k_i is the linear term
355  // in the auxf expressed as a function of this row.
356  this_linear.CopyFromVec(linear_stats.Row(row));
357  for (int32 row2 = 0; row2 < raw_dim; row2++) {
358  if (row2 != row) {
359  if (row2 < row) {
360  this_linear.AddMatVec(-1.0, off_diag_stats[row][row2], kNoTrans,
361  fmllr_mat.Row(row2), 1.0);
362  } else {
363  // We won't have the element [row][row2] stored, but use symmetry.
364  this_linear.AddMatVec(-1.0, off_diag_stats[row2][row], kTrans,
365  fmllr_mat.Row(row2), 1.0);
366  }
367  }
368  }
369  FmllrInnerUpdate(diag_stats[row],
370  this_linear,
371  effective_beta,
372  row,
373  &fmllr_mat);
374  }
375  if (GetVerboseLevel() >= 2) {
376  double cur_auxf = GetAuxf(simple_linear_stats, simple_quadratic_stats,
377  fmllr_mat),
378  auxf_change = cur_auxf - auxf_orig;
379  KALDI_VLOG(2) << "Updating raw fMLLR: objf improvement per frame was "
380  << (auxf_change / this->count_) << " over "
381  << this->count_ << " frames, by the " << iter
382  << "'th iteration";
383  }
384  }
385  double auxf_final = GetAuxf(simple_linear_stats, simple_quadratic_stats,
386  fmllr_mat),
387  auxf_change = auxf_final - auxf_orig;
388  *count = this->count_;
389  KALDI_VLOG(1) << "Updating raw fMLLR: objf improvement per frame was "
390  << (auxf_change / this->count_) << " over "
391  << this->count_ << " frames.";
392  if (auxf_final > auxf_orig) {
393  *objf_impr = auxf_change;
394  *count = this->count_;
395  raw_fmllr_mat->CopyFromMat(fmllr_mat);
396  } else {
397  *objf_impr = 0.0;
398  // don't update "raw_fmllr_mat"
399  }
400 }
401 
403  count_ = 0.0;
405  single_frame_stats_.s.SetZero();
406  Q_.SetZero();
407  S_.SetZero();
408 }
409 
410 // Compute the M_i quantities, needed in the update. This function could be
411 // greatly speeded up but I don't think it's the limiting factor.
412 void FmllrRawAccs::ComputeM(std::vector<Matrix<double> > *M) const {
413  int32 full_dim = FullDim(), raw_dim = RawDim(),
414  raw_dim2 = raw_dim * (raw_dim + 1);
415  M->resize(full_dim);
416  for (int32 i = 0; i < full_dim; i++)
417  (*M)[i].Resize(raw_dim2, full_dim + 1);
418 
419  // the N's are simpler matrices from which we'll interpolate the M's.
420  // In this loop we imagine w are computing the vector of N's, but
421  // when we get each element, if it's nonzero we propagate it straight
422  // to the M's.
423  for (int32 i = 0; i < full_dim; i++) {
424  // i is index after fMLLR transform; i1 is splicing index,
425  // i2 is cepstral index.
426  int32 i1 = i / raw_dim, i2 = i % raw_dim;
427  for (int32 j = 0; j < raw_dim2; j++) {
428  // j1 is row-index of fMLLR transform, j2 is column-index
429  int32 j1 = j / (raw_dim + 1), j2 = j % (raw_dim + 1);
430  for (int32 k = 0; k < full_dim + 1; k++) {
431  BaseFloat n_ijk;
432  if (j1 != i2) {
433  n_ijk = 0.0;
434  } else if (k == full_dim) {
435  if (j2 == raw_dim) // offset term in fMLLR matrix.
436  n_ijk = 1.0;
437  else
438  n_ijk = 0.0;
439  } else {
440  // k1 is splicing index, k2 is cepstral idnex.
441  int32 k1 = k / raw_dim, k2 = k % raw_dim;
442  if (k1 != i1 || k2 != j2)
443  n_ijk = 0.0;
444  else
445  n_ijk = 1.0;
446  }
447  if (n_ijk != 0.0)
448  for (int32 l = 0; l < full_dim; l++)
449  (*M)[l](j, k) += n_ijk * full_transform_(l, i);
450  }
451  }
452  }
453 }
454 
456  Vector<double> *simple_linear_stats,
457  SpMatrix<double> *simple_quadratic_stats) const {
458  std::vector<Matrix<double> > M;
459  ComputeM(&M);
460 
461  int32 full_dim = FullDim(), raw_dim = RawDim(), model_dim = ModelDim(),
462  raw_dim2 = raw_dim * (raw_dim + 1),
463  full_dim2 = ((full_dim+1)*(full_dim+2))/2;
464  simple_linear_stats->Resize(raw_dim2);
465  simple_quadratic_stats->Resize(raw_dim2);
466  for (int32 i = 0; i < full_dim; i++) {
467  Vector<double> q_i(full_dim + 1);
468  SpMatrix<double> S_i(full_dim + 1);
469  SubVector<double> S_i_vec(S_i.Data(), full_dim2);
470  if (i < model_dim) {
471  q_i.CopyFromVec(Q_.Row(i));
472  S_i_vec.CopyFromVec(S_.Row(i));
473  } else {
474  q_i.CopyFromVec(Q_.Row(model_dim)); // The last row contains stats proportional
475  // to "count", which we need to modify to be correct.
476  q_i.Scale(-transform_offset_(i)); // These stats are zero (corresponding to
477  // a zero-mean model) if there is no offset in the LDA transform. Note:
478  // the two statements above are the equivalent, for the rejected dims,
479  // of the statement "a.AddVecVec(-1.0, b, offset);" for the kept ones.
480  //
481  S_i_vec.CopyFromVec(S_.Row(model_dim)); // these are correct, and
482  // all the same (corresponds to unit variance).
483  }
484  // The equation v = \sum_i M_i q_i:
485  simple_linear_stats->AddMatVec(1.0, M[i], kNoTrans, q_i, 1.0);
486  // The equation W = \sum_i M_i S_i M_i^T
487  // Here, M[i] is quite sparse, so AddSmat2Sp will be faster.
488  simple_quadratic_stats->AddSmat2Sp(1.0, M[i], kNoTrans, S_i, 1.0);
489  }
490 }
491 
492 // See header for comment.
494  const Vector<double> &simple_linear_stats,
495  const SpMatrix<double> &simple_quadratic_stats_sp,
496  Matrix<double> *linear_stats,
497  std::vector<SpMatrix<double> > *diag_stats,
498  std::vector<std::vector<Matrix<double> > > *off_diag_stats) const {
499 
500  // get it as a Matrix, which makes it easier to extract sub-parts.
501  Matrix<double> simple_quadratic_stats(simple_quadratic_stats_sp);
502 
503  linear_stats->Resize(RawDim(), RawDim() + 1);
504  linear_stats->CopyRowsFromVec(simple_linear_stats);
505  diag_stats->resize(RawDim());
506  off_diag_stats->resize(RawDim());
507 
508  // Set *diag_stats
509  int32 rd1 = RawDim() + 1;
510  for (int32 i = 0; i < RawDim(); i++) {
511  SubMatrix<double> this_diag(simple_quadratic_stats,
512  i * rd1, rd1,
513  i * rd1, rd1);
514  (*diag_stats)[i].Resize(RawDim() + 1);
515  (*diag_stats)[i].CopyFromMat(this_diag, kTakeMean);
516  }
517 
518  for (int32 i = 0; i < RawDim(); i++) {
519  (*off_diag_stats)[i].resize(i);
520  for (int32 j = 0; j < i; j++) {
521  SubMatrix<double> this_off_diag(simple_quadratic_stats,
522  i * rd1, rd1,
523  j * rd1, rd1);
524  (*off_diag_stats)[i][j] = this_off_diag;
525  }
526  }
527 }
528 
529 double FmllrRawAccs::GetAuxf(const Vector<double> &simple_linear_stats,
530  const SpMatrix<double> &simple_quadratic_stats,
531  const Matrix<double> &fmllr_mat) const {
532  // linearize transform...
533  int32 raw_dim = RawDim(), spice_width = SpliceWidth();
534  Vector<double> fmllr_vec(raw_dim * (raw_dim + 1));
535  fmllr_vec.CopyRowsFromMat(fmllr_mat);
536  SubMatrix<double> square_part(fmllr_mat, 0, raw_dim,
537  0, raw_dim);
538  double logdet = square_part.LogDet();
539  return VecVec(fmllr_vec, simple_linear_stats) -
540  0.5 * VecSpVec(fmllr_vec, simple_quadratic_stats, fmllr_vec) +
541  logdet * spice_width * count_;
542 }
543 
544 
545 
546 } // namespace kaldi
bool ApproxEqual(const VectorBase< Real > &other, float tol=0.01) const
Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
Matrix< double > S_
Definition: fmllr-raw.h:196
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Vector< BaseFloat > transform_offset_
Definition: fmllr-raw.h:185
SingleFrameStats single_frame_stats_
Definition: fmllr-raw.h:188
#define M_LOG_2PI
Definition: kaldi-math.h:60
int32 ModelDim() const
Dimension of the model.
Definition: fmllr-raw.h:92
int32 SpliceWidth() const
Number of frames that are spliced together each time.
Definition: fmllr-raw.h:90
void Update(const FmllrRawOptions &opts, MatrixBase< BaseFloat > *raw_fmllr_mat, BaseFloat *objf_impr, BaseFloat *count)
Update "raw_fmllr_mat"; it should have the correct dimension and reasonable values at entry (see the ...
Definition: fmllr-raw.cc:299
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
const Matrix< BaseFloat > & means_invvars() const
Definition: diag-gmm.h:179
void ConvertToSimpleStats(Vector< double > *simple_linear_stats, SpMatrix< double > *simple_quadratic_stats) const
Converts from the Q and S stats to a simple objective function of the form l .
Definition: fmllr-raw.cc:455
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60
void ComputeM(std::vector< Matrix< double > > *M) const
Computes the M_i matrices used in the update, see the extended comment in fmllr-raw.cc for explanation.
Definition: fmllr-raw.cc:412
kaldi::int32 int32
void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, const VectorBase< BaseFloat > &posteriors)
Accumulate stats for a GMM, given supplied posteriors.
Definition: fmllr-raw.cc:246
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
Matrix< BaseFloat > full_transform_
Definition: fmllr-raw.h:183
void AddVecVec(Real alpha, const VectorBase< Real > &v, const VectorBase< Real > &r, Real beta)
Add element-by-element product of vectors:
bool IsZero(Real cutoff=1.0e-05) const
Returns true if matrix is all zeros.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
const size_t count
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, Vector< BaseFloat > *posteriors) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: diag-gmm.cc:601
int32 FullDim() const
Full feature dimension after splicing.
Definition: fmllr-raw.h:88
void AddSmat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
This is a version of AddMat2Sp specialized for when M is fairly sparse.
Definition: sp-matrix.cc:1026
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void ConvertToPerRowStats(const Vector< double > &simple_linear_stats, const SpMatrix< double > &simple_quadratic_stats_sp, Matrix< double > *linear_stats, std::vector< SpMatrix< double > > *diag_stats, std::vector< std::vector< Matrix< double > > > *off_diag_stats) const
Transform stats into a convenient format for the update.
Definition: fmllr-raw.cc:493
Matrix< double > Q_
Definition: fmllr-raw.h:195
#define KALDI_ERR
Definition: kaldi-error.h:147
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
Definition: sp-matrix.cc:964
Vector< BaseFloat > transformed_data
Definition: fmllr-raw.h:132
#define KALDI_WARN
Definition: kaldi-error.h:150
double GetAuxf(const Vector< double > &simple_linear_stats, const SpMatrix< double > &simple_quadratic_stats, const Matrix< double > &fmllr_mat) const
Compute the auxiliary function for this matrix.
Definition: fmllr-raw.cc:529
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetZero()
Sets matrix to zero.
void Scale(Real alpha)
Multiplies all elements by this constant.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
void FmllrInnerUpdate(SpMatrix< double > &inv_G, VectorBase< double > &k, double beta, int32 row, MatrixBase< double > *transform)
This function does one row of the inner-loop fMLLR transform update.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
Real LogDet(Real *det_sign=NULL) const
Returns logdet of matrix.
SpMatrix< double > temp_
Definition: fmllr-raw.h:194
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
BaseFloat AccumulateForGmm(const DiagGmm &gmm, const VectorBase< BaseFloat > &data, BaseFloat weight)
Accumulate stats for a single GMM in the model; returns log likelihood.
Definition: fmllr-raw.cc:107
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void InitSingleFrameStats(const VectorBase< BaseFloat > &data)
Definition: fmllr-raw.cc:93
int32 RawDim() const
Dimension of raw MFCC (etc.) features.
Definition: fmllr-raw.h:86
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void SetZero()
Set vector to all zeros.
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
bool DataHasChanged(const VectorBase< BaseFloat > &data) const
Definition: fmllr-raw.cc:63
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Sub-matrix representation.
Definition: kaldi-matrix.h:988
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void CommitSingleFrameStats()
Definition: fmllr-raw.cc:68
const Matrix< BaseFloat > & inv_vars() const
Definition: diag-gmm.h:180
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94