am-sgmm2-project.cc
Go to the documentation of this file.
1 // sgmm2/am-sgmm2-project.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <algorithm>
21 #include <functional>
22 #include <set>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 using std::vector;
27 
28 #include "sgmm2/am-sgmm2-project.h"
29 #include "util/kaldi-thread.h"
30 #include "gmm/full-gmm-normal.h"
31 #include "gmm/diag-gmm-normal.h"
32 
33 namespace kaldi {
34 
35 // The output pointer argument "projection" projects from the pre-LDA+MLLT space
36 // to the space we're going to model. We retain "model_dim" dimensions, which
37 // means we're keeping all dimensions that have any variation at all.
38 
40  const Matrix<BaseFloat> &inv_lda_mllt,
41  int32 start_dim,
42  int32 end_dim, // last dim plus one
43  Matrix<BaseFloat> *projection) {
44  Matrix<double> inv_lda_mllt_dbl(inv_lda_mllt);
45  KALDI_ASSERT(inv_lda_mllt.NumRows() == inv_lda_mllt.NumCols());
46 
47  // First, to compute the projection that we're going to use:
48 
49  SpMatrix<double> B; // between-class covar.
50  SpMatrix<double> W; // within-class covar.
51 
52  int32 model_dim = sgmm.FeatureDim(),
53  full_dim = inv_lda_mllt.NumRows();
54  KALDI_ASSERT(full_dim > model_dim);
55  KALDI_ASSERT(start_dim >= 0 && start_dim < end_dim && end_dim <= full_dim);
56 
57  ComputeLdaStats(sgmm.full_ubm(), &B, &W);
58  // B and W are now of dim "model_dim".
59 
60  double diag_term = 0.001 / model_dim * B.Trace(); // This will ensure
61  // that the between-class covariance is full rank within the original
62  // feature space.
63  for (int32 i = 0; i < B.NumRows(); i++)
64  B(i, i) += diag_term;
65 
66  B.Resize(full_dim, kCopyData); // This extends the extra dims with
67  // zeros, which is what we want, because we assume the means are zero in the
68  // extra dimensions [this is valid because we have cmd'ed data].
69 
70  W.Resize(full_dim, kCopyData); // We want the within-class
71  // covar to be unit in the extra dimensions, so we need to do something
72  // about this... note, this is valid if we have an LDA-based feature
73  // space, as we constructed the LDA matrix so that the covar in
74  // the rejected dimensions is unit. [note: we can gloss over differences
75  // between within vs. total covar here, as it's almost exactly the same
76  // for the rejected dimensions].
77  for (int32 i = model_dim; i < full_dim; i++)
78  W(i, i) = 1.0;
79 
80  // Next, we'll project these "extended" stats with the "inv_lda_mllt"
81  // matrix, which takes us into the space where we were before LDA+MLLT.
82  SpMatrix<double> B_orig(full_dim), W_orig(full_dim);
83  B_orig.AddMat2Sp(1.0, inv_lda_mllt_dbl, kNoTrans, B, 0.0); // B_orig <-- inv_lda_mllt B inv_lda_mllt^T
84  W_orig.AddMat2Sp(1.0, inv_lda_mllt_dbl, kNoTrans, W, 0.0); // W_orig <-- inv_lda_mllt W inv_lda_mllt^T
85 
86  // Now get versions of B_orig and W_orig that are limited to the
87  // dimension range that we wanted.
88  Matrix<double> B_orig_mat(B_orig), W_orig_mat(W_orig); // Get them as full matrices...
89  SpMatrix<double> B_orig_limit(B_orig_mat.Range(start_dim, end_dim-start_dim,
90  start_dim, end_dim-start_dim)),
91  W_orig_limit(W_orig_mat.Range(start_dim, end_dim-start_dim,
92  start_dim, end_dim-start_dim));
93 
94  Matrix<double> proj;
95  int32 retained_dim = model_dim;
96  if (end_dim - start_dim < retained_dim) retained_dim = end_dim - start_dim;
97  ComputeLdaTransform(B_orig_limit, W_orig_limit, retained_dim, &proj);
98 
99  // Now proj has the projection from the "limited-dimension" space.
100  // We want a projection from the entire space.
101 
102  projection->Resize(retained_dim, full_dim); // This projection (which we output) will project from
103  // full_dim to retained_dim; it goes from the pre-LDA+MLLT space to "retained_dim" which
104  // is <= model_dim.
105 
106  // Copy the relevant dimensions of "projection" from the "proj" matrix that
107  // we just computed. The rest remain zero (corresponding to discarded dimensions).
108  projection->Range(0, retained_dim, start_dim, end_dim-start_dim).CopyFromMat(proj);
109 }
110 
112  const SpMatrix<double> &W,
113  int32 dim_to_retain,
114  Matrix<double> *Projection) {
115  int32 dim = B.NumRows();
116  KALDI_ASSERT(dim_to_retain <= dim);
117 
118  // OK, now do LDA in this space...
119  TpMatrix<double> T(dim);
120  T.Cholesky(W); // do Cholesky factorization W_orig = T T^T. Now,
121  // T^{-1} is the projection that makes W unit.
122  TpMatrix<double> Tinv(T); // get inverse of T.
123  Tinv.Invert();
124 
125  // Now project B_orig with Tinv, to get between-class scatter in space where
126  // W_orig is unit.
127  SpMatrix<double> B_proj(dim);
128  B_proj.AddTp2Sp(1.0, Tinv, kNoTrans, B, 0.0);
129 
130  // Now, in this space, do SVD.
131 
132  Matrix<double> P(dim, dim);
133  Vector<double> s(dim);
134  B_proj.SymPosSemiDefEig(&s, &P);
135  // Now B_proj = P diag(s) P^T, with P orthogonal. It's both SVD and eigenvalue
136  // decomposition.
137  // So P^{-1}, which equals P^T, is the transformation that
138  // will make B_proj diagonal (with eigenvalues equal to s).
139 
140  P.Resize(dim, dim_to_retain, kCopyData); // keep only rows of P^T that we want.
141  Projection->Resize(dim_to_retain, dim);
142  // The next line sets "Projection" to the LDA matrix, which is (part of P^T) * T^{-1}
143  Projection->AddMatTp(1.0, P, kTrans, Tinv, kNoTrans, 0.0);
144 
145  KALDI_LOG << "Eigenvalues of retained LDA dimensions: "
146  << s.Range(0, dim_to_retain) << " (sum is:) "
147  << s.Range(0, dim_to_retain).Sum();
148  KALDI_LOG << "Eigenvalues of rejected LDA dimensions: "
149  << s.Range(dim_to_retain, dim - dim_to_retain) << " (sum is:) "
150  << s.Range(dim_to_retain, dim - dim_to_retain).Sum();
151 
152  { // Check that it's been done correctly by projecting the
153  // matrices we got as input checking they become (diagonal, unit).
154  SpMatrix<double> B_ldaproj(dim_to_retain), W_ldaproj(dim_to_retain);
155  B_ldaproj.AddMat2Sp(1.0, *Projection, kNoTrans, B, 0.0);
156  KALDI_ASSERT(B_ldaproj.IsDiagonal());
157  W_ldaproj.AddMat2Sp(1.0, *Projection, kNoTrans, W, 0.0);
158  KALDI_ASSERT(W_ldaproj.IsUnit());
159  }
160 }
161 
162 
164  SpMatrix<double> *between_covar,
165  SpMatrix<double> *within_covar) {
166  int32 dim = full_ubm.Dim(); // Feature dimension.
167  between_covar->Resize(dim); // zeroes it.
168  within_covar->Resize(dim); // zeroes it.
169  FullGmmNormal full_gmm_normal(full_ubm);
170  BaseFloat weight = 1.0 / full_ubm.NumGauss();
171  Vector<double> avg_mean(dim);
172  for (int32 i = 0; i < full_ubm.NumGauss(); i++) {
173  between_covar->AddSp(weight, full_gmm_normal.vars_[i]);
174  within_covar->AddVec2(weight, full_gmm_normal.means_.Row(i));
175  avg_mean.AddVec(weight, full_gmm_normal.means_.Row(i));
176  }
177  between_covar->AddVec2(-1.0, avg_mean);
178 }
179 
181  AmSgmm2 *sgmm) {
182  int32 dim = sgmm->FeatureDim();
183  int32 retained_dim = total_projection.NumRows();
184  KALDI_ASSERT(retained_dim <= dim);
185 
186  // Note: small_projection is as total_projection but ignoring the
187  // higher dimensions of the input... this is valid as far as the means
188  // are concerned, because we extend with zeros.
189  SubMatrix<BaseFloat> small_projection(total_projection, 0, retained_dim, 0, dim);
190  Matrix<double> small_projection_dbl(small_projection);
191  Matrix<double> total_projection_dbl(total_projection);
192 
193  int32 I = sgmm->NumGauss();
194  for (int32 i = 0; i < I; i++) {
195  {
196  // do M_i <-- small_projection * M_i
197  Matrix<BaseFloat> M(sgmm->M_[i]);
198  sgmm->M_[i].Resize(retained_dim, M.NumCols());
199  sgmm->M_[i].AddMatMat(1.0, small_projection, kNoTrans, M, kNoTrans, 0.0);
200  }
201  if (!sgmm->N_.empty()) {
202  // do N_i <-- small_projection * N_i
203  Matrix<BaseFloat> N(sgmm->N_[i]);
204  sgmm->N_[i].Resize(retained_dim, N.NumCols());
205  sgmm->N_[i].AddMatMat(1.0, small_projection, kNoTrans, N, kNoTrans, 0.0);
206  }
207  ProjectVariance(total_projection_dbl, true, // inverted,
208  &(sgmm->SigmaInv_[i]));
209  }
210 
211  { // Project full_ubm.
212  FullGmmNormal full_ubm_normal(sgmm->full_ubm_);
213  for (int32 i = 0; i < I; i++) {
214  ProjectVariance(total_projection_dbl, false, &(full_ubm_normal.vars_[i]));
215  }
216  Matrix<double> old_means(full_ubm_normal.means_);
217  full_ubm_normal.means_.Resize(I, retained_dim);
218  full_ubm_normal.means_.AddMatMat(1.0, old_means, kNoTrans,
219  small_projection_dbl, kTrans, 0.0);
220  sgmm->full_ubm_.Resize(I, retained_dim);
221  full_ubm_normal.CopyToFullGmm(&sgmm->full_ubm_);
222  sgmm->full_ubm_.ComputeGconsts();
223  }
224  sgmm->diag_ubm_.Resize(I, retained_dim);
225  sgmm->diag_ubm_.CopyFromFullGmm(sgmm->full_ubm_);
226  sgmm->diag_ubm_.ComputeGconsts();
227  sgmm->n_.clear(); // The normalizers are invalid now, so clear them.
228 }
229 
230 void Sgmm2Project::ProjectVariance(const Matrix<double> &total_projection,
231  bool inverse,
232  SpMatrix<double> *variance) {
233  if (inverse) {
234  SpMatrix<double> inv_var(*variance);
235  inv_var.Invert();
236  ProjectVariance(total_projection, false, &inv_var);
237  inv_var.Invert();
238  if (variance->NumRows() != inv_var.NumRows())
239  variance->Resize(inv_var.NumRows());
240  variance->CopyFromSp(inv_var);
241  } else {
242  SpMatrix<double> extended_var(*variance);
243  KALDI_ASSERT(total_projection.NumCols() >= extended_var.NumRows());
244  extended_var.Resize(total_projection.NumCols(), kCopyData);
245  for (int32 i = variance->NumRows(); i < extended_var.NumRows(); i++)
246  extended_var(i, i) = 1.0; // make new part of diagonal ones.
247  int32 tgt_dim = total_projection.NumRows();
248  KALDI_ASSERT(tgt_dim <= variance->NumRows());
249  if (tgt_dim < variance->NumRows()) variance->Resize(tgt_dim);
250  variance->AddMat2Sp(1.0, total_projection, kNoTrans, extended_var, 0.0);
251  }
252 }
253 
254 void Sgmm2Project::ProjectVariance (const Matrix<double> &total_projection,
255  bool inverse,
256  SpMatrix<float> *variance) {
257  SpMatrix<double> variance_dbl(*variance);
258  ProjectVariance(total_projection, inverse, &variance_dbl);
259  if (variance->NumRows() != variance_dbl.NumRows())
260  variance->Resize(variance_dbl.NumRows());
261  variance->CopyFromSp(variance_dbl);
262 }
263 
264 
265 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool IsUnit(Real cutoff=1.0e-05) const
Definition: sp-matrix.cc:480
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231
int32 Dim() const
Returns the dimensionality of the Gaussian mean vectors.
Definition: full-gmm.h:60
Definition for Gaussian Mixture Model with full covariances in normal mode: where the parameters are ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
int32 ComputeGconsts()
Sets the gconsts.
Definition: full-gmm.cc:92
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: diag-gmm.cc:66
void ComputeProjection(const AmSgmm2 &sgmm, const Matrix< BaseFloat > &inv_lda_mllt, int32 begin_dim, int32 end_dim, Matrix< BaseFloat > *projection)
int32 ComputeGconsts()
Sets the gconsts.
Definition: diag-gmm.cc:114
kaldi::int32 int32
DiagGmm diag_ubm_
These contain the "background" model associated with the subspace GMM.
Definition: am-sgmm2.h:413
std::vector< Matrix< BaseFloat > > n_
n_{jim}, per-Gaussian normalizer. Dimension is [J1][I][#mix]
Definition: am-sgmm2.h:440
std::vector< Matrix< BaseFloat > > N_
Speaker-subspace projections. Dimension is [I][D][T].
Definition: am-sgmm2.h:427
void ApplyProjection(const Matrix< BaseFloat > &total_projection, AmSgmm2 *sgmm)
MatrixIndexT NumRows() const
const FullGmm & full_ubm() const
Accessors.
Definition: am-sgmm2.h:378
void CopyFromSp(const SpMatrix< Real > &other)
Definition: sp-matrix.h:85
std::vector< SpMatrix< double > > vars_
covariances
void AddTp2Sp(const Real alpha, const TpMatrix< Real > &T, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
The following function does: this <– beta*this + alpha * T * A * T^T.
Definition: sp-matrix.cc:1139
std::vector< Matrix< BaseFloat > > M_
Phonetic-subspace projections. Dimension is [I][D][S].
Definition: am-sgmm2.h:425
int32 FeatureDim() const
Definition: am-sgmm2.h:363
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v&#39;
Definition: sp-matrix.cc:946
void Cholesky(const SpMatrix< Real > &orig)
Definition: tp-matrix.cc:88
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: full-gmm.cc:41
void ComputeLdaStats(const FullGmm &full_ubm, SpMatrix< double > *between_covar, SpMatrix< double > *within_covar)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
std::vector< SpMatrix< BaseFloat > > SigmaInv_
Globally shared parameters of the subspace GMM.
Definition: am-sgmm2.h:423
void ProjectVariance(const Matrix< double > &total_projection, bool inverse, SpMatrix< double > *variance)
void AddSp(const Real alpha, const SpMatrix< Real > &Ma)
Definition: sp-matrix.h:211
FullGmm full_ubm_
Definition: am-sgmm2.h:414
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
void CopyToFullGmm(FullGmm *fullgmm, GmmFlagsType flags=kGmmAll)
Copies to FullGmm.
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
int32 NumGauss() const
Definition: am-sgmm2.h:360
void ComputeLdaTransform(const SpMatrix< double > &B, const SpMatrix< double > &W, int32 dim_to_retain, Matrix< double > *Projection)
void CopyFromFullGmm(const FullGmm &fullgmm)
Copies from given FullGmm.
Definition: diag-gmm.cc:92
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AddMat2Sp(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const SpMatrix< Real > &A, const Real beta=0.0)
Extension of rank-N update: this <– beta*this + alpha * M * A * M^T.
Definition: sp-matrix.cc:982
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Definition: sp-matrix.h:81
void SymPosSemiDefEig(VectorBase< Real > *s, MatrixBase< Real > *P, Real tolerance=0.001) const
This is the version of SVD that we implement for symmetric positive definite matrices.
Definition: sp-matrix.cc:57
void AddMatTp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const TpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
Definition: kaldi-matrix.h:725
Matrix< double > means_
Means.
void Invert(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: sp-matrix.cc:219
#define KALDI_LOG
Definition: kaldi-error.h:153
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Sub-matrix representation.
Definition: kaldi-matrix.h:988
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94