am-sgmm2-project.h
Go to the documentation of this file.
1 // sgmm2/am-sgmm2-project.h
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #ifndef KALDI_SGMM2_AM_SGMM2_PROJECT_H_
21 #define KALDI_SGMM2_AM_SGMM2_PROJECT_H_
22 
23 #include <vector>
24 #include <queue>
25 
26 #include "sgmm2/am-sgmm2.h"
27 
28 namespace kaldi {
29 
30 class Sgmm2Project {
31  // This class essentially functions as a namespace for some functions;
32  // it's a friend of AmSgmm.h. It relates to "predictive" SGMMs. This
33  // hasn't been written up yet. We don't make any functions const or
34  // static, because there are no member variables.
35  public:
36 
37  // If inv_lda_mllt is the matrix that projects from the space the SGMM is
38  // in, typically back to the spliced-MFCC space, and begin_dim and end_dim
39  // represent the range of dims we want to model, then "projection" will be
40  // a matrix, applied *after* the "inv_lda_mllt" matrix, that projects from
41  // the raw splice-MFCC features to the space we want to model. This matrix
42  // is of dimension e.g. 40 x 117, and omits the space that the model's states
43  // all treat the same.
44  void ComputeProjection(const AmSgmm2 &sgmm,
45  const Matrix<BaseFloat> &inv_lda_mllt,
46  int32 begin_dim,
47  int32 end_dim, // last dim plus one that we keep.
48  Matrix<BaseFloat> *projection);
49 
50  // This function applies the feature-space projection to the SGMM.
51  // The matrix "total_projection" is the product of the "projection" matrix
52  // of ComputeProjection times the "inv_lda_mllt" matrix. It actually
53  // projects from a larger dimension than the current SGMM. We treat
54  // the means as if extended with zeros, and the covariances as if
55  // extended with a unit matrix.
56  void ApplyProjection(const Matrix<BaseFloat> &total_projection,
57  AmSgmm2 *sgmm);
58 
59  private:
60  // Computes statistics for LDA, in the SGMM's feature space.
61  // This only needs to be approximate, so we use stats based
62  // on the means in the UBM.
63  void ComputeLdaStats(const FullGmm &full_ubm,
64  SpMatrix<double> *between_covar,
65  SpMatrix<double> *within_covar);
66 
67  void ProjectVariance (const Matrix<double> &total_projection,
68  bool inverse,
69  SpMatrix<double> *variance);
70 
71  void ProjectVariance (const Matrix<double> &total_projection,
72  bool inverse,
73  SpMatrix<float> *variance);
74 
76  const SpMatrix<double> &W,
77  int32 dim_to_retain,
78  Matrix<double> *Projection);
79 
80 };
81 
82 
83 
84 } // end namespace kaldi
85 
86 #endif // KALDI_SGMM2_AM_SGMM2_PROJECT_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
void ComputeProjection(const AmSgmm2 &sgmm, const Matrix< BaseFloat > &inv_lda_mllt, int32 begin_dim, int32 end_dim, Matrix< BaseFloat > *projection)
kaldi::int32 int32
void ApplyProjection(const Matrix< BaseFloat > &total_projection, AmSgmm2 *sgmm)
void ComputeLdaStats(const FullGmm &full_ubm, SpMatrix< double > *between_covar, SpMatrix< double > *within_covar)
void ProjectVariance(const Matrix< double > &total_projection, bool inverse, SpMatrix< double > *variance)
void ComputeLdaTransform(const SpMatrix< double > &B, const SpMatrix< double > &W, int32 dim_to_retain, Matrix< double > *Projection)