fmllr-sgmm2-test.cc
Go to the documentation of this file.
1 // sgmm2/fmllr-sgmm2-test.cc
2 
3 // Copyright 2009-2011 Saarland University (author: Arnab Ghoshal)
4 // 2012 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include <vector>
22 
23 #include "base/kaldi-math.h"
24 #include "gmm/model-test-common.h"
25 #include "sgmm2/am-sgmm2.h"
26 #include "sgmm2/fmllr-sgmm2.h"
27 #include "util/kaldi-io.h"
28 
29 using kaldi::AmSgmm2;
30 using kaldi::int32;
31 using kaldi::BaseFloat;
32 using kaldi::Vector;
33 using kaldi::Matrix;
34 using kaldi::Exp;
35 
36 namespace ut = kaldi::unittest;
37 
39  const Matrix<BaseFloat> &xf,
40  Vector<BaseFloat> *out) {
41  int32 dim = in.Dim();
42  KALDI_ASSERT(xf.NumRows() == dim && xf.NumCols() == dim + 1);
43  Vector<BaseFloat> tmp(dim + 1);
44  tmp.Range(0, dim).CopyFromVec(in);
45  tmp(dim) = 1.0;
46  out->Resize(dim, kaldi::kSetZero);
47  out->AddMatVec(1.0, xf, kaldi::kNoTrans, tmp, 0.0);
48 }
49 
50 // Tests the Read() and Write() methods for the accumulators, in both binary
51 // and ASCII mode, as well as Check().
52 void TestSgmm2FmllrAccsIO(const AmSgmm2 &sgmm,
53  const kaldi::Matrix<BaseFloat> &feats) {
54  KALDI_LOG << "Test IO start.";
55  using namespace kaldi;
56  int32 dim = sgmm.FeatureDim();
59  kaldi::Sgmm2FmllrGlobalParams fmllr_globals;
60  kaldi::Sgmm2GselectConfig sgmm_config;
61 
62  frame_vars.Resize(sgmm.NumGauss(), dim, sgmm.PhoneSpaceDim());
63  sgmm_config.full_gmm_nbest = std::min(sgmm_config.full_gmm_nbest,
64  sgmm.NumGauss());
65  kaldi::Vector<BaseFloat> occs(sgmm.NumPdfs());
66  occs.Set(feats.NumRows());
67  sgmm.ComputeFmllrPreXform(occs, &fmllr_globals.pre_xform_,
68  &fmllr_globals.inv_xform_,
69  &fmllr_globals.mean_scatter_);
70  if (fmllr_globals.mean_scatter_.Min() == 0.0) {
71  KALDI_WARN << "Global covariances low rank!";
72  KALDI_WARN << "Diag-scatter = " << fmllr_globals.mean_scatter_;
73  return;
74  }
75 
76 // std::cout << "Pre-Xform = " << fmllr_globals.pre_xform_;
77 // std::cout << "Inv-Xform = " << fmllr_globals.inv_xform_;
78 
79  FmllrSgmm2Accs accs;
80  accs.Init(sgmm.FeatureDim(), sgmm.NumGauss());
81  BaseFloat loglike = 0.0;
82  std::vector<int32> gselect;
83  for (int32 i = 0; i < feats.NumRows(); i++) {
84  sgmm.GaussianSelection(sgmm_config, feats.Row(i), &gselect);
85  sgmm.ComputePerFrameVars(feats.Row(i), gselect, empty, &frame_vars);
86  loglike += accs.Accumulate(sgmm, feats.Row(i), frame_vars, 0, 1.0,
87  &empty);
88  }
89 
90  kaldi::Sgmm2FmllrConfig update_opts;
91  update_opts.fmllr_min_count = 999; // Make sure it doesn't
92  // divide 200, because the test can fail when we cross the boundary
93  // of 1000 due to roundoff. Actually it's weird because 1000 should
94  // be exactly representable in float and in text. But something's going wrong.
95  kaldi::Matrix<BaseFloat> xform_mat(dim, dim+1);
96  xform_mat.SetUnit();
97  BaseFloat frames, impr;
98  accs.Update(sgmm, fmllr_globals, update_opts, &xform_mat, &frames, &impr);
99 
100  Vector<BaseFloat> xformed_feat(dim);
101  ApplyFmllrXform(feats.Row(0), xform_mat, &xformed_feat);
102  sgmm.GaussianSelection(sgmm_config, xformed_feat, &gselect);
103  sgmm.ComputePerFrameVars(xformed_feat, gselect, empty, &frame_vars);
104 
105  Sgmm2LikelihoodCache like_cache(sgmm.NumGroups(), sgmm.NumPdfs());
106  BaseFloat loglike1 = sgmm.LogLikelihood(frame_vars, 0,
107  &like_cache, &empty);
108 
109  bool binary_in;
110  // First, non-binary write
111  KALDI_LOG << "Test ASCII IO.";
112  accs.Write(kaldi::Output("tmpf", false).Stream(), false);
113  FmllrSgmm2Accs *accs1 = new FmllrSgmm2Accs();
114  // Non-binary read
115  kaldi::Input ki1("tmpf", &binary_in);
116  accs1->Read(ki1.Stream(), binary_in, false);
117  xform_mat.SetUnit();
118  accs1->Update(sgmm, fmllr_globals, update_opts, &xform_mat, NULL, NULL);
119  ApplyFmllrXform(feats.Row(0), xform_mat, &xformed_feat);
120  sgmm.GaussianSelection(sgmm_config, xformed_feat, &gselect);
121  sgmm.ComputePerFrameVars(xformed_feat, gselect, empty, &frame_vars);
122  like_cache.NextFrame();
123  BaseFloat loglike2 = sgmm.LogLikelihood(frame_vars, 0,
124  &like_cache, &empty);
125  std::cout << "LL1 = " << loglike1 << ", LL2 = " << loglike2 << std::endl;
126 
127  kaldi::AssertEqual(loglike1, loglike2, 1e-2);
128  delete accs1;
129 
130  // Next, binary write
131  KALDI_LOG << "Test Binary IO.";
132  accs.Write(kaldi::Output("tmpfb", true).Stream(), true);
133  FmllrSgmm2Accs *accs2 = new FmllrSgmm2Accs();
134  // Binary read
135  kaldi::Input ki2("tmpfb", &binary_in);
136  accs2->Read(ki2.Stream(), binary_in, false);
137  xform_mat.SetUnit();
138  accs2->Update(sgmm, fmllr_globals, update_opts, &xform_mat, NULL, NULL);
139  ApplyFmllrXform(feats.Row(0), xform_mat, &xformed_feat);
140  sgmm.GaussianSelection(sgmm_config, xformed_feat, &gselect);
141  sgmm.ComputePerFrameVars(xformed_feat, gselect, empty, &frame_vars);
142  BaseFloat loglike3 = sgmm.LogLikelihood(frame_vars, 0,
143  &like_cache, &empty);
144  std::cout << "LL1 = " << loglike1 << ", LL3 = " << loglike3 << std::endl;
145  kaldi::AssertEqual(loglike1, loglike3, 1e-4);
146  delete accs2;
147 
148  unlink("tmpf");
149  unlink("tmpfb");
150  KALDI_LOG << "Test IO end.";
151 }
152 
154  const kaldi::Matrix<BaseFloat> &feats) {
155  KALDI_LOG << "Test Subspace start.";
156  using namespace kaldi;
157  int32 dim = sgmm.FeatureDim();
160  kaldi::Sgmm2FmllrGlobalParams fmllr_globals;
161  kaldi::Sgmm2GselectConfig sgmm_config;
162 
163  frame_vars.Resize(sgmm.NumGauss(), dim, sgmm.PhoneSpaceDim());
164  sgmm_config.full_gmm_nbest = std::min(sgmm_config.full_gmm_nbest,
165  sgmm.NumGauss());
166  kaldi::Vector<BaseFloat> occs(sgmm.NumPdfs());
167  occs.Set(feats.NumRows());
168  sgmm.ComputeFmllrPreXform(occs, &fmllr_globals.pre_xform_,
169  &fmllr_globals.inv_xform_,
170  &fmllr_globals.mean_scatter_);
171  if (fmllr_globals.mean_scatter_.Min() == 0.0) {
172  KALDI_WARN << "Global covariances low rank!";
173  KALDI_WARN << "Diag-scatter = " << fmllr_globals.mean_scatter_;
174  return;
175  }
176 
177  FmllrSgmm2Accs accs;
178  accs.Init(sgmm.FeatureDim(), sgmm.NumGauss());
179  BaseFloat loglike = 0.0;
180  std::vector<int32> gselect;
181  for (int32 i = 0; i < feats.NumRows(); i++) {
182  sgmm.GaussianSelection(sgmm_config, feats.Row(i), &gselect);
183  sgmm.ComputePerFrameVars(feats.Row(i), gselect, empty, &frame_vars);
184  loglike += accs.Accumulate(sgmm, feats.Row(i), frame_vars, 0, 1.0,
185  &empty);
186  }
187 
188  SpMatrix<double> grad_scatter(dim * (dim+1));
189  accs.AccumulateForFmllrSubspace(sgmm, fmllr_globals, &grad_scatter);
190  kaldi::Sgmm2FmllrConfig update_opts;
191  EstimateSgmm2FmllrSubspace(grad_scatter, update_opts.num_fmllr_bases, dim,
192  &fmllr_globals);
193 // update_opts.fmllr_min_count = 100;
194  kaldi::Matrix<BaseFloat> xform_mat(dim, dim+1);
195  xform_mat.SetUnit();
196  accs.Update(sgmm, fmllr_globals, update_opts, &xform_mat, NULL, NULL);
197  KALDI_LOG << "Test Subspace end.";
198 }
199 
201  // srand(time(NULL));
202  int32 dim = 1 + kaldi::RandInt(0, 9); // random dimension of the gmm
203  int32 num_comp = 2 + kaldi::RandInt(0, 9); // random number of mixtures
204  kaldi::FullGmm full_gmm;
205  ut::InitRandFullGmm(dim, num_comp, &full_gmm);
206 
207  AmSgmm2 sgmm;
209  std::vector<int32> pdf2group;
210  pdf2group.push_back(0);
211  sgmm.InitializeFromFullGmm(full_gmm, pdf2group, dim+1, dim, true, 0.9);
212  sgmm.ComputeNormalizers();
213 
215 
216  { // First, generate random means and variances
217  int32 num_feat_comp = num_comp + kaldi::RandInt(-num_comp/2, num_comp/2);
218  kaldi::Matrix<BaseFloat> means(num_feat_comp, dim),
219  vars(num_feat_comp, dim);
220  for (int32 m = 0; m < num_feat_comp; m++) {
221  for (int32 d= 0; d < dim; d++) {
222  means(m, d) = kaldi::RandGauss();
223  vars(m, d) = Exp(kaldi::RandGauss()) + 1e-2;
224  }
225  }
226  // Now generate random features with those means and variances.
227  feats.Resize(num_feat_comp * 200, dim);
228  for (int32 m = 0; m < num_feat_comp; m++) {
229  kaldi::SubMatrix<BaseFloat> tmp(feats, m*200, 200, 0, dim);
230  ut::RandDiagGaussFeatures(200, means.Row(m), vars.Row(m), &tmp);
231  }
232  }
233  TestSgmm2FmllrAccsIO(sgmm, feats);
234  TestSgmm2FmllrSubspace(sgmm, feats);
235 }
236 
237 int main() {
239  for (int i = 0; i < 10; i++)
240  TestSgmm2Fmllr();
241  std::cout << "Test OK.\n";
242  return 0;
243 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
double Exp(double x)
Definition: kaldi-math.h:83
Class for definition of the subspace Gmm acoustic model.
Definition: am-sgmm2.h:231
void TestSgmm2FmllrAccsIO(const AmSgmm2 &sgmm, const kaldi::Matrix< BaseFloat > &feats)
Class for computing the accumulators needed for the maximum-likelihood estimate of FMLLR transforms f...
Definition: fmllr-sgmm2.h:122
Matrix< BaseFloat > pre_xform_
Pre-transform matrix. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:103
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
void ApplyFmllrXform(const kaldi::VectorBase< BaseFloat > &in, const Matrix< BaseFloat > &xf, Vector< BaseFloat > *out)
void InitializeFromFullGmm(const FullGmm &gmm, const std::vector< int32 > &pdf2group, int32 phn_subspace_dim, int32 spk_subspace_dim, bool speaker_dependent_weights, BaseFloat self_weight)
Initializes the SGMM parameters from a full-covariance UBM.
Definition: am-sgmm2.cc:381
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
A class for storing matrices.
Definition: kaldi-matrix.h:823
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void TestSgmm2FmllrSubspace(const AmSgmm2 &sgmm, const kaldi::Matrix< BaseFloat > &feats)
int32 PhoneSpaceDim() const
Definition: am-sgmm2.h:361
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too].
void EstimateSgmm2FmllrSubspace(const SpMatrix< double > &fmllr_grad_scatter, int32 num_fmllr_bases, int32 feat_dim, Sgmm2FmllrGlobalParams *globals, double min_eig)
Computes the fMLLR basis matrices given the scatter of the vectorized gradients (eq: B...
Definition: fmllr-sgmm2.cc:506
void AccumulateForFmllrSubspace(const AmSgmm2 &sgmm, const Sgmm2FmllrGlobalParams &fmllr_globals, SpMatrix< double > *grad_scatter)
Definition: fmllr-sgmm2.cc:205
bool Update(const AmSgmm2 &model, const Sgmm2FmllrGlobalParams &fmllr_globals, const Sgmm2FmllrConfig &opts, Matrix< BaseFloat > *out_xform, BaseFloat *frame_count, BaseFloat *auxf_improv) const
Computes the FMLLR transform from the accumulated stats, using the pre-transforms in fmllr_globals...
Definition: fmllr-sgmm2.cc:356
int32 FeatureDim() const
Definition: am-sgmm2.h:363
std::istream & Stream()
Definition: kaldi-io.cc:826
int32 NumGroups() const
Definition: am-sgmm2.h:351
float BaseFloat
Definition: kaldi-types.h:29
void TestSgmm2Fmllr()
BaseFloat LogLikelihood(const Sgmm2PerFrameDerivedVars &per_frame_vars, int32 j2, Sgmm2LikelihoodCache *cache, Sgmm2PerSpkDerivedVars *spk_vars, BaseFloat log_prune=0.0) const
This does a likelihood computation for a given state using the pre-selected Gaussian components (in p...
Definition: am-sgmm2.cc:517
void Read(std::istream &in_stream, bool binary, bool add)
Definition: fmllr-sgmm2.cc:275
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
Matrix< BaseFloat > inv_xform_
Inverse of pre-transform. Dim is [D][D+1].
Definition: fmllr-sgmm2.h:105
BaseFloat GaussianSelection(const Sgmm2GselectConfig &config, const VectorBase< BaseFloat > &data, std::vector< int32 > *gselect) const
Computes the top-scoring Gaussian indices (used for pruning of later stages of computation).
Definition: am-sgmm2.cc:1406
int32 NumPdfs() const
Various model dimensions.
Definition: am-sgmm2.h:350
int32 full_gmm_nbest
Number of highest-scoring full-covariance Gaussians per frame.
Definition: am-sgmm2.h:120
void ComputePerFrameVars(const VectorBase< BaseFloat > &data, const std::vector< int32 > &gselect, const Sgmm2PerSpkDerivedVars &spk_vars, Sgmm2PerFrameDerivedVars *per_frame_vars) const
This needs to be called with each new frame of data, prior to accumulation or likelihood evaluation: ...
Definition: am-sgmm2.cc:442
#define KALDI_WARN
Definition: kaldi-error.h:150
void Resize(int32 ngauss, int32 feat_dim, int32 phn_dim)
Definition: am-sgmm2.h:151
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
Configuration variables needed in the estimation of FMLLR for SGMMs.
Definition: fmllr-sgmm2.h:40
int32 NumGauss() const
Definition: am-sgmm2.h:360
int32 num_fmllr_bases
Number of basis matrices to use for FMLLR estimation.
Definition: fmllr-sgmm2.h:52
void InitRandFullGmm(int32 dim, int32 num_comp, FullGmm *gmm)
A class representing a vector.
Definition: kaldi-vector.h:406
BaseFloat Accumulate(const AmSgmm2 &sgmm, const VectorBase< BaseFloat > &data, const Sgmm2PerFrameDerivedVars &frame_vars, int32 state_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk)
Accumulation routine that computes the Gaussian posteriors and calls the AccumulateFromPosteriors fun...
Definition: fmllr-sgmm2.cc:156
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
int32 g_kaldi_verbose_level
This is set by util/parse-options.
Definition: kaldi-error.cc:46
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Write(std::ostream &out_stream, bool binary) const
Definition: fmllr-sgmm2.cc:266
void Set(Real f)
Set all members of a vector to a specified value.
void ComputeNormalizers()
Computes the data-independent terms in the log-likelihood computation for each Gaussian component and...
Definition: am-sgmm2.cc:857
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
Sgmm2LikelihoodCache caches SGMM likelihoods at two levels: the final pdf likelihoods, and the sub-state level likelihoods, which means that with the SCTM system we can avoid redundant computation.
Definition: am-sgmm2.h:199
void RandDiagGaussFeatures(int32 num_samples, const VectorBase< BaseFloat > &mean, const VectorBase< BaseFloat > &sqrt_var, MatrixBase< BaseFloat > *feats)
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
Global adaptation parameters.
Definition: fmllr-sgmm2.h:91
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Vector< BaseFloat > mean_scatter_
Diagonal of mean-scatter matrix. Dim is [D].
Definition: fmllr-sgmm2.h:107
#define KALDI_LOG
Definition: kaldi-error.h:153
void Init(int32 dim, int32 num_gaussians)
Definition: fmllr-sgmm2.cc:146
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...
Definition: am-sgmm2.h:142
BaseFloat fmllr_min_count
Minimum occupancy count to estimate FMLLR without basis matrices.
Definition: fmllr-sgmm2.h:46
Sub-matrix representation.
Definition: kaldi-matrix.h:988
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95
void ComputeFmllrPreXform(const Vector< BaseFloat > &pdf_occs, Matrix< BaseFloat > *xform, Matrix< BaseFloat > *inv_xform, Vector< BaseFloat > *diag_mean_scatter) const
Computes the LDA-like pre-transform and its inverse as well as the eigenvalues of the scatter of the ...
Definition: am-sgmm2.cc:965
int main()