regtree-fmllr-diag-gmm-test.cc
Go to the documentation of this file.
1 // transform/regtree-fmllr-diag-gmm-test.cc
2 
3 // Copyright 2009-2011 Georg Stemmer; Saarland University
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "util/common-utils.h"
21 #include "gmm/diag-gmm.h"
22 #include "gmm/mle-diag-gmm.h"
23 #include "gmm/mle-am-diag-gmm.h"
24 #include "gmm/model-test-common.h"
26 
27 namespace kaldi {
28 
29 static void
31  size_t dim = matrix->NumCols();
32  KALDI_ASSERT(matrix->NumCols() == matrix->NumRows());
33 
34  size_t iter = 0;
35  size_t max_iter = 10000;
36  // generate random (non-singular) matrix
37  // until condition
38  Matrix<BaseFloat> tmp(dim, dim);
39  SpMatrix<BaseFloat> tmp2(dim);
40  while (iter < max_iter) {
41  tmp.SetRandn();
42  if (tmp.Cond() < 100) break;
43  iter++;
44  }
45  if (iter >= max_iter) {
46  KALDI_ERR << "Internal error: found no random covariance matrix.";
47  }
48  // tmp * tmp^T will give positive definite matrix
49  tmp2.AddMat2(1.0, tmp, kNoTrans, 0.0);
50  matrix->CopyFromSp(tmp2);
51 }
52 
53 
57 
58 enum cova_type {
61 };
62 
63 static void
64 generate_features(cova_type covariance_type,
65  size_t n_gaussians,
66  size_t dim,
67  Matrix<BaseFloat> &trans_mat,
68  size_t frames_per_gaussian,
69  std::vector<Vector<BaseFloat>*> & train_feats,
70  std::vector<Vector<BaseFloat>*> & adapt_feats
71  ) {
72  // compute inverse of the transformation matrix
73  Matrix<BaseFloat> inv_trans_mat(dim, dim);
74  inv_trans_mat.CopyFromMat(trans_mat, kNoTrans);
75  inv_trans_mat.Invert();
76  // the untransformed means are random
77  Matrix<BaseFloat> untransformed_means(dim, n_gaussians);
78  untransformed_means.SetRandn();
79  untransformed_means.Scale(10);
80 
81  // the actual means result from
82  // transformation with inv_trans_mat
83  Matrix<BaseFloat> actual_means(dim, n_gaussians);
84 
85  // actual_means = inv_trans_mat * untransformed_means
86  actual_means.AddMatMat(1.0, inv_trans_mat, kNoTrans,
87  untransformed_means, kNoTrans, 0.0);
88 
89  size_t train_counter = 0;
90 
91  // temporary variables
92  Vector<BaseFloat> randomvec(dim);
93  Matrix<BaseFloat> Sj(dim, dim);
94 
95  // loop over all gaussians
96  for (size_t j = 0; j < n_gaussians; j++) {
97  if (covariance_type == diag) {
98  // random diagonal covariance for gaussian j
99  Sj.SetZero();
100  for (size_t d = 0; d < dim; d++) {
101  Sj(d, d) = 2*Exp(RandGauss());
102  }
103  }
104  if (covariance_type == full) {
105  // random full covariance for gaussian j
106  RandFullCova(&Sj);
107  }
108  // compute inv_trans_mat * Sj
109  Matrix<BaseFloat> tmp_matrix(dim, dim);
110  tmp_matrix.AddMatMat(1.0, inv_trans_mat, kNoTrans, Sj, kNoTrans, 0.0);
111 
112  // compute features
113  for (size_t i = 0; i < frames_per_gaussian; i++) {
114  train_feats[train_counter] = new Vector<BaseFloat>(dim);
115  adapt_feats[train_counter] = new Vector<BaseFloat>(dim);
116 
117  // initalize feature vector with mean of class j
118  train_feats[train_counter]->CopyColFromMat(untransformed_means, j);
119  adapt_feats[train_counter]->CopyColFromMat(actual_means, j);
120 
121  // determine random vector and
122  // multiply the random vector with SJ
123  // and add it to train_feats:
124  // train_feats = train_feats + SJ * random
125  // for adapt_feats we include the invtrans_mat:
126  // adapt_feats = adapt_feats + invtrans_mat * SJ * random
127  for (size_t d = 0; d < dim; d++) {
128  randomvec(d) = RandGauss();
129  }
130  train_feats[train_counter]->AddMatVec(1.0, Sj, kNoTrans,
131  randomvec, 1.0);
132  adapt_feats[train_counter]->AddMatVec(1.0, tmp_matrix, kNoTrans,
133  randomvec, 1.0);
134  train_counter++;
135  }
136  }
137  return;
138 }
139 
140 void UnitTestRegtreeFmllrDiagGmm(cova_type feature_type, size_t max_bclass) {
141  // dimension of the feature space
142  size_t dim = 5 + Rand() % 3;
143 
144  // number of components in the data
145  size_t n_gaussians = 8;
146 
147  // number of data points to generate for every gaussian
148  size_t frames_per_gaussian = 100;
149 
150  // generate random transformation matrix trans_mat
151  Matrix<BaseFloat> trans_mat(dim, dim);
152  int i = 0;
153  while (i < 10000) {
154  trans_mat.SetRandn();
155  if (trans_mat.Cond() < 100) break;
156  i++;
157  }
158  std::cout << "Condition of original Trans_Mat: " << trans_mat.Cond() << '\n';
159 
160  // generate many feature vectors for each of the mixture components
161  std::vector<Vector<BaseFloat>*>
162  train_feats(n_gaussians * frames_per_gaussian);
163  std::vector<Vector<BaseFloat>*>
164  adapt_feats(n_gaussians * frames_per_gaussian);
165 
166  generate_features(feature_type,
167  n_gaussians,
168  dim,
169  trans_mat,
170  frames_per_gaussian,
171  train_feats,
172  adapt_feats);
173 
174  // initial values for a GMM
175  Vector<BaseFloat> weights(1);
176  Matrix<BaseFloat> means(1, dim), vars(1, dim), invvars(1, dim);
177  for (size_t d= 0; d < dim; d++) {
178  means(0, d) = 0.0F;
179  vars(0, d) = 1.0F;
180  }
181  weights(0) = 1.0F;
182  invvars.CopyFromMat(vars);
183  invvars.InvertElements();
184 
185  // new HMM with 1 state
186  DiagGmm *gmm = new DiagGmm();
187  gmm->Resize(1, dim);
188  gmm->SetWeights(weights);
189  gmm->SetInvVarsAndMeans(invvars, means);
190  gmm->ComputeGconsts();
191  GmmFlagsType flags = kGmmAll;
192  MleDiagGmmOptions opts;
193 
194  AmDiagGmm *am = new AmDiagGmm();
195  am->AddPdf(*gmm);
196  AccumAmDiagGmm *est_am = new AccumAmDiagGmm();
197 
198  // train HMM
199  size_t iteration = 0;
200  size_t maxiterations = 10;
201  int32 maxcomponents = n_gaussians;
202  BaseFloat loglike = 0;
203  while (iteration < maxiterations) {
204  est_am->Init(*am, flags);
205 
206  loglike = 0;
207  for (size_t j = 0; j < train_feats.size(); j++) {
208  loglike += est_am->AccumulateForGmm(*am, *train_feats[j], 0, 1.0);
209  }
210  MleAmDiagGmmUpdate(opts, *est_am, flags, am, NULL, NULL);
211 
212  std::cout << "Loglikelihood before iteration " << iteration << " : "
213  << std::scientific << loglike << " number of components: "
214  << am->NumGaussInPdf(0) << '\n';
215 
216  if ((iteration % 3 == 1) &&
217  (am->NumGaussInPdf(0) * 2 <= maxcomponents)) {
218  size_t n = am->NumGaussInPdf(0)*2;
219  am->SplitPdf(0, n, 0.001);
220  }
221  iteration++;
222  }
223 
224  // adapt HMM to the transformed feature vectors
225  iteration = 0;
227  RegressionTree regtree;
228 
229  RegtreeFmllrOptions xform_opts;
230  xform_opts.min_count = 100 * (1 + Rand() % 10);
231  xform_opts.use_regtree = (RandUniform() < 0.5)? false : true;
232 
233  size_t num_pdfs = 1;
234  Vector<BaseFloat> occs(num_pdfs);
235  for (int32 i = 0; i < static_cast<int32>(num_pdfs); i++) {
236  occs(i) = 1.0/static_cast<BaseFloat>(num_pdfs);
237  }
238  std::vector<int32> silphones;
239  regtree.BuildTree(occs, silphones, *am, max_bclass);
240  maxiterations = 10;
241  std::vector<Vector<BaseFloat>*> logdet(adapt_feats.size());
242  for (size_t j = 0; j < adapt_feats.size(); j++) {
243  logdet[j] = new Vector<BaseFloat>(1);
244  logdet[j]->operator()(0) = 0.0;
245  }
246  while (iteration < maxiterations) {
247  fmllr_accs->Init(regtree.NumBaseclasses(), dim);
248  fmllr_accs->SetZero();
249  RegtreeFmllrDiagGmm *new_fmllr = new RegtreeFmllrDiagGmm();
250  loglike = 0;
251  for (size_t j = 0; j < adapt_feats.size(); j++) {
252  loglike += fmllr_accs->AccumulateForGmm(regtree, *am, *adapt_feats[j], 0, 1.0);
253  loglike += logdet[j]->operator()(0);
254  }
255  std::cout << "FMLLR: Loglikelihood before iteration " << iteration << " : "
256  << std::scientific << loglike << '\n';
257 
258  fmllr_accs->Update(regtree, xform_opts, new_fmllr, NULL, NULL);
259  std::cout << "Got " << new_fmllr->NumBaseClasses() << " baseclasses\n";
260  bool binary = (RandUniform() < 0.5)? true : false;
261  std::cout << "Writing the transform to disk.\n";
262  new_fmllr->Write(Output("tmpf", binary).Stream(), binary);
263  RegtreeFmllrDiagGmm *fmllr_read = new RegtreeFmllrDiagGmm();
264  bool binary_in;
265  Input ki("tmpf", &binary_in);
266  std::cout << "Reading the transform from disk.\n";
267  fmllr_read->Read(ki.Stream(), binary_in);
268  fmllr_read->Validate();
269 
270  // transform features
271  std::vector<Vector<BaseFloat> > trans_feats(1);
272  Vector<BaseFloat> trans_logdet;
273 // new_fmllr->ComputeLogDets();
274  trans_logdet.Resize(fmllr_read->NumRegClasses());
275  fmllr_read->GetLogDets(&trans_logdet);
276  for (size_t j = 0; j < adapt_feats.size(); j++) {
277  fmllr_read->TransformFeature(*adapt_feats[j], &trans_feats);
278  logdet[j]->operator()(0) += trans_logdet(0);
279  adapt_feats[j]->CopyFromVec(trans_feats[0]);
280  }
281  iteration++;
282  delete new_fmllr;
283  delete fmllr_read;
284 
285  unlink("tmpf");
286  }
287 
288 // // transform features with empty transform
289 // std::vector<Vector<BaseFloat> > trans_feats(1);
290 // RegtreeFmllrDiagGmm *empty_fmllr = new RegtreeFmllrDiagGmm();
291 // empty_fmllr->Init(0, 0);
292 // for (size_t j = 0; j < adapt_feats.size(); j++) {
293 // empty_fmllr->TransformFeature(*adapt_feats[j], &trans_feats);
294 // }
295 // delete empty_fmllr;
296 
297  // clean up
298  delete fmllr_accs;
299  delete est_am;
300  delete am;
301  delete gmm;
302  DeletePointers(&logdet);
303  DeletePointers(&train_feats);
304  DeletePointers(&adapt_feats);
305 }
306 } // namespace kaldi ends here
307 
308 int main() {
309  for (int i = 0; i <= 8; i+=2) { // test is too slow so can't do too many
310  std::cout << "--------------------------------------" << '\n';
311  std::cout << "Test number " << i << '\n';
312  std::cout << "--\nfeatures = full\n";
314  std::cout << "--\nfeatures = diag\n";
316  std::cout << "--------------------------------------" << '\n';
317  }
318  std::cout << "Test OK.\n";
319 }
320 
cova_type
Generate features for a certain covariance type covariance_type == 0: full covariance covariance_type...
void MleAmDiagGmmUpdate(const MleDiagGmmOptions &config, const AccumAmDiagGmm &am_diag_gmm_acc, GmmFlagsType flags, AmDiagGmm *am_gmm, BaseFloat *obj_change_out, BaseFloat *count_out)
for computing the maximum-likelihood estimates of the parameters of an acoustic model that uses diago...
void AddMat2(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
rank-N update: if (transM == kNoTrans) (*this) = beta*(*this) + alpha * M * M^T, or (if transM == kTr...
Definition: sp-matrix.cc:1110
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
double Exp(double x)
Definition: kaldi-math.h:83
void AddPdf(const DiagGmm &gmm)
Adds a GMM to the model, and increments the total number of PDFs.
Definition: am-diag-gmm.cc:57
Packed symetric matrix class.
Definition: matrix-common.h:62
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
Definition: stl-utils.h:184
void SetInvVarsAndMeans(const MatrixBase< Real > &invvars, const MatrixBase< Real > &means)
Use SetInvVarsAndMeans if updating both means and (inverse) variances.
Definition: diag-gmm-inl.h:63
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
void Read(std::istream &in_stream, bool binary)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Real Cond() const
Returns condition number by computing Svd.
BaseFloat AccumulateForGmm(const AmDiagGmm &model, const VectorBase< BaseFloat > &data, int32 gmm_index, BaseFloat weight)
Accumulate stats for a single GMM in the model; returns log likelihood.
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: diag-gmm.cc:66
int32 ComputeGconsts()
Sets the gconsts.
Definition: diag-gmm.cc:114
void GetLogDets(VectorBase< BaseFloat > *out) const
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
static void generate_features(cova_type covariance_type, size_t n_gaussians, size_t dim, Matrix< BaseFloat > &trans_mat, size_t frames_per_gaussian, std::vector< Vector< BaseFloat > *> &train_feats, std::vector< Vector< BaseFloat > *> &adapt_feats)
uint16 GmmFlagsType
Bitwise OR of the above flags.
Definition: model-common.h:35
static void RandFullCova(Matrix< BaseFloat > *matrix)
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
int32 NumGaussInPdf(int32 pdf_index) const
Definition: am-diag-gmm.h:113
void CopyFromSp(const SpMatrix< OtherReal > &M)
Copy given spmatrix. (no resize is done).
An FMLLR (feature-space MLLR) transformation, also called CMLLR (constrained MLLR) is an affine trans...
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
std::istream & Stream()
Definition: kaldi-io.cc:826
int32 NumBaseclasses() const
Accessors (const)
Configuration variables for FMLLR transforms.
void Scale(Real alpha)
Multiply each element with a scalar value.
void BuildTree(const Vector< BaseFloat > &state_occs, const std::vector< int32 > &sil_indices, const AmDiagGmm &am, int32 max_clusters)
Top-down clustering of the Gaussians in a model based on their means.
A regression tree is a clustering of Gaussian densities in an acoustic model, such that the group of ...
struct rnnlm::@11::@12 n
void SetRandn()
Sets to random values of a normal distribution.
void TransformFeature(const VectorBase< BaseFloat > &in, std::vector< Vector< BaseFloat > > *out) const
Get the transformed features for each of the transforms.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
void Validate()
Checks whether the various parameters are consistent.
BaseFloat min_count
Minimum occupancy for computing a transform.
void SetZero()
Sets matrix to zero.
Configuration variables like variance floor, minimum occupancy, etc.
Definition: mle-diag-gmm.h:38
bool use_regtree
If &#39;true&#39;, find transforms to generate using regression tree.
void AddMatVec(const Real alpha, const MatrixBase< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add matrix times vector : this <– beta*this + alpha*M*v.
Definition: kaldi-vector.cc:92
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
void InvertElements()
Inverts all the elements of the matrix.
void CopyColFromMat(const MatrixBase< OtherReal > &M, MatrixIndexT col)
Extracts a column of the matrix M.
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
Class for computing the accumulators needed for the maximum-likelihood estimate of FMLLR transforms f...
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void SplitPdf(int32 idx, int32 target_components, float perturb_factor)
Definition: am-diag-gmm.h:152
void SetWeights(const VectorBase< Real > &w)
Mutators for both float or double.
Definition: diag-gmm-inl.h:28
void UnitTestRegtreeFmllrDiagGmm(cova_type feature_type, size_t max_bclass)
BaseFloat AccumulateForGmm(const RegressionTree &regtree, const AmDiagGmm &am, const VectorBase< BaseFloat > &data, size_t pdf_index, BaseFloat weight)
Accumulate stats for a single GMM in the model; returns log likelihood.
void Invert(Real *log_det=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: kaldi-matrix.cc:38
void Update(const RegressionTree &regtree, const RegtreeFmllrOptions &opts, RegtreeFmllrDiagGmm *out_fmllr, BaseFloat *auxf_impr, BaseFloat *tot_t) const
void Write(std::ostream &out_stream, bool binary) const
void Init(size_t num_bclass, size_t dim)
void Init(const AmDiagGmm &model, GmmFlagsType flags)
Initializes accumulators for each GMM based on the number of components and dimension.