ivector-extractor-test.cc
Go to the documentation of this file.
1 // ivector/ivector-extractor-test.cc
2 
3 // Copyright 2013 Daniel Povey
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "gmm/model-test-common.h"
21 #include "gmm/full-gmm-normal.h"
23 #include "util/kaldi-io.h"
24 
25 
26 namespace kaldi {
27 
28 void TestIvectorExtractorIO(const IvectorExtractor &extractor) {
29  std::ostringstream ostr;
30  bool binary = (Rand() % 2 == 0);
31  extractor.Write(ostr, binary);
32  std::istringstream istr(ostr.str());
33  IvectorExtractor extractor2;
34  extractor2.Read(istr, binary);
35  std::ostringstream ostr2;
36  extractor2.Write(ostr2, binary);
37  KALDI_ASSERT(ostr.str() == ostr2.str());
38 }
40  std::ostringstream ostr;
41  bool binary = (Rand() % 2 == 0);
42  stats.Write(ostr, binary);
43  std::istringstream istr(ostr.str());
44  IvectorExtractorStats stats2;
45  stats2.Read(istr, binary);
46  std::ostringstream ostr2;
47  stats2.Write(ostr2, binary);
48 
49  if (binary) {
50  // this was failing in text mode, due to differences like
51  // 8.2244e+06 vs 8.22440e+06
52  KALDI_ASSERT(ostr.str() == ostr2.str());
53  }
54 
55  { // Test I/O of IvectorExtractorStats and that it works identically with the "add"
56  // mechanism. We only test this with binary == true; otherwise it's not
57  // identical due to limited precision.
58  std::ostringstream ostr;
59  bool binary = true;
60  stats.Write(ostr, binary);
61  IvectorExtractorStats stats2;
62  {
63  std::istringstream istr(ostr.str());
64  stats2.Read(istr, binary);
65  }
66  {
67  std::istringstream istr(ostr.str());
68  stats2.Read(istr, binary, true); // add to existing.
69  }
70  IvectorExtractorStats stats3(stats);
71  stats3.Add(stats);
72 
73  std::ostringstream ostr2;
74  stats2.Write(ostr2, false);
75 
76  std::ostringstream ostr3;
77  stats3.Write(ostr3, false);
78 
79  //if (binary) {
80  // KALDI_ASSERT(ostr2.str() == ostr3.str());
81  //}
82  }
83 }
84 
86  const MatrixBase<BaseFloat> &feats,
87  const FullGmm &fgmm) {
88  if (extractor.IvectorDependentWeights())
89  return; // Nothing to do as online iVector estimator does not work in this
90  // case.
91  int32 num_frames = feats.NumRows(),
92  feat_dim = feats.NumCols(),
93  num_gauss = extractor.NumGauss(),
94  ivector_dim = extractor.IvectorDim();
95  Posterior post(num_frames);
96 
97  double tot_log_like = 0.0;
98  for (int32 t = 0; t < num_frames; t++) {
99  SubVector<BaseFloat> frame(feats, t);
100  Vector<BaseFloat> posterior(fgmm.NumGauss(), kUndefined);
101  tot_log_like += fgmm.ComponentPosteriors(frame, &posterior);
102  for (int32 i = 0; i < posterior.Dim(); i++)
103  post[t].push_back(std::make_pair(i, posterior(i)));
104  }
105 
106  // The zeroth and 1st-order stats are in "utt_stats".
107  IvectorExtractorUtteranceStats utt_stats(num_gauss, feat_dim,
108  false);
109  utt_stats.AccStats(feats, post);
110 
111  OnlineIvectorEstimationStats online_stats(extractor.IvectorDim(),
112  extractor.PriorOffset(),
113  0.0);
114 
115  for (int32 t = 0; t < num_frames; t++) {
116  online_stats.AccStats(extractor, feats.Row(t), post[t]);
117  }
118 
119  Vector<double> ivector1(ivector_dim), ivector2(ivector_dim);
120 
121  extractor.GetIvectorDistribution(utt_stats, &ivector1, NULL);
122 
123  int32 num_cg_iters = -1; // for testing purposes, compute it exactly.
124  online_stats.GetIvector(num_cg_iters, &ivector2);
125 
126  KALDI_LOG << "ivector1 = " << ivector1;
127  KALDI_LOG << "ivector2 = " << ivector2;
128 
129  // objf change vs. default iVector. note, here I'm using objf
130  // and auxf pretty much interchangeably :-(
131  double objf_change2 = online_stats.ObjfChange(ivector2) *
132  utt_stats.NumFrames();
133 
134  Vector<double> ivector_baseline(ivector_dim);
135  ivector_baseline(0) = extractor.PriorOffset();
136  double objf_change1 = extractor.GetAuxf(utt_stats, ivector1) -
137  extractor.GetAuxf(utt_stats, ivector_baseline);
138  KALDI_LOG << "objf_change1 = " << objf_change1
139  << ", objf_change2 = " << objf_change2;
140 
141  KALDI_ASSERT(ivector1.ApproxEqual(ivector2));
142 }
143 
144 
146  FullGmm fgmm;
147  int32 dim = 5 + Rand() % 5, num_comp = 1 + Rand() % 5;
148  KALDI_LOG << "Num Gauss = " << num_comp;
149  unittest::InitRandFullGmm(dim, num_comp, &fgmm);
150  FullGmmNormal fgmm_normal(fgmm);
151 
152  IvectorExtractorOptions ivector_opts;
153  ivector_opts.ivector_dim = dim + 5;
154  ivector_opts.use_weights = (Rand() % 2 == 0);
155  KALDI_LOG << "Feature dim is " << dim
156  << ", ivector dim is " << ivector_opts.ivector_dim;
157  IvectorExtractor extractor(ivector_opts, fgmm);
158  TestIvectorExtractorIO(extractor);
159 
160  IvectorExtractorStatsOptions stats_opts;
161  if (Rand() % 2 == 0) stats_opts.update_variances = false;
162  stats_opts.num_samples_for_weights = 100; // Improve accuracy
163  // of estimation, since we do it with relatively few utterances,
164  // and we're testing the convergence.
165 
166  int32 num_utts = 1 + Rand() % 5;
167  std::vector<Matrix<BaseFloat> > all_feats(num_utts);
168  for (int32 utt = 0; utt < num_utts; utt++) {
169  int32 num_frames = 100 + Rand() % 200;
170  if (Rand() % 2 == 0) num_frames *= 10;
171  if (Rand() % 2 == 0) num_frames /= 1.0;
172  Matrix<BaseFloat> feats(num_frames, dim);
173  fgmm_normal.Rand(&feats);
174  feats.Swap(&all_feats[utt]);
175  }
176 
177  int32 num_iters = 4;
178  double last_auxf_impr = 0.0, last_auxf = 0.0;
179  for (int32 iter = 0; iter < num_iters; iter++) {
180  IvectorExtractorStats stats(extractor, stats_opts);
181 
182  for (int32 utt = 0; utt < num_utts; utt++) {
183  Matrix<BaseFloat> &feats = all_feats[utt];
184  stats.AccStatsForUtterance(extractor, feats, fgmm);
185  TestIvectorExtraction(extractor, feats, fgmm);
186  }
188 
189  IvectorExtractorEstimationOptions estimation_opts;
190  estimation_opts.gaussian_min_count = dim + 5;
191  double auxf = stats.AuxfPerFrame(),
192  auxf_impr = stats.Update(estimation_opts, &extractor);
193 
194  KALDI_LOG << "Iter " << iter << ", auxf per frame was " << auxf
195  << ", improvement in this update "
196  << "phase was " << auxf_impr;
197  if (iter > 0) {
198  double auxf_change = auxf - last_auxf;
199  KALDI_LOG << "Predicted auxf change from last update phase was "
200  << last_auxf_impr << " versus observed change "
201  << auxf_change;
202  double wiggle_room = (ivector_opts.use_weights ? 5.0e-05 : 1.0e-08);
203  // The weight update is (a) not exact, and (b) relies on sampling, [two
204  // separate issues], so it might not always improve. But with
205  // a large number of "weight samples", it's OK.
206  KALDI_ASSERT(auxf_change >= last_auxf_impr - wiggle_room);
207  }
208  last_auxf_impr = auxf_impr;
209  last_auxf = auxf;
210  }
211  std::cout << "********************************************************************************************\n";
212 }
213 
214 }
215 
216 int main() {
217  using namespace kaldi;
218  SetVerboseLevel(5);
219  for (int i = 0; i < 10; i++)
221  std::cout << "Test OK.\n";
222  return 0;
223 }
void TestIvectorExtractorIO(const IvectorExtractor &extractor)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
double ObjfChange(const VectorBase< double > &ivector) const
ObjfChange returns the change in objective function *per frame* from using the default value [ prior_...
void Add(const IvectorExtractorStats &other)
double PriorOffset() const
The distribution over iVectors, in our formulation, is not centered at zero; its first dimension has ...
BaseFloat ComponentPosteriors(const VectorBase< BaseFloat > &data, VectorBase< BaseFloat > *posterior) const
Computes the posterior probabilities of all Gaussian components given a data point.
Definition: full-gmm.cc:719
Definition for Gaussian Mixture Model with full covariances in normal mode: where the parameters are ...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
bool IvectorDependentWeights() const
void AccStats(const IvectorExtractor &extractor, const VectorBase< BaseFloat > &feature, const std::vector< std::pair< int32, BaseFloat > > &gauss_post)
Definition for Gaussian Mixture Model with full covariances.
Definition: full-gmm.h:40
double GetAuxf(const IvectorExtractorUtteranceStats &utt_stats, const VectorBase< double > &mean, const SpMatrix< double > *var=NULL) const
Returns the log-likelihood objective function, summed over frames, for this distribution of iVectors ...
IvectorExtractorStats is a class used to update the parameters of the ivector extractor.
kaldi::int32 int32
void Read(std::istream &is, bool binary, bool add=false)
This class helps us to efficiently estimate iVectors in situations where the data is coming in frame ...
void Swap(Matrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
Definition: kaldi-error.h:64
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
Options for IvectorExtractorStats, which is used to update the parameters of IvectorExtractor.
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void Write(std::ostream &os, bool binary) const
void GetIvector(int32 num_cg_iters, VectorBase< double > *ivector) const
This function gets the current estimate of the iVector.
void UnitTestIvectorExtractor()
void Rand(MatrixBase< BaseFloat > *feats)
Generates random features from the model.
int main()
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: full-gmm.h:58
double Update(const IvectorExtractorEstimationOptions &opts, IvectorExtractor *extractor) const
Returns the objf improvement per frame.
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
void InitRandFullGmm(int32 dim, int32 num_comp, FullGmm *gmm)
void AccStats(const MatrixBase< BaseFloat > &feats, const Posterior &post)
void GetIvectorDistribution(const IvectorExtractorUtteranceStats &utt_stats, VectorBase< double > *mean, SpMatrix< double > *var) const
Gets the distribution over ivectors (or at least, a Gaussian approximation to it).
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AccStatsForUtterance(const IvectorExtractor &extractor, const MatrixBase< BaseFloat > &feats, const Posterior &post)
Options for training the IvectorExtractor, e.g. variance flooring.
void Read(std::istream &is, bool binary)
These are the stats for a particular utterance, i.e.
#define KALDI_LOG
Definition: kaldi-error.h:153
void TestIvectorExtractorStatsIO(IvectorExtractorStats &stats)
void Write(std::ostream &os, bool binary)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void TestIvectorExtraction(const IvectorExtractor &extractor, const MatrixBase< BaseFloat > &feats, const FullGmm &fgmm)