ivector-compute-lda.cc File Reference
Include dependency graph for ivector-compute-lda.cc:

Go to the source code of this file.

Classes

class  CovarianceStats
 

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

template<class Real >
void ComputeNormalizingTransform (const SpMatrix< Real > &covar, Real floor, MatrixBase< Real > *proj)
 
void ComputeLdaTransform (const std::map< std::string, Vector< BaseFloat > *> &utt2ivector, const std::map< std::string, std::vector< std::string > > &spk2utt, BaseFloat total_covariance_factor, BaseFloat covariance_floor, MatrixBase< BaseFloat > *lda_out)
 
void ComputeAndSubtractMean (std::map< std::string, Vector< BaseFloat > *> utt2ivector, Vector< BaseFloat > *mean_out)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 205 of file ivector-compute-lda.cc.

References VectorBase< Real >::AddMatVec(), kaldi::ComputeAndSubtractMean(), kaldi::ComputeLdaTransform(), MatrixBase< Real >::CopyColFromVec(), VectorBase< Real >::Dim(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ASSERT, KALDI_ERR, KALDI_LOG, KALDI_VLOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), kaldi::kNoTrans, SequentialTableReader< Holder >::Next(), VectorBase< Real >::Norm(), ParseOptions::NumArgs(), kaldi::PrintableWxfilename(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), and kaldi::WriteKaldiObject().

205  {
206  using namespace kaldi;
207  typedef kaldi::int32 int32;
208  try {
209  const char *usage =
210  "Compute an LDA matrix for iVector system. Reads in iVectors per utterance,\n"
211  "and an utt2spk file which it uses to help work out the within-speaker and\n"
212  "between-speaker covariance matrices. Outputs an LDA projection to a\n"
213  "specified dimension. By default it will normalize so that the projected\n"
214  "within-class covariance is unit, but if you set --normalize-total-covariance\n"
215  "to true, it will normalize the total covariance.\n"
216  "Note: the transform we produce is actually an affine transform which will\n"
217  "also set the global mean to zero.\n"
218  "\n"
219  "Usage: ivector-compute-lda [options] <ivector-rspecifier> <utt2spk-rspecifier> "
220  "<lda-matrix-out>\n"
221  "e.g.: \n"
222  " ivector-compute-lda ark:ivectors.ark ark:utt2spk lda.mat\n";
223 
224  ParseOptions po(usage);
225 
226  int32 lda_dim = 100; // Dimension we reduce to
227  BaseFloat total_covariance_factor = 0.0,
228  covariance_floor = 1.0e-06;
229  bool binary = true;
230 
231  po.Register("dim", &lda_dim, "Dimension we keep with the LDA transform");
232  po.Register("total-covariance-factor", &total_covariance_factor,
233  "If this is 0.0 we normalize to make the within-class covariance "
234  "unit; if 1.0, the total covariance; if between, we normalize "
235  "an interpolated matrix.");
236  po.Register("covariance-floor", &covariance_floor, "Floor the eigenvalues "
237  "of the interpolated covariance matrix to the product of its "
238  "largest eigenvalue and this number.");
239  po.Register("binary", &binary, "Write output in binary mode");
240 
241  po.Read(argc, argv);
242 
243  if (po.NumArgs() != 3) {
244  po.PrintUsage();
245  exit(1);
246  }
247 
248  std::string ivector_rspecifier = po.GetArg(1),
249  utt2spk_rspecifier = po.GetArg(2),
250  lda_wxfilename = po.GetArg(3);
251 
252  KALDI_ASSERT(covariance_floor >= 0.0);
253 
254  int32 num_done = 0, num_err = 0, dim = 0;
255 
256  SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
257  RandomAccessTokenReader utt2spk_reader(utt2spk_rspecifier);
258 
259  std::map<std::string, Vector<BaseFloat> *> utt2ivector;
260  std::map<std::string, std::vector<std::string> > spk2utt;
261 
262  for (; !ivector_reader.Done(); ivector_reader.Next()) {
263  std::string utt = ivector_reader.Key();
264  const Vector<BaseFloat> &ivector = ivector_reader.Value();
265  if (utt2ivector.count(utt) != 0) {
266  KALDI_WARN << "Duplicate iVector found for utterance " << utt
267  << ", ignoring it.";
268  num_err++;
269  continue;
270  }
271  if (!utt2spk_reader.HasKey(utt)) {
272  KALDI_WARN << "utt2spk has no entry for utterance " << utt
273  << ", skipping it.";
274  num_err++;
275  continue;
276  }
277  std::string spk = utt2spk_reader.Value(utt);
278  utt2ivector[utt] = new Vector<BaseFloat>(ivector);
279  if (dim == 0) {
280  dim = ivector.Dim();
281  } else {
282  KALDI_ASSERT(dim == ivector.Dim() && "iVector dimension mismatch");
283  }
284  spk2utt[spk].push_back(utt);
285  num_done++;
286  }
287 
288  KALDI_LOG << "Read " << num_done << " utterances, "
289  << num_err << " with errors.";
290 
291  if (num_done == 0) {
292  KALDI_ERR << "Did not read any utterances.";
293  } else {
294  KALDI_LOG << "Computing within-class covariance.";
295  }
296 
297  Vector<BaseFloat> mean;
298  ComputeAndSubtractMean(utt2ivector, &mean);
299  KALDI_LOG << "2-norm of iVector mean is " << mean.Norm(2.0);
300 
301 
302  Matrix<BaseFloat> lda_mat(lda_dim, dim + 1); // LDA matrix without the offset term.
303  SubMatrix<BaseFloat> linear_part(lda_mat, 0, lda_dim, 0, dim);
304  ComputeLdaTransform(utt2ivector,
305  spk2utt,
306  total_covariance_factor,
307  covariance_floor,
308  &linear_part);
309  Vector<BaseFloat> offset(lda_dim);
310  offset.AddMatVec(-1.0, linear_part, kNoTrans, mean, 0.0);
311  lda_mat.CopyColFromVec(offset, dim); // add mean-offset to transform
312 
313  KALDI_VLOG(2) << "2-norm of transformed iVector mean is "
314  << offset.Norm(2.0);
315 
316  WriteKaldiObject(lda_mat, lda_wxfilename, binary);
317 
318  KALDI_LOG << "Wrote LDA transform to "
319  << PrintableWxfilename(lda_wxfilename);
320 
321  std::map<std::string, Vector<BaseFloat> *>::iterator iter;
322  for (iter = utt2ivector.begin(); iter != utt2ivector.end(); ++iter)
323  delete iter->second;
324  utt2ivector.clear();
325 
326  return 0;
327  } catch(const std::exception &e) {
328  std::cerr << e.what();
329  return -1;
330  }
331 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
kaldi::int32 int32
Real Norm(Real p) const
Compute the p-th norm of the vector.
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
void ComputeLdaTransform(const std::map< std::string, Vector< BaseFloat > *> &utt2ivector, const std::map< std::string, std::vector< std::string > > &spk2utt, BaseFloat total_covariance_factor, BaseFloat covariance_floor, MatrixBase< BaseFloat > *lda_out)
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
void ComputeAndSubtractMean(std::map< std::string, Vector< BaseFloat > *> utt2ivector, Vector< BaseFloat > *mean_out)
#define KALDI_WARN
Definition: kaldi-error.h:150
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Definition: kaldi-io.h:257
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
#define KALDI_LOG
Definition: kaldi-error.h:153
Sub-matrix representation.
Definition: kaldi-matrix.h:988