#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "gmm/am-diag-gmm.h"
#include "ivector/ivector-extractor.h"
#include "util/kaldi-thread.h"

Include dependency graph for ivector-compute-lda.cc:

Classes
class	CovarianceStats

Namespaces
	kaldi
	This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:

Functions
template<class Real >
void	ComputeNormalizingTransform (const SpMatrix< Real > &covar, Real floor, MatrixBase< Real > *proj)

void	ComputeLdaTransform (const std::map< std::string, Vector< BaseFloat > > &utt2ivector, const std::map< std::string, std::vector< std::string > > &spk2utt, BaseFloat total_covariance_factor, BaseFloat covariance_floor, MatrixBase< BaseFloat > lda_out)

void	ComputeAndSubtractMean (std::map< std::string, Vector< BaseFloat > > utt2ivector, Vector< BaseFloat > mean_out)

int	main (int argc, char *argv[])

Function Documentation

◆ main()

int main	(	int	argc,
		char *	argv[]
	)

Definition at line 205 of file ivector-compute-lda.cc.

References VectorBase< Real >::AddMatVec(), kaldi::ComputeAndSubtractMean(), kaldi::ComputeLdaTransform(), MatrixBase< Real >::CopyColFromVec(), VectorBase< Real >::Dim(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ASSERT, KALDI_ERR, KALDI_LOG, KALDI_VLOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), kaldi::kNoTrans, SequentialTableReader< Holder >::Next(), VectorBase< Real >::Norm(), ParseOptions::NumArgs(), kaldi::PrintableWxfilename(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), and kaldi::WriteKaldiObject().

                                  {
   using namespace kaldi;
   typedef kaldi::int32 int32;
   try {
     const char *usage =
         "Compute an LDA matrix for iVector system.  Reads in iVectors per utterance,\n"
         "and an utt2spk file which it uses to help work out the within-speaker and\n"
         "between-speaker covariance matrices.  Outputs an LDA projection to a\n"
         "specified dimension.  By default it will normalize so that the projected\n"
         "within-class covariance is unit, but if you set --normalize-total-covariance\n"
         "to true, it will normalize the total covariance.\n"
         "Note: the transform we produce is actually an affine transform which will\n"
         "also set the global mean to zero.\n"
         "\n"
         "Usage:  ivector-compute-lda [options] <ivector-rspecifier> <utt2spk-rspecifier> "
         "<lda-matrix-out>\n"
         "e.g.: \n"
         " ivector-compute-lda ark:ivectors.ark ark:utt2spk lda.mat\n";
 
     ParseOptions po(usage);
 
     int32 lda_dim = 100; // Dimension we reduce to
     BaseFloat total_covariance_factor = 0.0,
               covariance_floor = 1.0e-06;
     bool binary = true;
 
     po.Register("dim", &lda_dim, "Dimension we keep with the LDA transform");
     po.Register("total-covariance-factor", &total_covariance_factor,
                 "If this is 0.0 we normalize to make the within-class covariance "
                 "unit; if 1.0, the total covariance; if between, we normalize "
                 "an interpolated matrix.");
     po.Register("covariance-floor", &covariance_floor, "Floor the eigenvalues "
                 "of the interpolated covariance matrix to the product of its "
                 "largest eigenvalue and this number.");
     po.Register("binary", &binary, "Write output in binary mode");
 
     po.Read(argc, argv);
 
     if (po.NumArgs() != 3) {
       po.PrintUsage();
       exit(1);
     }
 
     std::string ivector_rspecifier = po.GetArg(1),
         utt2spk_rspecifier = po.GetArg(2),
         lda_wxfilename = po.GetArg(3);
 
     KALDI_ASSERT(covariance_floor >= 0.0);
 
     int32 num_done = 0, num_err = 0, dim = 0;
 
     SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
     RandomAccessTokenReader utt2spk_reader(utt2spk_rspecifier);
 
     std::map<std::string, Vector<BaseFloat> *> utt2ivector;
     std::map<std::string, std::vector<std::string> > spk2utt;
 
     for (; !ivector_reader.Done(); ivector_reader.Next()) {
       std::string utt = ivector_reader.Key();
       const Vector<BaseFloat> &ivector = ivector_reader.Value();
       if (utt2ivector.count(utt) != 0) {
         KALDI_WARN << "Duplicate iVector found for utterance " << utt
                    << ", ignoring it.";
         num_err++;
         continue;
       }
       if (!utt2spk_reader.HasKey(utt)) {
         KALDI_WARN << "utt2spk has no entry for utterance " << utt
                    << ", skipping it.";
         num_err++;
         continue;
       }
       std::string spk = utt2spk_reader.Value(utt);
       utt2ivector[utt] = new Vector<BaseFloat>(ivector);
       if (dim == 0) {
         dim = ivector.Dim();
       } else {
         KALDI_ASSERT(dim == ivector.Dim() && "iVector dimension mismatch");
       }
       spk2utt[spk].push_back(utt);
       num_done++;
     }
 
     KALDI_LOG << "Read " << num_done << " utterances, "
               << num_err << " with errors.";
 
     if (num_done == 0) {
       KALDI_ERR << "Did not read any utterances.";
     } else {
       KALDI_LOG << "Computing within-class covariance.";
     }
 
     Vector<BaseFloat> mean;
     ComputeAndSubtractMean(utt2ivector, &mean);
     KALDI_LOG << "2-norm of iVector mean is " << mean.Norm(2.0);
 
 
     Matrix<BaseFloat> lda_mat(lda_dim, dim + 1); // LDA matrix without the offset term.
     SubMatrix<BaseFloat> linear_part(lda_mat, 0, lda_dim, 0, dim);
     ComputeLdaTransform(utt2ivector,
                         spk2utt,
                         total_covariance_factor,
                         covariance_floor,
                         &linear_part);
     Vector<BaseFloat> offset(lda_dim);
     offset.AddMatVec(-1.0, linear_part, kNoTrans, mean, 0.0);
     lda_mat.CopyColFromVec(offset, dim); // add mean-offset to transform
 
     KALDI_VLOG(2) << "2-norm of transformed iVector mean is "
                   << offset.Norm(2.0);
 
     WriteKaldiObject(lda_mat, lda_wxfilename, binary);
 
     KALDI_LOG << "Wrote LDA transform to "
               << PrintableWxfilename(lda_wxfilename);
 
     std::map<std::string, Vector<BaseFloat> *>::iterator iter;
     for (iter = utt2ivector.begin(); iter != utt2ivector.end(); ++iter)
       delete iter->second;
     utt2ivector.clear();
 
     return 0;
   } catch(const std::exception &e) {
     std::cerr << e.what();
     return -1;
   }
 }

Classes

Namespaces

Functions

Function Documentation

◆ main()