#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "gmm/model-common.h"
#include "gmm/full-gmm.h"
#include "gmm/diag-gmm.h"
#include "gmm/mle-full-gmm.h"

Include dependency graph for gmm-global-init-from-feats.cc:

Namespaces
	kaldi
	This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:

Functions
void	InitGmmFromRandomFrames (const Matrix< BaseFloat > &feats, DiagGmm *gmm)

void	TrainOneIter (const Matrix< BaseFloat > &feats, const MleDiagGmmOptions &gmm_opts, int32 iter, int32 num_threads, DiagGmm *gmm)

int	main (int argc, char *argv[])

Function Documentation

◆ main()

int main	(	int	argc,
		char *	argv[]
	)

Definition at line 88 of file gmm-global-init-from-feats.cc.

References SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), kaldi::InitGmmFromRandomFrames(), KALDI_ASSERT, KALDI_ERR, KALDI_LOG, KALDI_WARN, kaldi::kCopyData, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumCols(), DiagGmm::NumGauss(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), kaldi::RandInt(), ParseOptions::Read(), MleDiagGmmOptions::Register(), ParseOptions::Register(), Matrix< Real >::Resize(), MatrixBase< Real >::Row(), DiagGmm::Split(), kaldi::TrainOneIter(), SequentialTableReader< Holder >::Value(), kaldi::WithProb(), and kaldi::WriteKaldiObject().

                                  {
   try {
     using namespace kaldi;
 
     const char *usage =
         "This program initializes a single diagonal GMM and does multiple iterations of\n"
         "training from features stored in memory.\n"
         "Usage:  gmm-global-init-from-feats [options] <feature-rspecifier> <model-out>\n"
         "e.g.: gmm-global-init-from-feats scp:train.scp 1.mdl\n";
 
     ParseOptions po(usage);
     MleDiagGmmOptions gmm_opts;
     
     bool binary = true;
     int32 num_gauss = 100;
     int32 num_gauss_init = 0;
     int32 num_iters = 50;
     int32 num_frames = 200000;
     int32 srand_seed = 0;
     int32 num_threads = 4;
     
     po.Register("binary", &binary, "Write output in binary mode");
     po.Register("num-gauss", &num_gauss, "Number of Gaussians in the model");
     po.Register("num-gauss-init", &num_gauss_init, "Number of Gaussians in "
                 "the model initially (if nonzero and less than num_gauss, "
                 "we'll do mixture splitting)");
     po.Register("num-iters", &num_iters, "Number of iterations of training");
     po.Register("num-frames", &num_frames, "Number of feature vectors to store in "
                 "memory and train on (randomly chosen from the input features)");
     po.Register("srand", &srand_seed, "Seed for random number generator ");
     po.Register("num-threads", &num_threads, "Number of threads used for "
                 "statistics accumulation");
                 
     gmm_opts.Register(&po);
 
     po.Read(argc, argv);
 
     srand(srand_seed);    
     
     if (po.NumArgs() != 2) {
       po.PrintUsage();
       exit(1);
     }
 
     std::string feature_rspecifier = po.GetArg(1),
         model_wxfilename = po.GetArg(2);
     
     Matrix<BaseFloat> feats;
 
     SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
 
 
     KALDI_ASSERT(num_frames > 0);
     
     int64 num_read = 0, dim = 0;
 
     KALDI_LOG << "Reading features (will keep " << num_frames << " frames.)";
     
     for (; !feature_reader.Done(); feature_reader.Next()) {
       const Matrix<BaseFloat>  &this_feats = feature_reader.Value();
       for (int32 t = 0; t < this_feats.NumRows(); t++) {
         num_read++;
         if (dim == 0) {
           dim = this_feats.NumCols();
           feats.Resize(num_frames, dim);
         } else if (this_feats.NumCols() != dim) {
           KALDI_ERR << "Features have inconsistent dims "
                     << this_feats.NumCols() << " vs. " << dim
                     << " (current utt is) " << feature_reader.Key();
         }
         if (num_read <= num_frames) {
           feats.Row(num_read - 1).CopyFromVec(this_feats.Row(t));
         } else {
           BaseFloat keep_prob = num_frames / static_cast<BaseFloat>(num_read);
           if (WithProb(keep_prob)) { // With probability "keep_prob"
             feats.Row(RandInt(0, num_frames - 1)).CopyFromVec(this_feats.Row(t));
           }
         }
       }
     }
 
     if (num_read < num_frames) {
       KALDI_WARN << "Number of frames read " << num_read << " was less than "
                  << "target number " << num_frames << ", using all we read.";
       feats.Resize(num_read, dim, kCopyData);
     } else {
       BaseFloat percent = num_frames * 100.0 / num_read;
       KALDI_LOG << "Kept " << num_frames << " out of " << num_read
                 << " input frames = " << percent << "%.";
     }
 
     if (num_gauss_init <= 0 || num_gauss_init > num_gauss)
       num_gauss_init = num_gauss;
     
     DiagGmm gmm(num_gauss_init, dim);
     
     KALDI_LOG << "Initializing GMM means from random frames to "
               << num_gauss_init << " Gaussians.";
     InitGmmFromRandomFrames(feats, &gmm);
 
     // we'll increase the #Gaussians by splitting,
     // till halfway through training.
     int32 cur_num_gauss = num_gauss_init,
         gauss_inc = (num_gauss - num_gauss_init) / (num_iters / 2);
         
     for (int32 iter = 0; iter < num_iters; iter++) {
       TrainOneIter(feats, gmm_opts, iter, num_threads, &gmm);
 
       int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc);
       if (next_num_gauss > gmm.NumGauss()) {
         KALDI_LOG << "Splitting to " << next_num_gauss << " Gaussians.";
         gmm.Split(next_num_gauss, 0.1);
         cur_num_gauss = next_num_gauss;
       }
     }
 
     WriteKaldiObject(gmm, model_wxfilename, binary);
     KALDI_LOG << "Wrote model to " << model_wxfilename;
     return 0;
   } catch(const std::exception &e) {
     std::cerr << e.what();
     return -1;
   }
 }

Namespaces

Functions

Function Documentation

◆ main()