#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "gmm/am-diag-gmm.h"
#include "ivector/ivector-extractor.h"
#include "util/kaldi-thread.h"

Include dependency graph for ivector-extract.cc:

Classes
class	IvectorExtractTask

Namespaces
	kaldi
	This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:

Functions
int32	RunPerSpeaker (const std::string &ivector_extractor_rxfilename, const IvectorEstimationOptions &opts, bool compute_objf_change, const std::string &spk2utt_rspecifier, const std::string &feature_rspecifier, const std::string &posterior_rspecifier, const std::string &ivector_wspecifier)

int	main (int argc, char *argv[])

Function Documentation

◆ main()

int main	(	int	argc,
		char *	argv[]
	)

Definition at line 210 of file ivector-extract.cc.

References IvectorEstimationOptions::acoustic_weight, SequentialTableReader< Holder >::Done(), kaldi::g_num_threads, ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), IvectorExtractTask::IvectorExtractTask(), KALDI_ASSERT, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), IvectorEstimationOptions::max_count, SequentialTableReader< Holder >::Next(), TaskSequencerConfig::num_threads, ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), ParseOptions::Read(), kaldi::ReadKaldiObject(), IvectorEstimationOptions::Register(), ParseOptions::Register(), TaskSequencerConfig::Register(), TaskSequencer< C >::Run(), kaldi::RunPerSpeaker(), kaldi::ScalePosterior(), kaldi::TotalPosterior(), RandomAccessTableReader< Holder >::Value(), and SequentialTableReader< Holder >::Value().

                                  {
   using namespace kaldi;
   typedef kaldi::int32 int32;
   typedef kaldi::int64 int64;
   try {
     const char *usage =
         "Extract iVectors for utterances, using a trained iVector extractor,\n"
         "and features and Gaussian-level posteriors\n"
         "Usage:  ivector-extract [options] <model-in> <feature-rspecifier> "
         "<posteriors-rspecifier> <ivector-wspecifier>\n"
         "e.g.: \n"
         " fgmm-global-gselect-to-post 1.ubm '$feats' 'ark:gunzip -c gselect.1.gz|' ark:- | \\\n"
         "  ivector-extract final.ie '$feats' ark,s,cs:- ark,t:ivectors.1.ark\n";
 
     ParseOptions po(usage);
     bool compute_objf_change = true;
     IvectorEstimationOptions opts;
     std::string spk2utt_rspecifier;
     TaskSequencerConfig sequencer_config;
     po.Register("compute-objf-change", &compute_objf_change,
                 "If true, compute the change in objective function from using "
                 "nonzero iVector (a potentially useful diagnostic).  Combine "
                 "with --verbose=2 for per-utterance information");
     po.Register("spk2utt", &spk2utt_rspecifier, "Supply this option if you "
                 "want iVectors to be output at the per-speaker level, estimated "
                 "using stats accumulated from multiple utterances.  Note: this "
                 "is not the normal way iVectors are obtained for speaker-id. "
                 "This option will cause the program to ignore the --num-threads "
                 "option.");
 
     opts.Register(&po);
     sequencer_config.Register(&po);
 
     po.Read(argc, argv);
 
     if (po.NumArgs() != 4) {
       po.PrintUsage();
       exit(1);
     }
 
     std::string ivector_extractor_rxfilename = po.GetArg(1),
         feature_rspecifier = po.GetArg(2),
         posterior_rspecifier = po.GetArg(3),
         ivectors_wspecifier = po.GetArg(4);
 
 
     if (spk2utt_rspecifier.empty()) {
       // g_num_threads affects how ComputeDerivedVars is called when we read the
       // extractor.
       g_num_threads = sequencer_config.num_threads;
       IvectorExtractor extractor;
       ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
 
       double tot_auxf_change = 0.0, tot_t = 0.0;
       int32 num_done = 0, num_err = 0;
 
       SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
       RandomAccessPosteriorReader posterior_reader(posterior_rspecifier);
       BaseFloatVectorWriter ivector_writer(ivectors_wspecifier);
 
       {
         TaskSequencer<IvectorExtractTask> sequencer(sequencer_config);
         for (; !feature_reader.Done(); feature_reader.Next()) {
           std::string utt = feature_reader.Key();
           if (!posterior_reader.HasKey(utt)) {
             KALDI_WARN << "No posteriors for utterance " << utt;
             num_err++;
             continue;
           }
           const Matrix<BaseFloat> &mat = feature_reader.Value();
           Posterior posterior = posterior_reader.Value(utt);
 
           if (static_cast<int32>(posterior.size()) != mat.NumRows()) {
             KALDI_WARN << "Size mismatch between posterior " << posterior.size()
                        << " and features " << mat.NumRows() << " for utterance "
                        << utt;
             num_err++;
             continue;
           }
 
           double *auxf_ptr = (compute_objf_change ? &tot_auxf_change : NULL );
 
           double this_t = opts.acoustic_weight * TotalPosterior(posterior),
               max_count_scale = 1.0;
           if (opts.max_count > 0 && this_t > opts.max_count) {
             max_count_scale = opts.max_count / this_t;
             KALDI_LOG << "Scaling stats for utterance " << utt << " by scale "
                       << max_count_scale << " due to --max-count="
                       << opts.max_count;
             this_t = opts.max_count;
           }
           ScalePosterior(opts.acoustic_weight * max_count_scale,
                          &posterior);
           // note: now, this_t == sum of posteriors.
 
           sequencer.Run(new IvectorExtractTask(extractor, utt, mat, posterior,
                                                &ivector_writer, auxf_ptr));
 
           tot_t += this_t;
           num_done++;
         }
         // Destructor of "sequencer" will wait for any remaining tasks.
       }
 
       KALDI_LOG << "Done " << num_done << " files, " << num_err
                 << " with errors.  Total (weighted) frames " << tot_t;
       if (compute_objf_change)
         KALDI_LOG << "Overall average objective-function change from estimating "
                   << "ivector was " << (tot_auxf_change / tot_t) << " per frame "
                   << " over " << tot_t << " (weighted) frames.";
 
       return (num_done != 0 ? 0 : 1);
     } else {
       KALDI_ASSERT(sequencer_config.num_threads == 1 &&
                    "--spk2utt option is incompatible with --num-threads option");
       return RunPerSpeaker(ivector_extractor_rxfilename,
                            opts,
                            compute_objf_change,
                            spk2utt_rspecifier,
                            feature_rspecifier,
                            posterior_rspecifier,
                            ivectors_wspecifier);
     }
   } catch(const std::exception &e) {
     std::cerr << e.what();
     return -1;
   }
 }

Classes

Namespaces

Functions

Function Documentation

◆ main()