39                      double *tot_auxf_change):
    44     bool need_2nd_order_stats = 
false;
    48                                              need_2nd_order_stats);
    70                     << 
" frames (weighted)";
    94                    bool compute_objf_change,
    95                    const std::string &spk2utt_rspecifier,
    96                    const std::string &feature_rspecifier,
    97                    const std::string &posterior_rspecifier,
    98                    const std::string &ivector_wspecifier) {
   106   double tot_auxf_change = 0.0, tot_post = 0.0, tot_norm = 0.0;
   107   int32 num_utt_done = 0, num_utt_err = 0,
   108       num_spk_done = 0, num_spk_err = 0;
   110   for (; !spk2utt_reader.
Done(); spk2utt_reader.
Next()) {
   111     std::string spk = spk2utt_reader.
Key();
   112     const std::vector<std::string> &utts = spk2utt_reader.
Value();
   114     bool need_2nd_order_stats = 
false;
   118                                              need_2nd_order_stats);
   120     for (
size_t i = 0; 
i < utts.size(); 
i++) {
   121       const std::string &utt = utts[
i];
   122       if (!feature_reader.
HasKey(utt)) {
   123         KALDI_WARN << 
"No features present for utterance " << utt;
   128       if (!posterior_reader.
HasKey(utt)) {
   129         KALDI_WARN << 
"No posteriors present for utterance " << utt;
   134       if (feats.
NumRows() != posterior.size()) {
   135         KALDI_WARN << 
"Posterior has wrong size " << posterior.size()
   136                    << 
" vs. feats " << feats.
NumRows() << 
" for "   143       utt_stats.AccStats(feats, posterior);
   146     if (utt_stats.NumFrames() == 0.0) {
   147       KALDI_WARN << 
"No stats accumulated for speaker " << spk;
   152         double scale = opts.
max_count / utt_stats.NumFrames();
   153         utt_stats.Scale(scale);
   154         KALDI_LOG << 
"Scaling stats for speaker " << spk << 
" by scale "   155                   << scale << 
" due to --max-count=" << opts.
max_count;
   161       if (compute_objf_change) {
   162         double old_auxf = extractor.
GetAuxf(utt_stats, ivector);
   164         double new_auxf = extractor.
GetAuxf(utt_stats, ivector);
   165         double auxf_change = new_auxf - old_auxf;
   167         KALDI_LOG << 
"Auxf change for speaker " << spk << 
" was "   168                   << (auxf_change / utt_stats.NumFrames()) << 
" per frame, over "   169                   << utt_stats.NumFrames() << 
" frames (weighted).";
   170         tot_auxf_change += auxf_change;
   179       KALDI_LOG << 
"Ivector norm for speaker " << spk
   180                 << 
" was " << ivector.Norm(2.0);
   182       tot_norm += ivector.Norm(2.0) * utt_stats.NumFrames();
   183       tot_post += utt_stats.NumFrames();
   186       ivector_writer.
Write(spk, ivector_flt);
   190   KALDI_LOG << 
"Done " << num_spk_done << 
" speakers; " << num_spk_err
   191             << 
" with errors.  " << num_utt_done << 
" utterances "   192             << 
"were processed, " << num_utt_err << 
" with errors.";
   193   if (tot_post != 0.0) {
   194     if (compute_objf_change) {
   195       KALDI_LOG << 
"Overall weighted-average objective function improvement was "   196                 << (tot_auxf_change / tot_post) << 
" over " << tot_post
   197                 << 
" frames (weighted)";
   199     KALDI_LOG << 
"Average iVector norm (weighted by frames) was "   200               << (tot_norm / tot_post) << 
" over " << tot_post
   201               << 
" frames (weighted)";
   203   return (num_spk_done != 0 ? 0 : 1);
   210 int main(
int argc, 
char *argv[]) {
   211   using namespace kaldi;
   213   typedef kaldi::int64 int64;
   216         "Extract iVectors for utterances, using a trained iVector extractor,\n"   217         "and features and Gaussian-level posteriors\n"   218         "Usage:  ivector-extract [options] <model-in> <feature-rspecifier> "   219         "<posteriors-rspecifier> <ivector-wspecifier>\n"   221         " fgmm-global-gselect-to-post 1.ubm '$feats' 'ark:gunzip -c gselect.1.gz|' ark:- | \\\n"   222         "  ivector-extract final.ie '$feats' ark,s,cs:- ark,t:ivectors.1.ark\n";
   225     bool compute_objf_change = 
true;
   227     std::string spk2utt_rspecifier;
   229     po.
Register(
"compute-objf-change", &compute_objf_change,
   230                 "If true, compute the change in objective function from using "   231                 "nonzero iVector (a potentially useful diagnostic).  Combine "   232                 "with --verbose=2 for per-utterance information");
   233     po.
Register(
"spk2utt", &spk2utt_rspecifier, 
"Supply this option if you "   234                 "want iVectors to be output at the per-speaker level, estimated "   235                 "using stats accumulated from multiple utterances.  Note: this "   236                 "is not the normal way iVectors are obtained for speaker-id. "   237                 "This option will cause the program to ignore the --num-threads "   250     std::string ivector_extractor_rxfilename = po.
GetArg(1),
   251         feature_rspecifier = po.
GetArg(2),
   252         posterior_rspecifier = po.
GetArg(3),
   253         ivectors_wspecifier = po.
GetArg(4);
   256     if (spk2utt_rspecifier.empty()) {
   263       double tot_auxf_change = 0.0, tot_t = 0.0;
   264       int32 num_done = 0, num_err = 0;
   272         for (; !feature_reader.
Done(); feature_reader.
Next()) {
   273           std::string utt = feature_reader.
Key();
   274           if (!posterior_reader.
HasKey(utt)) {
   275             KALDI_WARN << 
"No posteriors for utterance " << utt;
   282           if (static_cast<int32>(posterior.size()) != mat.
NumRows()) {
   283             KALDI_WARN << 
"Size mismatch between posterior " << posterior.size()
   284                        << 
" and features " << mat.
NumRows() << 
" for utterance "   290           double *auxf_ptr = (compute_objf_change ? &tot_auxf_change : NULL );
   293               max_count_scale = 1.0;
   295             max_count_scale = opts.
max_count / this_t;
   296             KALDI_LOG << 
"Scaling stats for utterance " << utt << 
" by scale "   297                       << max_count_scale << 
" due to --max-count="   306                                                &ivector_writer, auxf_ptr));
   314       KALDI_LOG << 
"Done " << num_done << 
" files, " << num_err
   315                 << 
" with errors.  Total (weighted) frames " << tot_t;
   316       if (compute_objf_change)
   317         KALDI_LOG << 
"Overall average objective-function change from estimating "   318                   << 
"ivector was " << (tot_auxf_change / tot_t) << 
" per frame "   319                   << 
" over " << tot_t << 
" (weighted) frames.";
   321       return (num_done != 0 ? 0 : 1);
   324                    "--spk2utt option is incompatible with --num-threads option");
   330                            posterior_rspecifier,
   331                            ivectors_wspecifier);
   333   } 
catch(
const std::exception &e) {
   334     std::cerr << e.what();
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void Run(C *c)
This function takes ownership of the pointer "c", and will delete it in the same sequence as Run was ...
 
void Register(OptionsItf *opts)
 
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor]. 
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero). 
 
Real Norm(Real p) const
Compute the p-th norm of the vector. 
 
void Write(const std::string &key, const T &value) const
 
void Register(const std::string &name, bool *ptr, const std::string &doc)
 
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
 
BaseFloat TotalPosterior(const Posterior &post)
Returns the total of all the weights in "post". 
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
const T & Value(const std::string &key)
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables. 
 
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility. 
 
bool HasKey(const std::string &key)
 
void ScalePosterior(BaseFloat scale, Posterior *post)
Scales the BaseFloat (weight) element in the posterior entries. 
 
int NumArgs() const
Number of positional parameters (c.f. argc-1). 
 
A class representing a vector. 
 
#define KALDI_ASSERT(cond)
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename, const IvectorEstimationOptions &opts, bool compute_objf_change, const std::string &spk2utt_rspecifier, const std::string &feature_rspecifier, const std::string &posterior_rspecifier, const std::string &ivector_wspecifier)
 
void Register(OptionsItf *opts)