ivector-extract.cc File Reference
Include dependency graph for ivector-extract.cc:

Go to the source code of this file.

Classes

class  IvectorExtractTask
 

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

int32 RunPerSpeaker (const std::string &ivector_extractor_rxfilename, const IvectorEstimationOptions &opts, bool compute_objf_change, const std::string &spk2utt_rspecifier, const std::string &feature_rspecifier, const std::string &posterior_rspecifier, const std::string &ivector_wspecifier)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 210 of file ivector-extract.cc.

References IvectorEstimationOptions::acoustic_weight, SequentialTableReader< Holder >::Done(), kaldi::g_num_threads, ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), IvectorExtractTask::IvectorExtractTask(), KALDI_ASSERT, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), IvectorEstimationOptions::max_count, SequentialTableReader< Holder >::Next(), TaskSequencerConfig::num_threads, ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), ParseOptions::Read(), kaldi::ReadKaldiObject(), IvectorEstimationOptions::Register(), ParseOptions::Register(), TaskSequencerConfig::Register(), TaskSequencer< C >::Run(), kaldi::RunPerSpeaker(), kaldi::ScalePosterior(), kaldi::TotalPosterior(), RandomAccessTableReader< Holder >::Value(), and SequentialTableReader< Holder >::Value().

210  {
211  using namespace kaldi;
212  typedef kaldi::int32 int32;
213  typedef kaldi::int64 int64;
214  try {
215  const char *usage =
216  "Extract iVectors for utterances, using a trained iVector extractor,\n"
217  "and features and Gaussian-level posteriors\n"
218  "Usage: ivector-extract [options] <model-in> <feature-rspecifier> "
219  "<posteriors-rspecifier> <ivector-wspecifier>\n"
220  "e.g.: \n"
221  " fgmm-global-gselect-to-post 1.ubm '$feats' 'ark:gunzip -c gselect.1.gz|' ark:- | \\\n"
222  " ivector-extract final.ie '$feats' ark,s,cs:- ark,t:ivectors.1.ark\n";
223 
224  ParseOptions po(usage);
225  bool compute_objf_change = true;
227  std::string spk2utt_rspecifier;
228  TaskSequencerConfig sequencer_config;
229  po.Register("compute-objf-change", &compute_objf_change,
230  "If true, compute the change in objective function from using "
231  "nonzero iVector (a potentially useful diagnostic). Combine "
232  "with --verbose=2 for per-utterance information");
233  po.Register("spk2utt", &spk2utt_rspecifier, "Supply this option if you "
234  "want iVectors to be output at the per-speaker level, estimated "
235  "using stats accumulated from multiple utterances. Note: this "
236  "is not the normal way iVectors are obtained for speaker-id. "
237  "This option will cause the program to ignore the --num-threads "
238  "option.");
239 
240  opts.Register(&po);
241  sequencer_config.Register(&po);
242 
243  po.Read(argc, argv);
244 
245  if (po.NumArgs() != 4) {
246  po.PrintUsage();
247  exit(1);
248  }
249 
250  std::string ivector_extractor_rxfilename = po.GetArg(1),
251  feature_rspecifier = po.GetArg(2),
252  posterior_rspecifier = po.GetArg(3),
253  ivectors_wspecifier = po.GetArg(4);
254 
255 
256  if (spk2utt_rspecifier.empty()) {
257  // g_num_threads affects how ComputeDerivedVars is called when we read the
258  // extractor.
259  g_num_threads = sequencer_config.num_threads;
260  IvectorExtractor extractor;
261  ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
262 
263  double tot_auxf_change = 0.0, tot_t = 0.0;
264  int32 num_done = 0, num_err = 0;
265 
266  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
267  RandomAccessPosteriorReader posterior_reader(posterior_rspecifier);
268  BaseFloatVectorWriter ivector_writer(ivectors_wspecifier);
269 
270  {
271  TaskSequencer<IvectorExtractTask> sequencer(sequencer_config);
272  for (; !feature_reader.Done(); feature_reader.Next()) {
273  std::string utt = feature_reader.Key();
274  if (!posterior_reader.HasKey(utt)) {
275  KALDI_WARN << "No posteriors for utterance " << utt;
276  num_err++;
277  continue;
278  }
279  const Matrix<BaseFloat> &mat = feature_reader.Value();
280  Posterior posterior = posterior_reader.Value(utt);
281 
282  if (static_cast<int32>(posterior.size()) != mat.NumRows()) {
283  KALDI_WARN << "Size mismatch between posterior " << posterior.size()
284  << " and features " << mat.NumRows() << " for utterance "
285  << utt;
286  num_err++;
287  continue;
288  }
289 
290  double *auxf_ptr = (compute_objf_change ? &tot_auxf_change : NULL );
291 
292  double this_t = opts.acoustic_weight * TotalPosterior(posterior),
293  max_count_scale = 1.0;
294  if (opts.max_count > 0 && this_t > opts.max_count) {
295  max_count_scale = opts.max_count / this_t;
296  KALDI_LOG << "Scaling stats for utterance " << utt << " by scale "
297  << max_count_scale << " due to --max-count="
298  << opts.max_count;
299  this_t = opts.max_count;
300  }
301  ScalePosterior(opts.acoustic_weight * max_count_scale,
302  &posterior);
303  // note: now, this_t == sum of posteriors.
304 
305  sequencer.Run(new IvectorExtractTask(extractor, utt, mat, posterior,
306  &ivector_writer, auxf_ptr));
307 
308  tot_t += this_t;
309  num_done++;
310  }
311  // Destructor of "sequencer" will wait for any remaining tasks.
312  }
313 
314  KALDI_LOG << "Done " << num_done << " files, " << num_err
315  << " with errors. Total (weighted) frames " << tot_t;
316  if (compute_objf_change)
317  KALDI_LOG << "Overall average objective-function change from estimating "
318  << "ivector was " << (tot_auxf_change / tot_t) << " per frame "
319  << " over " << tot_t << " (weighted) frames.";
320 
321  return (num_done != 0 ? 0 : 1);
322  } else {
323  KALDI_ASSERT(sequencer_config.num_threads == 1 &&
324  "--spk2utt option is incompatible with --num-threads option");
325  return RunPerSpeaker(ivector_extractor_rxfilename,
326  opts,
327  compute_objf_change,
328  spk2utt_rspecifier,
329  feature_rspecifier,
330  posterior_rspecifier,
331  ivectors_wspecifier);
332  }
333  } catch(const std::exception &e) {
334  std::cerr << e.what();
335  return -1;
336  }
337 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Register(OptionsItf *opts)
int32 g_num_threads
Definition: kaldi-thread.cc:25
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
BaseFloat TotalPosterior(const Posterior &post)
Returns the total of all the weights in "post".
Definition: posterior.cc:230
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_WARN
Definition: kaldi-error.h:150
void ScalePosterior(BaseFloat scale, Posterior *post)
Scales the BaseFloat (weight) element in the posterior entries.
Definition: posterior.cc:218
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153
int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename, const IvectorEstimationOptions &opts, bool compute_objf_change, const std::string &spk2utt_rspecifier, const std::string &feature_rspecifier, const std::string &posterior_rspecifier, const std::string &ivector_wspecifier)
void Register(OptionsItf *opts)
Definition: kaldi-thread.h:160