ivector-extract-online.cc File Reference
Include dependency graph for ivector-extract-online.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 27 of file ivector-extract-online.cc.

References SequentialTableReader< Holder >::Done(), kaldi::EstimateIvectorsOnline(), kaldi::g_num_threads, ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, IvectorExtractor::IvectorDim(), KALDI_LOG, KALDI_VLOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), IvectorExtractor::PriorOffset(), ParseOptions::Read(), kaldi::ReadKaldiObject(), ParseOptions::Register(), MatrixBase< Real >::Row(), kaldi::TotalPosterior(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

27  {
28  using namespace kaldi;
29  typedef kaldi::int32 int32;
30  typedef kaldi::int64 int64;
31  try {
32  const char *usage =
33  "Extract iVectors for utterances, using a trained iVector extractor,\n"
34  "and features and Gaussian-level posteriors. This version extracts an\n"
35  "iVector every n frames (see the --ivector-period option), by including\n"
36  "all frames up to that point in the utterance. This is designed to\n"
37  "correspond with what will happen in a streaming decoding scenario;\n"
38  "the iVectors would be used in neural net training. The iVectors are\n"
39  "output as an archive of matrices, indexed by utterance-id; each row\n"
40  "corresponds to an iVector.\n"
41  "See also ivector-extract-online2\n"
42  "\n"
43  "Usage: ivector-extract-online [options] <model-in> <feature-rspecifier>"
44  "<posteriors-rspecifier> <ivector-wspecifier>\n"
45  "e.g.: \n"
46  " gmm-global-get-post 1.dubm '$feats' ark:- | \\\n"
47  " ivector-extract-online --ivector-period=10 final.ie '$feats' ark,s,cs:- ark,t:ivectors.1.ark\n";
48 
49  ParseOptions po(usage);
50  int32 num_cg_iters = 15;
51  int32 ivector_period = 10;
52  BaseFloat max_count = 0.0;
53  g_num_threads = 8;
54 
55  po.Register("num-cg-iters", &num_cg_iters,
56  "Number of iterations of conjugate gradient descent to perform "
57  "each time we re-estimate the iVector.");
58  po.Register("ivector-period", &ivector_period,
59  "Controls how frequently we re-estimate the iVector as we get "
60  "more data.");
61  po.Register("num-threads", &g_num_threads,
62  "Number of threads to use for computing derived variables "
63  "of iVector extractor, at process start-up.");
64  po.Register("max-count", &max_count,
65  "If >0, when the count of posteriors exceeds max-count we will "
66  "start using a stronger prior term. Can make iVectors from "
67  "longer than normal utterances look more 'typical'. Interpret "
68  "this value as a number of frames multiplied by your "
69  "posterior scale (so typically 0.1 times a number of frames).");
70  po.Read(argc, argv);
71 
72  if (po.NumArgs() != 4) {
73  po.PrintUsage();
74  exit(1);
75  }
76 
77  std::string ivector_extractor_rxfilename = po.GetArg(1),
78  feature_rspecifier = po.GetArg(2),
79  posteriors_rspecifier = po.GetArg(3),
80  ivectors_wspecifier = po.GetArg(4);
81 
82  IvectorExtractor extractor;
83  ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
84 
85  double tot_objf_impr = 0.0, tot_t = 0.0, tot_length = 0.0,
86  tot_length_utt_end = 0.0;
87  int32 num_done = 0, num_err = 0;
88 
89  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
90  RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
91  BaseFloatMatrixWriter ivector_writer(ivectors_wspecifier);
92 
93 
94  for (; !feature_reader.Done(); feature_reader.Next()) {
95  std::string utt = feature_reader.Key();
96  if (!posteriors_reader.HasKey(utt)) {
97  KALDI_WARN << "No posteriors for utterance " << utt;
98  num_err++;
99  continue;
100  }
101  const Matrix<BaseFloat> &feats = feature_reader.Value();
102  const Posterior &posterior = posteriors_reader.Value(utt);
103 
104  if (static_cast<int32>(posterior.size()) != feats.NumRows()) {
105  KALDI_WARN << "Size mismatch between posterior " << posterior.size()
106  << " and features " << feats.NumRows() << " for utterance "
107  << utt;
108  num_err++;
109  continue;
110  }
111 
112 
113  Matrix<BaseFloat> ivectors;
114  double objf_impr_per_frame;
115  objf_impr_per_frame = EstimateIvectorsOnline(feats, posterior, extractor,
116  ivector_period, num_cg_iters,
117  max_count, &ivectors);
118 
119  BaseFloat offset = extractor.PriorOffset();
120  for (int32 i = 0 ; i < ivectors.NumRows(); i++)
121  ivectors(i, 0) -= offset;
122 
123  double tot_post = TotalPosterior(posterior);
124 
125  KALDI_VLOG(2) << "For utterance " << utt << " objf impr/frame is "
126  << objf_impr_per_frame << " per frame, over "
127  << tot_post << " frames (weighted).";
128 
129  ivector_writer.Write(utt, ivectors);
130 
131  tot_t += tot_post;
132  tot_objf_impr += objf_impr_per_frame * tot_post;
133  tot_length_utt_end += ivectors.Row(ivectors.NumRows() - 1).Norm(2.0) *
134  tot_post;
135  for (int32 i = 0; i < ivectors.NumRows(); i++)
136  tot_length += ivectors.Row(i).Norm(2.0) * tot_post / ivectors.NumRows();
137 
138  num_done++;
139  }
140 
141  KALDI_LOG << "Estimated iVectors for " << num_done << " files, " << num_err
142  << " with errors.";
143  KALDI_LOG << "Average objective-function improvement was "
144  << (tot_objf_impr / tot_t) << " per frame, over "
145  << tot_t << " frames (weighted).";
146  KALDI_LOG << "Average iVector length was " << (tot_length / tot_t)
147  << " and at utterance-end was " << (tot_length_utt_end / tot_t)
148  << ", over " << tot_t << " frames (weighted); "
149  << " expected length is " << sqrt(extractor.IvectorDim());
150 
151  return (num_done != 0 ? 0 : 1);
152  } catch(const std::exception &e) {
153  std::cerr << e.what();
154  return -1;
155  }
156 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
double PriorOffset() const
The distribution over iVectors, in our formulation, is not centered at zero; its first dimension has ...
int32 g_num_threads
Definition: kaldi-thread.cc:25
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
BaseFloat TotalPosterior(const Posterior &post)
Returns the total of all the weights in "post".
Definition: posterior.cc:230
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_WARN
Definition: kaldi-error.h:150
double EstimateIvectorsOnline(const Matrix< BaseFloat > &feats, const Posterior &post, const IvectorExtractor &extractor, int32 ivector_period, int32 num_cg_iters, BaseFloat max_count, Matrix< BaseFloat > *ivectors)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
#define KALDI_LOG
Definition: kaldi-error.h:153