28 using namespace kaldi;
30 typedef kaldi::int64 int64;
33 "Extract iVectors for utterances, using a trained iVector extractor,\n" 34 "and features and Gaussian-level posteriors. This version extracts an\n" 35 "iVector every n frames (see the --ivector-period option), by including\n" 36 "all frames up to that point in the utterance. This is designed to\n" 37 "correspond with what will happen in a streaming decoding scenario;\n" 38 "the iVectors would be used in neural net training. The iVectors are\n" 39 "output as an archive of matrices, indexed by utterance-id; each row\n" 40 "corresponds to an iVector.\n" 41 "See also ivector-extract-online2\n" 43 "Usage: ivector-extract-online [options] <model-in> <feature-rspecifier>" 44 "<posteriors-rspecifier> <ivector-wspecifier>\n" 46 " gmm-global-get-post 1.dubm '$feats' ark:- | \\\n" 47 " ivector-extract-online --ivector-period=10 final.ie '$feats' ark,s,cs:- ark,t:ivectors.1.ark\n";
50 int32 num_cg_iters = 15;
51 int32 ivector_period = 10;
55 po.Register(
"num-cg-iters", &num_cg_iters,
56 "Number of iterations of conjugate gradient descent to perform " 57 "each time we re-estimate the iVector.");
58 po.Register(
"ivector-period", &ivector_period,
59 "Controls how frequently we re-estimate the iVector as we get " 62 "Number of threads to use for computing derived variables " 63 "of iVector extractor, at process start-up.");
64 po.Register(
"max-count", &max_count,
65 "If >0, when the count of posteriors exceeds max-count we will " 66 "start using a stronger prior term. Can make iVectors from " 67 "longer than normal utterances look more 'typical'. Interpret " 68 "this value as a number of frames multiplied by your " 69 "posterior scale (so typically 0.1 times a number of frames).");
72 if (po.NumArgs() != 4) {
77 std::string ivector_extractor_rxfilename = po.GetArg(1),
78 feature_rspecifier = po.GetArg(2),
79 posteriors_rspecifier = po.GetArg(3),
80 ivectors_wspecifier = po.GetArg(4);
85 double tot_objf_impr = 0.0, tot_t = 0.0, tot_length = 0.0,
86 tot_length_utt_end = 0.0;
87 int32 num_done = 0, num_err = 0;
94 for (; !feature_reader.Done(); feature_reader.Next()) {
95 std::string utt = feature_reader.Key();
96 if (!posteriors_reader.HasKey(utt)) {
97 KALDI_WARN <<
"No posteriors for utterance " << utt;
102 const Posterior &posterior = posteriors_reader.Value(utt);
104 if (static_cast<int32>(posterior.size()) != feats.
NumRows()) {
105 KALDI_WARN <<
"Size mismatch between posterior " << posterior.size()
106 <<
" and features " << feats.
NumRows() <<
" for utterance " 114 double objf_impr_per_frame;
116 ivector_period, num_cg_iters,
117 max_count, &ivectors);
120 for (int32
i = 0 ;
i < ivectors.
NumRows();
i++)
121 ivectors(
i, 0) -= offset;
125 KALDI_VLOG(2) <<
"For utterance " << utt <<
" objf impr/frame is " 126 << objf_impr_per_frame <<
" per frame, over " 127 << tot_post <<
" frames (weighted).";
129 ivector_writer.Write(utt, ivectors);
132 tot_objf_impr += objf_impr_per_frame * tot_post;
133 tot_length_utt_end += ivectors.
Row(ivectors.
NumRows() - 1).Norm(2.0) *
135 for (int32
i = 0;
i < ivectors.
NumRows();
i++)
136 tot_length += ivectors.
Row(
i).Norm(2.0) * tot_post / ivectors.
NumRows();
141 KALDI_LOG <<
"Estimated iVectors for " << num_done <<
" files, " << num_err
143 KALDI_LOG <<
"Average objective-function improvement was " 144 << (tot_objf_impr / tot_t) <<
" per frame, over " 145 << tot_t <<
" frames (weighted).";
146 KALDI_LOG <<
"Average iVector length was " << (tot_length / tot_t)
147 <<
" and at utterance-end was " << (tot_length_utt_end / tot_t)
148 <<
", over " << tot_t <<
" frames (weighted); " 149 <<
" expected length is " << sqrt(extractor.
IvectorDim());
151 return (num_done != 0 ? 0 : 1);
152 }
catch(
const std::exception &e) {
153 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
A templated class for writing objects to an archive or script file; see The Table concept...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
BaseFloat TotalPosterior(const Posterior &post)
Returns the total of all the weights in "post".
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
double EstimateIvectorsOnline(const Matrix< BaseFloat > &feats, const Posterior &post, const IvectorExtractor &extractor, int32 ivector_period, int32 num_cg_iters, BaseFloat max_count, Matrix< BaseFloat > *ivectors)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).