39 double *tot_auxf_change):
44 bool need_2nd_order_stats =
false;
48 need_2nd_order_stats);
70 <<
" frames (weighted)";
94 bool compute_objf_change,
95 const std::string &spk2utt_rspecifier,
96 const std::string &feature_rspecifier,
97 const std::string &posterior_rspecifier,
98 const std::string &ivector_wspecifier) {
106 double tot_auxf_change = 0.0, tot_post = 0.0, tot_norm = 0.0;
107 int32 num_utt_done = 0, num_utt_err = 0,
108 num_spk_done = 0, num_spk_err = 0;
110 for (; !spk2utt_reader.
Done(); spk2utt_reader.
Next()) {
111 std::string spk = spk2utt_reader.
Key();
112 const std::vector<std::string> &utts = spk2utt_reader.
Value();
114 bool need_2nd_order_stats =
false;
118 need_2nd_order_stats);
120 for (
size_t i = 0;
i < utts.size();
i++) {
121 const std::string &utt = utts[
i];
122 if (!feature_reader.
HasKey(utt)) {
123 KALDI_WARN <<
"No features present for utterance " << utt;
128 if (!posterior_reader.
HasKey(utt)) {
129 KALDI_WARN <<
"No posteriors present for utterance " << utt;
134 if (feats.
NumRows() != posterior.size()) {
135 KALDI_WARN <<
"Posterior has wrong size " << posterior.size()
136 <<
" vs. feats " << feats.
NumRows() <<
" for " 143 utt_stats.AccStats(feats, posterior);
146 if (utt_stats.NumFrames() == 0.0) {
147 KALDI_WARN <<
"No stats accumulated for speaker " << spk;
152 double scale = opts.
max_count / utt_stats.NumFrames();
153 utt_stats.Scale(scale);
154 KALDI_LOG <<
"Scaling stats for speaker " << spk <<
" by scale " 155 << scale <<
" due to --max-count=" << opts.
max_count;
161 if (compute_objf_change) {
162 double old_auxf = extractor.
GetAuxf(utt_stats, ivector);
164 double new_auxf = extractor.
GetAuxf(utt_stats, ivector);
165 double auxf_change = new_auxf - old_auxf;
167 KALDI_LOG <<
"Auxf change for speaker " << spk <<
" was " 168 << (auxf_change / utt_stats.NumFrames()) <<
" per frame, over " 169 << utt_stats.NumFrames() <<
" frames (weighted).";
170 tot_auxf_change += auxf_change;
179 KALDI_LOG <<
"Ivector norm for speaker " << spk
180 <<
" was " << ivector.Norm(2.0);
182 tot_norm += ivector.Norm(2.0) * utt_stats.NumFrames();
183 tot_post += utt_stats.NumFrames();
186 ivector_writer.
Write(spk, ivector_flt);
190 KALDI_LOG <<
"Done " << num_spk_done <<
" speakers; " << num_spk_err
191 <<
" with errors. " << num_utt_done <<
" utterances " 192 <<
"were processed, " << num_utt_err <<
" with errors.";
193 if (tot_post != 0.0) {
194 if (compute_objf_change) {
195 KALDI_LOG <<
"Overall weighted-average objective function improvement was " 196 << (tot_auxf_change / tot_post) <<
" over " << tot_post
197 <<
" frames (weighted)";
199 KALDI_LOG <<
"Average iVector norm (weighted by frames) was " 200 << (tot_norm / tot_post) <<
" over " << tot_post
201 <<
" frames (weighted)";
203 return (num_spk_done != 0 ? 0 : 1);
210 int main(
int argc,
char *argv[]) {
211 using namespace kaldi;
213 typedef kaldi::int64 int64;
216 "Extract iVectors for utterances, using a trained iVector extractor,\n" 217 "and features and Gaussian-level posteriors\n" 218 "Usage: ivector-extract [options] <model-in> <feature-rspecifier> " 219 "<posteriors-rspecifier> <ivector-wspecifier>\n" 221 " fgmm-global-gselect-to-post 1.ubm '$feats' 'ark:gunzip -c gselect.1.gz|' ark:- | \\\n" 222 " ivector-extract final.ie '$feats' ark,s,cs:- ark,t:ivectors.1.ark\n";
225 bool compute_objf_change =
true;
227 std::string spk2utt_rspecifier;
229 po.
Register(
"compute-objf-change", &compute_objf_change,
230 "If true, compute the change in objective function from using " 231 "nonzero iVector (a potentially useful diagnostic). Combine " 232 "with --verbose=2 for per-utterance information");
233 po.
Register(
"spk2utt", &spk2utt_rspecifier,
"Supply this option if you " 234 "want iVectors to be output at the per-speaker level, estimated " 235 "using stats accumulated from multiple utterances. Note: this " 236 "is not the normal way iVectors are obtained for speaker-id. " 237 "This option will cause the program to ignore the --num-threads " 250 std::string ivector_extractor_rxfilename = po.
GetArg(1),
251 feature_rspecifier = po.
GetArg(2),
252 posterior_rspecifier = po.
GetArg(3),
253 ivectors_wspecifier = po.
GetArg(4);
256 if (spk2utt_rspecifier.empty()) {
263 double tot_auxf_change = 0.0, tot_t = 0.0;
264 int32 num_done = 0, num_err = 0;
272 for (; !feature_reader.
Done(); feature_reader.
Next()) {
273 std::string utt = feature_reader.
Key();
274 if (!posterior_reader.
HasKey(utt)) {
275 KALDI_WARN <<
"No posteriors for utterance " << utt;
282 if (static_cast<int32>(posterior.size()) != mat.
NumRows()) {
283 KALDI_WARN <<
"Size mismatch between posterior " << posterior.size()
284 <<
" and features " << mat.
NumRows() <<
" for utterance " 290 double *auxf_ptr = (compute_objf_change ? &tot_auxf_change : NULL );
293 max_count_scale = 1.0;
295 max_count_scale = opts.
max_count / this_t;
296 KALDI_LOG <<
"Scaling stats for utterance " << utt <<
" by scale " 297 << max_count_scale <<
" due to --max-count=" 306 &ivector_writer, auxf_ptr));
314 KALDI_LOG <<
"Done " << num_done <<
" files, " << num_err
315 <<
" with errors. Total (weighted) frames " << tot_t;
316 if (compute_objf_change)
317 KALDI_LOG <<
"Overall average objective-function change from estimating " 318 <<
"ivector was " << (tot_auxf_change / tot_t) <<
" per frame " 319 <<
" over " << tot_t <<
" (weighted) frames.";
321 return (num_done != 0 ? 0 : 1);
324 "--spk2utt option is incompatible with --num-threads option");
330 posterior_rspecifier,
331 ivectors_wspecifier);
333 }
catch(
const std::exception &e) {
334 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Run(C *c)
This function takes ownership of the pointer "c", and will delete it in the same sequence as Run was ...
void Register(OptionsItf *opts)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
Real Norm(Real p) const
Compute the p-th norm of the vector.
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
BaseFloat TotalPosterior(const Posterior &post)
Returns the total of all the weights in "post".
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
void ScalePosterior(BaseFloat scale, Posterior *post)
Scales the BaseFloat (weight) element in the posterior entries.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename, const IvectorEstimationOptions &opts, bool compute_objf_change, const std::string &spk2utt_rspecifier, const std::string &feature_rspecifier, const std::string &posterior_rspecifier, const std::string &ivector_wspecifier)
void Register(OptionsItf *opts)