ivector-extractor-acc-stats.cc
Go to the documentation of this file.
1 // ivectorbin/ivector-extractor-acc-stats.cc
2 
3 // Copyright 2013 Daniel Povey
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "gmm/am-diag-gmm.h"
25 #include "util/kaldi-thread.h"
26 
27 
28 namespace kaldi {
29 
30 // this class is used to run the command
31 // stats.AccStatsForUtterance(extractor, mat, posterior);
32 // in parallel.
33 class IvectorTask {
34  public:
35  IvectorTask(const IvectorExtractor &extractor,
36  const Matrix<BaseFloat> &features,
37  const Posterior &posterior,
38  IvectorExtractorStats *stats): extractor_(extractor),
39  features_(features),
40  posterior_(posterior),
41  stats_(stats) { }
42 
43  void operator () () {
45  }
46  ~IvectorTask() { } // the destructor doesn't have to do anything.
47  private:
49  Matrix<BaseFloat> features_; // not a reference, since features come from a
50  // Table and the reference we get from that is
51  // not valid long-term.
52  Posterior posterior_; // as above.
54 };
55 
56 
57 
58 }
59 
60 
61 
62 int main(int argc, char *argv[]) {
63  using namespace kaldi;
64  typedef kaldi::int32 int32;
65  typedef kaldi::int64 int64;
66  try {
67  const char *usage =
68  "Accumulate stats for iVector extractor training\n"
69  "Reads in features and Gaussian-level posteriors (typically from a full GMM)\n"
70  "Supports multiple threads, but won't be able to make use of too many at a time\n"
71  "(e.g. more than about 4)\n"
72  "Usage: ivector-extractor-acc-stats [options] <model-in> <feature-rspecifier>"
73  "<posteriors-rspecifier> <stats-out>\n"
74  "e.g.: \n"
75  " fgmm-global-gselect-to-post 1.fgmm '$feats' 'ark:gunzip -c gselect.1.gz|' ark:- | \\\n"
76  " ivector-extractor-acc-stats 2.ie '$feats' ark,s,cs:- 2.1.acc\n";
77 
78  ParseOptions po(usage);
79  bool binary = true;
81  TaskSequencerConfig sequencer_opts;
82  po.Register("binary", &binary, "Write output in binary mode");
83  stats_opts.Register(&po);
84  sequencer_opts.Register(&po);
85 
86  po.Read(argc, argv);
87 
88  if (po.NumArgs() != 4) {
89  po.PrintUsage();
90  exit(1);
91  }
92 
93  std::string ivector_extractor_rxfilename = po.GetArg(1),
94  feature_rspecifier = po.GetArg(2),
95  posteriors_rspecifier = po.GetArg(3),
96  accs_wxfilename = po.GetArg(4);
97 
98 
99  // Initialize these Reader objects before reading the IvectorExtractor,
100  // because it uses up a lot of memory and any fork() after that will
101  // be in danger of causing an allocation failure.
102  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
103  RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
104 
105 
106  // This is a bit of a mess... the code that reads in the extractor calls
107  // ComputeDerivedVars, and it can do this multi-threaded, controlled by
108  // g_num_threads. So if the user specified the --num-threads option, which
109  // goes to sequencer_opts in this case, copy it to g_num_threads.
110  g_num_threads = sequencer_opts.num_threads;
111 
112  IvectorExtractor extractor;
113  ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
114 
115  IvectorExtractorStats stats(extractor, stats_opts);
116 
117 
118  int64 tot_t = 0;
119  int32 num_done = 0, num_err = 0;
120 
121  {
122  TaskSequencer<IvectorTask> sequencer(sequencer_opts);
123 
124  for (; !feature_reader.Done(); feature_reader.Next()) {
125  std::string key = feature_reader.Key();
126  if (!posteriors_reader.HasKey(key)) {
127  KALDI_WARN << "No posteriors for utterance " << key;
128  num_err++;
129  continue;
130  }
131  const Matrix<BaseFloat> &mat = feature_reader.Value();
132  const Posterior &posterior = posteriors_reader.Value(key);
133 
134  if (static_cast<int32>(posterior.size()) != mat.NumRows()) {
135  KALDI_WARN << "Size mismatch between posterior " << (posterior.size())
136  << " and features " << (mat.NumRows()) << " for utterance "
137  << key;
138  num_err++;
139  continue;
140  }
141 
142  sequencer.Run(new IvectorTask(extractor, mat, posterior, &stats));
143 
144  tot_t += posterior.size();
145  num_done++;
146  }
147  // destructor of "sequencer" will wait for any remaining tasks that
148  // have not yet completed.
149  }
150 
151  KALDI_LOG << "Done " << num_done << " files, " << num_err
152  << " with errors. Total frames " << tot_t;
153 
154  {
155  Output ko(accs_wxfilename, binary);
156  stats.Write(ko.Stream(), binary);
157  }
158 
159  KALDI_LOG << "Wrote stats to " << accs_wxfilename;
160 
161  return (num_done != 0 ? 0 : 1);
162  } catch(const std::exception &e) {
163  std::cerr << e.what();
164  return -1;
165  }
166 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Run(C *c)
This function takes ownership of the pointer "c", and will delete it in the same sequence as Run was ...
Definition: kaldi-thread.h:190
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int32 g_num_threads
Definition: kaldi-thread.cc:25
IvectorExtractorStats is a class used to update the parameters of the ivector extractor.
kaldi::int32 int32
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
const IvectorExtractor & extractor_
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
int main(int argc, char *argv[])
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
Options for IvectorExtractorStats, which is used to update the parameters of IvectorExtractor.
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void AccStatsForUtterance(const IvectorExtractor &extractor, const MatrixBase< BaseFloat > &feats, const Posterior &post)
IvectorExtractorStats * stats_
IvectorTask(const IvectorExtractor &extractor, const Matrix< BaseFloat > &features, const Posterior &posterior, IvectorExtractorStats *stats)
#define KALDI_LOG
Definition: kaldi-error.h:153
void Write(std::ostream &os, bool binary)
void Register(OptionsItf *opts)
Definition: kaldi-thread.h:160