nnet-get-weighted-egs.cc File Reference
Include dependency graph for nnet-get-weighted-egs.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 
 kaldi::nnet2
 

Functions

int32 GetCount (double expected_count)
 
static void ProcessFile (const MatrixBase< BaseFloat > &feats, const Posterior &pdf_post, const std::string &utt_id, const Vector< BaseFloat > &weights, int32 left_context, int32 right_context, int32 const_feat_dim, BaseFloat keep_proportion, BaseFloat weight_threshold, bool use_frame_selection, bool use_frame_weights, int64 *num_frames_written, int64 *num_frames_skipped, NnetExampleWriter *example_writer)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 110 of file nnet-get-weighted-egs.cc.

References VectorBase< Real >::Dim(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), kaldi::nnet2::ProcessFile(), ParseOptions::Read(), ParseOptions::Register(), RandomAccessTableReader< Holder >::Value(), and SequentialTableReader< Holder >::Value().

110  {
111  try {
112  using namespace kaldi;
113  using namespace kaldi::nnet2;
114  typedef kaldi::int32 int32;
115  typedef kaldi::int64 int64;
116 
117  const char *usage =
118  "Get frame-by-frame examples of data for neural network training.\n"
119  "Essentially this is a format change from features and posteriors\n"
120  "into a special frame-by-frame format. To split randomly into\n"
121  "different subsets, do nnet-copy-egs with --random=true, but\n"
122  "note that this does not randomize the order of frames.\n"
123  "\n"
124  "Usage: nnet-get-weighted-egs [options] <features-rspecifier> "
125  "<pdf-post-rspecifier> <weights-rspecifier> <training-examples-out>\n"
126  "\n"
127  "An example [where $feats expands to the actual features]:\n"
128  "nnet-get-weighted-egs --left-context=8 --right-context=8 \"$feats\" \\\n"
129  " \"ark:gunzip -c exp/nnet/ali.1.gz | ali-to-pdf exp/nnet/1.nnet ark:- ark:- | ali-to-post ark:- ark:- |\" \\\n"
130  " ark:- \n"
131  "Note: the --left-context and --right-context would be derived from\n"
132  "the output of nnet-info.";
133 
134 
135  int32 left_context = 0, right_context = 0, const_feat_dim = 0;
136  int32 srand_seed = 0;
137  BaseFloat keep_proportion = 1.0;
138  BaseFloat weight_threshold = 0.0;
139  bool use_frame_selection = true, use_frame_weights=false;
140 
141  ParseOptions po(usage);
142  po.Register("left-context", &left_context, "Number of frames of left context "
143  "the neural net requires.");
144  po.Register("right-context", &right_context, "Number of frames of right context "
145  "the neural net requires.");
146  po.Register("const-feat-dim", &const_feat_dim, "If specified, the last "
147  "const-feat-dim dimensions of the feature input are treated as "
148  "constant over the context window (so are not spliced)");
149  po.Register("keep-proportion", &keep_proportion, "If <1.0, this program will "
150  "randomly keep this proportion of the input samples. If >1.0, it will "
151  "in expectation copy a sample this many times. It will copy it a number "
152  "of times equal to floor(keep-proportion) or ceil(keep-proportion).");
153  po.Register("srand", &srand_seed, "Seed for random number generator "
154  "(only relevant if --keep-proportion != 1.0)");
155  po.Register("weight-threshold", &weight_threshold, "Keep only frames with weights "
156  "above this threshold.");
157  po.Register("use-frame-selection", &use_frame_selection, "Remove the frames below threshold.");
158  po.Register("use-frame-weights", &use_frame_weights, "Scale the error derivatives by the weight");
159 
160  po.Read(argc, argv);
161 
162  srand(srand_seed);
163 
164  if (po.NumArgs() != 4) {
165  po.PrintUsage();
166  exit(1);
167  }
168 
169  std::string feature_rspecifier = po.GetArg(1),
170  pdf_post_rspecifier = po.GetArg(2),
171  weights_rspecifier = po.GetArg(3),
172  examples_wspecifier = po.GetArg(4);
173 
174  // Read in all the training files.
175  SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);
176  RandomAccessPosteriorReader pdf_post_reader(pdf_post_rspecifier);
177  RandomAccessBaseFloatVectorReader weights_reader(weights_rspecifier);
178  NnetExampleWriter example_writer(examples_wspecifier);
179 
180  int32 num_done = 0, num_err = 0;
181  int64 num_frames_written = 0;
182  int64 num_frames_skipped = 0;
183 
184  for (; !feat_reader.Done(); feat_reader.Next()) {
185  std::string key = feat_reader.Key();
186  const Matrix<BaseFloat> &feats = feat_reader.Value();
187  if (!pdf_post_reader.HasKey(key)) {
188  KALDI_WARN << "No pdf-level posterior for key " << key;
189  num_err++;
190  } else {
191  const Posterior &pdf_post = pdf_post_reader.Value(key);
192  if (pdf_post.size() != feats.NumRows()) {
193  KALDI_WARN << "Posterior has wrong size " << pdf_post.size()
194  << " versus " << feats.NumRows();
195  num_err++;
196  continue;
197  }
198  if (!weights_reader.HasKey(key)) {
199  KALDI_ERR << "No weights for utterance " << key;
200  //ProcessFile(feats, pdf_post, NULL,
201  // left_context, right_context, const_feat_dim, keep_proportion,
202  // weight_threshold, false, false, &num_frames_written,
203  // &num_frames_skipped, &example_writer);
204  } else {
205  Vector<BaseFloat> weights = weights_reader.Value(key);
206  if (weights.Dim() != static_cast<int32>(pdf_post.size())) {
207  KALDI_WARN << "Weights for utterance " << key
208  << " have wrong size, " << weights.Dim()
209  << " vs. " << pdf_post.size();
210  num_err++;
211  continue;
212  }
213  ProcessFile(feats, pdf_post, key, weights, left_context, right_context,
214  const_feat_dim, keep_proportion, weight_threshold,
215  use_frame_selection, use_frame_weights,
216  &num_frames_written, &num_frames_skipped, &example_writer);
217  }
218  num_done++;
219  }
220  }
221 
222  KALDI_LOG << "Finished generating examples, "
223  << "successfully processed " << num_done
224  << " feature files, wrote " << num_frames_written << " examples, "
225  << "skipped " << num_frames_skipped << " examples, "
226  << num_err << " files had errors.";
227  return (num_done == 0 ? 1 : 0);
228  } catch(const std::exception &e) {
229  std::cerr << e.what() << '\n';
230  return -1;
231  }
232 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
A class representing a vector.
Definition: kaldi-vector.h:406
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
static void ProcessFile(const MatrixBase< BaseFloat > &feats, const Posterior &pdf_post, const std::string &utt_id, int32 left_context, int32 right_context, int32 num_frames, int32 const_feat_dim, int64 *num_frames_written, int64 *num_egs_written, NnetExampleWriter *example_writer)
Definition: nnet-get-egs.cc:32
#define KALDI_LOG
Definition: kaldi-error.h:153