nnet3-get-egs.cc File Reference
#include <sstream>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "hmm/transition-model.h"
#include "hmm/posterior.h"
#include "nnet3/nnet-example.h"
#include "nnet3/nnet-example-utils.h"
Include dependency graph for nnet3-get-egs.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 
 kaldi::nnet3
 

Functions

static bool ProcessFile (const GeneralMatrix &feats, const MatrixBase< BaseFloat > *ivector_feats, int32 ivector_period, const Posterior &pdf_post, const std::string &utt_id, bool compress, int32 num_pdfs, int32 length_tolerance, UtteranceSplitter *utt_splitter, NnetExampleWriter *example_writer)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 140 of file nnet3-get-egs.cc.

References ExampleGenerationConfig::ComputeDerived(), SequentialTableReader< Holder >::Done(), UtteranceSplitter::ExitStatus(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ERR, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), GeneralMatrix::NumRows(), ParseOptions::PrintUsage(), kaldi::nnet3::ProcessFile(), ParseOptions::Read(), ParseOptions::Register(), ExampleGenerationConfig::Register(), RandomAccessTableReader< Holder >::Value(), and SequentialTableReader< Holder >::Value().

140  {
141  try {
142  using namespace kaldi;
143  using namespace kaldi::nnet3;
144  typedef kaldi::int32 int32;
145  typedef kaldi::int64 int64;
146 
147  const char *usage =
148  "Get frame-by-frame examples of data for nnet3 neural network training.\n"
149  "Essentially this is a format change from features and posteriors\n"
150  "into a special frame-by-frame format. This program handles the\n"
151  "common case where you have some input features, possibly some\n"
152  "iVectors, and one set of labels. If people in future want to\n"
153  "do different things they may have to extend this program or create\n"
154  "different versions of it for different tasks (the egs format is quite\n"
155  "general)\n"
156  "\n"
157  "Usage: nnet3-get-egs [options] <features-rspecifier> "
158  "<pdf-post-rspecifier> <egs-out>\n"
159  "\n"
160  "An example [where $feats expands to the actual features]:\n"
161  "nnet3-get-egs --num-pdfs=2658 --left-context=12 --right-context=9 --num-frames=8 \"$feats\"\\\n"
162  "\"ark:gunzip -c exp/nnet/ali.1.gz | ali-to-pdf exp/nnet/1.nnet ark:- ark:- | ali-to-post ark:- ark:- |\" \\\n"
163  " ark:- \n"
164  "See also: nnet3-chain-get-egs, nnet3-get-egs-simple\n";
165 
166 
167  bool compress = true;
168  int32 num_pdfs = -1, length_tolerance = 100,
169  targets_length_tolerance = 2,
170  online_ivector_period = 1;
171 
172  ExampleGenerationConfig eg_config; // controls num-frames,
173  // left/right-context, etc.
174 
175  std::string online_ivector_rspecifier;
176 
177  ParseOptions po(usage);
178 
179  po.Register("compress", &compress, "If true, write egs with input features "
180  "in compressed format (recommended). This is "
181  "only relevant if the features being read are un-compressed; "
182  "if already compressed, we keep we same compressed format when "
183  "dumping egs.");
184  po.Register("num-pdfs", &num_pdfs, "Number of pdfs in the acoustic "
185  "model");
186  po.Register("ivectors", &online_ivector_rspecifier, "Alias for "
187  "--online-ivectors option, for back compatibility");
188  po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier of "
189  "ivector features, as a matrix.");
190  po.Register("online-ivector-period", &online_ivector_period, "Number of "
191  "frames between iVectors in matrices supplied to the "
192  "--online-ivectors option");
193  po.Register("length-tolerance", &length_tolerance, "Tolerance for "
194  "difference in num-frames between feat and ivector matrices");
195  po.Register("targets-length-tolerance", &targets_length_tolerance,
196  "Tolerance for "
197  "difference in num-frames (after subsampling) between "
198  "feature matrix and posterior");
199  eg_config.Register(&po);
200 
201  po.Read(argc, argv);
202 
203  if (po.NumArgs() != 3) {
204  po.PrintUsage();
205  exit(1);
206  }
207 
208  if (num_pdfs <= 0)
209  KALDI_ERR << "--num-pdfs options is required.";
210 
211  eg_config.ComputeDerived();
212  UtteranceSplitter utt_splitter(eg_config);
213 
214  std::string feature_rspecifier = po.GetArg(1),
215  pdf_post_rspecifier = po.GetArg(2),
216  examples_wspecifier = po.GetArg(3);
217 
218  // SequentialGeneralMatrixReader can read either a Matrix or
219  // CompressedMatrix (or SparseMatrix, but not as relevant here),
220  // and it retains the type. This way, we can generate parts of
221  // the feature matrices without uncompressing and re-compressing.
222  SequentialGeneralMatrixReader feat_reader(feature_rspecifier);
223  RandomAccessPosteriorReader pdf_post_reader(pdf_post_rspecifier);
224  NnetExampleWriter example_writer(examples_wspecifier);
225  RandomAccessBaseFloatMatrixReader online_ivector_reader(
226  online_ivector_rspecifier);
227 
228  int32 num_err = 0;
229 
230  for (; !feat_reader.Done(); feat_reader.Next()) {
231  std::string key = feat_reader.Key();
232  const GeneralMatrix &feats = feat_reader.Value();
233  if (!pdf_post_reader.HasKey(key)) {
234  KALDI_WARN << "No pdf-level posterior for key " << key;
235  num_err++;
236  } else {
237  const Posterior &pdf_post = pdf_post_reader.Value(key);
238  const Matrix<BaseFloat> *online_ivector_feats = NULL;
239  if (!online_ivector_rspecifier.empty()) {
240  if (!online_ivector_reader.HasKey(key)) {
241  KALDI_WARN << "No iVectors for utterance " << key;
242  num_err++;
243  continue;
244  } else {
245  // this address will be valid until we call HasKey() or Value()
246  // again.
247  online_ivector_feats = &(online_ivector_reader.Value(key));
248  }
249  }
250 
251  if (online_ivector_feats != NULL &&
252  (abs(feats.NumRows() - (online_ivector_feats->NumRows() *
253  online_ivector_period)) > length_tolerance
254  || online_ivector_feats->NumRows() == 0)) {
255  KALDI_WARN << "Length difference between feats " << feats.NumRows()
256  << " and iVectors " << online_ivector_feats->NumRows()
257  << "exceeds tolerance " << length_tolerance;
258  num_err++;
259  continue;
260  }
261 
262  if (!ProcessFile(feats, online_ivector_feats, online_ivector_period,
263  pdf_post, key, compress, num_pdfs,
264  targets_length_tolerance,
265  &utt_splitter, &example_writer))
266  num_err++;
267  }
268  }
269  if (num_err > 0)
270  KALDI_WARN << num_err << " utterances had errors and could "
271  "not be processed.";
272  // utt_splitter prints stats in its destructor.
273  return utt_splitter.ExitStatus();
274  } catch(const std::exception &e) {
275  std::cerr << e.what() << '\n';
276  return -1;
277  }
278 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
This class is a wrapper that enables you to store a matrix in one of three forms: either as a Matrix<...
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
static bool ProcessFile(const discriminative::SplitDiscriminativeSupervisionOptions &config, const TransitionModel &tmodel, const MatrixBase< BaseFloat > &feats, const MatrixBase< BaseFloat > *ivector_feats, int32 ivector_period, const discriminative::DiscriminativeSupervision &supervision, const std::string &utt_id, bool compress, UtteranceSplitter *utt_splitter, NnetDiscriminativeExampleWriter *example_writer)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
MatrixIndexT NumRows() const
void ComputeDerived()
This function decodes &#39;num_frames_str&#39; into &#39;num_frames&#39;, and ensures that the members of &#39;num_frames...