nnet3-xvector-get-egs.cc File Reference
#include <sstream>
#include "util/common-utils.h"
#include "nnet3/nnet-example.h"
Include dependency graph for nnet3-xvector-get-egs.cc:

Go to the source code of this file.

Classes

struct  ChunkInfo
 

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 
 kaldi::nnet3
 

Functions

static void ProcessRangeFile (const std::string &range_rxfilename, unordered_map< std::string, std::vector< ChunkInfo *> > *utt_to_chunks)
 
static void WriteExamples (const MatrixBase< BaseFloat > &feats, const std::vector< ChunkInfo *> &chunks, const std::string &utt, bool compress, int32 num_pdfs, int32 *num_egs_written, std::vector< NnetExampleWriter *> *example_writers)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 133 of file nnet3-xvector-get-egs.cc.

References kaldi::DeletePointers(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), rnnlm::i, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), kaldi::nnet3::ProcessRangeFile(), ParseOptions::Read(), ParseOptions::Register(), SequentialTableReader< Holder >::Value(), and kaldi::nnet3::WriteExamples().

133  {
134  try {
135  using namespace kaldi;
136  using namespace kaldi::nnet3;
137  typedef kaldi::int32 int32;
138 
139  const char *usage =
140  "Get examples for training an nnet3 neural network for the xvector\n"
141  "system. Each output example contains a chunk of features from some\n"
142  "utterance along with a speaker label. The location and length of\n"
143  "the feature chunks are specified in the 'ranges' file. Each line\n"
144  "is interpreted as follows:\n"
145  " <source-utterance> <relative-output-archive-index> "
146  "<absolute-archive-index> <start-frame-index> <num-frames> "
147  "<speaker-label>\n"
148  "where <relative-output-archive-index> is interpreted as a zero-based\n"
149  "index into the wspecifiers provided on the command line (<egs-0-out>\n"
150  "and so on), and <absolute-archive-index> is ignored by this program.\n"
151  "For example:\n"
152  " utt1 3 13 65 300 3\n"
153  " utt1 0 10 50 400 3\n"
154  " utt2 ...\n"
155  "\n"
156  "Usage: nnet3-xvector-get-egs [options] <ranges-filename> "
157  "<features-rspecifier> <egs-0-out> <egs-1-out> ... <egs-N-1-out>\n"
158  "\n"
159  "For example:\n"
160  "nnet3-xvector-get-egs ranges.1 \"$feats\" ark:egs_temp.1.ark"
161  " ark:egs_temp.2.ark ark:egs_temp.3.ark\n";
162 
163  bool compress = true;
164  int32 num_pdfs = -1;
165 
166  ParseOptions po(usage);
167  po.Register("compress", &compress, "If true, write egs in "
168  "compressed format.");
169  po.Register("num-pdfs", &num_pdfs, "Number of speakers in the training "
170  "list.");
171 
172  po.Read(argc, argv);
173 
174  if (po.NumArgs() < 3) {
175  po.PrintUsage();
176  exit(1);
177  }
178 
179  std::string range_rspecifier = po.GetArg(1),
180  feature_rspecifier = po.GetArg(2);
181  std::vector<NnetExampleWriter *> example_writers;
182 
183  for (int32 i = 3; i <= po.NumArgs(); i++)
184  example_writers.push_back(new NnetExampleWriter(po.GetArg(i)));
185 
186  unordered_map<std::string, std::vector<ChunkInfo *> > utt_to_chunks;
187  ProcessRangeFile(range_rspecifier, &utt_to_chunks);
188  SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);
189 
190  int32 num_done = 0,
191  num_err = 0,
192  num_egs_written = 0;
193 
194  for (; !feat_reader.Done(); feat_reader.Next()) {
195  std::string key = feat_reader.Key();
196  const Matrix<BaseFloat> &feats = feat_reader.Value();
197  unordered_map<std::string, std::vector<ChunkInfo*> >::iterator
198  got = utt_to_chunks.find(key);
199  if (got == utt_to_chunks.end()) {
200  KALDI_WARN << "Could not create examples from utterance "
201  << key << " because it has no entry in the ranges "
202  << "input file.";
203  num_err++;
204  } else {
205  std::vector<ChunkInfo *> chunks = got->second;
206  WriteExamples(feats, chunks, key, compress, num_pdfs,
207  &num_egs_written, &example_writers);
208  num_done++;
209  }
210  }
211 
212  // Free memory
213  for (unordered_map<std::string, std::vector<ChunkInfo*> >::iterator
214  map_it = utt_to_chunks.begin();
215  map_it != utt_to_chunks.end(); ++map_it) {
216  DeletePointers(&map_it->second);
217  }
218  DeletePointers(&example_writers);
219 
220  KALDI_LOG << "Finished generating examples, "
221  << "successfully processed " << num_done
222  << " feature files, wrote " << num_egs_written << " examples; "
223  << num_err << " files had errors.";
224  return (num_egs_written == 0 || num_err > num_done ? 1 : 0);
225  } catch(const std::exception &e) {
226  std::cerr << e.what() << '\n';
227  return -1;
228  }
229 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
Definition: stl-utils.h:184
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
static void WriteExamples(const MatrixBase< BaseFloat > &feats, const std::vector< ChunkInfo *> &chunks, const std::string &utt, bool compress, int32 num_pdfs, int32 *num_egs_written, std::vector< NnetExampleWriter *> *example_writers)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_LOG
Definition: kaldi-error.h:153
static void ProcessRangeFile(const std::string &range_rxfilename, unordered_map< std::string, std::vector< ChunkInfo *> > *utt_to_chunks)