96 using namespace kaldi;
99 typedef kaldi::int64 int64;
102 "Get frame-by-frame examples of data for neural network training.\n" 103 "Essentially this is a format change from features and posteriors\n" 104 "into a special frame-by-frame format. To split randomly into\n" 105 "different subsets, do nnet-copy-egs with --random=true, but\n" 106 "note that this does not randomize the order of frames.\n" 108 "Usage: nnet-get-egs [options] <features-rspecifier> " 109 "<pdf-post-rspecifier> <training-examples-out>\n" 111 "An example [where $feats expands to the actual features]:\n" 112 "nnet-get-egs --left-context=8 --right-context=8 \"$feats\" \\\n" 113 " \"ark:gunzip -c exp/nnet/ali.1.gz | ali-to-pdf exp/nnet/1.nnet ark:- ark:- | ali-to-post ark:- ark:- |\" \\\n" 115 "Note: the --left-context and --right-context would be derived from\n" 116 "the output of nnet-info.";
119 int32 left_context = 0, right_context = 0,
120 num_frames = 1, const_feat_dim = 0;
123 po.Register(
"left-context", &left_context,
"Number of frames of left " 124 "context the neural net requires.");
125 po.Register(
"right-context", &right_context,
"Number of frames of right " 126 "context the neural net requires.");
127 po.Register(
"num-frames", &num_frames,
"Number of frames with labels " 128 "that each example contains.");
129 po.Register(
"const-feat-dim", &const_feat_dim,
"If specified, the last " 130 "const-feat-dim dimensions of the feature input are treated as " 131 "constant over the context window (so are not spliced)");
135 if (po.NumArgs() != 3) {
140 std::string feature_rspecifier = po.GetArg(1),
141 pdf_post_rspecifier = po.GetArg(2),
142 examples_wspecifier = po.GetArg(3);
149 int32 num_done = 0, num_err = 0;
150 int64 num_frames_written = 0, num_egs_written = 0;
152 for (; !feat_reader.Done(); feat_reader.Next()) {
153 std::string key = feat_reader.Key();
155 if (!pdf_post_reader.HasKey(key)) {
156 KALDI_WARN <<
"No pdf-level posterior for key " << key;
159 const Posterior &pdf_post = pdf_post_reader.Value(key);
160 if (pdf_post.size() != feats.
NumRows()) {
161 KALDI_WARN <<
"Posterior has wrong size " << pdf_post.size()
162 <<
" versus " << feats.
NumRows();
167 left_context, right_context, num_frames,
168 const_feat_dim, &num_frames_written, &num_egs_written,
174 KALDI_LOG <<
"Finished generating examples, " 175 <<
"successfully processed " << num_done
176 <<
" feature files, wrote " << num_egs_written <<
" examples, " 177 <<
" with " << num_frames_written <<
" egs in total; " 178 << num_err <<
" files had errors.";
179 return (num_egs_written == 0 || num_err > num_done ? 1 : 0);
180 }
catch(
const std::exception &e) {
181 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
A templated class for writing objects to an archive or script file; see The Table concept...
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
static void ProcessFile(const MatrixBase< BaseFloat > &feats, const Posterior &pdf_post, const std::string &utt_id, int32 left_context, int32 right_context, int32 num_frames, int32 const_feat_dim, int64 *num_frames_written, int64 *num_egs_written, NnetExampleWriter *example_writer)