36 while (expected_count > 1.0) {
47 const std::string &utt_id,
54 bool use_frame_selection,
55 bool use_frame_weights,
56 int64 *num_frames_written,
57 int64 *num_frames_skipped,
62 int32 basic_feat_dim = feat_dim - const_feat_dim;
74 for (
int32 j = -left_context;
j <= right_context;
j++) {
82 eg.
labels.push_back(pdf_post[
i]);
84 if (const_feat_dim > 0) {
87 basic_feat_dim, const_feat_dim);
90 if (use_frame_selection) {
91 if (weights(i) < weight_threshold) {
92 (*num_frames_skipped)++;
96 std::ostringstream os;
97 os << utt_id <<
"-" <<
i;
98 std::string key = os.str();
101 example_writer->
Write(key, eg);
110 int main(
int argc,
char *argv[]) {
112 using namespace kaldi;
115 typedef kaldi::int64 int64;
118 "Get frame-by-frame examples of data for neural network training.\n" 119 "Essentially this is a format change from features and posteriors\n" 120 "into a special frame-by-frame format. To split randomly into\n" 121 "different subsets, do nnet-copy-egs with --random=true, but\n" 122 "note that this does not randomize the order of frames.\n" 124 "Usage: nnet-get-weighted-egs [options] <features-rspecifier> " 125 "<pdf-post-rspecifier> <weights-rspecifier> <training-examples-out>\n" 127 "An example [where $feats expands to the actual features]:\n" 128 "nnet-get-weighted-egs --left-context=8 --right-context=8 \"$feats\" \\\n" 129 " \"ark:gunzip -c exp/nnet/ali.1.gz | ali-to-pdf exp/nnet/1.nnet ark:- ark:- | ali-to-post ark:- ark:- |\" \\\n" 131 "Note: the --left-context and --right-context would be derived from\n" 132 "the output of nnet-info.";
135 int32 left_context = 0, right_context = 0, const_feat_dim = 0;
136 int32 srand_seed = 0;
139 bool use_frame_selection =
true, use_frame_weights=
false;
142 po.
Register(
"left-context", &left_context,
"Number of frames of left context " 143 "the neural net requires.");
144 po.
Register(
"right-context", &right_context,
"Number of frames of right context " 145 "the neural net requires.");
146 po.
Register(
"const-feat-dim", &const_feat_dim,
"If specified, the last " 147 "const-feat-dim dimensions of the feature input are treated as " 148 "constant over the context window (so are not spliced)");
149 po.
Register(
"keep-proportion", &keep_proportion,
"If <1.0, this program will " 150 "randomly keep this proportion of the input samples. If >1.0, it will " 151 "in expectation copy a sample this many times. It will copy it a number " 152 "of times equal to floor(keep-proportion) or ceil(keep-proportion).");
153 po.
Register(
"srand", &srand_seed,
"Seed for random number generator " 154 "(only relevant if --keep-proportion != 1.0)");
155 po.
Register(
"weight-threshold", &weight_threshold,
"Keep only frames with weights " 156 "above this threshold.");
157 po.
Register(
"use-frame-selection", &use_frame_selection,
"Remove the frames below threshold.");
158 po.
Register(
"use-frame-weights", &use_frame_weights,
"Scale the error derivatives by the weight");
169 std::string feature_rspecifier = po.
GetArg(1),
170 pdf_post_rspecifier = po.
GetArg(2),
171 weights_rspecifier = po.
GetArg(3),
172 examples_wspecifier = po.
GetArg(4);
180 int32 num_done = 0, num_err = 0;
181 int64 num_frames_written = 0;
182 int64 num_frames_skipped = 0;
184 for (; !feat_reader.
Done(); feat_reader.
Next()) {
185 std::string key = feat_reader.
Key();
187 if (!pdf_post_reader.
HasKey(key)) {
188 KALDI_WARN <<
"No pdf-level posterior for key " << key;
192 if (pdf_post.size() != feats.
NumRows()) {
193 KALDI_WARN <<
"Posterior has wrong size " << pdf_post.size()
194 <<
" versus " << feats.
NumRows();
198 if (!weights_reader.
HasKey(key)) {
199 KALDI_ERR <<
"No weights for utterance " << key;
206 if (weights.
Dim() !=
static_cast<int32
>(pdf_post.size())) {
208 <<
" have wrong size, " << weights.
Dim()
209 <<
" vs. " << pdf_post.size();
213 ProcessFile(feats, pdf_post, key, weights, left_context, right_context,
214 const_feat_dim, keep_proportion, weight_threshold,
215 use_frame_selection, use_frame_weights,
216 &num_frames_written, &num_frames_skipped, &example_writer);
222 KALDI_LOG <<
"Finished generating examples, " 223 <<
"successfully processed " << num_done
224 <<
" feature files, wrote " << num_frames_written <<
" examples, " 225 <<
"skipped " << num_frames_skipped <<
" examples, " 226 << num_err <<
" files had errors.";
227 return (num_done == 0 ? 1 : 0);
228 }
catch(
const std::exception &e) {
229 std::cerr << e.what() <<
'\n';
CompressedMatrix input_frames
The input data, with NumRows() >= labels.size() + left_context; it includes features to the left and ...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int32 left_context
The number of frames of left context (we can work out the #frames of right context from input_frames...
bool WithProb(BaseFloat prob, struct RandomState *state)
A templated class for writing objects to an archive or script file; see The Table concept...
int main(int argc, char *argv[])
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
std::vector< std::vector< std::pair< int32, BaseFloat > > > labels
The label(s) for each frame in a sequence of frames; in the normal case, this will be just [ [ (pdf-i...
static void ProcessFile(const MatrixBase< BaseFloat > &feats, const Posterior &pdf_post, const std::string &utt_id, int32 left_context, int32 right_context, int32 num_frames, int32 const_feat_dim, int64 *num_frames_written, int64 *num_egs_written, NnetExampleWriter *example_writer)
int32 GetCount(double expected_count)
Note on how to parse this filename: it contains functions relatied to neural-net training examples...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Vector< BaseFloat > spk_info
The speaker-specific input, if any, or an empty vector if we're not using this features.