36   while (expected_count > 1.0) {
    47                         const std::string &utt_id,
    54                         bool use_frame_selection,
    55                         bool use_frame_weights,
    56                         int64 *num_frames_written,
    57                         int64 *num_frames_skipped,
    62   int32 basic_feat_dim = feat_dim - const_feat_dim;
    74       for (
int32 j = -left_context; 
j <= right_context; 
j++) {
    82       eg.
labels.push_back(pdf_post[
i]);
    84       if (const_feat_dim > 0) {
    87                                         basic_feat_dim, const_feat_dim);
    90       if (use_frame_selection) {
    91         if (weights(i) < weight_threshold) {
    92           (*num_frames_skipped)++;
    96       std::ostringstream os;
    97       os << utt_id << 
"-" << 
i;
    98       std::string key = os.str(); 
   101         example_writer->
Write(key, eg);
   110 int main(
int argc, 
char *argv[]) {
   112     using namespace kaldi;
   115     typedef kaldi::int64 int64;
   118         "Get frame-by-frame examples of data for neural network training.\n"   119         "Essentially this is a format change from features and posteriors\n"   120         "into a special frame-by-frame format.  To split randomly into\n"   121         "different subsets, do nnet-copy-egs with --random=true, but\n"   122         "note that this does not randomize the order of frames.\n"   124         "Usage:  nnet-get-weighted-egs [options] <features-rspecifier> "   125         "<pdf-post-rspecifier> <weights-rspecifier> <training-examples-out>\n"   127         "An example [where $feats expands to the actual features]:\n"   128         "nnet-get-weighted-egs --left-context=8 --right-context=8 \"$feats\" \\\n"   129         "  \"ark:gunzip -c exp/nnet/ali.1.gz | ali-to-pdf exp/nnet/1.nnet ark:- ark:- | ali-to-post ark:- ark:- |\" \\\n"   131         "Note: the --left-context and --right-context would be derived from\n"   132         "the output of nnet-info.";
   135     int32 left_context = 0, right_context = 0, const_feat_dim = 0;
   136     int32 srand_seed = 0;
   139     bool use_frame_selection = 
true, use_frame_weights=
false;
   142     po.
Register(
"left-context", &left_context, 
"Number of frames of left context "   143                 "the neural net requires.");
   144     po.
Register(
"right-context", &right_context, 
"Number of frames of right context "   145                 "the neural net requires.");
   146     po.
Register(
"const-feat-dim", &const_feat_dim, 
"If specified, the last "   147                 "const-feat-dim dimensions of the feature input are treated as "   148                 "constant over the context window (so are not spliced)");
   149     po.
Register(
"keep-proportion", &keep_proportion, 
"If <1.0, this program will "   150                 "randomly keep this proportion of the input samples.  If >1.0, it will "   151                 "in expectation copy a sample this many times.  It will copy it a number "   152                 "of times equal to floor(keep-proportion) or ceil(keep-proportion).");
   153     po.
Register(
"srand", &srand_seed, 
"Seed for random number generator "   154                 "(only relevant if --keep-proportion != 1.0)");
   155     po.
Register(
"weight-threshold", &weight_threshold, 
"Keep only frames with weights "   156                 "above this threshold.");
   157     po.
Register(
"use-frame-selection", &use_frame_selection, 
"Remove the frames below threshold.");
   158     po.
Register(
"use-frame-weights", &use_frame_weights, 
"Scale the error derivatives by the weight");
   169     std::string feature_rspecifier = po.
GetArg(1),
   170         pdf_post_rspecifier = po.
GetArg(2),
   171         weights_rspecifier = po.
GetArg(3),
   172         examples_wspecifier = po.
GetArg(4);
   180     int32 num_done = 0, num_err = 0;
   181     int64 num_frames_written = 0;
   182     int64 num_frames_skipped = 0;
   184     for (; !feat_reader.
Done(); feat_reader.
Next()) {
   185       std::string key = feat_reader.
Key();
   187       if (!pdf_post_reader.
HasKey(key)) {
   188         KALDI_WARN << 
"No pdf-level posterior for key " << key;
   192         if (pdf_post.size() != feats.
NumRows()) {
   193           KALDI_WARN << 
"Posterior has wrong size " << pdf_post.size()
   194                      << 
" versus " << feats.
NumRows();
   198         if (!weights_reader.
HasKey(key)) {
   199           KALDI_ERR << 
"No weights for utterance " << key;
   206           if (weights.
Dim() != 
static_cast<int32
>(pdf_post.size())) {
   208               << 
" have wrong size, " << weights.
Dim()
   209               << 
" vs. " << pdf_post.size();
   213           ProcessFile(feats, pdf_post, key, weights, left_context, right_context,
   214                       const_feat_dim, keep_proportion, weight_threshold,
   215                       use_frame_selection, use_frame_weights,
   216                       &num_frames_written, &num_frames_skipped, &example_writer);
   222     KALDI_LOG << 
"Finished generating examples, "   223               << 
"successfully processed " << num_done
   224               << 
" feature files, wrote " << num_frames_written << 
" examples, "   225               << 
"skipped " << num_frames_skipped << 
" examples, "   226               << num_err << 
" files had errors.";
   227     return (num_done == 0 ? 1 : 0);
   228   } 
catch(
const std::exception &e) {
   229     std::cerr << e.what() << 
'\n';
 CompressedMatrix input_frames
The input data, with NumRows() >= labels.size() + left_context; it includes features to the left and ...
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
 
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix). 
 
Base class which provides matrix operations not involving resizing or allocation. ...
 
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor]. 
 
int32 left_context
The number of frames of left context (we can work out the #frames of right context from input_frames...
 
bool WithProb(BaseFloat prob, struct RandomState *state)
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
int main(int argc, char *argv[])
 
void Write(const std::string &key, const T &value) const
 
void Register(const std::string &name, bool *ptr, const std::string &doc)
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size). 
 
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const]. 
 
const T & Value(const std::string &key)
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables. 
 
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility. 
 
MatrixIndexT Dim() const
Returns the dimension of the vector. 
 
bool HasKey(const std::string &key)
 
int NumArgs() const
Number of positional parameters (c.f. argc-1). 
 
A class representing a vector. 
 
#define KALDI_ASSERT(cond)
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
std::vector< std::vector< std::pair< int32, BaseFloat > > > labels
The label(s) for each frame in a sequence of frames; in the normal case, this will be just [ [ (pdf-i...
 
static void ProcessFile(const MatrixBase< BaseFloat > &feats, const Posterior &pdf_post, const std::string &utt_id, int32 left_context, int32 right_context, int32 num_frames, int32 const_feat_dim, int64 *num_frames_written, int64 *num_egs_written, NnetExampleWriter *example_writer)
 
int32 GetCount(double expected_count)
 
Note on how to parse this filename: it contains functions relatied to neural-net training examples...
 
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
 
Vector< BaseFloat > spk_info
The speaker-specific input, if any, or an empty vector if we're not using this features.