doc/nnet-get-weighted-egs_8cc_source.html

 // nnet2bin/nnet-get-weighted-egs.cc

 // Copyright 2013-2014  (Author: Vimal Manohar)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "hmm/transition-model.h"
 #include "nnet2/nnet-example-functions.h"

 namespace kaldi {
 namespace nnet2 {

 // returns an integer randomly drawn with expected value "expected_count"
 // (will be either floor(expected_count) or ceil(expected_count)).
 // this will go into an infinite loop if expected_count is very huge, but
 // it should never be that huge.
 // In the normal case, "expected_count" will be between zero and one.
 int32 GetCount(double expected_count) {
   KALDI_ASSERT(expected_count >= 0.0);
   int32 ans = 0;
   while (expected_count > 1.0) {
     ans++;
     expected_count--;
   }
   if (WithProb(expected_count))
     ans++;
   return ans;
 }

 static void ProcessFile(const MatrixBase<BaseFloat> &feats,
                         const Posterior &pdf_post,
                         const std::string &utt_id,
                         const Vector<BaseFloat> &weights,
                         int32 left_context,
                         int32 right_context,
                         int32 const_feat_dim,
                         BaseFloat keep_proportion,
                         BaseFloat weight_threshold,
                         bool use_frame_selection,
                         bool use_frame_weights,
                         int64 *num_frames_written,
                         int64 *num_frames_skipped,
                         NnetExampleWriter *example_writer) {
   KALDI_ASSERT(feats.NumRows() == static_cast<int32>(pdf_post.size()));
   int32 feat_dim = feats.NumCols();
   KALDI_ASSERT(const_feat_dim < feat_dim);
   int32 basic_feat_dim = feat_dim - const_feat_dim;
   NnetExample eg;
   Matrix<BaseFloat> input_frames(left_context + 1 + right_context,
                                  basic_feat_dim);
   eg.left_context = left_context;
   // TODO: modify this code, and this binary itself, to support the --num-frames
   // option to allow multiple frames per eg.
   for (int32 i = 0; i < feats.NumRows(); i++) {
     int32 count = GetCount(keep_proportion); // number of times
     // we'll write this out (1 by default).
     if (count > 0) {
       // Set up "input_frames".
       for (int32 j = -left_context; j <= right_context; j++) {
         int32 j2 = j + i;
         if (j2 < 0) j2 = 0;
         if (j2 >= feats.NumRows()) j2 = feats.NumRows() - 1;
         SubVector<BaseFloat> src(feats, j2), dest(input_frames,
                                                   j + left_context);
         dest.CopyFromVec(src);
       }
       eg.labels.push_back(pdf_post[i]);
       eg.input_frames = input_frames;
       if (const_feat_dim > 0) {
         // we'll normally reach here if we're using online-estimated iVectors.
         SubVector<BaseFloat> const_part(feats.Row(i),
                                         basic_feat_dim, const_feat_dim);
         eg.spk_info.CopyFromVec(const_part);
       }
       if (use_frame_selection) {
         if (weights(i) < weight_threshold) {
           (*num_frames_skipped)++;
           continue;
         }
       }
       std::ostringstream os;
       os << utt_id << "-" << i;
       std::string key = os.str(); // key in the archive is the number of the example

       for (int32 c = 0; c < count; c++)
         example_writer->Write(key, eg);
     }
   }
 }


 } // namespace nnet2
 } // namespace kaldi

 int main(int argc, char *argv[]) {
   try {
     using namespace kaldi;
     using namespace kaldi::nnet2;
     typedef kaldi::int32 int32;
     typedef kaldi::int64 int64;

     const char *usage =
         "Get frame-by-frame examples of data for neural network training.\n"
         "Essentially this is a format change from features and posteriors\n"
         "into a special frame-by-frame format.  To split randomly into\n"
         "different subsets, do nnet-copy-egs with --random=true, but\n"
         "note that this does not randomize the order of frames.\n"
         "\n"
         "Usage:  nnet-get-weighted-egs [options] <features-rspecifier> "
         "<pdf-post-rspecifier> <weights-rspecifier> <training-examples-out>\n"
         "\n"
         "An example [where $feats expands to the actual features]:\n"
         "nnet-get-weighted-egs --left-context=8 --right-context=8 \"$feats\" \\\n"
         "  \"ark:gunzip -c exp/nnet/ali.1.gz | ali-to-pdf exp/nnet/1.nnet ark:- ark:- | ali-to-post ark:- ark:- |\" \\\n"
         "   ark:- \n"
         "Note: the --left-context and --right-context would be derived from\n"
         "the output of nnet-info.";


     int32 left_context = 0, right_context = 0, const_feat_dim = 0;
     int32 srand_seed = 0;
     BaseFloat keep_proportion = 1.0;
     BaseFloat weight_threshold = 0.0;
     bool use_frame_selection = true, use_frame_weights=false;

     ParseOptions po(usage);
     po.Register("left-context", &left_context, "Number of frames of left context "
                 "the neural net requires.");
     po.Register("right-context", &right_context, "Number of frames of right context "
                 "the neural net requires.");
     po.Register("const-feat-dim", &const_feat_dim, "If specified, the last "
                 "const-feat-dim dimensions of the feature input are treated as "
                 "constant over the context window (so are not spliced)");
     po.Register("keep-proportion", &keep_proportion, "If <1.0, this program will "
                 "randomly keep this proportion of the input samples.  If >1.0, it will "
                 "in expectation copy a sample this many times.  It will copy it a number "
                 "of times equal to floor(keep-proportion) or ceil(keep-proportion).");
     po.Register("srand", &srand_seed, "Seed for random number generator "
                 "(only relevant if --keep-proportion != 1.0)");
     po.Register("weight-threshold", &weight_threshold, "Keep only frames with weights "
                 "above this threshold.");
     po.Register("use-frame-selection", &use_frame_selection, "Remove the frames below threshold.");
     po.Register("use-frame-weights", &use_frame_weights, "Scale the error derivatives by the weight");

     po.Read(argc, argv);

     srand(srand_seed);

     if (po.NumArgs() != 4) {
       po.PrintUsage();
       exit(1);
     }

     std::string feature_rspecifier = po.GetArg(1),
         pdf_post_rspecifier = po.GetArg(2),
         weights_rspecifier = po.GetArg(3),
         examples_wspecifier = po.GetArg(4);

     // Read in all the training files.
     SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);
     RandomAccessPosteriorReader pdf_post_reader(pdf_post_rspecifier);
     RandomAccessBaseFloatVectorReader weights_reader(weights_rspecifier);
     NnetExampleWriter example_writer(examples_wspecifier);

     int32 num_done = 0, num_err = 0;
     int64 num_frames_written = 0;
     int64 num_frames_skipped = 0;

     for (; !feat_reader.Done(); feat_reader.Next()) {
       std::string key = feat_reader.Key();
       const Matrix<BaseFloat> &feats = feat_reader.Value();
       if (!pdf_post_reader.HasKey(key)) {
         KALDI_WARN << "No pdf-level posterior for key " << key;
         num_err++;
       } else {
         const Posterior &pdf_post = pdf_post_reader.Value(key);
         if (pdf_post.size() != feats.NumRows()) {
           KALDI_WARN << "Posterior has wrong size " << pdf_post.size()
                      << " versus " << feats.NumRows();
           num_err++;
           continue;
         }
         if (!weights_reader.HasKey(key)) {
           KALDI_ERR << "No weights for utterance " << key;
           //ProcessFile(feats, pdf_post, NULL,
           //    left_context, right_context, const_feat_dim, keep_proportion,
           //    weight_threshold, false, false, &num_frames_written,
           //    &num_frames_skipped, &example_writer);
         } else {
           Vector<BaseFloat> weights = weights_reader.Value(key);
           if (weights.Dim() != static_cast<int32>(pdf_post.size())) {
             KALDI_WARN << "Weights for utterance " << key
               << " have wrong size, " << weights.Dim()
               << " vs. " << pdf_post.size();
             num_err++;
             continue;
           }
           ProcessFile(feats, pdf_post, key, weights, left_context, right_context,
                       const_feat_dim, keep_proportion, weight_threshold,
                       use_frame_selection, use_frame_weights,
                       &num_frames_written, &num_frames_skipped, &example_writer);
         }
         num_done++;
       }
     }

     KALDI_LOG << "Finished generating examples, "
               << "successfully processed " << num_done
               << " feature files, wrote " << num_frames_written << " examples, "
               << "skipped " << num_frames_skipped << " examples, "
               << num_err << " files had errors.";
     return (num_done == 0 ? 1 : 0);
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
     return -1;
   }
 }
kaldi::nnet2::NnetExample::input_frames
CompressedMatrix input_frames
The input data, with NumRows() >= labels.size() + left_context; it includes features to the left and ...
Definition: nnet-example.h:49

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet2::NnetExample
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:36

rnnlm::j
int j
Definition: mikolov-rnnlm-lib.cc:66

kaldi::MatrixBase::NumCols
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::ParseOptions::PrintUsage
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
Definition: parse-options.cc:393

kaldi::SequentialTableReader::Key
std::string Key()
Definition: kaldi-table-inl.h:918

kaldi::nnet2::NnetExample::left_context
int32 left_context
The number of frames of left context (we can work out the #frames of right context from input_frames...
Definition: nnet-example.h:53

kaldi::WithProb
bool WithProb(BaseFloat prob, struct RandomState *state)
Definition: kaldi-math.cc:72

kaldi::TableWriter
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

main
int main(int argc, char *argv[])
Definition: nnet-get-weighted-egs.cc:110

kaldi::Matrix< BaseFloat >

kaldi::TableWriter::Write
void Write(const std::string &key, const T &value) const
Definition: kaldi-table-inl.h:1511

kaldi::ParseOptions::Register
void Register(const std::string &name, bool *ptr, const std::string &doc)
Definition: parse-options.cc:56

kaldi::RandomAccessTableReader
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233

kaldi::VectorBase::CopyFromVec
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
Definition: kaldi-vector.cc:228

count
const size_t count
Definition: arpa-file-parser-test.cc:66

kaldi::BaseFloat
float BaseFloat
Definition: kaldi-types.h:29

kaldi::Posterior
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42

kaldi::ParseOptions
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36

kaldi::MatrixBase::Row
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188

float

kaldi::RandomAccessTableReader::Value
const T & Value(const std::string &key)
Definition: kaldi-table-inl.h:2561

transition-model.h

kaldi::SequentialTableReader
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287

kaldi::ParseOptions::Read
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Definition: parse-options.cc:311

kaldi::SequentialTableReader::Done
bool Done()
Definition: kaldi-table-inl.h:948

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::ParseOptions::GetArg
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
Definition: parse-options.cc:202

kaldi::SequentialTableReader::Next
void Next()
Definition: kaldi-table-inl.h:942

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::RandomAccessTableReader::HasKey
bool HasKey(const std::string &key)
Definition: kaldi-table-inl.h:2551

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

kaldi::ParseOptions::NumArgs
int NumArgs() const
Number of positional parameters (c.f. argc-1).
Definition: parse-options.cc:198

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::SequentialTableReader::Value
T & Value()
Definition: kaldi-table-inl.h:934

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::MatrixBase::NumRows
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64

kaldi::nnet2
Definition: am-nnet-test.cc:26

kaldi::nnet2::NnetExample::labels
std::vector< std::vector< std::pair< int32, BaseFloat > > > labels
The label(s) for each frame in a sequence of frames; in the normal case, this will be just [ [ (pdf-i...
Definition: nnet-example.h:43

kaldi::nnet2::ProcessFile
static void ProcessFile(const MatrixBase< BaseFloat > &feats, const Posterior &pdf_post, const std::string &utt_id, int32 left_context, int32 right_context, int32 num_frames, int32 const_feat_dim, int64 *num_frames_written, int64 *num_egs_written, NnetExampleWriter *example_writer)
Definition: nnet-get-egs.cc:32

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::nnet2::GetCount
int32 GetCount(double expected_count)
Definition: nnet-copy-egs-discriminative.cc:31

kaldi-common.h

nnet-example-functions.h
Note on how to parse this filename: it contains functions relatied to neural-net training examples...

kaldi::SubVector
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501

kaldi::nnet2::NnetExample::spk_info
Vector< BaseFloat > spk_info
The speaker-specific input, if any, or an empty vector if we&#39;re not using this features.
Definition: nnet-example.h:58