#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "hmm/transition-model.h"
#include "nnet2/nnet-example-functions.h"

Include dependency graph for nnet-get-weighted-egs.cc:

Namespaces
	kaldi
	This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:

	kaldi::nnet2

Functions
int32	GetCount (double expected_count)

static void	ProcessFile (const MatrixBase< BaseFloat > &feats, const Posterior &pdf_post, const std::string &utt_id, const Vector< BaseFloat > &weights, int32 left_context, int32 right_context, int32 const_feat_dim, BaseFloat keep_proportion, BaseFloat weight_threshold, bool use_frame_selection, bool use_frame_weights, int64 num_frames_written, int64 num_frames_skipped, NnetExampleWriter *example_writer)

int	main (int argc, char *argv[])

Function Documentation

◆ main()

int main	(	int	argc,
		char *	argv[]
	)

Definition at line 110 of file nnet-get-weighted-egs.cc.

References VectorBase< Real >::Dim(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), kaldi::nnet2::ProcessFile(), ParseOptions::Read(), ParseOptions::Register(), RandomAccessTableReader< Holder >::Value(), and SequentialTableReader< Holder >::Value().

                                  {
   try {
     using namespace kaldi;
     using namespace kaldi::nnet2;
     typedef kaldi::int32 int32;
     typedef kaldi::int64 int64;
 
     const char *usage =
         "Get frame-by-frame examples of data for neural network training.\n"
         "Essentially this is a format change from features and posteriors\n"
         "into a special frame-by-frame format.  To split randomly into\n"
         "different subsets, do nnet-copy-egs with --random=true, but\n"
         "note that this does not randomize the order of frames.\n"
         "\n"
         "Usage:  nnet-get-weighted-egs [options] <features-rspecifier> "
         "<pdf-post-rspecifier> <weights-rspecifier> <training-examples-out>\n"
         "\n"
         "An example [where $feats expands to the actual features]:\n"
         "nnet-get-weighted-egs --left-context=8 --right-context=8 \"$feats\" \\\n"
         "  \"ark:gunzip -c exp/nnet/ali.1.gz | ali-to-pdf exp/nnet/1.nnet ark:- ark:- | ali-to-post ark:- ark:- |\" \\\n"
         "   ark:- \n"
         "Note: the --left-context and --right-context would be derived from\n"
         "the output of nnet-info.";
         
     
     int32 left_context = 0, right_context = 0, const_feat_dim = 0;
     int32 srand_seed = 0;
     BaseFloat keep_proportion = 1.0;
     BaseFloat weight_threshold = 0.0;
     bool use_frame_selection = true, use_frame_weights=false;
     
     ParseOptions po(usage);
     po.Register("left-context", &left_context, "Number of frames of left context "
                 "the neural net requires.");
     po.Register("right-context", &right_context, "Number of frames of right context "
                 "the neural net requires.");
     po.Register("const-feat-dim", &const_feat_dim, "If specified, the last "
                 "const-feat-dim dimensions of the feature input are treated as "
                 "constant over the context window (so are not spliced)");
     po.Register("keep-proportion", &keep_proportion, "If <1.0, this program will "
                 "randomly keep this proportion of the input samples.  If >1.0, it will "
                 "in expectation copy a sample this many times.  It will copy it a number "
                 "of times equal to floor(keep-proportion) or ceil(keep-proportion).");
     po.Register("srand", &srand_seed, "Seed for random number generator "
                 "(only relevant if --keep-proportion != 1.0)");
     po.Register("weight-threshold", &weight_threshold, "Keep only frames with weights "
                 "above this threshold.");
     po.Register("use-frame-selection", &use_frame_selection, "Remove the frames below threshold.");
     po.Register("use-frame-weights", &use_frame_weights, "Scale the error derivatives by the weight");
     
     po.Read(argc, argv);
 
     srand(srand_seed);
     
     if (po.NumArgs() != 4) {
       po.PrintUsage();
       exit(1);
     }
 
     std::string feature_rspecifier = po.GetArg(1),
         pdf_post_rspecifier = po.GetArg(2),
         weights_rspecifier = po.GetArg(3),
         examples_wspecifier = po.GetArg(4);
 
     // Read in all the training files.
     SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);
     RandomAccessPosteriorReader pdf_post_reader(pdf_post_rspecifier);
     RandomAccessBaseFloatVectorReader weights_reader(weights_rspecifier);
     NnetExampleWriter example_writer(examples_wspecifier);
     
     int32 num_done = 0, num_err = 0;
     int64 num_frames_written = 0;
     int64 num_frames_skipped = 0;
     
     for (; !feat_reader.Done(); feat_reader.Next()) {
       std::string key = feat_reader.Key();
       const Matrix<BaseFloat> &feats = feat_reader.Value();
       if (!pdf_post_reader.HasKey(key)) {
         KALDI_WARN << "No pdf-level posterior for key " << key;
         num_err++;
       } else {
         const Posterior &pdf_post = pdf_post_reader.Value(key);
         if (pdf_post.size() != feats.NumRows()) {
           KALDI_WARN << "Posterior has wrong size " << pdf_post.size()
                      << " versus " << feats.NumRows();
           num_err++;
           continue;
         }
         if (!weights_reader.HasKey(key)) {
           KALDI_ERR << "No weights for utterance " << key;
           //ProcessFile(feats, pdf_post, NULL,
           //    left_context, right_context, const_feat_dim, keep_proportion,
           //    weight_threshold, false, false, &num_frames_written, 
           //    &num_frames_skipped, &example_writer);
         } else {
           Vector<BaseFloat> weights = weights_reader.Value(key);
           if (weights.Dim() != static_cast<int32>(pdf_post.size())) {
             KALDI_WARN << "Weights for utterance " << key
               << " have wrong size, " << weights.Dim()
               << " vs. " << pdf_post.size();
             num_err++;
             continue;
           }
           ProcessFile(feats, pdf_post, key, weights, left_context, right_context,
                       const_feat_dim, keep_proportion, weight_threshold,
                       use_frame_selection, use_frame_weights,
                       &num_frames_written, &num_frames_skipped, &example_writer);
         }
         num_done++;
       }
     }
 
     KALDI_LOG << "Finished generating examples, "
               << "successfully processed " << num_done
               << " feature files, wrote " << num_frames_written << " examples, "
               << "skipped " << num_frames_skipped << " examples, "
               << num_err << " files had errors.";
     return (num_done == 0 ? 1 : 0);
   } catch(const std::exception &e) {
     std::cerr << e.what() << '\n';
     return -1;
   }
 }

Namespaces

Functions

Function Documentation

◆ main()