#include <limits>
#include "nnet/nnet-nnet.h"
#include "nnet/nnet-loss.h"
#include "nnet/nnet-pdf-prior.h"
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "base/timer.h"

Include dependency graph for nnet-forward.cc:

Functions
int	main (int argc, char *argv[])

Function Documentation

◆ main()

int main	(	int	argc,
		char *	argv[]
	)

Definition at line 30 of file nnet-forward.cc.

References CuMatrixBase< Real >::Add(), CuMatrixBase< Real >::ApplyLog(), PdfPriorOptions::class_frame_counts, SequentialTableReader< Holder >::Done(), Timer::Elapsed(), Nnet::Feedforward(), ParseOptions::GetArg(), Nnet::GetLastComponent(), Component::GetType(), kaldi::GetVerboseLevel(), KALDI_ERR, KALDI_ISFINITE, KALDI_LOG, KALDI_VLOG, KALDI_WARN, Component::kBlockSoftmax, SequentialTableReader< Holder >::Key(), Component::kSoftmax, CuMatrixBase< Real >::Max(), CuMatrixBase< Real >::Min(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), ParseOptions::Read(), Nnet::Read(), PdfPriorOptions::Register(), ParseOptions::Register(), Nnet::RemoveLastComponent(), Nnet::SetDropoutRate(), PdfPrior::SubtractOnLogpost(), MatrixBase< Real >::Sum(), CuMatrixBase< Real >::Sum(), Component::TypeToMarker(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

                                  {
   using namespace kaldi;
   using namespace kaldi::nnet1;
   try {
     const char *usage =
       "Perform forward pass through Neural Network.\n"
       "Usage: nnet-forward [options] <nnet1-in> <feature-rspecifier> <feature-wspecifier>\n"
       "e.g.: nnet-forward final.nnet ark:input.ark ark:output.ark\n";
 
     ParseOptions po(usage);
 
     PdfPriorOptions prior_opts;
     prior_opts.Register(&po);
 
     std::string feature_transform;
     po.Register("feature-transform", &feature_transform,
         "Feature transform in front of main network (in nnet format)");
 
     bool no_softmax = false;
     po.Register("no-softmax", &no_softmax,
         "Removes the last component with Softmax, if found. The pre-softmax "
         "activations are the output of the network. Decoding them leads to "
         "the same lattices as if we had used 'log-posteriors'.");
 
     bool apply_log = false;
     po.Register("apply-log", &apply_log, "Transform NN output by log()");
 
     std::string use_gpu="no";
     po.Register("use-gpu", &use_gpu,
         "yes|no|optional, only has effect if compiled with CUDA");
 
     using namespace kaldi;
     using namespace kaldi::nnet1;
     typedef kaldi::int32 int32;
 
     po.Read(argc, argv);
 
     if (po.NumArgs() != 3) {
       po.PrintUsage();
       exit(1);
     }
 
     std::string model_filename = po.GetArg(1),
         feature_rspecifier = po.GetArg(2),
         feature_wspecifier = po.GetArg(3);
 
     // Select the GPU
 #if HAVE_CUDA == 1
     CuDevice::Instantiate().SelectGpuId(use_gpu);
 #endif
 
     Nnet nnet_transf;
     if (feature_transform != "") {
       nnet_transf.Read(feature_transform);
     }
 
     Nnet nnet;
     nnet.Read(model_filename);
     // optionally remove softmax,
     Component::ComponentType last_comp_type = nnet.GetLastComponent().GetType();
     if (no_softmax) {
       if (last_comp_type == Component::kSoftmax ||
           last_comp_type == Component::kBlockSoftmax) {
         KALDI_LOG << "Removing " << Component::TypeToMarker(last_comp_type)
                   << " from the nnet " << model_filename;
         nnet.RemoveLastComponent();
       } else {
         KALDI_WARN << "Last component 'NOT-REMOVED' by --no-softmax=true, "
           << "the component was " << Component::TypeToMarker(last_comp_type);
       }
     }
 
     // avoid some bad option combinations,
     if (apply_log && no_softmax) {
       KALDI_ERR << "Cannot use both --apply-log=true --no-softmax=true, "
                 << "use only one of the two!";
     }
 
     // we will subtract log-priors later,
     PdfPrior pdf_prior(prior_opts);
 
     // disable dropout,
     nnet_transf.SetDropoutRate(0.0);
     nnet.SetDropoutRate(0.0);
 
     kaldi::int64 tot_t = 0;
 
     SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
     BaseFloatMatrixWriter feature_writer(feature_wspecifier);
 
     CuMatrix<BaseFloat> feats, feats_transf, nnet_out;
     Matrix<BaseFloat> nnet_out_host;
 
     Timer time;
     double time_now = 0;
     int32 num_done = 0;
 
     // main loop,
     for (; !feature_reader.Done(); feature_reader.Next()) {
       // read
       Matrix<BaseFloat> mat = feature_reader.Value();
       std::string utt = feature_reader.Key();
       KALDI_VLOG(2) << "Processing utterance " << num_done+1
                     << ", " << utt
                     << ", " << mat.NumRows() << "frm";
 
 
       if (!KALDI_ISFINITE(mat.Sum())) {  // check there's no nan/inf,
         KALDI_ERR << "NaN or inf found in features for " << utt;
       }
 
       // push it to gpu,
       feats = mat;
 
       // fwd-pass, feature transform,
       nnet_transf.Feedforward(feats, &feats_transf);
       if (!KALDI_ISFINITE(feats_transf.Sum())) {  // check there's no nan/inf,
         KALDI_ERR << "NaN or inf found in transformed-features for " << utt;
       }
 
       // fwd-pass, nnet,
       nnet.Feedforward(feats_transf, &nnet_out);
       if (!KALDI_ISFINITE(nnet_out.Sum())) {  // check there's no nan/inf,
         KALDI_ERR << "NaN or inf found in nn-output for " << utt;
       }
 
       // convert posteriors to log-posteriors,
       if (apply_log) {
         if (!(nnet_out.Min() >= 0.0 && nnet_out.Max() <= 1.0)) {
           KALDI_WARN << "Applying 'log()' to data which don't seem to be "
                      << "probabilities," << utt;
         }
         nnet_out.Add(1e-20);  // avoid log(0),
         nnet_out.ApplyLog();
       }
 
       // subtract log-priors from log-posteriors or pre-softmax,
       if (prior_opts.class_frame_counts != "") {
         pdf_prior.SubtractOnLogpost(&nnet_out);
       }
 
       // download from GPU,
       nnet_out_host = Matrix<BaseFloat>(nnet_out);
 
       // write,
       if (!KALDI_ISFINITE(nnet_out_host.Sum())) {  // check there's no nan/inf,
         KALDI_ERR << "NaN or inf found in final output nn-output for " << utt;
       }
       feature_writer.Write(feature_reader.Key(), nnet_out_host);
 
       // progress log,
       if (num_done % 100 == 0) {
         time_now = time.Elapsed();
         KALDI_VLOG(1) << "After " << num_done << " utterances: time elapsed = "
                       << time_now/60 << " min; processed " << tot_t/time_now
                       << " frames per second.";
       }
       num_done++;
       tot_t += mat.NumRows();
     }
 
     // final message,
     KALDI_LOG << "Done " << num_done << " files"
               << " in " << time.Elapsed()/60 << "min,"
               << " (fps " << tot_t/time.Elapsed() << ")";
 
 #if HAVE_CUDA == 1
     if (GetVerboseLevel() >= 1) {
       CuDevice::Instantiate().PrintProfile();
     }
 #endif
 
     if (num_done == 0) return -1;
     return 0;
   } catch(const std::exception &e) {
     std::cerr << e.what();
     return -1;
   }
 }

Functions

Function Documentation

◆ main()