nnet-am-compute.cc File Reference
Include dependency graph for nnet-am-compute.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 30 of file nnet-am-compute.cc.

References MatrixBase< Real >::ApplyFloor(), MatrixBase< Real >::ApplyLog(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), AmNnet::GetNnet(), rnnlm::i, KALDI_ASSERT, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), Nnet::LeftContext(), MatrixBase< Real >::MulColsVec(), SequentialTableReader< Holder >::Next(), kaldi::nnet2::NnetComputation(), kaldi::nnet2::NnetComputationChunked(), ParseOptions::NumArgs(), AmNnet::NumPdfs(), MatrixBase< Real >::NumRows(), Nnet::OutputDim(), ParseOptions::PrintUsage(), AmNnet::Priors(), AmNnet::Read(), ParseOptions::Read(), TransitionModel::Read(), ParseOptions::Register(), Nnet::RightContext(), VectorBase< Real >::Scale(), Input::Stream(), VectorBase< Real >::Sum(), CuMatrix< Real >::Swap(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

30  {
31  try {
32  using namespace kaldi;
33  using namespace kaldi::nnet2;
34  typedef kaldi::int32 int32;
35  typedef kaldi::int64 int64;
36 
37  const char *usage =
38  "Does the neural net computation for each file of input features, and\n"
39  "outputs as a matrix the result. Used mostly for debugging.\n"
40  "Note: if you want it to apply a log (e.g. for log-likelihoods), use\n"
41  "--apply-log=true\n"
42  "\n"
43  "Usage: nnet-am-compute [options] <model-in> <feature-rspecifier> "
44  "<feature-or-loglikes-wspecifier>\n"
45  "See also: nnet-compute, nnet-logprob\n";
46 
47  bool divide_by_priors = false;
48  bool apply_log = false;
49  bool pad_input = true;
50  std::string use_gpu = "no";
51  int32 chunk_size = 0;
52  ParseOptions po(usage);
53  po.Register("divide-by-priors", &divide_by_priors, "If true, "
54  "divide by the priors stored in the model and re-normalize, apply-log may follow");
55  po.Register("apply-log", &apply_log, "Apply a log to the result of the computation "
56  "before outputting.");
57  po.Register("pad-input", &pad_input, "If true, duplicate the first and last frames "
58  "of input features as required for temporal context, to prevent #frames "
59  "of output being less than those of input.");
60  po.Register("use-gpu", &use_gpu,
61  "yes|no|optional|wait, only has effect if compiled with CUDA");
62  po.Register("chunk-size", &chunk_size, "Process the feature matrix in chunks. "
63  "This is useful when processing large feature files in the GPU. "
64  "If chunk-size > 0, pad-input must be true.");
65 
66  po.Read(argc, argv);
67 
68  if (po.NumArgs() != 3) {
69  po.PrintUsage();
70  exit(1);
71  }
72  // If chunk_size is greater than 0, pad_input needs to be true.
73  KALDI_ASSERT(chunk_size < 0 || pad_input);
74 
75 #if HAVE_CUDA==1
76  CuDevice::Instantiate().SelectGpuId(use_gpu);
77 #endif
78 
79  std::string nnet_rxfilename = po.GetArg(1),
80  features_rspecifier = po.GetArg(2),
81  features_or_loglikes_wspecifier = po.GetArg(3);
82 
83  TransitionModel trans_model;
84  AmNnet am_nnet;
85  {
86  bool binary_read;
87  Input ki(nnet_rxfilename, &binary_read);
88  trans_model.Read(ki.Stream(), binary_read);
89  am_nnet.Read(ki.Stream(), binary_read);
90  }
91 
92  Nnet &nnet = am_nnet.GetNnet();
93 
94  int64 num_done = 0, num_frames = 0;
95 
96  Vector<BaseFloat> inv_priors(am_nnet.Priors());
97  KALDI_ASSERT((!divide_by_priors || inv_priors.Dim() == am_nnet.NumPdfs()) &&
98  "Priors in neural network not set up.");
99  inv_priors.ApplyPow(-1.0);
100 
101  SequentialBaseFloatMatrixReader feature_reader(features_rspecifier);
102  BaseFloatMatrixWriter writer(features_or_loglikes_wspecifier);
103 
104  for (; !feature_reader.Done(); feature_reader.Next()) {
105  std::string utt = feature_reader.Key();
106  const Matrix<BaseFloat> &feats = feature_reader.Value();
107 
108  int32 output_frames = feats.NumRows(), output_dim = nnet.OutputDim();
109  if (!pad_input)
110  output_frames -= nnet.LeftContext() + nnet.RightContext();
111  if (output_frames <= 0) {
112  KALDI_WARN << "Skipping utterance " << utt << " because output "
113  << "would be empty.";
114  continue;
115  }
116 
117  Matrix<BaseFloat> output(output_frames, output_dim);
118  CuMatrix<BaseFloat> cu_feats(feats);
119  CuMatrix<BaseFloat> cu_output(output);
120  if (chunk_size > 0 && chunk_size < feats.NumRows()) {
121  NnetComputationChunked(nnet, cu_feats, chunk_size, &cu_output);
122  } else {
123  NnetComputation(nnet, cu_feats, pad_input, &cu_output);
124  }
125  cu_output.Swap(&output);
126 
127  if (divide_by_priors) {
128  output.MulColsVec(inv_priors); // scales each column by the corresponding element
129  // of inv_priors.
130  for (int32 i = 0; i < output.NumRows(); i++) {
131  SubVector<BaseFloat> frame(output, i);
132  BaseFloat p = frame.Sum();
133  if (!(p > 0.0)) {
134  KALDI_WARN << "Bad sum of probabilities " << p;
135  } else {
136  frame.Scale(1.0 / p); // re-normalize to sum to one.
137  }
138  }
139  }
140 
141  if (apply_log) {
142  output.ApplyFloor(1.0e-20);
143  output.ApplyLog();
144  }
145  writer.Write(utt, output);
146  num_frames += feats.NumRows();
147  num_done++;
148  }
149 #if HAVE_CUDA==1
150  CuDevice::Instantiate().PrintProfile();
151 #endif
152 
153  KALDI_LOG << "Processed " << num_done << " feature files, "
154  << num_frames << " frames of input were processed.";
155 
156  return (num_done == 0 ? 1 : 0);
157  } catch(const std::exception &e) {
158  std::cerr << e.what() << '\n';
159  return -1;
160  }
161 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 LeftContext() const
Returns the left-context summed over all the Components...
Definition: nnet-nnet.cc:42
void Read(std::istream &is, bool binary)
Definition: am-nnet.cc:39
int32 OutputDim() const
The output dimension of the network – typically the number of pdfs.
Definition: nnet-nnet.cc:31
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void NnetComputationChunked(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, int32 chunk_size, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void Read(std::istream &is, bool binary)
const VectorBase< BaseFloat > & Priors() const
Definition: am-nnet.h:67
int32 RightContext() const
Returns the right-context summed over all the Components...
Definition: nnet-nnet.cc:56
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_WARN
Definition: kaldi-error.h:150
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
int32 NumPdfs() const
Definition: am-nnet.h:55
#define KALDI_LOG
Definition: kaldi-error.h:153
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
const Nnet & GetNnet() const
Definition: am-nnet.h:61