nnet3-discriminative-compute-from-egs.cc
Go to the documentation of this file.
1 // nnet3bin/nnet3-discriminative-compute-from-egs.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-common.h"
21 #include "util/common-utils.h"
22 #include "hmm/transition-model.h"
23 #include "nnet3/nnet-nnet.h"
26 #include "nnet3/nnet-optimize.h"
27 
28 
29 namespace kaldi {
30 namespace nnet3 {
31 
32 class NnetComputerFromEg {
33  public:
34  NnetComputerFromEg(const Nnet &nnet):
35  nnet_(nnet), compiler_(nnet) { }
36 
37  // Compute the output (which will have the same number of rows as the number
38  // of Indexes in the output of the eg), and put it in "output".
39  void Compute(const NnetExample &eg, Matrix<BaseFloat> *output) {
40  ComputationRequest request;
41  bool need_backprop = false, store_stats = false;
42  GetComputationRequest(nnet_, eg, need_backprop, store_stats, &request);
43  const NnetComputation &computation = *(compiler_.Compile(request));
44  NnetComputeOptions options;
45  if (GetVerboseLevel() >= 3)
46  options.debug = true;
47  NnetComputer computer(options, computation, nnet_, NULL);
48  computer.AcceptInputs(nnet_, eg.io);
49  computer.Run();
50  const CuMatrixBase<BaseFloat> &nnet_output = computer.GetOutput("output");
51  output->Resize(nnet_output.NumRows(), nnet_output.NumCols());
52  nnet_output.CopyToMat(output);
53  }
54  private:
55  const Nnet &nnet_;
57 
58 };
59 
60 }
61 }
62 
63 int main(int argc, char *argv[]) {
64  try {
65  using namespace kaldi;
66  using namespace kaldi::nnet3;
67  typedef kaldi::int32 int32;
68  typedef kaldi::int64 int64;
69 
70  const char *usage =
71  "Read input nnet discriminative training examples, and compute the "
72  "output for each one. This program is similar to "
73  "nnet3-compute-from-egs, but works with discriminative egs. \n"
74  "If --apply-exp=true, apply the Exp() function to the output before writing\n"
75  "it out.\n"
76  "Note: This program uses only the input; it does not do forward-backward\n"
77  "over the lattice. See nnet3-discriminative-compute-objf for that.\n"
78  "\n"
79  "Usage: nnet3-discriminative-compute-from-egs [options] <raw-nnet-in> <training-examples-in> <matrices-out>\n"
80  "e.g.:\n"
81  "nnet3-discriminative-compute-from-egs --apply-exp=true 0.raw ark:1.degs ark:- | matrix-sum-rows ark:- ... \n"
82  "See also: nnet3-compute nnet3-compute-from-egs\n";
83 
84  bool binary_write = true,
85  apply_exp = false;
86  std::string use_gpu = "yes";
87 
88  ParseOptions po(usage);
89  po.Register("binary", &binary_write, "Write output in binary mode");
90  po.Register("apply-exp", &apply_exp, "If true, apply exp function to "
91  "output");
92  po.Register("use-gpu", &use_gpu,
93  "yes|no|optional|wait, only has effect if compiled with CUDA");
94 
95  po.Read(argc, argv);
96 
97  if (po.NumArgs() != 3) {
98  po.PrintUsage();
99  exit(1);
100  }
101 
102 #if HAVE_CUDA==1
103  CuDevice::Instantiate().SelectGpuId(use_gpu);
104 #endif
105 
106  std::string nnet_rxfilename = po.GetArg(1),
107  examples_rspecifier = po.GetArg(2),
108  matrix_wspecifier = po.GetArg(3);
109 
110  Nnet nnet;
111  ReadKaldiObject(nnet_rxfilename, &nnet);
112 
113  NnetComputerFromEg computer(nnet);
114 
115  int64 num_egs = 0;
116 
117  SequentialNnetDiscriminativeExampleReader example_reader(examples_rspecifier);
118  BaseFloatMatrixWriter matrix_writer(matrix_wspecifier);
119 
120  for (; !example_reader.Done(); example_reader.Next(), num_egs++) {
121  Matrix<BaseFloat> output;
122  NnetExample eg;
123  NnetDiscriminativeExample disc_eg = example_reader.Value();
124  eg.io.swap(disc_eg.inputs);
125 
126  for (int32 i = 0; i < disc_eg.outputs.size(); i++) {
127  NnetIo io;
128  io.name = disc_eg.outputs[i].name;
129  io.indexes = disc_eg.outputs[i].indexes;
130  eg.io.push_back(io);
131  }
132 
133  computer.Compute(eg, &output);
134  KALDI_ASSERT(output.NumRows() != 0);
135  if (apply_exp)
136  output.ApplyExp();
137  matrix_writer.Write(example_reader.Key(), output);
138  }
139 #if HAVE_CUDA==1
140  CuDevice::Instantiate().PrintProfile();
141 #endif
142  KALDI_LOG << "Processed " << num_egs << " examples.";
143  return 0;
144  } catch(const std::exception &e) {
145  std::cerr << e.what() << '\n';
146  return -1;
147  }
148 }
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:111
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void CopyToMat(MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
Definition: cu-matrix.cc:447
int main(int argc, char *argv[])
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
Definition: nnet-example.h:42
void Compute(const NnetExample &eg, Matrix< BaseFloat > *output)
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void AcceptInputs(const Nnet &nnet, const std::vector< NnetIo > &io)
This convenience function calls AcceptInput() in turn on all the inputs in the training example...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
int NumArgs() const
Number of positional parameters (c.f. argc-1).
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
std::vector< NnetIo > inputs
&#39;inputs&#39; contains the input to the network– normally just it has just one element called "input"...
std::vector< NnetDiscriminativeSupervision > outputs
&#39;outputs&#39; contains the sequence output supervision.
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
std::string name
the name of the input in the neural net; in simple setups it will just be "input".
Definition: nnet-example.h:36
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
std::vector< NnetIo > io
"io" contains the input and output.
Definition: nnet-example.h:116
#define KALDI_LOG
Definition: kaldi-error.h:153
NnetDiscriminativeExample is like NnetExample, but specialized for sequence training.
void GetComputationRequest(const Nnet &nnet, const NnetExample &eg, bool need_model_derivative, bool store_component_stats, ComputationRequest *request)
This function takes a NnetExample (which should already have been frame-selected, if desired...