nnet-compute.cc
Go to the documentation of this file.
1 // nnet2/nnet-compute.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 // Copyright 2015 David Snyder
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include "nnet2/nnet-compute.h"
22 #include "hmm/posterior.h"
23 
24 namespace kaldi {
25 namespace nnet2 {
26 
27 /*
28  This class does the forward and possibly backward computation for (typically)
29  a whole utterance of contiguous features. You'll instantiate one of
30  these classes each time you want to do this computation.
31 */
32 class NnetComputer {
33  public:
34  /* Initializer. If pad == true, pad input with nnet.LeftContext() frames on
35  the left and nnet.RightContext() frames on the right (duplicate the first
36  and last frames.) */
37  NnetComputer(const Nnet &nnet,
38  const CuMatrixBase<BaseFloat> &input_feats,
39  bool pad,
40  Nnet *nnet_to_update = NULL);
41 
43  void Propagate();
44 
45  void Backprop(CuMatrix<BaseFloat> *tmp_deriv);
46 
47 
52  CuMatrix<BaseFloat> *deriv) const;
53 
55 
56  private:
57  const Nnet &nnet_;
58  std::vector<CuMatrix<BaseFloat> > forward_data_;
59  Nnet *nnet_to_update_; // May be NULL, if just want objective function
60  // but no gradient info or SGD.
61  std::vector <ChunkInfo> chunk_info_;
62 };
63 
65  const CuMatrixBase<BaseFloat> &input_feats,
66  bool pad,
67  Nnet *nnet_to_update):
68  nnet_(nnet), nnet_to_update_(nnet_to_update) {
69  int32 dim = input_feats.NumCols();
70  if (dim != nnet.InputDim()) {
71  KALDI_ERR << "Feature dimension is " << dim << " but network expects "
72  << nnet.InputDim();
73  }
74  forward_data_.resize(nnet.NumComponents() + 1);
75 
76  int32 left_context = (pad ? nnet_.LeftContext() : 0),
77  right_context = (pad ? nnet_.RightContext() : 0);
78 
79  int32 num_rows = left_context + input_feats.NumRows() + right_context;
80  nnet.ComputeChunkInfo(num_rows, 1, &chunk_info_);
81 
83  input.Resize(num_rows, dim);
84  input.Range(left_context, input_feats.NumRows(),
85  0, dim).CopyFromMat(input_feats);
86  for (int32 i = 0; i < left_context; i++)
87  input.Row(i).CopyFromVec(input_feats.Row(0));
88  int32 last_row = input_feats.NumRows() - 1;
89  for (int32 i = 0; i < right_context; i++)
90  input.Row(num_rows - i - 1).CopyFromVec(input_feats.Row(last_row));
91 }
92 
93 
96  for (int32 c = 0; c < nnet_.NumComponents(); c++) {
97  const Component &component = nnet_.GetComponent(c);
99  &output = forward_data_[c+1];
100  component.Propagate(chunk_info_[c], chunk_info_[c+1], input, &output);
101  const Component *prev_component = (c == 0 ? NULL : &(nnet_.GetComponent(c-1)));
102  bool will_do_backprop = (nnet_to_update_ != NULL),
103  keep_last_output = will_do_backprop &&
104  ((c>0 && prev_component->BackpropNeedsOutput()) ||
105  component.BackpropNeedsInput());
106  if (!keep_last_output)
107  forward_data_[c].Resize(0, 0); // We won't need this data; save memory.
108  }
109 }
110 
112  CuMatrix<BaseFloat> *deriv) const {
113  // TODO: convert this to proper CUDA code, c.f. ComputeObjfAndDeriv
114  // in nnet-update.cc (I'm not sure, though, that this code is ever reached.)
115  int32 num_components = nnet_.NumComponents();
116  double tot_objf = 0.0, tot_weight = 0.0;
117  const CuMatrix<BaseFloat> &last_layer_output = forward_data_[num_components];
118  int32 num_frames = last_layer_output.NumRows(),
119  num_pdfs = last_layer_output.NumCols();
120  KALDI_ASSERT(pdf_post.size() == static_cast<size_t>(num_frames));
121  deriv->Resize(num_frames, num_pdfs); // will zero it.
122  for (int32 i = 0; i < deriv->NumRows(); i++) {
123  for (size_t j = 0; j < pdf_post[i].size(); j++) {
124  int32 label = pdf_post[i][j].first;
125  BaseFloat weight = pdf_post[i][j].second;
126  KALDI_ASSERT(label >= 0 && label < num_pdfs);
127  BaseFloat this_prob = last_layer_output(i, label);
128  KALDI_ASSERT(this_prob > 0.99e-20); // We floored to 1.0e-20 in SoftmaxLayer.
129  tot_objf += weight * Log(this_prob);
130  tot_weight += weight;
131  (*deriv)(i, label) += weight / this_prob; // could be "=", assuming the
132  // labels are all distinct.
133  }
134  }
135  KALDI_VLOG(4) << "Objective function is " << (tot_objf/tot_weight) <<
136  " per frame over " << tot_weight << " samples.";
137  return tot_objf;
138 }
139 
140 
142  KALDI_ASSERT(nnet_to_update_ != NULL); // Or why do backprop?
143  // If later this reasoning changes, we can change this
144  // statement and add logic to make component_to_update, below,
145  // NULL if necessary.
146 
147  for (int32 c = nnet_.NumComponents() - 1; c >= 0; c--) {
148  const Component &component = nnet_.GetComponent(c);
149  Component *component_to_update = &(nnet_to_update_->GetComponent(c));
150  const CuMatrix<BaseFloat> &input = forward_data_[c],
151  &output = forward_data_[c+1],
152  &output_deriv = *tmp_deriv;
153  CuMatrix<BaseFloat> input_deriv;
154  component.Backprop(chunk_info_[c], chunk_info_[c+1], input, output, output_deriv,
155  component_to_update, &input_deriv);
156  *tmp_deriv = input_deriv;
157  }
158 }
159 
160 void NnetComputation(const Nnet &nnet,
161  const CuMatrixBase<BaseFloat> &input, // features
162  bool pad_input,
163  CuMatrixBase<BaseFloat> *output) {
164  NnetComputer nnet_computer(nnet, input, pad_input, NULL);
165  nnet_computer.Propagate();
166  output->CopyFromMat(nnet_computer.GetOutput());
167 }
168 
169 void NnetComputationChunked(const Nnet &nnet,
170  const CuMatrixBase<BaseFloat> &input, // features
171  int32 chunk_size,
172  CuMatrixBase<BaseFloat> *output) {
173  int32 num_rows,
174  num_chunks = ceil((BaseFloat)input.NumRows() / chunk_size),
175  dim = input.NumCols(),
176  left_context = nnet.LeftContext(),
177  right_context = nnet.RightContext();
178  CuMatrix<BaseFloat> full_input;
179  num_rows = left_context + input.NumRows() + right_context;
180  full_input.Resize(num_rows, dim);
181  full_input.Range(left_context, input.NumRows(),
182  0, dim).CopyFromMat(input);
183  for (int32 i = 0; i < left_context; i++)
184  full_input.Row(i).CopyFromVec(input.Row(0));
185  int32 last_row = input.NumRows() - 1;
186  for (int32 i = 0; i < right_context; i++)
187  full_input.Row(num_rows - i - 1).CopyFromVec(input.Row(last_row));
188 
189  for (int32 i = 0; i < num_chunks; i++) {
190  int32 index = i * chunk_size,
191  offset = std::min(num_rows - chunk_size * i,
192  left_context + chunk_size + right_context);
193  CuSubMatrix<BaseFloat> chunk_input(full_input, index, offset, 0, dim);
194  CuMatrix<BaseFloat> cu_chunk_input(chunk_input);
195 
196  // Note: we have already accounted for input padding, so we pass
197  // pad_input==false to the NnetComputer.
198  NnetComputer nnet_computer(nnet, cu_chunk_input, false, NULL);
199  nnet_computer.Propagate();
200  CuMatrix<BaseFloat> cu_chunk_output(nnet_computer.GetOutput());
201  CuSubMatrix<BaseFloat> chunk_out(*output, i * chunk_size,
202  cu_chunk_output.NumRows(), 0,
203  cu_chunk_output.NumCols());
204  chunk_out.CopyFromMat(cu_chunk_output);
205  }
206 }
207 
209  const CuMatrixBase<BaseFloat> &input,
210  bool pad_input,
211  const Posterior &pdf_post,
212  Nnet *nnet_to_update) {
213  NnetComputer nnet_computer(nnet, input, pad_input, nnet_to_update);
214  nnet_computer.Propagate();
215  CuMatrix<BaseFloat> deriv;
216  BaseFloat ans;
217  ans = nnet_computer.ComputeLastLayerDeriv(pdf_post, &deriv);
218  nnet_computer.Backprop(&deriv);
219  return ans;
220 }
221 
222 
223 } // namespace nnet2
224 } // namespace kaldi
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141
int32 LeftContext() const
Returns the left-context summed over all the Components...
Definition: nnet-nnet.cc:42
void Backprop(CuMatrix< BaseFloat > *tmp_deriv)
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
virtual bool BackpropNeedsInput() const
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:653
kaldi::int32 int32
void NnetComputationChunked(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, int32 chunk_size, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
std::vector< CuMatrix< BaseFloat > > forward_data_
Definition: nnet-compute.cc:58
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void NnetComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, CuMatrixBase< BaseFloat > *output)
Does the basic neural net computation, on a sequence of data (e.g.
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
double Log(double x)
Definition: kaldi-math.h:100
BaseFloat NnetGradientComputation(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input, bool pad_input, const Posterior &pdf_post, Nnet *nnet_to_update)
int32 RightContext() const
Returns the right-context summed over all the Components...
Definition: nnet-nnet.cc:56
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const =0
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
NnetComputer(const Nnet &nnet, const CuMatrixBase< BaseFloat > &input_feats, bool pad, Nnet *nnet_to_update=NULL)
Definition: nnet-compute.cc:64
#define KALDI_ERR
Definition: kaldi-error.h:147
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void Propagate()
The forward-through-the-layers part of the computation.
Definition: nnet-compute.cc:95
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const =0
Perform forward pass propagation Input->Output.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
BaseFloat ComputeLastLayerDeriv(const Posterior &pdf_post, CuMatrix< BaseFloat > *deriv) const
Computes objf derivative at last layer, and returns objective function summed over labels and multipl...
void ComputeChunkInfo(int32 input_chunk_size, int32 num_chunks, std::vector< ChunkInfo > *chunk_info_out) const
Uses the output of the Context() functions of the network, to compute a vector of size NumComponents(...
Definition: nnet-nnet.cc:65
virtual bool BackpropNeedsOutput() const
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50
int32 InputDim() const
Dimension of the input features, e.g.
Definition: nnet-nnet.cc:36
std::vector< ChunkInfo > chunk_info_
Definition: nnet-compute.cc:61
CuMatrixBase< BaseFloat > & GetOutput()
Definition: nnet-compute.cc:54