nnet-update.cc
Go to the documentation of this file.
1 // nnet2/nnet-update.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 // 2014 Xiaohui Zhang
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include "nnet2/nnet-update.h"
22 
23 namespace kaldi {
24 namespace nnet2 {
25 
26 
27 
29  Nnet *nnet_to_update):
30  nnet_(nnet), nnet_to_update_(nnet_to_update) {
31 }
32 
33 
34 
35 void NnetUpdater::FormatInput(const std::vector<NnetExample> &data) {
36 
37  forward_data_.resize(nnet_.NumComponents() + 1);
38  Matrix<BaseFloat> input;
39  FormatNnetInput(nnet_, data, &input);
40  forward_data_[0].Resize(0, 0); // avoids the next command ever copying GPU->CPU
41  forward_data_[0].Swap(&input); // Copy to GPU, if being used.
43  data.size(), &chunk_info_out_);
44 }
45 
47  const std::vector<NnetExample> &data,
48  double *tot_accuracy) {
49 
50  FormatInput(data);
51  Propagate();
52  CuMatrix<BaseFloat> tmp_deriv;
53  double ans = ComputeObjfAndDeriv(data, &tmp_deriv, tot_accuracy);
54  if (nnet_to_update_ != NULL)
55  Backprop(&tmp_deriv); // this is summed (after weighting), not
56  // averaged.
57  return ans;
58 }
59 
60 
61 // form of ComputeForMinibatch for when the input data has
62 // already been formatted as a single matrix.
63 double NnetUpdater::ComputeForMinibatch(const std::vector<NnetExample> &data,
64  Matrix<BaseFloat> *formatted_data,
65  double *tot_accuracy) {
66  { // accept the formatted input. This replaces the call to FormatInput().
67  int32 num_chunks = data.size();
68  KALDI_ASSERT(formatted_data->NumRows() ==
69  num_chunks * (1 + nnet_.LeftContext() + nnet_.RightContext()) &&
70  formatted_data->NumCols() == nnet_.InputDim());
71 
72  forward_data_.resize(nnet_.NumComponents() + 1);
73  // the next command avoids the Swap() command ever copying GPU->CPU in case
74  // an instance of this class is used more than once (which it isn't in
75  // practice).
76  forward_data_[0].Resize(0, 0);
77  forward_data_[0].Swap(formatted_data); // Copy to GPU, if being used.
79  data.size(), &chunk_info_out_);
80  }
81  Propagate();
82  CuMatrix<BaseFloat> tmp_deriv;
83  double ans = ComputeObjfAndDeriv(data, &tmp_deriv, tot_accuracy);
84  if (nnet_to_update_ != NULL)
85  Backprop(&tmp_deriv); // this is summed (after weighting), not
86  // averaged.
87  return ans;
88 }
89 
90 
92  int32 num_components = nnet_.NumComponents();
94  *output = forward_data_[num_components];
95 }
96 
98  static int32 num_times_printed = 0;
99 
100  int32 num_components = nnet_.NumComponents();
101  for (int32 c = 0; c < num_components; c++) {
102  const Component &component = nnet_.GetComponent(c);
103  const CuMatrix<BaseFloat> &input = forward_data_[c];
104  CuMatrix<BaseFloat> &output = forward_data_[c+1];
105  // Note: the Propagate function will automatically resize the
106  // output.
107  component.Propagate(chunk_info_out_[c], chunk_info_out_[c+1], input, &output);
108  // If we won't need the output of the previous layer for
109  // backprop, delete it to save memory.
110  bool need_last_output =
111  (c>0 && nnet_.GetComponent(c-1).BackpropNeedsOutput()) ||
112  component.BackpropNeedsInput();
113  if (g_kaldi_verbose_level >= 3 && num_times_printed < 100) {
114  KALDI_VLOG(3) << "Stddev of data for component " << c
115  << " for this minibatch is "
117  (forward_data_[c].NumRows() * forward_data_[c].NumCols()));
118  num_times_printed++;
119  }
120  if (!need_last_output)
121  forward_data_[c].Resize(0, 0); // We won't need this data.
122  }
123 }
124 
126  const std::vector<NnetExample> &data,
127  CuMatrix<BaseFloat> *deriv,
128  double *tot_accuracy) const {
129  BaseFloat tot_objf = 0.0, tot_weight = 0.0;
130  int32 num_components = nnet_.NumComponents();
131  int32 num_chunks = data.size();
132  deriv->Resize(num_chunks, nnet_.OutputDim()); // sets to zero.
133  const CuMatrix<BaseFloat> &output(forward_data_[num_components]);
134  KALDI_ASSERT(SameDim(output, *deriv));
135 
136  std::vector<MatrixElement<BaseFloat> > sv_labels;
137  sv_labels.reserve(num_chunks); // We must have at least this many labels.
138  for (int32 m = 0; m < num_chunks; m++) {
139  KALDI_ASSERT(data[m].labels.size() == 1 &&
140  "Training code currently does not support multi-frame egs");
141  const std::vector<std::pair<int32,BaseFloat> > &labels = data[m].labels[0];
142  for (size_t i = 0; i < labels.size(); i++) {
143  KALDI_ASSERT(labels[i].first < nnet_.OutputDim() &&
144  "Possibly egs come from alignments from mismatching model");
145  MatrixElement<BaseFloat> elem = {m, labels[i].first, labels[i].second};
146  sv_labels.push_back(elem);
147  }
148  }
149 
150  if (tot_accuracy != NULL)
151  *tot_accuracy = ComputeTotAccuracy(data);
152 
153  deriv->CompObjfAndDeriv(sv_labels, output, &tot_objf, &tot_weight);
154 
155  KALDI_VLOG(4) << "Objective function is " << (tot_objf/tot_weight) << " over "
156  << tot_weight << " samples (weighted).";
157  return tot_objf;
158 }
159 
160 
162  const std::vector<NnetExample> &data) const {
163  BaseFloat tot_accuracy = 0.0;
164  int32 num_components = nnet_.NumComponents();
165  const CuMatrix<BaseFloat> &output(forward_data_[num_components]);
166  KALDI_ASSERT(output.NumRows() == static_cast<int32>(data.size()));
167  CuArray<int32> best_pdf(output.NumRows());
168  std::vector<int32> best_pdf_cpu;
169 
170  output.FindRowMaxId(&best_pdf);
171  best_pdf.CopyToVec(&best_pdf_cpu);
172 
173  for (int32 i = 0; i < output.NumRows(); i++) {
174  KALDI_ASSERT(data[i].labels.size() == 1 &&
175  "Training code currently does not support multi-frame egs");
176  const std::vector<std::pair<int32,BaseFloat> > &labels = data[i].labels[0];
177  for (size_t j = 0; j < labels.size(); j++) {
178  int32 ref_pdf_id = labels[j].first,
179  hyp_pdf_id = best_pdf_cpu[i];
180  BaseFloat weight = labels[j].second;
181  tot_accuracy += weight * (hyp_pdf_id == ref_pdf_id ? 1.0 : 0.0);
182  }
183  }
184  return tot_accuracy;
185 }
186 
187 
189  // We assume ComputeObjfAndDeriv has already been called.
190  for (int32 c = nnet_.NumComponents() - 1;
191  c >= nnet_.FirstUpdatableComponent(); c--) {
192  const Component &component = nnet_.GetComponent(c);
193  Component *component_to_update = (nnet_to_update_ == NULL ? NULL :
195  const CuMatrix<BaseFloat> &input = forward_data_[c],
196  &output = forward_data_[c+1];
197  CuMatrix<BaseFloat> input_deriv(input.NumRows(), input.NumCols());
198  const CuMatrix<BaseFloat> &output_deriv(*deriv);
199  component.Backprop(chunk_info_out_[c], chunk_info_out_[c+1], input, output,
200  output_deriv, component_to_update,
201  &input_deriv);
202  input_deriv.Swap(deriv);
203  }
204 }
205 
206 
207 void FormatNnetInput(const Nnet &nnet,
208  const std::vector<NnetExample> &data,
209  Matrix<BaseFloat> *input_mat) {
210  KALDI_ASSERT(data.size() > 0);
211  int32 num_splice = 1 + nnet.RightContext() + nnet.LeftContext();
212  KALDI_ASSERT(data[0].input_frames.NumRows() >= num_splice);
213 
214  int32 feat_dim = data[0].input_frames.NumCols(),
215  spk_dim = data[0].spk_info.Dim(),
216  tot_dim = feat_dim + spk_dim; // we append these at the neural net
217  // input... note, spk_dim might be 0.
218  KALDI_ASSERT(tot_dim == nnet.InputDim());
219  KALDI_ASSERT(data[0].left_context >= nnet.LeftContext());
220  int32 ignore_frames = data[0].left_context - nnet.LeftContext(); // If
221  // the NnetExample has more left-context than we need, ignore some.
222  // this may happen in settings where we increase the amount of context during
223  // training, e.g. by adding layers that require more context.
224 
225  int32 num_chunks = data.size();
226 
227  input_mat->Resize(num_splice * num_chunks,
228  tot_dim, kUndefined);
229 
230  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
231  SubMatrix<BaseFloat> dest(*input_mat,
232  chunk * num_splice, num_splice,
233  0, feat_dim);
234 
235  Matrix<BaseFloat> full_src(data[chunk].input_frames);
236  SubMatrix<BaseFloat> src(full_src, ignore_frames, num_splice, 0, feat_dim);
237 
238  dest.CopyFromMat(src);
239  if (spk_dim != 0) {
240  SubMatrix<BaseFloat> spk_dest(*input_mat,
241  chunk * num_splice, num_splice,
242  feat_dim, spk_dim);
243  spk_dest.CopyRowsFromVec(data[chunk].spk_info);
244  }
245  }
246 }
247 
248 BaseFloat TotalNnetTrainingWeight(const std::vector<NnetExample> &egs) {
249  double ans = 0.0;
250  for (size_t i = 0; i < egs.size(); i++)
251  for (size_t j = 0; j < egs[i].labels.size(); j++) // for each labeled frame
252  for (size_t k = 0; k < egs[i].labels[j].size(); k++)
253  ans += egs[i].labels[j][k].second;
254  return ans;
255 }
256 
257 
258 double ComputeNnetObjf(const Nnet &nnet,
259  const std::vector<NnetExample> &examples,
260  double *tot_accuracy) {
261  NnetUpdater updater(nnet, NULL);
262  return updater.ComputeForMinibatch(examples, tot_accuracy);
263 }
264 
265 double DoBackprop(const Nnet &nnet,
266  const std::vector<NnetExample> &examples,
267  Nnet *nnet_to_update,
268  double *tot_accuracy) {
269  if (nnet_to_update == NULL)
270  return ComputeNnetObjf(nnet, examples, tot_accuracy);
271  try {
272  NnetUpdater updater(nnet, nnet_to_update);
273  return updater.ComputeForMinibatch(examples, tot_accuracy);
274  } catch (...) {
275  KALDI_LOG << "Error doing backprop, nnet info is: " << nnet.Info();
276  throw;
277  }
278 }
279 
280 // version of DoBackprop that takes already-formatted examples.
281 double DoBackprop(const Nnet &nnet,
282  const std::vector<NnetExample> &examples,
283  Matrix<BaseFloat> *examples_formatted,
284  Nnet *nnet_to_update,
285  double *tot_accuracy) {
286  if (nnet_to_update == NULL) {
287  KALDI_WARN << "Was not expecting to reach this code path "
288  << "(wastefully formatting data twice)";
289  return ComputeNnetObjf(nnet, examples, tot_accuracy);
290  } try {
291  NnetUpdater updater(nnet, nnet_to_update);
292  return updater.ComputeForMinibatch(examples,
293  examples_formatted,
294  tot_accuracy);
295  } catch (...) {
296  KALDI_LOG << "Error doing backprop, nnet info is: " << nnet.Info();
297  throw;
298  }
299 }
300 
301 
303  const Nnet &nnet,
304  const std::vector<NnetExample> &validation_set,
305  int32 batch_size,
306  Nnet *gradient) {
307  bool treat_as_gradient = true;
308  gradient->SetZero(treat_as_gradient);
309  std::vector<NnetExample> batch;
310  batch.reserve(batch_size);
311  double tot_objf = 0.0;
312  for (int32 start_pos = 0;
313  start_pos < static_cast<int32>(validation_set.size());
314  start_pos += batch_size) {
315  batch.clear();
316  for (int32 i = start_pos;
317  i < std::min(start_pos + batch_size,
318  static_cast<int32>(validation_set.size()));
319  i++) {
320  batch.push_back(validation_set[i]);
321  }
322  tot_objf += DoBackprop(nnet,
323  batch,
324  gradient);
325  }
326  return tot_objf / validation_set.size();
327 }
328 
330  const Nnet &nnet,
331  const std::vector<NnetExample> &validation_set,
332  int32 batch_size,
333  double *tot_accuracy) {
334  double tot_accuracy_tmp;
335  if (tot_accuracy)
336  *tot_accuracy = 0.0;
337  std::vector<NnetExample> batch;
338  batch.reserve(batch_size);
339  double tot_objf = 0.0;
340  for (int32 start_pos = 0;
341  start_pos < static_cast<int32>(validation_set.size());
342  start_pos += batch_size) {
343  batch.clear();
344  for (int32 i = start_pos;
345  i < std::min(start_pos + batch_size,
346  static_cast<int32>(validation_set.size()));
347  i++) {
348  batch.push_back(validation_set[i]);
349  }
350  tot_objf += ComputeNnetObjf(nnet, batch,
351  tot_accuracy != NULL ? &tot_accuracy_tmp : NULL);
352  if (tot_accuracy)
353  *tot_accuracy += tot_accuracy_tmp;
354  }
355  return tot_objf;
356 }
357 
358 
359 
360 } // namespace nnet2
361 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
const Component & GetComponent(int32 c) const
Definition: nnet-nnet.cc:141
int32 LeftContext() const
Returns the left-context summed over all the Components...
Definition: nnet-nnet.cc:42
void Backprop(CuMatrix< BaseFloat > *deriv) const
Backprop must be called after ComputeObjfAndDeriv.
Definition: nnet-update.cc:188
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
double ComputeNnetGradient(const Nnet &nnet, const std::vector< NnetExample > &validation_set, int32 batch_size, Nnet *gradient)
ComputeNnetGradient is mostly used to compute gradients on validation sets; it divides the example in...
Definition: nnet-update.cc:302
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
virtual bool BackpropNeedsInput() const
int32 FirstUpdatableComponent() const
Returns the index of the lowest-numbered component which is updatable, or NumComponents() if none are...
Definition: nnet-nnet.cc:828
int32 OutputDim() const
The output dimension of the network – typically the number of pdfs.
Definition: nnet-nnet.cc:31
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
double ComputeNnetObjf(const Nnet &nnet, const std::vector< NnetExample > &examples, double *tot_accuracy)
Computes objective function over a minibatch.
Definition: nnet-update.cc:258
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
void FormatInput(const std::vector< NnetExample > &data)
Formats the input as a single matrix and sets the size of forward_data_, and sets up chunk_info_out_...
Definition: nnet-update.cc:35
double DoBackprop(const Nnet &nnet, const std::vector< NnetExample > &examples, Nnet *nnet_to_update, double *tot_accuracy)
This function computes the objective function and either updates the model or adds to parameter gradi...
Definition: nnet-update.cc:265
void FormatNnetInput(const Nnet &nnet, const std::vector< NnetExample > &data, Matrix< BaseFloat > *input_mat)
Takes the input to the nnet for a minibatch of examples, and formats as a single matrix.
Definition: nnet-update.cc:207
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
Definition: nnet-nnet.h:69
void CompObjfAndDeriv(const std::vector< MatrixElement< Real > > &elements, const CuMatrix< Real > &A, Real *tot_objf, Real *tot_weight)
Here, A is interpreted as a matrix of probabilities, and "elements" as a list of posteriors (possibly...
Definition: cu-matrix.cc:1661
float BaseFloat
Definition: kaldi-types.h:29
void SetZero(bool treat_as_gradient)
Definition: nnet-nnet.cc:151
std::vector< CuMatrix< BaseFloat > > forward_data_
Definition: nnet-update.h:108
int32 RightContext() const
Returns the right-context summed over all the Components...
Definition: nnet-nnet.cc:56
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const =0
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
std::string Info() const
Definition: nnet-nnet.cc:257
#define KALDI_WARN
Definition: kaldi-error.h:150
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
double ComputeTotAccuracy(const std::vector< NnetExample > &data) const
Definition: nnet-update.cc:161
BaseFloat TotalNnetTrainingWeight(const std::vector< NnetExample > &egs)
Returns the total weight summed over all the examples...
Definition: nnet-update.cc:248
void FindRowMaxId(CuArray< int32 > *id) const
Find the id of the maximal element for each row (resizes the &#39;id&#39; array to the appropriate size)...
Definition: cu-matrix.cc:1829
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const =0
Perform forward pass propagation Input->Output.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
int32 g_kaldi_verbose_level
This is set by util/parse-options.
Definition: kaldi-error.cc:46
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
This header provides functionality for sample-by-sample stochastic gradient descent and gradient comp...
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
std::vector< ChunkInfo > chunk_info_out_
Definition: nnet-update.h:106
NnetUpdater(const Nnet &nnet, Nnet *nnet_to_update)
Definition: nnet-update.cc:28
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
double ComputeForMinibatch(const std::vector< NnetExample > &data, double *tot_accuracy)
Does the entire forward and backward computation for this minbatch.
Definition: nnet-update.cc:46
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
#define KALDI_LOG
Definition: kaldi-error.h:153
void ComputeChunkInfo(int32 input_chunk_size, int32 num_chunks, std::vector< ChunkInfo > *chunk_info_out) const
Uses the output of the Context() functions of the network, to compute a vector of size NumComponents(...
Definition: nnet-nnet.cc:65
double ComputeObjfAndDeriv(const std::vector< NnetExample > &data, CuMatrix< BaseFloat > *deriv, double *tot_accuracy=NULL) const
Computes objective function and derivative at output layer, but does not do the backprop [for that...
Definition: nnet-update.cc:125
void GetOutput(CuMatrix< BaseFloat > *output)
Definition: nnet-update.cc:91
Sub-matrix representation.
Definition: kaldi-matrix.h:988
virtual bool BackpropNeedsOutput() const
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50
int32 InputDim() const
Dimension of the input features, e.g.
Definition: nnet-nnet.cc:36