nnet-tdnn-component.cc
Go to the documentation of this file.
1 // nnet3/nnet-tdnn-component.h
2 
3 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 // Note: the code defined here was declared in nnet-convolutional-component.h.
21 
22 #include <iterator>
23 #include <sstream>
24 #include <iomanip>
27 #include "nnet3/nnet-parse.h"
28 
29 namespace kaldi {
30 namespace nnet3 {
31 
32 
34  orthonormal_constraint_(0.0),
35  use_natural_gradient_(true) { }
36 
37 
39  const TdnnComponent &other):
40  UpdatableComponent(other), // initialize base-class
48  Check();
49 }
50 
51 
52 void TdnnComponent::Check() const {
53  KALDI_ASSERT(linear_params_.NumRows() > 0 &&
54  !time_offsets_.empty() &&
55  std::set<int32>(time_offsets_.begin(),
56  time_offsets_.end()).size() ==
57  time_offsets_.size() &&
58  linear_params_.NumCols() % time_offsets_.size() == 0 &&
59  (bias_params_.Dim() == 0 ||
60  bias_params_.Dim() == linear_params_.NumRows()));
61 }
62 
63 std::string TdnnComponent::Info() const {
64  std::ostringstream stream;
65  stream << UpdatableComponent::Info();
66  if (orthonormal_constraint_ != 0.0)
67  stream << ", orthonormal-constraint=" << orthonormal_constraint_;
68  stream << ", time-offsets=";
69  for (size_t i = 0; i < time_offsets_.size(); i++) {
70  if (i != 0) stream << ',';
71  stream << time_offsets_[i];
72  }
73  PrintParameterStats(stream, "linear-params", linear_params_,
74  false, // include_mean
75  true, // include_row_norms
76  true, // include_column_norms
77  GetVerboseLevel() >= 2); // include_singular_values
78  if (bias_params_.Dim() == 0) {
79  stream << ", has-bias=false";
80  } else {
81  PrintParameterStats(stream, "bias", bias_params_, true);
82  }
83  if (!use_natural_gradient_) {
84  stream << ", use-natural-gradient=false";
85  } else {
86  stream << ", rank-in=" << preconditioner_in_.GetRank()
87  << ", rank-out=" << preconditioner_out_.GetRank()
88  << ", num-samples-history=" << preconditioner_in_.GetNumSamplesHistory()
89  << ", update-period=" << preconditioner_in_.GetUpdatePeriod()
90  << ", alpha-in=" << preconditioner_in_.GetAlpha()
91  << ", alpha-out=" << preconditioner_out_.GetAlpha();
92  }
93  return stream.str();
94 }
95 
96 
98  // 1. Config values inherited from UpdatableComponent.
100 
101  // 2. Structural config values
102  std::string time_offsets;
103 
104  int32 input_dim = -1, output_dim = -1;
105 
106  bool ok = cfl->GetValue("time-offsets", &time_offsets) &&
107  cfl->GetValue("input-dim", &input_dim) &&
108  cfl->GetValue("output-dim", &output_dim);
109  if (!ok || input_dim <= 0 || output_dim <= 0 ||
110  !SplitStringToIntegers(time_offsets, ",", false, &time_offsets_) ||
111  time_offsets_.empty()) {
112  KALDI_ERR << "Bad initializer: there is a problem with "
113  "time-offsets, input-dim or output-dim (not defined?): "
114  << cfl->WholeLine();
115  }
116 
117  if (std::set<int32>(time_offsets_.begin(),
118  time_offsets_.end()).size() != time_offsets_.size()) {
119  KALDI_ERR << "Bad initializer: repeated time-offsets: "
120  << cfl->WholeLine();
121  }
122 
123  // 3. Parameter-initialization configs, "has-bias", and
124  // orthonormal-constraint.
126  BaseFloat param_stddev = -1, bias_mean = 0.0, bias_stddev = 1.0;
127  bool use_bias = true;
128  cfl->GetValue("param-stddev", &param_stddev);
129  cfl->GetValue("bias-stddev", &bias_stddev);
130  cfl->GetValue("bias-mean", &bias_mean);
131  cfl->GetValue("use-bias", &use_bias);
132  cfl->GetValue("orthonormal-constraint", &orthonormal_constraint_);
133  if (param_stddev < 0.0) {
134  param_stddev = 1.0 / sqrt(input_dim * time_offsets_.size());
135  }
136  // initialize the parameters.
137  linear_params_.Resize(output_dim,
138  input_dim * time_offsets_.size());
139  linear_params_.SetRandn();
140  linear_params_.Scale(param_stddev);
141 
142  if (use_bias) {
143  bias_params_.Resize(output_dim);
144  bias_params_.SetRandn();
145  bias_params_.Scale(bias_stddev);
146  bias_params_.Add(bias_mean);
147  } else {
148  bias_params_.Resize(0);
149  }
150 
151  // 4. Natural-gradient related configs.
152  use_natural_gradient_ = true;
153  int32 rank_out = -1, rank_in = -1;
154  BaseFloat alpha_out = 4.0, alpha_in = 4.0,
155  num_samples_history = 2000.0;
156  cfl->GetValue("use-natural-gradient", &use_natural_gradient_);
157  cfl->GetValue("rank-in", &rank_in);
158  cfl->GetValue("rank-out", &rank_out);
159  cfl->GetValue("alpha-in", &alpha_in);
160  cfl->GetValue("alpha-out", &alpha_out);
161  cfl->GetValue("num-samples-history", &num_samples_history);
162 
163  int32 spliced_input_dim =
164  input_dim * static_cast<int32>(time_offsets_.size());
165  if (rank_in < 0)
166  rank_in = std::min<int32>(20, (spliced_input_dim + 1) / 2);
167  preconditioner_in_.SetRank(rank_in);
168  if (rank_out < 0)
169  rank_out = std::min<int32>(80, (output_dim + 1) / 2);
170  preconditioner_out_.SetRank(rank_out);
171  preconditioner_in_.SetNumSamplesHistory(num_samples_history);
172  preconditioner_out_.SetNumSamplesHistory(num_samples_history);
173 
174  preconditioner_in_.SetAlpha(alpha_in);
175  preconditioner_out_.SetAlpha(alpha_out);
176 
179 }
180 
182  const ComponentPrecomputedIndexes *indexes_in,
183  const CuMatrixBase<BaseFloat> &in,
184  CuMatrixBase<BaseFloat> *out) const {
185  const PrecomputedIndexes *indexes =
186  dynamic_cast<const PrecomputedIndexes*>(indexes_in);
187  KALDI_ASSERT(indexes != NULL);
188 
189  if (bias_params_.Dim() != 0)
191  // if bias_params_.Dim() == 0 we don't need to zero 'out' at
192  // this point because in that case we set the flag kPropagateAdds,
193  // so the calling code knows that the Propagate function *adds to*
194  // the 'out' matrix, so it should (typicaly) be zeroed before calling
195  // Propagate().
196 
197  KALDI_ASSERT(indexes->row_offsets.size() == time_offsets_.size());
198 
199  int32 num_offsets = time_offsets_.size(),
200  input_dim = InputDim();
201  for (int32 i = 0; i < num_offsets; i++) {
202  CuSubMatrix<BaseFloat> in_part = GetInputPart(in, out->NumRows(),
203  indexes->row_stride,
204  indexes->row_offsets[i]);
205  CuSubMatrix<BaseFloat> linear_params_part(linear_params_,
206  0, linear_params_.NumRows(),
207  i * input_dim, input_dim);
208  out->AddMatMat(1.0, in_part, kNoTrans, linear_params_part, kTrans, 1.0);
209  }
210  return NULL;
211 }
212 
214  const std::string &debug_info,
215  const ComponentPrecomputedIndexes *indexes_in,
216  const CuMatrixBase<BaseFloat> &in_value,
217  const CuMatrixBase<BaseFloat> &, // out_value
218  const CuMatrixBase<BaseFloat> &out_deriv,
219  void*, // memo
220  Component *to_update_in,
221  CuMatrixBase<BaseFloat> *in_deriv) const {
222  NVTX_RANGE("TdnnComponent::Backprop");
223  const PrecomputedIndexes *indexes =
224  dynamic_cast<const PrecomputedIndexes*>(indexes_in);
225  KALDI_ASSERT(indexes != NULL &&
226  indexes->row_offsets.size() == time_offsets_.size());
227  int32 num_offsets = time_offsets_.size(),
228  input_dim = InputDim();
229 
230  if (in_deriv != NULL) {
231  // Propagate the derivatives back to the input data.
232  for (int32 i = 0; i < num_offsets; i++) {
233  CuSubMatrix<BaseFloat> in_deriv_part =
234  GetInputPart(*in_deriv, out_deriv.NumRows(),
235  indexes->row_stride, indexes->row_offsets[i]);
236  CuSubMatrix<BaseFloat> linear_params_part(linear_params_,
237  0, linear_params_.NumRows(),
238  i * input_dim, input_dim);
239  // note: this component has the property kBackpropAdds, which is why the
240  // final 1.0 is there in the following call (otherwise we'd have to zero
241  // *in_deriv first).
242  in_deriv_part.AddMatMat(1.0, out_deriv, kNoTrans,
243  linear_params_part, kNoTrans, 1.0);
244  }
245  }
246 
247  if (to_update_in != NULL) {
248  TdnnComponent *to_update =
249  dynamic_cast<TdnnComponent*>(to_update_in);
250  KALDI_ASSERT(to_update != NULL);
251 
252  if (to_update->learning_rate_ == 0.0)
253  return;
254 
255  if (to_update->is_gradient_ || !to_update->use_natural_gradient_)
256  to_update->UpdateSimple(*indexes, in_value, out_deriv);
257  else
258  to_update->UpdateNaturalGradient(*indexes, in_value, out_deriv);
259  }
260 }
261 
263  const PrecomputedIndexes &indexes,
264  const CuMatrixBase<BaseFloat> &in_value,
265  const CuMatrixBase<BaseFloat> &out_deriv) {
266  NVTX_RANGE("UpdateSimple");
267 
268  if (bias_params_.Dim() != 0)
269  bias_params_.AddRowSumMat(learning_rate_, out_deriv);
270 
271  int32 input_dim = in_value.NumCols(),
272  num_offsets = time_offsets_.size();
273  for (int32 i = 0; i < num_offsets; i++) {
274  CuSubMatrix<BaseFloat> in_value_part =
275  GetInputPart(in_value, out_deriv.NumRows(),
276  indexes.row_stride,
277  indexes.row_offsets[i]);
278  CuSubMatrix<BaseFloat> linear_params_part(linear_params_,
279  0, linear_params_.NumRows(),
280  i * input_dim, input_dim);
281  linear_params_part.AddMatMat(learning_rate_, out_deriv, kTrans,
282  in_value_part, kNoTrans, 1.0);
283  }
284 }
285 
287  const PrecomputedIndexes &indexes,
288  const CuMatrixBase<BaseFloat> &in_value,
289  const CuMatrixBase<BaseFloat> &out_deriv) {
290  NVTX_RANGE("UpdateNaturalGradient");
291 
292  int32 num_offsets = time_offsets_.size(),
293  num_rows = out_deriv.NumRows(),
294  input_dim = in_value.NumCols(),
295  spliced_input_dim = num_offsets * input_dim,
296  augmented_input_dim =
297  spliced_input_dim + (bias_params_.Dim() != 0 ? 1 : 0);
298 
299  // in_value_temp is the fully spliced input with a column of ones appended to
300  // it.
301  CuMatrix<BaseFloat> in_value_temp(num_rows,
302  augmented_input_dim);
303  if (bias_params_.Dim() != 0) {
304  // set the last column of in_value_temp to 1.0
305  in_value_temp.Range(0, num_rows, spliced_input_dim, 1).Set(1.0);
306  }
307 
308  for (int32 i = 0; i < num_offsets; i++) {
309  CuSubMatrix<BaseFloat> in_value_temp_part(in_value_temp,
310  0, num_rows,
311  i * input_dim, input_dim),
312  in_value_part = GetInputPart(in_value,
313  num_rows,
314  indexes.row_stride,
315  indexes.row_offsets[i]);
316  in_value_temp_part.CopyFromMat(in_value_part);
317  }
318 
319  CuMatrix<BaseFloat> out_deriv_temp(out_deriv);
320 
321  // These "scale" values get will get multiplied into the learning rate (faster
322  // than having the matrices scaled inside the preconditioning code).
323  BaseFloat in_scale, out_scale;
324 
325  preconditioner_in_.PreconditionDirections(&in_value_temp, &in_scale);
326  preconditioner_out_.PreconditionDirections(&out_deriv_temp, &out_scale);
327 
328  // "scale" is a scaling factor coming from the PreconditionDirections calls
329  // (it's faster to have them output a scaling factor than to have them scale
330  // their outputs).
331  BaseFloat scale = in_scale * out_scale,
332  local_lrate = scale * learning_rate_;
333 
334  if (bias_params_.Dim() != 0) {
335  // this "precon_ones" is what happens to the vector of 1's representing
336  // offsets, after multiplication by the preconditioner.
337  CuVector<BaseFloat> precon_ones(num_rows);
338  precon_ones.CopyColFromMat(in_value_temp, spliced_input_dim);
339  bias_params_.AddMatVec(local_lrate, out_deriv_temp, kTrans,
340  precon_ones, 1.0);
341  }
342 
343  CuSubMatrix<BaseFloat> in_value_precon_part(in_value_temp,
344  0, num_rows,
345  0, spliced_input_dim);
346 
347  linear_params_.AddMatMat(local_lrate, out_deriv_temp, kTrans,
348  in_value_precon_part, kNoTrans, 1.0);
349 }
350 
352  std::vector<Index> *input_indexes,
353  std::vector<Index> *output_indexes) const {
354  using namespace time_height_convolution;
355 
356  // The following figures out a regular structure for the input and
357  // output indexes, in case there were gaps (which is unlikely in typical
358  // situations).
359  ConvolutionComputationIo io;
360  GetComputationIo(*input_indexes, *output_indexes, &io);
361  ModifyComputationIo(&io);
362 
363  std::vector<Index> modified_input_indexes,
364  modified_output_indexes;
365  // The following call ensures that 'modified_input_indexes' and
366  // 'modified_output_indexes' have the required ordering (where t has the
367  // largest stride and each (n,x) pair is repeated for each 't' value), as well
368  // as doing padding (setting t values to kNoTime where it had to insert
369  // elements to ensure regular structure).
370  GetIndexesForComputation(io, *input_indexes, *output_indexes,
371  &modified_input_indexes,
372  &modified_output_indexes);
373 
374  // It will be quite rare that this function actually changes
375  // 'input_indexes' or 'output_indexes', because in most cases,
376  // the indexes will already have the required structure and
377  // ordering.
378  input_indexes->swap(modified_input_indexes);
379  output_indexes->swap(modified_output_indexes);
380 }
381 
382 void TdnnComponent::Write(std::ostream &os, bool binary) const {
383  WriteUpdatableCommon(os, binary); // Write opening tag and learning rate.
384  WriteToken(os, binary, "<TimeOffsets>");
385  WriteIntegerVector(os, binary, time_offsets_);
386  WriteToken(os, binary, "<LinearParams>");
387  linear_params_.Write(os, binary);
388  WriteToken(os, binary, "<BiasParams>");
389  bias_params_.Write(os, binary);
390  WriteToken(os, binary, "<OrthonormalConstraint>");
392  WriteToken(os, binary, "<UseNaturalGradient>");
394  int32 rank_in = preconditioner_in_.GetRank(),
395  rank_out = preconditioner_out_.GetRank();
397  alpha_out = preconditioner_out_.GetAlpha(),
398  num_samples_history = preconditioner_in_.GetNumSamplesHistory();
399  WriteToken(os, binary, "<NumSamplesHistory>");
400  WriteBasicType(os, binary, num_samples_history);
401  WriteToken(os, binary, "<AlphaInOut>");
402  WriteBasicType(os, binary, alpha_in);
403  WriteBasicType(os, binary, alpha_out);
404  WriteToken(os, binary, "<RankInOut>");
405  WriteBasicType(os, binary, rank_in);
406  WriteBasicType(os, binary, rank_out);
407  WriteToken(os, binary, "</TdnnComponent>");
408 }
409 
410 void TdnnComponent::Read(std::istream &is, bool binary) {
411  std::string token = ReadUpdatableCommon(is, binary);
412  ExpectToken(is, binary, "<TimeOffsets>");
413  ReadIntegerVector(is, binary, &time_offsets_);
414  ExpectToken(is, binary, "<LinearParams>");
415  linear_params_.Read(is, binary);
416  ExpectToken(is, binary, "<BiasParams>");
417  bias_params_.Read(is, binary);
418  ExpectToken(is, binary, "<OrthonormalConstraint>");
420  ExpectToken(is, binary, "<UseNaturalGradient>");
421  ReadBasicType(is, binary, &use_natural_gradient_);
422  int32 rank_in, rank_out;
423  BaseFloat alpha_in, alpha_out,
424  num_samples_history;
425  ExpectToken(is, binary, "<NumSamplesHistory>");
426  ReadBasicType(is, binary, &num_samples_history);
427  { // This can be simplified after a while. It's to read a format of the model
428  // that was never checked into master, but with which I (Dan) did many of
429  // the experiments while tuning the resnet TDNN-F.
430  std::string token;
431  ReadToken(is, binary, &token);
432  if (token == "<AlphaInOut>") {
433  ReadBasicType(is, binary, &alpha_in);
434  ReadBasicType(is, binary, &alpha_out);
435  } else {
436  KALDI_ASSERT(token == "<Alpha>");
437  ReadBasicType(is, binary, &alpha_in);
438  alpha_out = alpha_in;
439  }
440  }
441  preconditioner_in_.SetAlpha(alpha_in);
442  preconditioner_out_.SetAlpha(alpha_out);
443  ExpectToken(is, binary, "<RankInOut>");
444  ReadBasicType(is, binary, &rank_in);
445  ReadBasicType(is, binary, &rank_out);
446  preconditioner_in_.SetRank(rank_in);
447  preconditioner_out_.SetRank(rank_out);
448  preconditioner_in_.SetNumSamplesHistory(num_samples_history);
449  preconditioner_out_.SetNumSamplesHistory(num_samples_history);
450  // the update periods are not configurable.
453  ExpectToken(is, binary, "</TdnnComponent>");
454  Check();
455 }
456 
458  const MiscComputationInfo &misc_info,
459  const Index &output_index,
460  std::vector<Index> *desired_indexes) const {
461  KALDI_ASSERT(output_index.t != kNoTime);
462  size_t size = time_offsets_.size();
463  desired_indexes->resize(size);
464  for (size_t i = 0; i < size; i++) {
465  (*desired_indexes)[i].n = output_index.n;
466  (*desired_indexes)[i].t = output_index.t + time_offsets_[i];
467  (*desired_indexes)[i].x = output_index.x;
468  }
469 }
470 
471 
473  const MiscComputationInfo &misc_info,
474  const Index &output_index,
475  const IndexSet &input_index_set,
476  std::vector<Index> *used_inputs) const {
477  KALDI_ASSERT(output_index.t != kNoTime);
478  size_t size = time_offsets_.size();
479  Index index(output_index);
480 
481  if (used_inputs != NULL) {
482  used_inputs->clear();
483  used_inputs->reserve(size);
484  }
485  for (size_t i = 0; i < size; i++) {
486  index.t = output_index.t + time_offsets_[i];
487  if (input_index_set(index)) {
488  if (used_inputs != NULL) {
489  // This input index is available.
490  used_inputs->push_back(index);
491  }
492  } else {
493  return false;
494  }
495  }
496  return true;
497 }
498 
499 // static
501  const CuMatrixBase<BaseFloat> &input_matrix,
502  int32 num_output_rows,
503  int32 row_stride,
504  int32 row_offset) {
505  KALDI_ASSERT(row_offset >= 0 && row_stride >= 1 &&
506  input_matrix.NumRows() >=
507  row_offset + (row_stride * num_output_rows) - (row_stride - 1));
508  // constructor takes args: (data, num_rows, num_cols, stride).
509  return CuSubMatrix<BaseFloat>(
510  input_matrix.Data() + input_matrix.Stride() * row_offset,
511  num_output_rows,
512  input_matrix.NumCols(),
513  input_matrix.Stride() * row_stride);
514 }
515 
518  if (io->t_step_out == 0) {
519  // the 't_step' values may be zero if there was only one (input or output)
520  // index so the time-stride could not be determined. This code fixes them
521  // up in that case. (If there was only one value, the stride is a
522  // don't-care actually).
523  if (io->t_step_in == 0)
524  io->t_step_in = 1;
525  io->t_step_out = io->t_step_in;
526  }
527  // At this point the t_step_{in,out} values will be nonzero.
528  KALDI_ASSERT(io->t_step_out % io->t_step_in == 0);
529  // The following affects the ordering of the input indexes; it allows us to
530  // reshape the input matrix in the way that we need to, in cases where there
531  // is subsampling. See the explanation where the variable was declared in
532  // class ConvolutionComputationIo.
533  io->reorder_t_in = io->t_step_out / io->t_step_in;
534 
535  // make sure that num_t_in is a multiple of io->reorder_t_in by rounding up.
536  int32 n = io->reorder_t_in;
537  io->num_t_in = n * ((io->num_t_in + n - 1) / n);
538 }
539 
541  const MiscComputationInfo &misc_info,
542  const std::vector<Index> &input_indexes,
543  const std::vector<Index> &output_indexes,
544  bool need_backprop) const {
545  using namespace time_height_convolution;
546  // The following figures out a regular structure for the input and
547  // output indexes, in case there were gaps (which is unlikely in typical
548  // situations).
549  ConvolutionComputationIo io;
550  GetComputationIo(input_indexes, output_indexes, &io);
551  ModifyComputationIo(&io);
552 
553  if (RandInt(0, 10) == 0) {
554  // Spot check that the provided indexes have the required properties;
555  // this is like calling this->ReorderIndexes() and checking that it
556  // doesn't change anything.
557  std::vector<Index> modified_input_indexes,
558  modified_output_indexes;
559  GetIndexesForComputation(io, input_indexes, output_indexes,
560  &modified_input_indexes,
561  &modified_output_indexes);
562  KALDI_ASSERT(modified_input_indexes == input_indexes &&
563  modified_output_indexes == output_indexes);
564  }
565 
566 
568  ans->row_stride = io.reorder_t_in;
569  int32 num_offsets = time_offsets_.size();
570  ans->row_offsets.resize(num_offsets);
571  for (int32 i = 0; i < num_offsets; i++) {
572  // For each offset, work out which row of the input has the same t value as
573  // the first t value in the output plus that offset. That becomes the start
574  // row of the corresponding sub-part of the input.
575  int32 time_offset = time_offsets_[i],
576  required_input_t = io.start_t_out + time_offset,
577  input_t = (required_input_t - io.start_t_in) / io.t_step_in;
578 
579  KALDI_ASSERT(required_input_t == io.start_t_in + io.t_step_in * input_t);
580  // input_t is a kind of normalized time offset in the input, relative to the
581  // first 't' value in the input and divided by the t-step in the input, so
582  // it's the numbering "as if" the input 't' values were numbered from 0,1,2.
583  // To turn input_t into an input row we need to take account of 'reorder_t_in'.
584  // If this is 1 then the input row is input_t times io.num_images.
585  // Otherwise it's a little more complicated and to understand it you should
586  // read the comment where 'reorder_t_in' is declared in convolution.h.
587  // Briefly: the part that is an integer multiple of 'reorder_t_in' gets
588  // multiplied by io.num_images; the remainder does not.
589 
590  int32 n = io.reorder_t_in,
591  input_t_multiple = n * (input_t / n), input_t_remainder = input_t % n;
592  // note: input_t == input_t_multiple + input_t_remainder .
593  int32 input_row_offset = input_t_multiple * io.num_images +
594  input_t_remainder;
595  ans->row_offsets[i] = input_row_offset;
596  }
597  return ans;
598 }
599 
601  if (scale == 0.0) {
602  linear_params_.SetZero();
603  bias_params_.SetZero();
604  } else {
605  linear_params_.Scale(scale);
606  bias_params_.Scale(scale);
607  }
608 }
609 
611  const Component &other_in) {
612  const TdnnComponent *other =
613  dynamic_cast<const TdnnComponent*>(&other_in);
614  KALDI_ASSERT(other != NULL);
615  linear_params_.AddMat(alpha, other->linear_params_);
616  if (bias_params_.Dim() != 0)
617  bias_params_.AddVec(alpha, other->bias_params_);
618 }
619 
621  CuMatrix<BaseFloat> temp_mat(linear_params_.NumRows(),
622  linear_params_.NumCols(), kUndefined);
623  temp_mat.SetRandn();
624  linear_params_.AddMat(stddev, temp_mat);
625  if (bias_params_.Dim() != 0) {
627  temp_vec.SetRandn();
628  bias_params_.AddVec(stddev, temp_vec);
629  }
630 }
631 
633  const UpdatableComponent &other_in) const {
634  const TdnnComponent *other =
635  dynamic_cast<const TdnnComponent*>(&other_in);
636  KALDI_ASSERT(other != NULL);
638  if (bias_params_.Dim() != 0)
639  ans += VecVec(bias_params_, other->bias_params_);
640  return ans;
641 }
642 
644  // note: bias_param_.Dim() may actually be zero.
645  return linear_params_.NumRows() * linear_params_.NumCols() +
646  bias_params_.Dim();
647 }
648 
650  VectorBase<BaseFloat> *params) const {
651  KALDI_ASSERT(params->Dim() == NumParameters());
652  int32 linear_size = linear_params_.NumRows() * linear_params_.NumCols(),
653  bias_size = bias_params_.Dim();
654  params->Range(0, linear_size).CopyRowsFromMat(linear_params_);
655  if (bias_size != 0)
656  params->Range(linear_size, bias_size).CopyFromVec(bias_params_);
657 }
658 
660  const VectorBase<BaseFloat> &params) {
661  KALDI_ASSERT(params.Dim() == NumParameters());
662  int32 linear_size = linear_params_.NumRows() * linear_params_.NumCols(),
663  bias_size = bias_params_.Dim();
664  linear_params_.CopyRowsFromVec(params.Range(0, linear_size));
665  if (bias_size != 0)
666  bias_params_.CopyFromVec(params.Range(linear_size, bias_size));
667 }
668 
670  preconditioner_in_.Freeze(freeze);
671  preconditioner_out_.Freeze(freeze);
672 }
673 
676  return new PrecomputedIndexes(*this);
677 }
678 
680  std::ostream &os, bool binary) const {
681  WriteToken(os, binary, "<TdnnComponentPrecomputedIndexes>");
682  WriteToken(os, binary, "<RowStride>");
683  WriteBasicType(os, binary, row_stride);
684  WriteToken(os, binary, "<RowOffsets>");
685  WriteIntegerVector(os, binary, row_offsets);
686  WriteToken(os, binary, "</TdnnComponentPrecomputedIndexes>");
687 }
688 
690  std::istream &is, bool binary) {
691  ExpectOneOrTwoTokens(is, binary,
692  "<TdnnComponentPrecomputedIndexes>",
693  "<RowStride>");
694  ReadBasicType(is, binary, &row_stride);
695  ExpectToken(is, binary, "<RowOffsets>");
696  ReadIntegerVector(is, binary, &row_offsets);
697  ExpectToken(is, binary, "</TdnnComponentPrecomputedIndexes>");
698 }
699 
702  preconditioner_in_.Swap(&temp_in);
704  preconditioner_out_.Swap(&temp_out);
705 }
706 
707 } // namespace nnet3
708 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT Stride() const
Definition: cu-matrix.h:217
const std::string WholeLine()
Definition: text-utils.h:230
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
static void ModifyComputationIo(time_height_convolution::ConvolutionComputationIo *io)
void CopyColFromMat(const CuMatrixBase< Real > &mat, MatrixIndexT col)
Definition: cu-vector.cc:103
Abstract base-class for neural-net components.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
void SetNumSamplesHistory(BaseFloat num_samples_history)
virtual void Write(std::ostream &os, bool binary) const
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60
An abstract representation of a set of Indexes.
TdnnComponent is a more memory-efficient alternative to manually splicing several frames of input and...
void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
void InitLearningRatesFromConfig(ConfigLine *cfl)
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
Keywords for search: natural gradient, naturalgradient, NG-SGD.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void ReorderIndexes(std::vector< Index > *input_indexes, std::vector< Index > *output_indexes) const
This function only does something interesting for non-simple Components.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
virtual void Read(std::istream &os, bool binary)
static CuSubMatrix< BaseFloat > GetInputPart(const CuMatrixBase< BaseFloat > &input_matrix, int32 num_output_rows, int32 row_stride, int32 row_offset)
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
Definition: text-utils.cc:536
void GetIndexesForComputation(const ConvolutionComputationIo &io, const std::vector< Index > &orig_input_indexes, const std::vector< Index > &orig_output_indexes, std::vector< Index > *input_indexes, std::vector< Index > *output_indexes)
This function computes the reordered and possibly padded indexes corresponding to the computation in ...
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
BaseFloat learning_rate_
learning rate (typically 0.0..0.01)
struct rnnlm::@11::@12 n
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
void UpdateSimple(const PrecomputedIndexes &indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
std::string ReadUpdatableCommon(std::istream &is, bool binary)
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
#define KALDI_ERR
Definition: kaldi-error.h:147
virtual PrecomputedIndexes * Copy() const
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
void PreconditionDirections(CuMatrixBase< BaseFloat > *X, BaseFloat *scale)
This call implements the main functionality of this class.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void Swap(OnlineNaturalGradient *other)
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void GetComputationIo(const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, ConvolutionComputationIo *io)
This function takes lists of input and output indexes to a computation (e.g.
void UpdateNaturalGradient(const PrecomputedIndexes &indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
bool is_gradient_
True if this component is to be treated as a gradient rather than as parameters.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void WriteUpdatableCommon(std::ostream &is, bool binary) const
#define NVTX_RANGE(name)
Definition: cu-common.h:143
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
bool GetValue(const std::string &key, std::string *value)
Definition: text-utils.cc:427
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
void PrintParameterStats(std::ostringstream &os, const std::string &name, const CuVectorBase< BaseFloat > &params, bool include_mean)
Print to &#39;os&#39; some information about the mean and standard deviation of some parameters, used in Info() functions in nnet-simple-component.cc.
Definition: nnet-parse.cc:157
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
const int kNoTime
Definition: nnet-common.cc:573
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94