nnet-convolutional-component.cc
Go to the documentation of this file.
1 // nnet3/nnet-convolutional-component.cc
2 
3 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <iterator>
21 #include <sstream>
22 #include <iomanip>
25 #include "nnet3/nnet-parse.h"
26 
27 namespace kaldi {
28 namespace nnet3 {
29 
30 
32  use_natural_gradient_(true) { }
33 
35  const TimeHeightConvolutionComponent &other):
36  UpdatableComponent(other), // initialize base-class
37  model_(other.model_),
46  Check();
47 }
48 
49 
51  model_.Check();
53  linear_params_.NumRows() == model_.ParamRows() &&
54  linear_params_.NumCols() == model_.ParamCols());
55 }
56 
58  return model_.InputDim();
59 }
60 
62  return model_.OutputDim();
63 }
64 
66  std::ostringstream stream;
67  // The output of model_.Info() has been designed to be suitable
68  // as a component-level info string, it has
69  // {num-filters,height}-{in-out}, offsets=[...], required-time-offsets=[...],
70  // {input,output}-dim.
71  stream << UpdatableComponent::Info() << ' ' << model_.Info();
72  PrintParameterStats(stream, "filter-params", linear_params_);
73  PrintParameterStats(stream, "bias-params", bias_params_, true);
74  stream << ", num-params=" << NumParameters()
75  << ", max-memory-mb=" << max_memory_mb_
76  << ", use-natural-gradient=" << use_natural_gradient_;
77  if (use_natural_gradient_) {
78  stream << ", num-minibatches-history="
80  << ", rank-in=" << preconditioner_in_.GetRank()
81  << ", rank-out=" << preconditioner_out_.GetRank()
82  << ", alpha=" << preconditioner_in_.GetAlpha();
83  }
84  return stream.str();
85 }
86 
87 
90  KALDI_ERR << "You cannot specify init-unit if the num-filters-in "
91  << "and num-filters-out differ.";
92  }
93  size_t i;
94  int32 zero_offset = 0;
95  for (i = 0; i < model_.offsets.size(); i++) {
96  if (model_.offsets[i].time_offset == 0 &&
97  model_.offsets[i].height_offset == 0) {
98  zero_offset = i;
99  break;
100  }
101  }
102  if (i == model_.offsets.size()) // did not break.
103  KALDI_ERR << "You cannot specify init-unit if the model does "
104  << "not have the offset (0, 0).";
105 
106  CuSubMatrix<BaseFloat> zero_offset_block(
107  linear_params_, 0, linear_params_.NumRows(),
108  zero_offset * model_.num_filters_in, model_.num_filters_in);
109 
110  KALDI_ASSERT(zero_offset_block.NumRows() == zero_offset_block.NumCols());
111  zero_offset_block.AddToDiag(1.0); // set this block to the unit matrix.
112 }
113 
115  // 1. Config values inherited from UpdatableComponent.
117 
118  // 2. convolution-related config values.
119  model_.height_subsample_out = 1; // default.
120  max_memory_mb_ = 200.0;
121  std::string height_offsets, time_offsets, required_time_offsets = "undef",
122  offsets;
123 
124  bool ok = cfl->GetValue("num-filters-in", &model_.num_filters_in) &&
125  cfl->GetValue("num-filters-out", &model_.num_filters_out) &&
126  cfl->GetValue("height-in", &model_.height_in) &&
127  cfl->GetValue("height-out", &model_.height_out);
128  if (!ok) {
129  KALDI_ERR << "Bad initializer: expected all the values "
130  "num-filters-in, num-filters-out, height-in, height-out, "
131  "to be defined: "
132  << cfl->WholeLine();
133  }
134  // some optional structural configs.
135  cfl->GetValue("required-time-offsets", &required_time_offsets);
136  cfl->GetValue("height-subsample-out", &model_.height_subsample_out);
137  cfl->GetValue("max-memory-mb", &max_memory_mb_);
139 
140  { // This block sets up model_.offsets.
141  model_.offsets.clear();
142  if (cfl->GetValue("offsets", &offsets)) {
143  // init from offsets, like "-1,-1;-1,0;-1,1;0,-1;...;1,1"
144  std::vector<std::string> splits;
145  SplitStringToVector(offsets, ";", false, &splits);
146  for (size_t i = 0; i < splits.size(); i++) {
147  std::vector<int32> int_pair;
148  if (!SplitStringToIntegers(splits[i], ",", false, &int_pair) ||
149  int_pair.size() != 2)
150  KALDI_ERR << "Bad config value offsets=" << offsets;
152  offset.time_offset = int_pair[0];
153  offset.height_offset = int_pair[1];
154  model_.offsets.push_back(offset);
155  }
156  std::sort(model_.offsets.begin(), model_.offsets.end());
157  if (!IsSortedAndUniq(model_.offsets) || model_.offsets.empty())
158  KALDI_ERR << "Error in offsets: probably repeated offset. "
159  "offsets=" << offsets;
160  } else if (cfl->GetValue("height-offsets", &height_offsets) &&
161  cfl->GetValue("time-offsets", &time_offsets)) {
162  std::vector<int32> height_offsets_vec,
163  time_offsets_vec;
164  if (!SplitStringToIntegers(height_offsets, ",", false,
165  &height_offsets_vec) ||
166  !SplitStringToIntegers(time_offsets, ",", false,
167  &time_offsets_vec)) {
168  KALDI_ERR << "Formatting problem in time-offsets or height-offsets: "
169  << cfl->WholeLine();
170  }
171  if (height_offsets_vec.empty() || !IsSortedAndUniq(height_offsets_vec) ||
172  time_offsets_vec.empty() || !IsSortedAndUniq(time_offsets_vec)) {
173  KALDI_ERR << "time-offsets and height-offsets must be nonempty, "
174  "sorted and unique.";
175  }
176  model_.offsets.clear();
177  for (size_t i = 0; i < time_offsets_vec.size(); i++) {
178  for (size_t j = 0; j < height_offsets_vec.size(); j++) {
180  offset.time_offset = time_offsets_vec[i];
181  offset.height_offset = height_offsets_vec[j];
182  model_.offsets.push_back(offset);
183  }
184  }
185  } else {
186  KALDI_ERR << "Expected either 'offsets', or both 'height-offsets' and "
187  "'time-offsets', to be defined: " << cfl->WholeLine();
188  }
189  }
190 
191  if (model_.offsets.empty())
192  KALDI_ERR << "Something went wrong setting offsets: " << cfl->WholeLine();
193 
194 
195  { // This block sets model_.required_time_offsets.
196  std::vector<int32> required_time_offsets_vec;
197  if (required_time_offsets == "undef") {
198  // it defaults to all the time offsets that were used.
199  std::set<int32> required_time_offsets;
200  for (size_t i = 0; i < model_.offsets.size(); i++)
201  required_time_offsets_vec.push_back(model_.offsets[i].time_offset);
202  SortAndUniq(&required_time_offsets_vec);
203  } else {
204  if (!SplitStringToIntegers(required_time_offsets, ",", false,
205  &required_time_offsets_vec) ||
206  required_time_offsets_vec.empty() ||
207  !IsSortedAndUniq(required_time_offsets_vec)) {
208  KALDI_ERR << "Formatting problem in required-time-offsets: "
209  << cfl->WholeLine();
210  }
211  }
214  required_time_offsets_vec.begin(),
215  required_time_offsets_vec.end());
216  }
217 
219  if (!model_.Check(false, true)) {
220  KALDI_ERR << "Parameters used to initialize TimeHeightConvolutionComponent "
221  << "do not make sense, line was: " << cfl->WholeLine();
222  }
223  if (!model_.Check(true, true)) {
224  KALDI_WARN << "There are input heights unused in "
225  "TimeHeightConvolutionComponent; consider increasing output "
226  "height or decreasing height of preceding layer."
227  << cfl->WholeLine();
228  }
229 
230  // 3. Parameter-initialization configs.
231  BaseFloat param_stddev = -1, bias_stddev = 0.0;
232  bool init_unit = false;
233  cfl->GetValue("param-stddev", &param_stddev);
234  cfl->GetValue("bias-stddev", &bias_stddev);
235  cfl->GetValue("init-unit", &init_unit);
236  if (param_stddev < 0.0) {
237  param_stddev = 1.0 / sqrt(model_.num_filters_in *
238  model_.offsets.size());
239  }
240  // initialize the parameters.
242  if (!init_unit) {
243  linear_params_.SetRandn();
244  linear_params_.Scale(param_stddev);
245  } else {
246  InitUnit();
247  }
249  bias_params_.SetRandn();
250  bias_params_.Scale(bias_stddev);
251 
252 
253  // 4. Natural-gradient related configs.
254  use_natural_gradient_ = true;
255  int32 rank_out = -1, rank_in = -1;
256  BaseFloat alpha_out = 4.0, alpha_in = 4.0,
257  num_minibatches_history = 4.0;
258  cfl->GetValue("use-natural-gradient", &use_natural_gradient_);
259  cfl->GetValue("rank-in", &rank_in);
260  cfl->GetValue("rank-out", &rank_out);
261  cfl->GetValue("alpha-in", &alpha_in);
262  cfl->GetValue("alpha-out", &alpha_out);
263  cfl->GetValue("num-minibatches-history", &num_minibatches_history);
264 
265  int32 dim_in = linear_params_.NumCols() + 1,
266  dim_out = linear_params_.NumRows();
267  if (rank_in < 0)
268  rank_in = std::min<int32>(80, (dim_in + 1) / 2);
269  preconditioner_in_.SetRank(rank_in);
270  if (rank_out < 0)
271  rank_out = std::min<int32>(80, (dim_out + 1) / 2);
272  preconditioner_out_.SetRank(rank_out);
273  preconditioner_in_.SetNumMinibatchesHistory(num_minibatches_history);
274  preconditioner_out_.SetNumMinibatchesHistory(num_minibatches_history);
275 
276  preconditioner_in_.SetAlpha(alpha_in);
277  preconditioner_out_.SetAlpha(alpha_out);
278 
279  ComputeDerived();
280 }
281 
283  const ComponentPrecomputedIndexes *indexes_in,
284  const CuMatrixBase<BaseFloat> &in,
285  CuMatrixBase<BaseFloat> *out) const {
286  const PrecomputedIndexes *indexes =
287  dynamic_cast<const PrecomputedIndexes*>(indexes_in);
288  KALDI_ASSERT(indexes != NULL);
289  { // this block handles the bias term.
290  KALDI_ASSERT(out->Stride() == out->NumCols() &&
292  CuSubMatrix<BaseFloat> out_reshaped(
293  out->Data(), out->NumRows() * model_.height_out,
295  out_reshaped.CopyRowsFromVec(bias_params_);
296  }
297  ConvolveForward(indexes->computation, in, linear_params_, out);
298  return NULL;
299 }
300 
302  const std::string &debug_info,
303  const ComponentPrecomputedIndexes *indexes_in,
304  const CuMatrixBase<BaseFloat> &in_value,
305  const CuMatrixBase<BaseFloat> &, // out_value
306  const CuMatrixBase<BaseFloat> &out_deriv,
307  void*, // memo
308  Component *to_update_in,
309  CuMatrixBase<BaseFloat> *in_deriv) const {
310  NVTX_RANGE("TimeHeightConvolutionComponent::Backprop");
311  const PrecomputedIndexes *indexes =
312  dynamic_cast<const PrecomputedIndexes*>(indexes_in);
313  KALDI_ASSERT(indexes != NULL);
314 
315  if (in_deriv != NULL) {
317  out_deriv, in_deriv);
318  }
319  if (to_update_in != NULL) {
320  TimeHeightConvolutionComponent *to_update =
321  dynamic_cast<TimeHeightConvolutionComponent*>(to_update_in);
322  KALDI_ASSERT(to_update != NULL);
323 
324  if (to_update->learning_rate_ == 0.0)
325  return;
326 
327  if (to_update->is_gradient_ || !to_update->use_natural_gradient_)
328  to_update->UpdateSimple(*indexes, in_value, out_deriv);
329  else
330  to_update->UpdateNaturalGradient(*indexes, in_value, out_deriv);
331  }
332 }
333 
335  const PrecomputedIndexes &indexes,
336  const CuMatrixBase<BaseFloat> &in_value,
337  const CuMatrixBase<BaseFloat> &out_deriv) {
338 
339  { // this block handles the bias term.
340  KALDI_ASSERT(out_deriv.Stride() == out_deriv.NumCols() &&
341  out_deriv.NumCols() ==
343  CuSubMatrix<BaseFloat> out_deriv_reshaped(
344  out_deriv.Data(), out_deriv.NumRows() * model_.height_out,
346  bias_params_.AddRowSumMat(learning_rate_, out_deriv_reshaped);
347  }
348 
349  ConvolveBackwardParams(indexes.computation, in_value, out_deriv,
351 }
352 
353 
355  const PrecomputedIndexes &indexes,
356  const CuMatrixBase<BaseFloat> &in_value,
357  const CuMatrixBase<BaseFloat> &out_deriv) {
358 
359  CuVector<BaseFloat> bias_deriv(bias_params_.Dim());
360 
361  { // this block computes 'bias_deriv', the derivative w.r.t. the bias.
362  KALDI_ASSERT(out_deriv.Stride() == out_deriv.NumCols() &&
363  out_deriv.NumCols() ==
365  CuSubMatrix<BaseFloat> out_deriv_reshaped(
366  out_deriv.Data(), out_deriv.NumRows() * model_.height_out,
368  bias_deriv.AddRowSumMat(1.0, out_deriv_reshaped);
369  }
370 
371  CuMatrix<BaseFloat> params_deriv(linear_params_.NumRows(),
372  linear_params_.NumCols() + 1);
373  params_deriv.CopyColFromVec(bias_deriv, linear_params_.NumCols());
374 
375 
376  CuSubMatrix<BaseFloat> linear_params_deriv(
377  params_deriv, 0, linear_params_.NumRows(),
378  0, linear_params_.NumCols());
379 
380  ConvolveBackwardParams(indexes.computation, in_value, out_deriv,
381  1.0, &linear_params_deriv);
382 
383  // the precondition-directions code outputs a scalar that
384  // must be multiplied by its output (this saves one
385  // CUDA operation internally).
386  // We don't bother applying this scale before doing the other
387  // dimenson of natural gradient, because although it's not
388  // invariant to scalar multiplication of the input if the
389  // scalars are different across iterations, the scalars
390  // will be pretty similar on different iterations
391  BaseFloat scale1, scale2;
392  preconditioner_in_.PreconditionDirections(&params_deriv, &scale1);
393 
394 
395  CuMatrix<BaseFloat> params_deriv_transpose(params_deriv, kTrans);
396  preconditioner_out_.PreconditionDirections(&params_deriv_transpose, &scale2);
397 
398  linear_params_.AddMat(
399  learning_rate_ * scale1 * scale2,
400  params_deriv_transpose.RowRange(0, linear_params_.NumCols()),
401  kTrans);
402 
403  bias_params_.AddVec(learning_rate_ * scale1 * scale2,
404  params_deriv_transpose.Row(linear_params_.NumCols()));
405 }
406 
407 
409  std::vector<Index> *input_indexes,
410  std::vector<Index> *output_indexes) const {
411  using namespace time_height_convolution;
412  ConvolutionComputationOptions opts;
413  opts.max_memory_mb = max_memory_mb_;
414  ConvolutionComputation computation_temp;
415  std::vector<Index> input_indexes_modified,
416  output_indexes_modified;
418  model_, *input_indexes, *output_indexes, opts,
419  &computation_temp, &input_indexes_modified, &output_indexes_modified);
420  input_indexes->swap(input_indexes_modified);
421  output_indexes->swap(output_indexes_modified);
422 }
423 
424 void TimeHeightConvolutionComponent::Write(std::ostream &os, bool binary) const {
425  WriteUpdatableCommon(os, binary); // Write opening tag and learning rate.
426  WriteToken(os, binary, "<Model>");
427  model_.Write(os, binary);
428  WriteToken(os, binary, "<LinearParams>");
429  linear_params_.Write(os, binary);
430  WriteToken(os, binary, "<BiasParams>");
431  bias_params_.Write(os, binary);
432  WriteToken(os, binary, "<MaxMemoryMb>");
433  WriteBasicType(os, binary, max_memory_mb_);
434  WriteToken(os, binary, "<UseNaturalGradient>");
436  int32 rank_in = preconditioner_in_.GetRank(),
437  rank_out = preconditioner_out_.GetRank();
439  alpha_out = preconditioner_out_.GetAlpha(),
440  num_minibatches_history = preconditioner_in_.GetNumMinibatchesHistory();
441  WriteToken(os, binary, "<NumMinibatchesHistory>");
442  WriteBasicType(os, binary, num_minibatches_history);
443  WriteToken(os, binary, "<AlphaInOut>");
444  WriteBasicType(os, binary, alpha_in);
445  WriteBasicType(os, binary, alpha_out);
446  WriteToken(os, binary, "<RankInOut>");
447  WriteBasicType(os, binary, rank_in);
448  WriteBasicType(os, binary, rank_out);
449  WriteToken(os, binary, "</TimeHeightConvolutionComponent>");
450 }
451 
452 void TimeHeightConvolutionComponent::Read(std::istream &is, bool binary) {
453  std::string token = ReadUpdatableCommon(is, binary);
454  // the next few lines are only for back compatibility.
455  if (token != "") {
456  KALDI_ASSERT(token == "<Model>");
457  } else {
458  ExpectToken(is, binary, "<Model>");
459  }
460  model_.Read(is, binary);
461  ExpectToken(is, binary, "<LinearParams>");
462  linear_params_.Read(is, binary);
463  ExpectToken(is, binary, "<BiasParams>");
464  bias_params_.Read(is, binary);
465  ExpectToken(is, binary, "<MaxMemoryMb>");
466  ReadBasicType(is, binary, &max_memory_mb_);
467  ExpectToken(is, binary, "<UseNaturalGradient>");
468  ReadBasicType(is, binary, &use_natural_gradient_);
469  int32 rank_in, rank_out;
470  BaseFloat alpha_in, alpha_out,
471  num_minibatches_history;
472  ExpectToken(is, binary, "<NumMinibatchesHistory>");
473  ReadBasicType(is, binary, &num_minibatches_history);
474  ExpectToken(is, binary, "<AlphaInOut>");
475  ReadBasicType(is, binary, &alpha_in);
476  ReadBasicType(is, binary, &alpha_out);
477  preconditioner_in_.SetAlpha(alpha_in);
478  preconditioner_out_.SetAlpha(alpha_out);
479  ExpectToken(is, binary, "<RankInOut>");
480  ReadBasicType(is, binary, &rank_in);
481  ReadBasicType(is, binary, &rank_out);
482  preconditioner_in_.SetRank(rank_in);
483  preconditioner_out_.SetRank(rank_out);
484  preconditioner_in_.SetNumMinibatchesHistory(num_minibatches_history);
485  preconditioner_out_.SetNumMinibatchesHistory(num_minibatches_history);
486  ExpectToken(is, binary, "</TimeHeightConvolutionComponent>");
487  ComputeDerived();
488  Check();
489 }
490 
492  all_time_offsets_.clear();
493  all_time_offsets_.insert(
494  all_time_offsets_.end(),
495  model_.all_time_offsets.begin(),
496  model_.all_time_offsets.end());
498  for (size_t i = 0; i < all_time_offsets_.size(); i++) {
501  }
502 }
503 
505  const MiscComputationInfo &misc_info,
506  const Index &output_index,
507  std::vector<Index> *desired_indexes) const {
508  KALDI_ASSERT(output_index.t != kNoTime);
509  size_t size = all_time_offsets_.size();
510  desired_indexes->resize(size);
511  for (size_t i = 0; i < size; i++) {
512  (*desired_indexes)[i].n = output_index.n;
513  (*desired_indexes)[i].t = output_index.t + all_time_offsets_[i];
514  (*desired_indexes)[i].x = output_index.x;
515  }
516 }
517 
518 
520  const MiscComputationInfo &misc_info,
521  const Index &output_index,
522  const IndexSet &input_index_set,
523  std::vector<Index> *used_inputs) const {
524  KALDI_ASSERT(output_index.t != kNoTime);
525  size_t size = all_time_offsets_.size();
526  Index index(output_index);
527  if (used_inputs != NULL) {
528  used_inputs->clear();
529  used_inputs->reserve(size);
530  for (size_t i = 0; i < size; i++) {
531  index.t = output_index.t + all_time_offsets_[i];
532  if (input_index_set(index)) {
533  // This input index is available.
534  used_inputs->push_back(index);
535  } else {
536  // This input index is not available.
537  if (time_offset_required_[i]) {
538  // A required offset was not present -> this output index is not
539  // computable.
540  used_inputs->clear();
541  return false;
542  }
543  }
544  }
545  // All required time-offsets of the output were computable. -> return true.
546  return true;
547  } else {
548  for (size_t i = 0; i < size; i++) {
549  if (time_offset_required_[i]) {
550  index.t = output_index.t + all_time_offsets_[i];
551  if (!input_index_set(index))
552  return false;
553  }
554  }
555  return true;
556  }
557 }
558 
559 
561  const MiscComputationInfo &misc_info,
562  const std::vector<Index> &input_indexes,
563  const std::vector<Index> &output_indexes,
564  bool need_backprop) const {
565  using namespace time_height_convolution;
566  ConvolutionComputationOptions opts;
567  opts.max_memory_mb = max_memory_mb_;
569  std::vector<Index> input_indexes_modified,
570  output_indexes_modified;
572  model_, input_indexes, output_indexes, opts,
573  &(ans->computation), &input_indexes_modified, &output_indexes_modified);
574  if (input_indexes_modified != input_indexes ||
575  output_indexes_modified != output_indexes) {
576  KALDI_ERR << "Problem precomputing indexes";
577  }
578  return ans;
579 }
580 
582  if (scale == 0.0) {
583  linear_params_.SetZero();
584  bias_params_.SetZero();
585  } else {
586  linear_params_.Scale(scale);
587  bias_params_.Scale(scale);
588  }
589 }
590 
592  const Component &other_in) {
593  const TimeHeightConvolutionComponent *other =
594  dynamic_cast<const TimeHeightConvolutionComponent*>(&other_in);
595  KALDI_ASSERT(other != NULL);
596  linear_params_.AddMat(alpha, other->linear_params_);
597  bias_params_.AddVec(alpha, other->bias_params_);
598 }
599 
601  CuMatrix<BaseFloat> temp_mat(linear_params_.NumRows(),
602  linear_params_.NumCols(), kUndefined);
603  temp_mat.SetRandn();
604  linear_params_.AddMat(stddev, temp_mat);
606  temp_vec.SetRandn();
607  bias_params_.AddVec(stddev, temp_vec);
608 }
609 
611  const UpdatableComponent &other_in) const {
612  const TimeHeightConvolutionComponent *other =
613  dynamic_cast<const TimeHeightConvolutionComponent*>(&other_in);
614  KALDI_ASSERT(other != NULL);
617 }
618 
620  return linear_params_.NumRows() * linear_params_.NumCols() +
621  bias_params_.Dim();
622 }
623 
625  VectorBase<BaseFloat> *params) const {
626  KALDI_ASSERT(params->Dim() == NumParameters());
627  int32 linear_size = linear_params_.NumRows() * linear_params_.NumCols(),
628  bias_size = bias_params_.Dim();
629  params->Range(0, linear_size).CopyRowsFromMat(linear_params_);
630  params->Range(linear_size, bias_size).CopyFromVec(bias_params_);
631 }
632 
634  const VectorBase<BaseFloat> &params) {
635  KALDI_ASSERT(params.Dim() == NumParameters());
636  int32 linear_size = linear_params_.NumRows() * linear_params_.NumCols(),
637  bias_size = bias_params_.Dim();
638  linear_params_.CopyRowsFromVec(params.Range(0, linear_size));
639  bias_params_.CopyFromVec(params.Range(linear_size, bias_size));
640 }
641 
643  preconditioner_in_.Freeze(freeze);
644  preconditioner_out_.Freeze(freeze);
645 }
646 
649  return new PrecomputedIndexes(*this);
650 }
651 
653  std::ostream &os, bool binary) const {
654  WriteToken(os, binary, "<TimeHeightConvolutionComponentPrecomputedIndexes>");
655  WriteToken(os, binary, "<Computation>");
656  computation.Write(os, binary);
657  WriteToken(os, binary, "</TimeHeightConvolutionComponentPrecomputedIndexes>");
658 }
659 
661  std::istream &is, bool binary) {
662  ExpectOneOrTwoTokens(is, binary,
663  "<TimeHeightConvolutionComponentPrecomputedIndexes>",
664  "<Computation>");
665  computation.Read(is, binary);
666  ExpectToken(is, binary, "</TimeHeightConvolutionComponentPrecomputedIndexes>");
667 }
668 
671  preconditioner_in_.Swap(&temp_in);
673  preconditioner_out_.Swap(&temp_out);
674 }
675 
676 } // namespace nnet3
677 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT Stride() const
Definition: cu-matrix.h:217
void Write(std::ostream &os, bool binary) const
Definition: convolution.cc:225
const std::string WholeLine()
Definition: text-utils.h:230
void ConvolveBackwardParams(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &output_deriv, BaseFloat alpha, CuMatrixBase< BaseFloat > *params_deriv)
This does the part of the backward derivative computation of convolution, that computes derivatives w...
Definition: convolution.cc:840
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
Abstract base-class for neural-net components.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
bool Check(bool check_heights_used=true, bool allow_height_padding=true) const
Definition: convolution.cc:130
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
An abstract representation of a set of Indexes.
void InitLearningRatesFromConfig(ConfigLine *cfl)
kaldi::int32 int32
Keywords for search: natural gradient, naturalgradient, NG-SGD.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
void UpdateNaturalGradient(const PrecomputedIndexes &indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
void CopyColFromVec(const CuVectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
Definition: cu-matrix.cc:2414
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
Definition: text-utils.cc:536
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
void ConvolveBackwardData(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &params, const CuMatrixBase< BaseFloat > &output_deriv, CuMatrixBase< BaseFloat > *input_deriv)
This does the part of the backward derivative computation of convolution, that propagates derivatives...
Definition: convolution.cc:682
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void ReorderIndexes(std::vector< Index > *input_indexes, std::vector< Index > *output_indexes) const
This function only does something interesting for non-simple Components.
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
BaseFloat learning_rate_
learning rate (typically 0.0..0.01)
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
std::string ReadUpdatableCommon(std::istream &is, bool binary)
void UpdateSimple(const PrecomputedIndexes &indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
#define KALDI_ERR
Definition: kaldi-error.h:147
virtual int32 OutputDim() const
Returns output-dimension of this component.
void PreconditionDirections(CuMatrixBase< BaseFloat > *X, BaseFloat *scale)
This call implements the main functionality of this class.
#define KALDI_WARN
Definition: kaldi-error.h:150
void CompileConvolutionComputation(const ConvolutionModel &model, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, const ConvolutionComputationOptions &opts, ConvolutionComputation *computation, std::vector< Index > *input_indexes_modified, std::vector< Index > *output_indexes_modified)
This function does the compilation for a convolution computation; it&#39;s a wrapper for the functions be...
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
CuSubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
Definition: cu-matrix.h:660
void Swap(OnlineNaturalGradient *other)
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual int32 InputDim() const
Returns input-dimension of this component.
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
bool is_gradient_
True if this component is to be treated as a gradient rather than as parameters.
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void WriteUpdatableCommon(std::ostream &is, bool binary) const
#define NVTX_RANGE(name)
Definition: cu-common.h:143
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
bool GetValue(const std::string &key, std::string *value)
Definition: text-utils.cc:427
void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void SetNumMinibatchesHistory(BaseFloat num_minibatches_history)
void PrintParameterStats(std::ostringstream &os, const std::string &name, const CuVectorBase< BaseFloat > &params, bool include_mean)
Print to &#39;os&#39; some information about the mean and standard deviation of some parameters, used in Info() functions in nnet-simple-component.cc.
Definition: nnet-parse.cc:157
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
TimeHeightConvolutionComponent implements 2-dimensional convolution where one of the dimensions of co...
void ConvolveForward(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &params, CuMatrixBase< BaseFloat > *output)
This does the forward computation of convolution.
Definition: convolution.cc:524
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
const int kNoTime
Definition: nnet-common.cc:573
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94