nnet-general-component.cc
Go to the documentation of this file.
1 // nnet3/nnet-general-component.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <iterator>
21 #include <sstream>
22 #include <iomanip>
25 #include "nnet3/nnet-parse.h"
26 
27 namespace kaldi {
28 namespace nnet3 {
29 
30 // used in I/O
31 static void CopyPairVector(const CuArray<Int32Pair> &in,
32  std::vector<std::pair<int32, int32> > *out) {
33  in.CopyToVec(reinterpret_cast<std::vector<Int32Pair>*>(out));
34 }
35 // used in I/O
36 static void CopyPairVector(const std::vector<std::pair<int32, int32> > &in,
37  CuArray<Int32Pair> *out) {
38  const std::vector<Int32Pair> *in_cast =
39  reinterpret_cast<const std::vector<Int32Pair>*>(&in);
40  out->CopyFromVec(*in_cast);
41 }
42 
43 
44 
45 //inline
47  Index *input_index,
48  int32 *block) const {
49  int32 num_blocks = input_dim_ / output_dim_;
50  *input_index = output_index;
51  int32 output_x = output_index.x, input_x;
52  if (output_x >= 0) {
53  input_x = output_x / num_blocks;
54  } else {
55  input_x = (output_x - num_blocks + 1) / num_blocks;
56  }
57  input_index->x = input_x;
58  if (block)
59  *block = output_x - (input_x * num_blocks);
60 }
61 
62 //virtual
64  const MiscComputationInfo &misc_info,
65  const Index &output_index,
66  std::vector<Index> *desired_indexes) const {
67  desired_indexes->resize(1);
68  ComputeInputIndexAndBlock(output_index, &((*desired_indexes)[0]), NULL);
69 }
70 
71 //virtual
73  const MiscComputationInfo &misc_info,
74  const Index &output_index,
75  const IndexSet &input_index_set,
76  std::vector<Index> *used_inputs) const {
77  Index input_index;
78  ComputeInputIndexAndBlock(output_index, &input_index, NULL);
79  if (!input_index_set(input_index))
80  return false;
81  if (used_inputs) {
82  used_inputs->clear();
83  used_inputs->push_back(input_index);
84  }
85  return true;
86 }
87 
88 void DistributeComponentPrecomputedIndexes::Write(std::ostream &ostream, bool binary) const {
89  WriteToken(ostream, binary, "<DistributeComponentPrecomputedIndexes>");
90  WriteToken(ostream, binary, "<Pairs>");
91  WriteIntegerPairVector(ostream, binary, pairs);
92  WriteToken(ostream, binary, "</DistributeComponentPrecomputedIndexes>");
93 }
94 
95 void DistributeComponentPrecomputedIndexes::Read(std::istream &istream, bool binary) {
96  ExpectOneOrTwoTokens(istream, binary, "<DistributeComponentPrecomputedIndexes>", "<Pairs>");
97  ReadIntegerPairVector(istream, binary, &pairs);
98  ExpectToken(istream, binary, "</DistributeComponentPrecomputedIndexes>");
99 }
100 
101 // virtual
103  const MiscComputationInfo &, // misc_info
104  const std::vector<Index> &input_indexes,
105  const std::vector<Index> &output_indexes,
106  bool) const { // the bool is 'need_backprop'- unused.
107  unordered_map<Index, int32, IndexHasher> index_to_input_dim;
108  int32 num_input_indexes = input_indexes.size(),
109  num_output_indexes = output_indexes.size();
110  for (int32 i = 0; i < num_input_indexes; i++)
111  index_to_input_dim[input_indexes[i]] = i;
114  ans->pairs.resize(output_indexes.size());
115 
116  int32 num_blocks = input_dim_ / output_dim_,
117  block_size = input_dim_ / num_blocks;
118 
119  for (int32 i = 0; i < num_output_indexes; i++) {
120  Index input_index;
121  int32 block_index;
122  ComputeInputIndexAndBlock(output_indexes[i], &input_index, &block_index);
123  unordered_map<Index, int32, IndexHasher>::iterator iter =
124  index_to_input_dim.find(input_index);
125  if (iter == index_to_input_dim.end())
126  KALDI_ERR << "Input index not found (code error)";
127  int32 input_row = iter->second;
128  ans->pairs[i] = std::pair<int32,int32>(input_row, block_index * block_size);
129  }
130  return ans;
131 }
132 
133 
135  const ComponentPrecomputedIndexes *indexes_in,
136  const CuMatrixBase<BaseFloat> &in,
137  int32 num_output_rows,
138  std::vector<const BaseFloat*> *input_pointers) const {
140  dynamic_cast<const DistributeComponentPrecomputedIndexes*>(indexes_in);
141  KALDI_ASSERT(indexes != NULL && "Invalid pointer type");
142  KALDI_ASSERT(num_output_rows == static_cast<int32>(indexes->pairs.size()));
143  input_pointers->resize(num_output_rows);
144 
145  const BaseFloat *input_data = in.Data();
146  int32 input_stride = in.Stride();
147  const BaseFloat **input_pointers_data = &((*input_pointers)[0]);
148  const std::pair<int32, int32> *pairs_data = &(indexes->pairs[0]);
149  for (int32 i = 0; i < num_output_rows; i++) {
150  input_pointers_data[i] = input_data +
151  pairs_data[i].first * input_stride +
152  pairs_data[i].second;
153  }
154 }
155 
157  const ComponentPrecomputedIndexes *indexes_in,
158  int32 num_output_rows,
160  std::vector<BaseFloat*> *input_pointers) const {
162  dynamic_cast<const DistributeComponentPrecomputedIndexes*>(indexes_in);
163  KALDI_ASSERT(indexes != NULL && "Invalid pointer type");
164  KALDI_ASSERT(num_output_rows == static_cast<int32>(indexes->pairs.size()));
165  input_pointers->resize(num_output_rows);
166 
167  BaseFloat *input_data = in->Data();
168  int32 input_stride = in->Stride();
169  BaseFloat **input_pointers_data = &((*input_pointers)[0]);
170  const std::pair<int32, int32> *pairs_data = &(indexes->pairs[0]);
171  for (int32 i = 0; i < num_output_rows; i++) {
172  input_pointers_data[i] = input_data +
173  pairs_data[i].first * input_stride +
174  pairs_data[i].second;
175  }
176 }
177 
178 
179 // virtual
181  const CuMatrixBase<BaseFloat> &in,
182  CuMatrixBase<BaseFloat> *out) const {
183  KALDI_ASSERT(indexes != NULL &&
184  in.NumCols() == input_dim_ && out->NumCols() == output_dim_);
185  int32 num_output_rows = out->NumRows();
186  std::vector<const BaseFloat*> input_pointers;
187  ComputeInputPointers(indexes, in, num_output_rows, &input_pointers);
188  CuArray<const BaseFloat*> input_pointers_cuda(input_pointers);
189  out->CopyRows(input_pointers_cuda);
190  return NULL;
191 }
192 
193 // virtual
194 void DistributeComponent::Backprop(const std::string &debug_info,
195  const ComponentPrecomputedIndexes *indexes,
196  const CuMatrixBase<BaseFloat> &, // in_value,
197  const CuMatrixBase<BaseFloat> &, // out_value
198  const CuMatrixBase<BaseFloat> &out_deriv,
199  void *memo,
200  Component *, // to_update,
201  CuMatrixBase<BaseFloat> *in_deriv) const {
202  NVTX_RANGE("DistributeComponent::Backprop");
203  if (in_deriv == NULL) return;
204 
205  int32 num_blocks = input_dim_ / output_dim_,
206  num_output_rows = out_deriv.NumRows();
207  if (num_output_rows != in_deriv->NumRows() * num_blocks) {
208  // there could be some 'gaps', i.e. some input values that are not ever
209  // referred to. So we need to zero the input. This would't happen in the
210  // setups I plan to use this for.
211  in_deriv->SetZero();
212  }
213 
214  std::vector<BaseFloat*> input_pointers;
215  ComputeInputPointers(indexes, num_output_rows, in_deriv, &input_pointers);
216  CuArray<BaseFloat*> input_pointers_cuda(input_pointers);
217  out_deriv.CopyToRows(input_pointers_cuda);
218 }
219 
220 
221 void DistributeComponent::Init(int32 input_dim, int32 output_dim) {
222  input_dim_ = input_dim;
223  output_dim_ = output_dim;
224  KALDI_ASSERT(input_dim > 0 && output_dim > 0 && input_dim % output_dim == 0);
225 }
226 
227 // virtual
229  int32 input_dim, output_dim;
230  bool ok = cfl->GetValue("input-dim", &input_dim) &&
231  cfl->GetValue("output-dim", &output_dim);
232  if (!ok || cfl->HasUnusedValues())
233  KALDI_ERR << "Invalid initializer for layer of type "
234  << Type() << ": \"" << cfl->WholeLine() << "\"";
235  else
236  Init(input_dim, output_dim);
237 }
238 
239 void DistributeComponent::Write(std::ostream &os, bool binary) const {
240  WriteToken(os, binary, "<DistributeComponent>");
241  WriteToken(os, binary, "<InputDim>");
242  WriteBasicType(os, binary, input_dim_);
243  WriteToken(os, binary, "<OutputDim>");
244  WriteBasicType(os, binary, output_dim_);
245  WriteToken(os, binary, "</DistributeComponent>");
246 }
247 
248 void DistributeComponent::Read(std::istream &is, bool binary) {
249  ExpectOneOrTwoTokens(is, binary, "<DistributeComponent>", "<InputDim>");
250  ReadBasicType(is, binary, &input_dim_);
251  ExpectToken(is, binary, "<OutputDim>");
252  ReadBasicType(is, binary, &output_dim_);
253  ExpectToken(is, binary, "</DistributeComponent>");
254 }
255 
256 
257 void StatisticsExtractionComponentPrecomputedIndexes::Write(std::ostream &os, bool binary) const {
258  WriteToken(os, binary, "<StatisticsExtractionComponentPrecomputedIndexes>");
259  WriteToken(os, binary, "<ForwardIndexes>");
260  std::vector<std::pair<int32, int32> > pairs_cpu;
261  CopyPairVector(forward_indexes, &pairs_cpu);
262  WriteIntegerPairVector(os, binary, pairs_cpu);
263  WriteToken(os, binary, "<Counts>");
264  counts.Write(os, binary);
265  WriteToken(os, binary, "<BackwardIndexes>");
266  std::vector<int32> backward_indexes_cpu;
267  backward_indexes.CopyToVec(&backward_indexes_cpu);
268  WriteIntegerVector(os, binary, backward_indexes_cpu);
269  WriteToken(os, binary, "</StatisticsExtractionComponentPrecomputedIndexes>");
270 }
271 
272 void StatisticsExtractionComponentPrecomputedIndexes::Read(std::istream &is, bool binary) {
273  ExpectOneOrTwoTokens(is, binary,
274  "<StatisticsExtractionComponentPrecomputedIndexes>",
275  "<ForwardIndexes>");
276  std::vector<std::pair<int32, int32> > pairs_cpu;
277  ReadIntegerPairVector(is, binary, &pairs_cpu);
278  CopyPairVector(pairs_cpu, &forward_indexes);
279  ExpectToken(is, binary, "<Counts>");
280  counts.Read(is, binary);
281  ExpectToken(is, binary, "<BackwardIndexes>");
282  std::vector<int32> backward_indexes_cpu;
283  ReadIntegerVector(is, binary, &backward_indexes_cpu);
284  backward_indexes.CopyFromVec(backward_indexes_cpu);
285  ExpectToken(is, binary, "</StatisticsExtractionComponentPrecomputedIndexes>");
286 }
287 
290  const MiscComputationInfo &misc_info,
291  const std::vector<Index> &input_indexes,
292  const std::vector<Index> &output_indexes,
293  bool need_backprop) const {
294  int32 num_input_indexes = input_indexes.size(),
295  num_output_indexes = output_indexes.size();
298  // both input and output indexes are assumed sorted first on
299  // n and x, then on t.
300  Int32Pair invalid_pair;
301  invalid_pair.first = -1;
302  invalid_pair.second = -1;
303  std::vector<Int32Pair> forward_indexes_cpu(output_indexes.size(),
304  invalid_pair);
305  std::vector<int32> backward_indexes_cpu(input_indexes.size(), -1);
306  Vector<BaseFloat> counts_cpu(output_indexes.size());
307 
308  // this map maps from Index to the position in 'input_indexes'.
309  unordered_map<Index, int32, IndexHasher> index_to_input_pos;
310  for (int32 i = 0; i < num_input_indexes; i++)
311  index_to_input_pos[input_indexes[i]] = i;
312 
313  for (int32 i = 0; i < num_output_indexes; i++) {
314  Index output_index = output_indexes[i];
315  Index input_index(output_index);
316  int32 t = output_index.t,
317  t_start = output_period_ * (t / output_period_);
318  if (t_start > t) // could happen for negative t_start due to
319  t_start -= output_period_; // the way modulus works in C.
320  int32 t_end = t_start + output_period_;
321  for (int32 t = t_start; t < t_end; t += input_period_) {
322  input_index.t = t;
323  unordered_map<Index, int32, IndexHasher>::iterator iter =
324  index_to_input_pos.find(input_index);
325  if (iter != index_to_input_pos.end()) {
326  int32 input_pos = iter->second;
327  if (forward_indexes_cpu[i].first == -1) {
328  forward_indexes_cpu[i].first = input_pos;
329  forward_indexes_cpu[i].second = input_pos + 1;
330  counts_cpu(i) = 1.0;
331  } else {
332  // the following might fail, for instance, if the sorting
333  // of the input or output indexes was not as expected.
334  KALDI_ASSERT(forward_indexes_cpu[i].second == input_pos);
335  forward_indexes_cpu[i].second++;
336  counts_cpu(i) += 1.0;
337  }
338  KALDI_ASSERT(backward_indexes_cpu[input_pos] == -1);
339  backward_indexes_cpu[input_pos] = i;
340  }
341  }
342  KALDI_ASSERT(counts_cpu(i) != 0.0);
343  }
344  for (int32 i = 0; i < num_input_indexes; i++) {
345  KALDI_ASSERT(backward_indexes_cpu[i] != -1);
346  }
347  ans->forward_indexes = forward_indexes_cpu;
348  ans->counts = counts_cpu;
349  if (need_backprop)
350  ans->backward_indexes = backward_indexes_cpu;
351  return ans;
352 }
353 
355  input_dim_(-1), input_period_(1), output_period_(1),
356  include_variance_(true) { }
357 
359  const StatisticsExtractionComponent &other):
360  input_dim_(other.input_dim_),
364  Check();
365 }
366 
368  // input-dim is required.
369  bool ok = cfl->GetValue("input-dim", &input_dim_);
370  cfl->GetValue("input-period", &input_period_);
371  cfl->GetValue("output-period", &output_period_);
372  cfl->GetValue("include-variance", &include_variance_);
373  if (cfl->HasUnusedValues())
374  KALDI_ERR << "Could not process these elements in initializer: "
375  << cfl->UnusedValues();
376  if (!ok || input_dim_ <= 0 || input_period_ <= 0 || output_period_ <= 0 ||
377  (output_period_ % input_period_ != 0))
378  KALDI_ERR << "Invalid initializer for layer of type "
379  << Type() << ": \"" << cfl->WholeLine() << "\"";
380  Check();
381 }
382 
384  if (!(input_dim_ > 0 && input_period_ > 0 && output_period_ > 0 &&
385  (output_period_ % input_period_) == 0))
386  KALDI_ERR << "Invalid configuration of StatisticsExtractionComponent";
387 }
388 
390  std::vector<Index> *input_indexes,
391  std::vector<Index> *output_indexes) const {
392  std::sort(input_indexes->begin(), input_indexes->end(),
393  IndexLessNxt());
394  std::sort(output_indexes->begin(), output_indexes->end(),
395  IndexLessNxt());
396 }
397 
399  const MiscComputationInfo &misc_info,
400  const Index &output_index,
401  const IndexSet &input_index_set,
402  std::vector<Index> *used_inputs) const {
403  Index input_index(output_index);
404  int32 t = output_index.t,
405  t_start = output_period_ * (t / output_period_);
406  if (t_start > t) // could happen for negative t_start due to
407  t_start -= output_period_; // the way modulus works in C.
408  int32 t_end = t_start + output_period_;
409  if (!used_inputs) {
410  for (int32 t = t_start; t < t_end; t += input_period_) {
411  input_index.t = t;
412  if (input_index_set(input_index))
413  return true;
414  }
415  return false;
416  } else {
417  used_inputs->clear();
418  bool ans = false;
419  for (int32 t = t_start; t < t_end; t += input_period_) {
420  input_index.t = t;
421  if (input_index_set(input_index)) {
422  ans = true;
423  used_inputs->push_back(input_index);
424  }
425  }
426  return ans;
427  }
428 }
429 
431  const MiscComputationInfo &misc_info,
432  const Index &output_index,
433  std::vector<Index> *desired_indexes) const {
434  desired_indexes->clear();
435  Index input_index(output_index);
436  int32 t = output_index.t,
437  t_start = output_period_ * (t / output_period_);
438  if (t_start > t) // could happen for negative t due to
439  t_start -= output_period_; // the way modulus works in C
440  int32 t_end = t_start + output_period_;
441  for (int32 t = t_start; t < t_end; t += input_period_) {
442  input_index.t = t;
443  desired_indexes->push_back(input_index);
444  }
445 }
446 
447 
449  const ComponentPrecomputedIndexes *indexes_in,
450  const CuMatrixBase<BaseFloat> &in,
451  CuMatrixBase<BaseFloat> *out) const {
452  KALDI_ASSERT(indexes_in != NULL);
455  indexes_in);
456  int32 num_rows_out = out->NumRows();
457  KALDI_ASSERT(indexes != NULL &&
458  indexes->forward_indexes.Dim() == num_rows_out &&
459  in.NumCols() == input_dim_ &&
460  out->NumCols() == OutputDim());
461  out->SetZero();
462  // store the counts.
463  out->CopyColFromVec(indexes->counts, 0);
464  // store the mean stats
465  out->ColRange(1, input_dim_).AddRowRanges(in, indexes->forward_indexes);
466  if (include_variance_) {
467  // store the variance (sum-squared) stats.
468  CuMatrix<BaseFloat> in_squared(in);
469  in_squared.ApplyPow(2.0);
470  out->ColRange(input_dim_ + 1,
471  input_dim_).AddRowRanges(in_squared,
472  indexes->forward_indexes);
473  }
474  return NULL;
475 }
476 
478  const std::string &debug_info,
479  const ComponentPrecomputedIndexes *indexes_in,
480  const CuMatrixBase<BaseFloat> &in_value,
481  const CuMatrixBase<BaseFloat> &, // out_value,
482  const CuMatrixBase<BaseFloat> &out_deriv,
483  void *memo,
484  Component *, // to_update,
485  CuMatrixBase<BaseFloat> *in_deriv) const {
486  NVTX_RANGE("StatisticsExtractionComponent::Backprop");
487  KALDI_ASSERT(indexes_in != NULL);
489  dynamic_cast<const StatisticsExtractionComponentPrecomputedIndexes*>(indexes_in);
490  in_deriv->SetZero();
491  in_deriv->AddRows(1.0, out_deriv.ColRange(1, input_dim_),
492  indexes->backward_indexes);
493  if (include_variance_) {
494  CuMatrix<BaseFloat> variance_deriv(in_value.NumRows(),
495  in_value.NumCols(),
496  kUndefined);
497  variance_deriv.CopyRows(out_deriv.ColRange(1 + input_dim_, input_dim_),
498  indexes->backward_indexes);
499  in_deriv->AddMatMatElements(2.0, variance_deriv, in_value, 1.0);
500  }
501 }
502 
503 void StatisticsExtractionComponent::Read(std::istream &is, bool binary) {
504  ExpectOneOrTwoTokens(is, binary, "<StatisticsExtractionComponent>",
505  "<InputDim>");
506  ReadBasicType(is, binary, &input_dim_);
507  ExpectToken(is, binary, "<InputPeriod>");
508  ReadBasicType(is, binary, &input_period_);
509  ExpectToken(is, binary, "<OutputPeriod>");
510  ReadBasicType(is, binary, &output_period_);
511  ExpectToken(is, binary, "<IncludeVarinance>");
512  ReadBasicType(is, binary, &include_variance_);
513  ExpectToken(is, binary, "</StatisticsExtractionComponent>");
514  Check();
515 }
516 
517 void StatisticsExtractionComponent::Write(std::ostream &os, bool binary) const {
518  WriteToken(os, binary, "<StatisticsExtractionComponent>");
519  WriteToken(os, binary, "<InputDim>");
520  WriteBasicType(os, binary, input_dim_);
521  WriteToken(os, binary, "<InputPeriod>");
522  WriteBasicType(os, binary, input_period_);
523  WriteToken(os, binary, "<OutputPeriod>");
524  WriteBasicType(os, binary, output_period_);
525  WriteToken(os, binary, "<IncludeVarinance>");
526  WriteBasicType(os, binary, include_variance_);
527  WriteToken(os, binary, "</StatisticsExtractionComponent>");
528 }
529 
530 void StatisticsPoolingComponentPrecomputedIndexes::Write(std::ostream &os, bool binary) const {
531  WriteToken(os, binary, "<StatisticsPoolingComponentPrecomputedIndexes>");
532  WriteToken(os, binary, "<ForwardIndexes>");
533  std::vector<std::pair<int32, int32> > indexes_cpu;
534  CopyPairVector(forward_indexes, &indexes_cpu);
535  WriteIntegerPairVector(os, binary, indexes_cpu);
536  WriteToken(os, binary, "<BackwardIndexes>");
537  CopyPairVector(backward_indexes, &indexes_cpu);
538  WriteIntegerPairVector(os, binary, indexes_cpu);
539  WriteToken(os, binary, "</StatisticsPoolingComponentPrecomputedIndexes>");
540 }
541 
542 void StatisticsPoolingComponentPrecomputedIndexes::Read(std::istream &is, bool binary) {
543  ExpectOneOrTwoTokens(is, binary,
544  "<StatisticsPoolingComponentPrecomputedIndexes>",
545  "<ForwardIndexes>");
546  std::vector<std::pair<int32, int32> > indexes_cpu;
547  ReadIntegerPairVector(is, binary, &indexes_cpu);
548  CopyPairVector(indexes_cpu, &forward_indexes);
549  ExpectToken(is, binary, "<BackwardIndexes>");
550  ReadIntegerPairVector(is, binary, &indexes_cpu);
551  CopyPairVector(indexes_cpu, &backward_indexes);
552  ExpectToken(is, binary, "</StatisticsPoolingComponentPrecomputedIndexes>");
553 }
554 
556  bool ok = cfl->GetValue("input-dim", &input_dim_);
557  cfl->GetValue("input-period", &input_period_);
558  cfl->GetValue("left-context", &left_context_);
559  cfl->GetValue("right-context", &right_context_);
560  cfl->GetValue("num-log-count-features", &num_log_count_features_);
561  cfl->GetValue("output-stddevs", &output_stddevs_);
562  cfl->GetValue("variance-floor", &variance_floor_);
563 
564  if (cfl->HasUnusedValues())
565  KALDI_ERR << "Could not process these elements in initializer: "
566  << cfl->UnusedValues();
567  // do some basic checks here but Check() will check more completely.
568  if (!ok || input_dim_ <= 0 || left_context_ + right_context_ <= 0 ||
569  num_log_count_features_ < 0)
570  KALDI_ERR << "Invalid initializer for layer of type "
571  << Type() << ": \"" << cfl->WholeLine() << "\"";
572  Check();
573 }
574 
576  input_dim_(-1), input_period_(1), left_context_(-1), right_context_(-1),
577  num_log_count_features_(0), output_stddevs_(false),
578  variance_floor_(1.0e-10), require_direct_input_(false) { }
579 
580 
582  const StatisticsPoolingComponent &other):
589  Check();
590 }
591 
600  KALDI_ASSERT(!output_stddevs_ || (input_dim_ - 1) % 2 == 0);
601 }
602 
603 void StatisticsPoolingComponent::Read(std::istream &is, bool binary) {
604  ExpectOneOrTwoTokens(is, binary, "<StatisticsPoolingComponent>",
605  "<InputDim>");
606  ReadBasicType(is, binary, &input_dim_);
607  ExpectToken(is, binary, "<InputPeriod>");
608  ReadBasicType(is, binary, &input_period_);
609  ExpectToken(is, binary, "<LeftContext>");
610  ReadBasicType(is, binary, &left_context_);
611  ExpectToken(is, binary, "<RightContext>");
612  ReadBasicType(is, binary, &right_context_);
613  ExpectToken(is, binary, "<NumLogCountFeatures>");
615  ExpectToken(is, binary, "<OutputStddevs>");
616  ReadBasicType(is, binary, &output_stddevs_);
617  ExpectToken(is, binary, "<VarianceFloor>");
618  ReadBasicType(is, binary, &variance_floor_);
619  ExpectToken(is, binary, "</StatisticsPoolingComponent>");
620  require_direct_input_ = false; // This is not written to disk, it's only used
621  // temporarily, in memory (see
622  // nnet3-xvector-compute-batched.cc).
623  Check();
624 }
625 
626 void StatisticsPoolingComponent::Write(std::ostream &os, bool binary) const {
627  WriteToken(os, binary, "<StatisticsPoolingComponent>");
628  WriteToken(os, binary, "<InputDim>");
629  WriteBasicType(os, binary, input_dim_);
630  WriteToken(os, binary, "<InputPeriod>");
631  WriteBasicType(os, binary, input_period_);
632  WriteToken(os, binary, "<LeftContext>");
633  WriteBasicType(os, binary, left_context_);
634  WriteToken(os, binary, "<RightContext>");
635  WriteBasicType(os, binary, right_context_);
636  WriteToken(os, binary, "<NumLogCountFeatures>");
638  WriteToken(os, binary, "<OutputStddevs>");
639  WriteBasicType(os, binary, output_stddevs_);
640  WriteToken(os, binary, "<VarianceFloor>");
641  WriteBasicType(os, binary, variance_floor_);
642  WriteToken(os, binary, "</StatisticsPoolingComponent>");
643 }
644 
646  std::vector<Index> *input_indexes,
647  std::vector<Index> *output_indexes) const {
648  std::sort(input_indexes->begin(), input_indexes->end(),
649  IndexLessNxt());
650  std::sort(output_indexes->begin(), output_indexes->end(),
651  IndexLessNxt());
652 }
653 
655  const MiscComputationInfo &misc_info,
656  const Index &output_index,
657  std::vector<Index> *desired_indexes) const {
658  desired_indexes->clear();
659  Index input_index(output_index);
660  int32 middle_t = output_index.t,
661  t_start = middle_t - left_context_,
662  t_last = middle_t + right_context_;
663  KALDI_ASSERT(middle_t % input_period_ == 0);
664  for (int32 t = t_start; t <= t_last; t += input_period_) {
665  input_index.t = t;
666  desired_indexes->push_back(input_index);
667  }
668 }
669 
671  const MiscComputationInfo &misc_info,
672  const Index &output_index,
673  const IndexSet &input_index_set,
674  std::vector<Index> *used_inputs) const {
675  if (used_inputs)
676  used_inputs->clear();
677  // you are not supposed to access the output of this component other than at
678  // multiples of the input period. We could make this an error but decided to
679  // just have it return false.
680  if (output_index.t % input_period_ != 0)
681  return false;
682 
683  Index input_index(output_index);
684  int32 output_t = output_index.t,
685  t_start = output_t - left_context_,
686  t_last = output_t + right_context_;
687  if (!used_inputs) {
688  for (int32 t = t_start; t <= t_last; t += input_period_) {
689  input_index.t = t;
690  if (input_index_set(input_index))
691  return true;
692  }
693  return false;
694  } else {
695  bool ans = false;
696  for (int32 t = t_start; t <= t_last; t += input_period_) {
697  input_index.t = t;
698  if (input_index_set(input_index)) {
699  ans = true;
700  used_inputs->push_back(input_index);
701  }
702  }
703  return ans;
704  }
705 }
706 
709  const MiscComputationInfo &misc_info,
710  const std::vector<Index> &input_indexes,
711  const std::vector<Index> &output_indexes,
712  bool need_backprop) const {
713  int32 num_input_indexes = input_indexes.size(),
714  num_output_indexes = output_indexes.size();
717 
718  Int32Pair invalid_pair;
719  invalid_pair.first = -1;
720  invalid_pair.second = -1;
721  // forward_indexes_cpu[i] will be the (begin, end) of input indexes
722  // included in the sum for the i'th output index.
723  std::vector<Int32Pair> forward_indexes_cpu(num_output_indexes,
724  invalid_pair);
725  // backward_indexes_cpu[i] will be the (begin, end) of output indexes
726  // for which the i'th input index participates in the sum.
727  // because of the way the indexes are sorted (and the fact that only
728  // required indexes are present at the input), it naturally has this
729  // structure [i.e. no gaps in the sets of indexes].
730  std::vector<Int32Pair> backward_indexes_cpu(num_input_indexes,
731  invalid_pair);
732 
733  // this map maps from Index to the position in 'input_indexes'.
734  unordered_map<Index, int32, IndexHasher> index_to_input_pos;
735  for (int32 i = 0; i < num_input_indexes; i++)
736  index_to_input_pos[input_indexes[i]] = i;
737 
738  for (int32 i = 0; i < num_output_indexes; i++) {
739  Index input_index(output_indexes[i]);
740  int32 middle_t = input_index.t,
741  t_start = middle_t - left_context_,
742  t_last = middle_t + right_context_;
743  for (int32 t = t_start; t <= t_last; t += input_period_) {
744  input_index.t = t;
745  unordered_map<Index, int32, IndexHasher>::iterator iter =
746  index_to_input_pos.find(input_index);
747  if (iter != index_to_input_pos.end()) {
748  int32 input_pos = iter->second;
749  if (forward_indexes_cpu[i].first == -1) {
750  forward_indexes_cpu[i].first = input_pos;
751  forward_indexes_cpu[i].second = input_pos + 1;
752  } else {
753  KALDI_ASSERT(forward_indexes_cpu[i].second == input_pos);
754  forward_indexes_cpu[i].second++;
755  }
756  if (backward_indexes_cpu[input_pos].first == -1) {
757  backward_indexes_cpu[input_pos].first = i;
758  backward_indexes_cpu[input_pos].second = i + 1;
759  } else {
760  KALDI_ASSERT(backward_indexes_cpu[input_pos].second == i);
761  backward_indexes_cpu[input_pos].second++;
762  }
763  }
764  }
765  KALDI_ASSERT(forward_indexes_cpu[i].first != -1);
766  }
767  for (int32 i = 0; i < num_input_indexes; i++) {
768  KALDI_ASSERT(backward_indexes_cpu[i].first != -1);
769  }
770 
771  ans->forward_indexes = forward_indexes_cpu;
772  if (need_backprop)
773  ans->backward_indexes = backward_indexes_cpu;
774  return ans;
775 }
776 
778  const ComponentPrecomputedIndexes *indexes_in,
779  const CuMatrixBase<BaseFloat> &in,
780  CuMatrixBase<BaseFloat> *out) const {
781  out->SetZero();
782  KALDI_ASSERT(indexes_in != NULL);
784  dynamic_cast<const StatisticsPoolingComponentPrecomputedIndexes*>(indexes_in);
785  int32 num_rows_out = out->NumRows();
786  KALDI_ASSERT(indexes != NULL &&
787  indexes->forward_indexes.Dim() == num_rows_out &&
788  in.NumCols() == input_dim_ &&
789  out->NumCols() == OutputDim());
790  CuVector<BaseFloat> counts(num_rows_out);
791  // counts_mat is a fake matrix with one column, containing the counts.
792  CuSubMatrix<BaseFloat> counts_mat(counts.Data(), num_rows_out, 1, 1);
793  counts_mat.AddRowRanges(in.ColRange(0, 1), indexes->forward_indexes);
794 
795  CuSubMatrix<BaseFloat> out_non_count(*out, 0, num_rows_out,
797  out_non_count.AddRowRanges(in.ColRange(1, input_dim_ - 1),
798  indexes->forward_indexes);
799  out_non_count.DivRowsVec(counts);
800 
801  if (num_log_count_features_ > 0) {
802  counts.ApplyLog();
804  ones.Set(1.0);
805  out->ColRange(0, num_log_count_features_).AddVecVec(1.0, counts, ones);
806  }
807 
808  if (output_stddevs_) {
809  // if this is true, then we assume the input contains x^2 stats as well as x
810  // stats, and we want to process them into a standard deviation.
811  KALDI_ASSERT((input_dim_ - 1) % 2 == 0);
812  int32 feature_dim = (input_dim_ - 1) / 2;
813  CuSubMatrix<BaseFloat> mean(*out, 0, num_rows_out,
814  num_log_count_features_, feature_dim),
815  variance(*out, 0, num_rows_out,
816  num_log_count_features_ + feature_dim, feature_dim);
817  // subtract mean-squared from average of x^2 to get the variance.
818  variance.AddMatMatElements(-1.0, mean, mean, 1.0);
819  variance.ApplyFloor(variance_floor_);
820  // compute the standard deviation via square root.
821  variance.ApplyPow(0.5);
822  }
823  return NULL;
824 }
825 
827  const std::string &debug_info,
828  const ComponentPrecomputedIndexes *indexes_in,
829  const CuMatrixBase<BaseFloat> &in_value,
830  const CuMatrixBase<BaseFloat> &out_value,
831  const CuMatrixBase<BaseFloat> &out_deriv_in,
832  void *memo,
833  Component *, // to_update,
834  CuMatrixBase<BaseFloat> *in_deriv) const {
835  NVTX_RANGE("StatisticsPoolingComponent::Backprop");
836  KALDI_ASSERT(indexes_in != NULL);
839  indexes_in);
840  int32 num_rows_out = out_deriv_in.NumRows();
841  CuMatrix<BaseFloat> out_deriv(out_deriv_in);
842  if (output_stddevs_) {
843  // for now we actually ignore the covariance flooring in the backprop- this
844  // is an approximation. Typically the derivatives computed will be quite
845  // tiny for floored variances (they should be zero), so it won't affect the
846  // derivatives much.
847  int32 feature_dim = (input_dim_ - 1) / 2;
848  CuSubMatrix<BaseFloat> mean_deriv(out_deriv, 0, num_rows_out,
849  num_log_count_features_, feature_dim),
850  variance_deriv(out_deriv, 0, num_rows_out,
851  num_log_count_features_ + feature_dim, feature_dim),
852  mean_value(out_value, 0, num_rows_out,
853  num_log_count_features_, feature_dim),
854  stddev_value(out_value, 0, num_rows_out,
855  num_log_count_features_ + feature_dim, feature_dim);
856  // we currently have the deriv w.r.t. the stddev. step 1 is to get it
857  // w.r.t. the centered variance. If the centered variance is s,
858  // and the stddev is sqrt(s), then d/ds sqrt(s) = 0.5 / sqrt(s),
859  // so we need to multiply variance_deriv by 0.5 / the stddev.
860  variance_deriv.DivElements(stddev_value);
861  variance_deriv.Scale(0.5);
862 
863  // the deriv w.r.t. the uncentered variance is the same as w.r.t. the
864  // uncentered variance (since they difer by a constant term of -(mean *
865  // mean), but we need to add to dF/dmean, the value -2.0 * mean *
866  // dF/dvariance.
867  mean_deriv.AddMatMatElements(-2.0, mean_value, variance_deriv, 1.0);
868  }
869  // now we have to account for the effect of division by the count, on
870  // the derivative.
871  CuVector<BaseFloat> counts(num_rows_out, kUndefined);
872  if (num_log_count_features_ > 0) {
873  counts.CopyColFromMat(out_value, 0);
874  counts.ApplyExp();
875  } else {
876  counts.SetZero();
877  // we need to recompute the counts from the input since they are not in the
878  // output. The submatrix initializer below takes num-rows, num-cols,
879  // stride; num-cols and stride are 1.
880  CuSubMatrix<BaseFloat> counts_mat(counts.Data(), num_rows_out, 1, 1);
881  counts_mat.AddRowRanges(in_value.ColRange(0, 1), indexes->forward_indexes);
882  }
883  // Divide the output derivative by the counts. This is what we want as it
884  // concerns the mean and x^2 stats. As for the counts themselves, the
885  // derivative will end up being discarded when we backprop to the
886  // StatisticsExtractionComponent (as the count is not differentiable) so it
887  // doesn't really matter.
888  out_deriv.DivRowsVec(counts);
889 
890  // Now propagate the derivative back to the input. we don't propagate it
891  // back for the count's row since it's non-differentiable.
892  in_deriv->ColRange(1, input_dim_ - 1).
893  AddRowRanges(out_deriv.ColRange(num_log_count_features_, input_dim_ - 1),
894  indexes->backward_indexes);
895 }
896 
897 // virtual
898 void BackpropTruncationComponent::Read(std::istream &is, bool binary) {
899  // might not see the "<NaturalGradientAffineComponent>" part because
900  // of how ReadNew() works.
901  ExpectOneOrTwoTokens(is, binary, "<BackpropTruncationComponent>",
902  "<Dim>");
903  ReadBasicType(is, binary, &dim_);
904  std::string tok;
905  ReadToken(is, binary, &tok);
906  if (tok == "<Scale>") {
907  ReadBasicType(is, binary, &scale_);
908  ReadToken(is, binary, &tok);
909  } else {
910  scale_ = 1.0;
911  }
912  KALDI_ASSERT(tok == "<ClippingThreshold>");
913  ReadBasicType(is, binary, &clipping_threshold_);
914  ExpectToken(is, binary, "<ZeroingThreshold>");
915  ReadBasicType(is, binary, &zeroing_threshold_);
916  ExpectToken(is, binary, "<ZeroingInterval>");
917  ReadBasicType(is, binary, &zeroing_interval_);
918  ExpectToken(is, binary, "<RecurrenceInterval>");
919  ReadBasicType(is, binary, &recurrence_interval_);
920  ExpectToken(is, binary, "<NumElementsClipped>");
921  ReadBasicType(is, binary, &num_clipped_);
922  ExpectToken(is, binary, "<NumElementsZeroed>");
923  ReadBasicType(is, binary, &num_zeroed_);
924  ExpectToken(is, binary, "<NumElementsProcessed>");
925  ReadBasicType(is, binary, &count_);
926  ExpectToken(is, binary, "<NumZeroingBoundaries>");
927  ReadBasicType(is, binary, &count_zeroing_boundaries_);
928  ExpectToken(is, binary, "</BackpropTruncationComponent>");
929 }
930 
931 // virtual
932 void BackpropTruncationComponent::Write(std::ostream &os, bool binary) const {
933  WriteToken(os, binary, "<BackpropTruncationComponent>");
934  WriteToken(os, binary, "<Dim>");
935  WriteBasicType(os, binary, dim_);
936  WriteToken(os, binary, "<Scale>");
937  WriteBasicType(os, binary, scale_);
938  WriteToken(os, binary, "<ClippingThreshold>");
939  WriteBasicType(os, binary, clipping_threshold_);
940  WriteToken(os, binary, "<ZeroingThreshold>");
941  WriteBasicType(os, binary, zeroing_threshold_);
942  WriteToken(os, binary, "<ZeroingInterval>");
943  WriteBasicType(os, binary, zeroing_interval_);
944  WriteToken(os, binary, "<RecurrenceInterval>");
945  WriteBasicType(os, binary, recurrence_interval_);
946  WriteToken(os, binary, "<NumElementsClipped>");
947  WriteBasicType(os, binary, num_clipped_);
948  WriteToken(os, binary, "<NumElementsZeroed>");
949  WriteBasicType(os, binary, num_zeroed_);
950  WriteToken(os, binary, "<NumElementsProcessed>");
951  WriteBasicType(os, binary, count_);
952  WriteToken(os, binary, "<NumZeroingBoundaries>");
953  WriteBasicType(os, binary, count_zeroing_boundaries_);
954  WriteToken(os, binary, "</BackpropTruncationComponent>");
955 }
956 
958  bool binary) const {
959  WriteToken(ostream, binary,
960  "<BackpropTruncationComponentPrecomputedIndexes>");
961  WriteToken(ostream, binary, "<Zeroing>");
962  zeroing.Write(ostream, binary);
963  WriteToken(ostream, binary, "<ZeroingSum>");
964  WriteBasicType(ostream, binary, zeroing_sum);
965  WriteToken(ostream, binary,
966  "</BackpropTruncationComponentPrecomputedIndexes>");
967 }
968 
970  bool binary) {
971  ExpectOneOrTwoTokens(istream, binary,
972  "<BackpropTruncationComponentPrecomputedIndexes>",
973  "<Zeroing>");
974  zeroing.Read(istream, binary);
975  ExpectToken(istream, binary, "<ZeroingSum>");
976  ReadBasicType(istream, binary, &zeroing_sum);
977  ExpectToken(istream, binary,
978  "</BackpropTruncationComponentPrecomputedIndexes>");
979 }
980 
982  std::ostringstream stream;
983  stream << Type() << ", dim=" << dim_
984  << ", scale=" << scale_
985  << ", count=" << std::setprecision(3) << count_ << std::setprecision(6)
986  << ", recurrence-interval=" << recurrence_interval_
987  << ", clipping-threshold=" << clipping_threshold_
988  << ", clipped-proportion="
989  << (count_ > 0.0 ? num_clipped_ / count_ : 0)
990  << ", zeroing-threshold=" << zeroing_threshold_
991  << ", zeroing-interval=" << zeroing_interval_
992  << ", zeroed-proportion="
993  << (count_zeroing_boundaries_ > 0.0 ?
994  num_zeroed_ / count_zeroing_boundaries_ : 0)
995  << ", count-zeroing-boundaries="
996  << static_cast<int32>(count_zeroing_boundaries_);
997  return stream.str();
998 }
999 
1001  int32 dim, BaseFloat scale, BaseFloat clipping_threshold,
1002  BaseFloat zeroing_threshold, int32 zeroing_interval,
1003  int32 recurrence_interval) {
1004  KALDI_ASSERT(clipping_threshold >= 0 && zeroing_threshold >= 0 &&
1005  scale > 0.0 && zeroing_interval > 0 &&
1006  recurrence_interval > 0 && dim > 0);
1007  dim_ = dim;
1008  scale_ = scale;
1009  clipping_threshold_ = clipping_threshold;
1010  zeroing_threshold_ = zeroing_threshold;
1011  zeroing_interval_ = zeroing_interval;
1012  recurrence_interval_ = recurrence_interval;
1013  num_clipped_ = 0.0;
1014  num_zeroed_ = 0.0;
1015  count_ = 0.0;
1016  count_zeroing_boundaries_ = 0.0;
1017 }
1018 
1019 // virtual
1021  int32 dim = 0;
1022  bool ok = cfl->GetValue("dim", &dim);
1023  BaseFloat scale = 1.0,
1024  clipping_threshold = 30.0,
1025  zeroing_threshold = 15.0;
1026  int32 zeroing_interval = 20, recurrence_interval = 1;
1027  cfl->GetValue("scale", &scale);
1028  cfl->GetValue("clipping-threshold", &clipping_threshold);
1029  cfl->GetValue("zeroing-threshold", &zeroing_threshold);
1030  cfl->GetValue("zeroing-interval", &zeroing_interval);
1031  cfl->GetValue("recurrence-interval", &recurrence_interval);
1032  if (!ok || cfl->HasUnusedValues() ||
1033  clipping_threshold < 0 || zeroing_threshold < 0 || zeroing_interval < 1 ||
1034  recurrence_interval < 1 || dim <= 0)
1035  KALDI_ERR << "Invalid initializer for layer of type "
1036  << Type() << ": \"" << cfl->WholeLine() << "\"";
1037  Init(dim, scale, clipping_threshold, zeroing_threshold,
1038  zeroing_interval, recurrence_interval);
1039 }
1040 
1041 // virtual
1044  ans->dim_ = dim_;
1045  ans->scale_ = scale_;
1046  ans->clipping_threshold_ = clipping_threshold_;
1047  ans->zeroing_threshold_ = zeroing_threshold_;
1048  ans->zeroing_interval_ = zeroing_interval_;
1049  ans->recurrence_interval_ = recurrence_interval_;
1050  ans->num_clipped_ = num_clipped_;
1051  ans->num_zeroed_ = num_zeroed_;
1052  ans->count_ = count_;
1053  ans->count_zeroing_boundaries_ = count_zeroing_boundaries_;
1054  return ans;
1055 }
1056 
1057 // virtual
1060  const MiscComputationInfo &misc_info,
1061  const std::vector<Index> &input_indexes,
1062  const std::vector<Index> &output_indexes,
1063  bool need_backprop) const {
1064  int32 num_input_indexes = input_indexes.size(),
1065  num_output_indexes = output_indexes.size();
1066  KALDI_ASSERT(num_input_indexes == num_output_indexes);
1067  Vector<BaseFloat> zeroing_cpu(num_output_indexes);
1068 
1069  for (int32 i = 0; i < num_output_indexes; i++) {
1070  const int32 output_n = output_indexes[i].n;
1071  const int32 output_t = output_indexes[i].t;
1072  // checks if output_t crosses a boundary that is a multiple of
1073  // zeroing_interval_. Note that frame (output_t - recurrence_interval_) is
1074  // right before frame output_t in RNNs. If the range
1075  // [output_t - recurrence_interval_, output_t] contains a multiple of
1076  // zeroing_interval_, then frame output_t crosses the boundary.
1077  // output_n is used to shift where we put the boundary, so that
1078  // we don't always zero out gradients on frame 0. It will help avoid
1079  // learning utterance-boundary effects.
1080  if (DivideRoundingDown(output_t - output_n, zeroing_interval_) !=
1081  DivideRoundingDown(output_t - recurrence_interval_ - output_n,
1082  zeroing_interval_))
1083  zeroing_cpu(i) = -1.0;
1084  }
1085 
1088  ans->zeroing = zeroing_cpu;
1089  ans->zeroing_sum = -zeroing_cpu.Sum();
1090  return ans;
1091 }
1092 
1093 // virtual
1095  const ComponentPrecomputedIndexes *indexes,
1096  const CuMatrixBase<BaseFloat> &in,
1097  CuMatrixBase<BaseFloat> *out) const {
1098  out->CopyFromMat(in);
1099  if (scale_ != 1.0)
1100  out->Scale(scale_);
1101  return NULL;
1102 }
1103 
1104 // virtual
1105 void BackpropTruncationComponent::Backprop(const std::string &debug_info,
1106  const ComponentPrecomputedIndexes *indexes_in,
1107  const CuMatrixBase<BaseFloat> &, //in_value
1108  const CuMatrixBase<BaseFloat> &,
1109  const CuMatrixBase<BaseFloat> &out_deriv,
1110  void *memo,
1111  Component *to_update_in, // may be NULL; may be
1112  // identical to "this" or different.
1113  CuMatrixBase<BaseFloat> *in_deriv) const {
1114  NVTX_RANGE("BackpropTruncationComponent::Backprop");
1116  dynamic_cast<const BackpropTruncationComponentPrecomputedIndexes*>(
1117  indexes_in);
1118  KALDI_ASSERT(indexes->zeroing.Dim() == out_deriv.NumRows());
1119  // the following statement will do nothing if in_deriv and out_deriv have same
1120  // memory.
1121  in_deriv->CopyFromMat(out_deriv);
1122  if (scale_ != 1.0)
1123  in_deriv->Scale(scale_);
1124 
1125  BackpropTruncationComponent *to_update =
1126  dynamic_cast<BackpropTruncationComponent*>(to_update_in);
1127 
1128  // computes clipping_scales
1129  BaseFloat clipping_threshold =
1130  (clipping_threshold_ <= 0.0 ? 1.0e+10 : clipping_threshold_);
1131  // each row in the derivative matrix, which corresponds to one sample in
1132  // the mini-batch, is scaled to have a max-norm of clipping_threshold_
1133  CuVector<BaseFloat> clipping_scales(in_deriv->NumRows());
1134  clipping_scales.AddDiagMat2(pow(clipping_threshold, -2), *in_deriv,
1135  kNoTrans, 0.0);
1136  // now clipping_scales contains the squared (norm of each row divided by
1137  // clipping_threshold)
1138  int32 num_not_scaled;
1139  clipping_scales.ApplyFloor(1.0, &num_not_scaled);
1140  // now clipping_scales contains min(1, squared-(norm/clipping_threshold))
1141  clipping_scales.ApplyPow(-0.5);
1142  // now clipping_scales contains max(1, clipping_threshold/vector_norm)
1143  if (to_update != NULL) {
1144  to_update->num_clipped_ += (clipping_scales.Dim() - num_not_scaled);
1145  to_update->count_ += clipping_scales.Dim();
1146  }
1147 
1148  // computes zeroing_scales
1149  BaseFloat zeroing_threshold =
1150  (zeroing_threshold_ <= 0.0 ? 1.0e+10 : zeroing_threshold_);
1151  // zeroing_scales_vec is actually a 1-row matrix. (the ApplyHeaviside
1152  // function isn't defined for vectors).
1153  CuMatrix<BaseFloat> zeroing_scales(1, in_deriv->NumRows());
1154  CuSubVector<BaseFloat> zeroing_scales_vec(zeroing_scales, 0);
1155  zeroing_scales_vec.Set(-pow(zeroing_threshold, 2));
1156  // now zeroing_scales_vec contains -(squared zeroing_threshold)
1157  zeroing_scales_vec.AddDiagMat2(1.0, *in_deriv, kNoTrans, 1.0);
1158  // now zeroing_scales_vec contains squared norm of each row -
1159  // squared zeroing_threshold
1160  zeroing_scales.ApplyHeaviside();
1161  // now the element of zeroing_scales_vec is 1.0 if its corresponding
1162  // sample's norm exceeds zero_threshold, and 0.0 otherwise
1163  zeroing_scales_vec.MulElements(indexes->zeroing);
1164  // now the element of zeroing_scales_vec is -1.0 if we want to zero its
1165  // corresponding sample's gradient, and 0.0 otherwise
1166  if (to_update != NULL) {
1167  to_update->num_zeroed_ -= zeroing_scales_vec.Sum(); // since it is negative
1168  to_update->count_zeroing_boundaries_ += indexes->zeroing_sum;
1169  }
1170  zeroing_scales_vec.Add(1.0);
1171  // now the element of zeroing_scales_vec is 0.0 if we want to zero its
1172  // corresponding sample's gradient, and 1.0 otherwise
1173 
1174  // combines clipping_scales and zeroing_scales and applies combined_scales
1175  // to in_deriv all at once
1176  CuVector<BaseFloat> combined_scales(clipping_scales);
1177  combined_scales.MulElements(zeroing_scales_vec);
1178  in_deriv->MulRowsVec(combined_scales);
1179 }
1180 
1181 // virtual
1183  count_ = 0.0;
1184  count_zeroing_boundaries_ = 0.0;
1185  num_clipped_ = 0.0;
1186  num_zeroed_ = 0.0;
1187 }
1188 
1189 // virtual
1191  count_ *= scale;
1192  count_zeroing_boundaries_ *= scale;
1193  num_clipped_ *= scale;
1194  num_zeroed_ *= scale;
1195 }
1196 
1197 // virtual
1199  const Component &other_in) {
1200  const BackpropTruncationComponent *other =
1201  dynamic_cast<const BackpropTruncationComponent*>(&other_in);
1202  KALDI_ASSERT(other != NULL);
1203  count_ += alpha * other->count_;
1204  count_zeroing_boundaries_ += alpha * other->count_zeroing_boundaries_;
1205  num_clipped_ += alpha * other->num_clipped_;
1206  num_zeroed_ += alpha * other->num_zeroed_;
1207 }
1208 
1209 
1210 std::string ConstantComponent::Info() const {
1211  std::ostringstream stream;
1212  stream << UpdatableComponent::Info()
1213  << ", " << Type()
1214  << ", output-dim=" << OutputDim()
1215  << ", is-updatable=" << std::boolalpha << is_updatable_
1216  << ", use-natural-gradient=" << std::boolalpha
1217  << use_natural_gradient_;
1218  PrintParameterStats(stream, "output", output_, true);
1219  return stream.str();
1220 }
1221 
1223  UpdatableComponent(), is_updatable_(true),
1224  use_natural_gradient_(true) { }
1225 
1227  const ConstantComponent &other):
1228  UpdatableComponent(other), output_(other.output_),
1231  preconditioner_(other.preconditioner_) { }
1232 
1234  const ComponentPrecomputedIndexes *indexes,
1235  const CuMatrixBase<BaseFloat> &in,
1236  CuMatrixBase<BaseFloat> *out) const {
1237  out->CopyRowsFromVec(output_);
1238  return NULL;
1239 }
1240 
1242  const std::string &debug_info,
1243  const ComponentPrecomputedIndexes *indexes,
1244  const CuMatrixBase<BaseFloat> &, // in_value
1245  const CuMatrixBase<BaseFloat> &, // out_value
1246  const CuMatrixBase<BaseFloat> &out_deriv,
1247  void *memo,
1248  Component *to_update_in,
1249  CuMatrixBase<BaseFloat> *in_deriv) const {
1250  NVTX_RANGE("ConstantComponent::Backprop");
1251  // we don't update in_deriv, since we set the flag
1252  // kBackpropAdds, and the output doesn't depend on the
1253  // input, so the input-derivative is zero.
1254  if (to_update_in) {
1255  ConstantComponent *to_update =
1256  dynamic_cast<ConstantComponent*>(to_update_in);
1257  if (to_update->is_updatable_) {
1258  // only do the update if the is_updatable_ flag is set.
1259  KALDI_ASSERT(to_update && to_update->is_updatable_);
1260  if (to_update->use_natural_gradient_ && !to_update->is_gradient_) {
1261  CuMatrix<BaseFloat> out_deriv_copy(out_deriv);
1262  BaseFloat scale = 1.0;
1263  to_update->preconditioner_.PreconditionDirections(&out_deriv_copy,
1264  &scale);
1265  to_update->output_.AddRowSumMat(scale * to_update->learning_rate_,
1266  out_deriv_copy);
1267  } else {
1268  to_update->output_.AddRowSumMat(to_update->learning_rate_,
1269  out_deriv);
1270  }
1271  }
1272  }
1273 }
1274 
1275 void ConstantComponent::Read(std::istream &is, bool binary) {
1276  std::string token;
1277  ReadToken(is, binary, &token);
1278  if (token == "<ConstantComponent>") {
1279  ReadToken(is, binary, &token);
1280  }
1281  if (token == "<LearningRateFactor>") {
1282  ReadBasicType(is, binary, &learning_rate_factor_);
1283  ReadToken(is, binary, &token);
1284  } else {
1285  learning_rate_factor_ = 1.0;
1286  }
1287  if (token == "<IsGradient>") {
1288  ReadBasicType(is, binary, &is_gradient_);
1289  ReadToken(is, binary, &token);
1290  } else {
1291  is_gradient_ = false;
1292  }
1293  if (token == "<MaxChange>") {
1294  ReadBasicType(is, binary, &max_change_);
1295  ReadToken(is, binary, &token);
1296  } else {
1297  max_change_ = 0.0;
1298  }
1299  if (token == "<LearningRate>") {
1300  ReadBasicType(is, binary, &learning_rate_);
1301  ReadToken(is, binary, &token);
1302  } else {
1303  learning_rate_ = 0.001;
1304  }
1305  if (token != "<Output>") {
1306  KALDI_ERR << "Expected token <Output>, got " << token;
1307  }
1308  output_.Read(is, binary);
1309  ExpectToken(is, binary, "<IsUpdatable>");
1310  ReadBasicType(is, binary, &is_updatable_);
1311  ExpectToken(is, binary, "<UseNaturalGradient>");
1312  ReadBasicType(is, binary, &use_natural_gradient_);
1313  ExpectToken(is, binary, "</ConstantComponent>");
1314 }
1315 
1316 void ConstantComponent::Write(std::ostream &os, bool binary) const {
1317  WriteUpdatableCommon(os, binary); // Write the opening tag and learning rate
1318  WriteToken(os, binary, "<Output>");
1319  output_.Write(os, binary);
1320  WriteToken(os, binary, "<IsUpdatable>");
1321  WriteBasicType(os, binary, is_updatable_);
1322  WriteToken(os, binary, "<UseNaturalGradient>");
1324  WriteToken(os, binary, "</ConstantComponent>");
1325 }
1326 
1328  return new ConstantComponent(*this);
1329 }
1330 
1332  if (is_updatable_) {
1333  if (scale == 0.0) {
1334  output_.SetZero();
1335  } else {
1336  output_.Scale(scale);
1337  }
1338  }
1339 }
1340 
1341 void ConstantComponent::Add(BaseFloat alpha, const Component &other_in) {
1342  if (is_updatable_) {
1343  const ConstantComponent *other =
1344  dynamic_cast<const ConstantComponent*>(&other_in);
1345  KALDI_ASSERT(other != NULL);
1346  output_.AddVec(alpha, other->output_);
1347  }
1348 }
1349 
1351  CuVector<BaseFloat> temp_output(output_.Dim(), kUndefined);
1352  temp_output.SetRandn();
1353  output_.AddVec(stddev, temp_output);
1354 }
1355 
1357  const UpdatableComponent &other_in) const {
1359  const ConstantComponent *other =
1360  dynamic_cast<const ConstantComponent*>(&other_in);
1361  KALDI_ASSERT(other != NULL);
1362  return VecVec(output_, other->output_);
1363 }
1364 
1366  int32 output_dim = 0;
1368  bool ok = cfl->GetValue("output-dim", &output_dim);
1369  cfl->GetValue("is-updatable", &is_updatable_);
1370  cfl->GetValue("use-natural-gradient", &use_natural_gradient_);
1371  BaseFloat output_mean = 0.0, output_stddev = 0.0;
1372  cfl->GetValue("output-mean", &output_mean);
1373  cfl->GetValue("output-stddev", &output_stddev);
1374  if (!ok || cfl->HasUnusedValues() || output_dim <= 0) {
1375  KALDI_ERR << "Bad initializer " << cfl->WholeLine();
1376  }
1377  Vector<BaseFloat> output(output_dim);
1378  output.SetRandn();
1379  output.Scale(output_stddev);
1380  output.Add(output_mean);
1381  output_ = output;
1382 }
1383 
1386  return output_.Dim();
1387 }
1388 
1390  params->CopyFromVec(output_);
1391 }
1392 
1394  output_.CopyFromVec(params);
1395 }
1396 
1399  preconditioner_.Swap(&temp);
1400 }
1401 
1402 std::string DropoutMaskComponent::Info() const {
1403  std::ostringstream stream;
1404  stream << Type()
1405  << ", output-dim=" << output_dim_
1406  << ", dropout-proportion=" << dropout_proportion_;
1407  if (continuous_)
1408  stream << ", continuous=true";
1409  return stream.str();
1410 }
1411 
1413  output_dim_(-1), dropout_proportion_(0.5), continuous_(false) { }
1414 
1416  const DropoutMaskComponent &other):
1417  output_dim_(other.output_dim_),
1419  continuous_(other.continuous_) { }
1420 
1422  const ComponentPrecomputedIndexes *indexes,
1423  const CuMatrixBase<BaseFloat> &in,
1424  CuMatrixBase<BaseFloat> *out) const {
1425  KALDI_ASSERT(in.NumRows() == 0 && out->NumCols() == output_dim_);
1426  BaseFloat dropout_proportion = dropout_proportion_;
1427  KALDI_ASSERT(dropout_proportion >= 0.0 && dropout_proportion <= 1.0);
1428 
1429  if (dropout_proportion == 0) {
1430  out->Set(1.0);
1431  return NULL;
1432  }
1433 
1434  if (continuous_) {
1435  if (test_mode_) {
1436  out->Set(1.0);
1437  } else {
1438  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
1439  out->Scale(dropout_proportion * 4.0);
1440  // make the expected value 1.0.
1441  out->Add(1.0 - (2.0 * dropout_proportion));
1442  }
1443  return NULL;
1444  }
1445 
1446  if (test_mode_) {
1447  out->Set(1.0 - dropout_proportion);
1448  return NULL;
1449  }
1450 
1451  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
1452  out->Add(-dropout_proportion);
1453  out->ApplyHeaviside();
1454 
1455  if (out->NumCols() == 2 || out->NumCols() == 3) {
1456  // This is a kind of special case relevant to LSTms.
1457  // To generate data where it's never the case that both of the dimensions
1458  // for a row are zero, we generate uniformly distributed data (call this u_i),
1459  // and for row i, set (*out)(i, 0) = (0 if u_i < dropout_proportion else 1)
1460  // and (*out)(i, 1) = (0 if u_i > 1-dropout_proportion else 1)
1461  int32 num_rows = out->NumRows();
1462  // later we may make this a bit more efficient.
1463  CuVector<BaseFloat> temp(num_rows, kUndefined);
1464  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(&temp);
1465  temp.Add(-dropout_proportion);
1466  out->CopyColFromVec(temp, 0);
1467  temp.Add(-1.0 + (2.0 * dropout_proportion));
1468  // Now, 'temp' contains the original uniformly-distributed data plus
1469  // -(1 - dropout_proportion).
1470  temp.Scale(-1.0);
1471  out->CopyColFromVec(temp, 1);
1472  out->ApplyHeaviside();
1473  }
1474  return NULL;
1475 }
1476 
1477 
1478 void DropoutMaskComponent::Read(std::istream &is, bool binary) {
1479  ExpectOneOrTwoTokens(is, binary, "<DropoutMaskComponent>", "<OutputDim>");
1480  ReadBasicType(is, binary, &output_dim_);
1481  ExpectToken(is, binary, "<DropoutProportion>");
1482  ReadBasicType(is, binary, &dropout_proportion_);
1483  if (PeekToken(is, binary) == 'T') {
1484  ExpectToken(is, binary, "<TestMode>");
1485  ReadBasicType(is, binary, &test_mode_); // read test mode
1486  } else {
1487  test_mode_ = false;
1488  }
1489  if (PeekToken(is, binary) == 'C') {
1490  ExpectToken(is, binary, "<Continuous>");
1491  continuous_ = true;
1492  } else {
1493  continuous_ = false;
1494  }
1495  ExpectToken(is, binary, "</DropoutMaskComponent>");
1496 }
1497 
1498 
1499 void DropoutMaskComponent::Write(std::ostream &os, bool binary) const {
1500  WriteToken(os, binary, "<DropoutMaskComponent>");
1501  WriteToken(os, binary, "<OutputDim>");
1502  WriteBasicType(os, binary, output_dim_);
1503  WriteToken(os, binary, "<DropoutProportion>");
1504  WriteBasicType(os, binary, dropout_proportion_);
1505  WriteToken(os, binary, "<TestMode>");
1506  WriteBasicType(os, binary, test_mode_);
1507  if (continuous_)
1508  WriteToken(os, binary, "<Continuous>");
1509  WriteToken(os, binary, "</DropoutMaskComponent>");
1510 }
1511 
1513  return new DropoutMaskComponent(*this);
1514 }
1515 
1517  output_dim_ = 0;
1518  bool ok = cfl->GetValue("output-dim", &output_dim_);
1519  KALDI_ASSERT(ok && output_dim_ > 0);
1520  dropout_proportion_ = 0.5;
1521  cfl->GetValue("dropout-proportion", &dropout_proportion_);
1522  continuous_ = false;
1523  cfl->GetValue("continuous", &continuous_);
1524  test_mode_ = false;
1525  cfl->GetValue("test-mode", &test_mode_);
1526 }
1527 
1528 
1529 std::string GeneralDropoutComponent::Info() const {
1530  std::ostringstream stream;
1531  stream << Type()
1532  << ", dim=" << dim_
1533  << ", block-dim=" << block_dim_
1534  << ", dropout-proportion=" << dropout_proportion_;
1535  if (continuous_)
1536  stream << ", continuous=true";
1537  if (specaugment_max_proportion_ != 0)
1538  stream << ", specaugment-max-proportion=" << specaugment_max_proportion_
1539  << ", specaugment-max-regions=" << specaugment_max_regions_;
1540  if (time_period_ > 0)
1541  stream << ", time-period=" << time_period_;
1542  return stream.str();
1543 }
1544 
1546  dim_(-1), block_dim_(-1), time_period_(0),
1547  dropout_proportion_(0.5),
1548  specaugment_max_proportion_(0.0),
1549  specaugment_max_regions_(1),
1550  continuous_(false) { }
1551 
1553  const GeneralDropoutComponent &other):
1554  dim_(other.dim_),
1555  block_dim_(other.block_dim_),
1556  time_period_(other.time_period_),
1560  continuous_(other.continuous_) { }
1561 
1563  const ComponentPrecomputedIndexes *indexes_in,
1564  const CuMatrixBase<BaseFloat> &in,
1565  CuMatrixBase<BaseFloat> *out) const {
1566 
1567  KALDI_ASSERT(SameDim(in, *out));
1568 
1569  // The following will do nothing if 'out' and 'in' refer to the same data.
1570  out->CopyFromMat(in);
1571 
1572  if (test_mode_ ||
1574  return NULL;
1575 
1577  dynamic_cast<const GeneralDropoutComponentPrecomputedIndexes*>(indexes_in);
1578  KALDI_ASSERT(indexes != NULL);
1579 
1580  CuMatrix<BaseFloat> *mask = GetMemo(indexes->num_mask_rows);
1581 
1582  if (block_dim_ < dim_) {
1583  KALDI_ASSERT(out->Stride() == out->NumCols());
1584  int32 num_rows = out->NumRows(),
1585  dim_multiple = dim_ / block_dim_,
1586  num_rows_reshaped = num_rows * dim_multiple;
1587  CuSubMatrix<BaseFloat> out_reshaped(out->Data(), num_rows_reshaped,
1589  out_reshaped.MulRows(*mask, indexes->indexes);
1590  } else {
1591  out->MulRows(*mask, indexes->indexes);
1592  }
1593  return mask;
1594 }
1595 
1597  const std::string &debug_info,
1598  const ComponentPrecomputedIndexes *indexes_in,
1599  const CuMatrixBase<BaseFloat> &, // in_value
1600  const CuMatrixBase<BaseFloat> &, // out_value
1601  const CuMatrixBase<BaseFloat> &out_deriv,
1602  void *memo,
1603  Component *to_update,
1604  CuMatrixBase<BaseFloat> *in_deriv) const {
1605  NVTX_RANGE("GeneralDropoutComponent::Backprop");
1606  KALDI_ASSERT(in_deriv != NULL && SameDim(*in_deriv, out_deriv));
1607 
1608  // The following will do no work if in_deriv->Data() == out_deriv.Data().
1609  in_deriv->CopyFromMat(out_deriv);
1610 
1611  if (test_mode_ ||
1612  (dropout_proportion_ == 0.0 && specaugment_max_proportion_ == 0.0)) {
1613  KALDI_ASSERT(memo == NULL);
1614  return;
1615  }
1616 
1618  dynamic_cast<const GeneralDropoutComponentPrecomputedIndexes*>(indexes_in);
1619  KALDI_ASSERT(indexes != NULL && memo != NULL);
1620  CuMatrix<BaseFloat> *mask = reinterpret_cast<CuMatrix<BaseFloat>*>(memo);
1621 
1622  if (block_dim_ < dim_) {
1623  KALDI_ASSERT(in_deriv->Stride() == in_deriv->NumCols());
1624  int32 num_rows = in_deriv->NumRows(),
1625  dim_multiple = dim_ / block_dim_,
1626  num_rows_reshaped = num_rows * dim_multiple;
1627  CuSubMatrix<BaseFloat> in_deriv_reshaped(in_deriv->Data(),
1628  num_rows_reshaped,
1630  in_deriv_reshaped.MulRows(*mask, indexes->indexes);
1631  } else {
1632  in_deriv->MulRows(*mask, indexes->indexes);
1633  }
1634 }
1635 
1636 void GeneralDropoutComponent::Read(std::istream &is, bool binary) {
1637  ExpectOneOrTwoTokens(is, binary, "<GeneralDropoutComponent>", "<Dim>");
1638  ReadBasicType(is, binary, &dim_);
1639  ExpectToken(is, binary, "<BlockDim>");
1640  ReadBasicType(is, binary, &block_dim_);
1641  ExpectToken(is, binary, "<TimePeriod>");
1642  ReadBasicType(is, binary, &time_period_);
1643  ExpectToken(is, binary, "<DropoutProportion>");
1644  ReadBasicType(is, binary, &dropout_proportion_);
1645  if (PeekToken(is, binary) == 'S') {
1646  ExpectToken(is, binary, "<SpecAugmentMaxProportion>");
1648  if (PeekToken(is, binary) == 'S') {
1649  ExpectToken(is, binary, "<SpecAugmentMaxRegions>");
1651  } else {
1653  }
1654  } else {
1657  }
1658  if (PeekToken(is, binary) == 'T') {
1659  ExpectToken(is, binary, "<TestMode>");
1660  test_mode_ = true;
1661  } else {
1662  test_mode_ = false;
1663  }
1664  if (PeekToken(is, binary) == 'C') {
1665  ExpectToken(is, binary, "<Continuous>");
1666  continuous_ = true;
1667  } else {
1668  continuous_ = false;
1669  }
1670  ExpectToken(is, binary, "</GeneralDropoutComponent>");
1671 }
1672 
1673 
1674 void GeneralDropoutComponent::Write(std::ostream &os, bool binary) const {
1675  WriteToken(os, binary, "<GeneralDropoutComponent>");
1676  WriteToken(os, binary, "<Dim>");
1677  WriteBasicType(os, binary, dim_);
1678  WriteToken(os, binary, "<BlockDim>");
1679  WriteBasicType(os, binary, block_dim_);
1680  WriteToken(os, binary, "<TimePeriod>");
1681  WriteBasicType(os, binary, time_period_);
1682  WriteToken(os, binary, "<DropoutProportion>");
1683  WriteBasicType(os, binary, dropout_proportion_);
1685  WriteToken(os, binary, "<SpecAugmentMaxProportion>");
1687  if (specaugment_max_regions_ != 1) {
1688  WriteToken(os, binary, "<SpecAugmentMaxRegions>");
1690  }
1691  }
1692  if (test_mode_)
1693  WriteToken(os, binary, "<TestMode>");
1694  if (continuous_)
1695  WriteToken(os, binary, "<Continuous>");
1696  WriteToken(os, binary, "</GeneralDropoutComponent>");
1697 }
1698 
1700  return new GeneralDropoutComponent(*this);
1701 }
1702 
1704  dim_ = 0;
1705  bool ok = cfl->GetValue("dim", &dim_);
1706  KALDI_ASSERT(ok && dim_ > 0);
1707  block_dim_ = dim_;
1708  cfl->GetValue("block-dim", &block_dim_);
1709  if (!(block_dim_ > 0 && dim_ % block_dim_ == 0))
1710  KALDI_ERR << "Invalid configuration dim=" << dim_
1711  << ", block-dim=" << block_dim_;
1712  time_period_ = 0;
1713  cfl->GetValue("time-period", &time_period_);
1714  dropout_proportion_ = 0.5;
1715  cfl->GetValue("dropout-proportion", &dropout_proportion_);
1716 
1718  cfl->GetValue("specaugment-max-proportion", &specaugment_max_proportion_);
1720  cfl->GetValue("specaugment-max-regions", &specaugment_max_regions_);
1721  continuous_ = false;
1722  cfl->GetValue("continuous", &continuous_);
1723  test_mode_ = false;
1724  cfl->GetValue("test-mode", &test_mode_);
1725 
1726  if (specaugment_max_proportion_ != 0.0) {
1727  if (specaugment_max_proportion_ < 0.0 ||
1730  KALDI_ERR << "Invalid config values: specaugment-max-proportion = "
1731  << specaugment_max_proportion_ << ", continuous = "
1732  << std::boolalpha << continuous_
1733  << ", specaugment-max-regions = " << specaugment_max_regions_;
1734  }
1735  }
1736 }
1737 
1738 
1740  int32 num_mask_rows) const {
1741  KALDI_ASSERT(num_mask_rows > 0 && !test_mode_ &&
1742  (dropout_proportion_ > 0.0 ||
1743  specaugment_max_proportion_ != 0.0));
1744  CuMatrix<BaseFloat> *ans = new CuMatrix<BaseFloat>(num_mask_rows, block_dim_,
1745  kUndefined);
1746 
1747  if (specaugment_max_proportion_ != 0.0) {
1748  // This block takes care of the case where we are doing SpecAugment.
1749  int32 num_freq_bins = block_dim_;
1750  Matrix<BaseFloat> mask(num_mask_rows, block_dim_);
1751  mask.Set(1.0);
1752  int32 specaugment_max_zeroed = static_cast<int32>(
1753  num_freq_bins * specaugment_max_proportion_ + 0.5);
1754  for (int32 seq = 0; seq < num_mask_rows; seq++) {
1755  // actually seq is more like a sub-part of a sequence, in the case where
1756  // time_period_ is not zero.
1757  SubVector<BaseFloat> this_mask(mask, seq); // will be all ones, right now.
1758  int32 num_bins_zeroed = RandInt(0, specaugment_max_zeroed);
1759  if (num_bins_zeroed != 0) {
1760  // This is not quite the same as the paper, it is allowed to "wrap around"
1761  // from the top to the bottom of the frequency spectrum.
1762  int32 start_bin = RandInt(0, num_freq_bins - 1);
1763  for (int32 i = start_bin; i < start_bin + num_bins_zeroed; i++)
1764  this_mask(i % num_freq_bins) = 0.0;
1765 
1766  // if specaugment_max_regions_ is not 1 (e.g. if it's 2 or 3), we want
1767  // to (possibly) split up the zeroed region into more segments.
1768  // The way we do this is a bit odd, but it was hard to think of
1769  // an elegant way to do it. We just choose a random half of the spectrum
1770  // (viewing it as a circle, so choosing a random half of the circle)
1771  // and swap around that half, i.e. flip it on its head.
1772  for (int32 n = 1; n < specaugment_max_regions_; n++) {
1773  int32 half_bin_size = num_freq_bins / 2,
1774  quarter_bin_size = half_bin_size / 2,
1775  start_bin = RandInt(0, num_freq_bins - 1),
1776  end_bin = start_bin + half_bin_size;
1777  for (int32 i = 0; i < quarter_bin_size; i++) {
1778  BaseFloat &a = this_mask((start_bin + i) % num_freq_bins),
1779  &b = this_mask((end_bin - i) % num_freq_bins);
1780  std::swap(a, b);
1781  }
1782  }
1783  }
1784  }
1785  ans->CopyFromMat(mask);
1786  return ans;
1787  }
1788 
1789  BaseFloat dropout_proportion = dropout_proportion_;
1790 
1791  // This const_cast is only safe assuming you don't attempt
1792  // to use multi-threaded code with the GPU.
1793  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(ans);
1794 
1795  if (!continuous_) {
1796  ans->Add(-dropout_proportion);
1797  // now, a proportion "dropout_proportion" will be < 0.0. After applying the
1798  // function (x>0?1:0), a proportion "dropout_proportion" will be zero and (1 -
1799  // dropout_proportion) will be 1.0.
1800  ans->ApplyHeaviside();
1801  ans->Scale(1.0 / (1.0 - dropout_proportion));
1802  } else {
1803  ans->Scale(dropout_proportion * 4.0);
1804  // make the expected value 1.0.
1805  ans->Add(1.0 - (2.0 * dropout_proportion));
1806  }
1807  return ans;
1808 }
1809 
1811  const MiscComputationInfo &misc_info,
1812  const std::vector<Index> &input_indexes,
1813  const std::vector<Index> &output_indexes,
1814  bool need_backprop) const {
1815  KALDI_ASSERT(input_indexes == output_indexes);
1816 
1819  int32 size = input_indexes.size(), time_period = time_period_,
1820  cur_row = 0;
1821  std::vector<int32> indexes(size);
1822  // the map 'm' will map from a pair from (n, t) value to the row-index of the
1823  // dropout-mask matrix*. However, the 't' isn't a real 't' value;
1824  // if time_period_ == 0, the 't' value will just be zero; otherwise,
1825  // it will be t divided by time_period_ (rounding towards negative infinity).
1826 
1827  // *before considering effects related to when block_dim_ != dim_.
1828 
1829  std::unordered_map<std::pair<int32,int32>, int32, PairHasher<int32> > m;
1830  for (int32 i = 0; i < size; i++) {
1831  int32 n = input_indexes[i].n,
1832  t = (time_period == 0 ? 0 : DivideRoundingDown(input_indexes[i].t,
1833  time_period));
1834  std::pair<int32, int32> p(n, t);
1835 
1836  std::unordered_map<std::pair<int32,int32>, int32,
1837  PairHasher<int32> >::const_iterator
1838  iter = m.find(p);
1839  if (iter != m.end()) {
1840  indexes[i] = iter->second;
1841  } else {
1842  m[p] = cur_row;
1843  indexes[i] = cur_row;
1844  cur_row++;
1845  }
1846  }
1847  int32 multiple = dim_ / block_dim_;
1848  ans->num_mask_rows = cur_row;
1849  if (multiple == 1) {
1850  ans->indexes.CopyFromVec(indexes);
1851  } else {
1852  ans->num_mask_rows = cur_row * multiple;
1853  std::vector<int32> repeated_indexes;
1854  repeated_indexes.reserve(size * multiple);
1855  for (int32 i = 0; i < size; i++) {
1856  int32 row = indexes[i];
1857  for (int32 j = 0; j < multiple; j++)
1858  repeated_indexes.push_back(row);
1859  }
1860  ans->indexes.CopyFromVec(repeated_indexes);
1861  }
1862  return ans;
1863 }
1864 
1866  bool binary) const {
1867  WriteToken(os, binary,
1868  "<GeneralDropoutComponentPrecomputedIndexes>");
1869  WriteToken(os, binary, "<NumMaskRows>");
1870  WriteBasicType(os, binary, num_mask_rows);
1871  WriteToken(os, binary, "<Indexes>");
1872  indexes.Write(os, binary);
1873  WriteToken(os, binary,
1874  "</GeneralDropoutComponentPrecomputedIndexes>");
1875 }
1876 
1878  bool binary) {
1879  ExpectOneOrTwoTokens(is, binary,
1880  "<GeneralDropoutComponentPrecomputedIndexes>",
1881  "<NumMaskRows>");
1882  ReadBasicType(is, binary, &num_mask_rows);
1883  ExpectToken(is, binary, "<Indexes>");
1884  indexes.Read(is, binary);
1885  ExpectToken(is, binary,
1886  "</GeneralDropoutComponentPrecomputedIndexes>");
1887 }
1888 
1890  std::ostringstream stream;
1891  stream << Type()
1892  << ", dim=" << dim_
1893  << ", zeroed-proportion=" << zeroed_proportion_
1894  << ", time-mask-max-frames=" << time_mask_max_frames_;
1895  return stream.str();
1896 }
1897 
1899  dim_(-1), zeroed_proportion_(0.25),
1900  time_mask_max_frames_(10) { }
1901 
1903  const SpecAugmentTimeMaskComponent &other):
1904  dim_(other.dim_),
1907 
1909  const ComponentPrecomputedIndexes *indexes_in,
1910  const CuMatrixBase<BaseFloat> &in,
1911  CuMatrixBase<BaseFloat> *out) const {
1912 
1913  KALDI_ASSERT(SameDim(in, *out));
1914 
1915  // The following will do nothing if 'out' and 'in' refer to the same data.
1916  out->CopyFromMat(in);
1917 
1918  if (test_mode_ ||
1919  zeroed_proportion_ == 0.0)
1920  return NULL;
1921 
1923  dynamic_cast<const SpecAugmentTimeMaskComponentPrecomputedIndexes*>(indexes_in);
1924  KALDI_ASSERT(indexes != NULL);
1925 
1926  CuVector<BaseFloat> *mask = GetMemo(*indexes);
1927  out->MulRowsVec(*mask);
1928  return mask;
1929 }
1930 
1932  const std::string &debug_info,
1933  const ComponentPrecomputedIndexes *indexes_in,
1934  const CuMatrixBase<BaseFloat> &, // in_value
1935  const CuMatrixBase<BaseFloat> &, // out_value
1936  const CuMatrixBase<BaseFloat> &out_deriv,
1937  void *memo,
1938  Component *to_update,
1939  CuMatrixBase<BaseFloat> *in_deriv) const {
1940  NVTX_RANGE("SpecAugmentTimeMaskComponent::Backprop");
1941  KALDI_ASSERT(in_deriv != NULL && SameDim(*in_deriv, out_deriv));
1942 
1943  // The following will do no work if in_deriv->Data() == out_deriv.Data().
1944  in_deriv->CopyFromMat(out_deriv);
1945 
1946  if (test_mode_ || zeroed_proportion_ == 0.0) {
1947  KALDI_ASSERT(memo == NULL);
1948  return;
1949  }
1950 
1952  dynamic_cast<const SpecAugmentTimeMaskComponentPrecomputedIndexes*>(indexes_in);
1953  KALDI_ASSERT(indexes != NULL && memo != NULL);
1954  CuVector<BaseFloat> *mask = reinterpret_cast<CuVector<BaseFloat>*>(memo);
1955 
1956  in_deriv->MulRowsVec(*mask);
1957 }
1958 
1959 void SpecAugmentTimeMaskComponent::Read(std::istream &is, bool binary) {
1960  ExpectOneOrTwoTokens(is, binary, "<SpecAugmentTimeMaskComponent>", "<Dim>");
1961  ReadBasicType(is, binary, &dim_);
1962  ExpectToken(is, binary, "<ZeroedProportion>");
1963  ReadBasicType(is, binary, &zeroed_proportion_);
1964  ExpectToken(is, binary, "<TimeMaskMaxFrames>");
1965  ReadBasicType(is, binary, &time_mask_max_frames_);
1966  if (PeekToken(is, binary) == 'T') {
1967  ExpectToken(is, binary, "<TestMode>");
1968  test_mode_ = true;
1969  } else {
1970  test_mode_ = false;
1971  }
1972  ExpectToken(is, binary, "</SpecAugmentTimeMaskComponent>");
1973 }
1974 
1975 
1976 void SpecAugmentTimeMaskComponent::Write(std::ostream &os, bool binary) const {
1977  WriteToken(os, binary, "<SpecAugmentTimeMaskComponent>");
1978  WriteToken(os, binary, "<Dim>");
1979  WriteBasicType(os, binary, dim_);
1980  WriteToken(os, binary, "<ZeroedProportion>");
1981  WriteBasicType(os, binary, zeroed_proportion_);
1982  WriteToken(os, binary, "<TimeMaskMaxFrames>");
1984  if (test_mode_)
1985  WriteToken(os, binary, "<TestMode>");
1986  WriteToken(os, binary, "</SpecAugmentTimeMaskComponent>");
1987 }
1988 
1990  return new SpecAugmentTimeMaskComponent(*this);
1991 }
1992 
1994  dim_ = 0;
1995  bool ok = cfl->GetValue("dim", &dim_);
1996  KALDI_ASSERT(ok && dim_ > 0);
1997  zeroed_proportion_ = 0.25;
1998  cfl->GetValue("zeroed-proportion", &zeroed_proportion_);
1999  time_mask_max_frames_ = 10;
2000  cfl->GetValue("time-mask-max-frames", &time_mask_max_frames_);
2002 }
2003 
2004 
2006  const SpecAugmentTimeMaskComponentPrecomputedIndexes &indexes_in) const {
2007 
2008  const std::vector<std::vector<int32> > &indexes = indexes_in.indexes;
2009  int32 num_sequences = indexes.size();
2011  int32 time_mask_max_frames = time_mask_max_frames_,
2012  non_time_mask_max_frames = time_mask_max_frames * (1-z) / z;
2013  KALDI_ASSERT(time_mask_max_frames > 0 &&
2014  non_time_mask_max_frames > 0);
2015  Vector<BaseFloat> mask(indexes_in.tot_size, kUndefined);
2016 
2017  for (int32 s = 0; s < num_sequences; s++) {
2018  // this_row_indexes gives us, for a particular sequence, the ordered list of
2019  // row-indexes where we can find the successive 't' values of this sequence.
2020  const std::vector<int32> this_row_indexes = indexes[s];
2021  int32 seq_length = this_row_indexes.size();
2022  KALDI_ASSERT(seq_length > 0);
2023 
2024  int32 t = 0;
2025  while (t < seq_length) {
2026  // add a non-zeroed, then a zeroed, segment, repeatedly until we have
2027  // filled the sequence. The first time we choose randomly whether to add
2028  // a zeroed or a non-zeroed segment.
2029  if (t > 0 || WithProb(z)) {
2030  int32 nonzeroed_length = RandInt(1, non_time_mask_max_frames);
2031  for (; t < seq_length && nonzeroed_length > 0; t++, nonzeroed_length--)
2032  mask(this_row_indexes[t]) = 1.0;
2033  }
2034  int32 zeroed_length = RandInt(1, time_mask_max_frames);
2035  for (; t < seq_length && zeroed_length > 0; t++, zeroed_length--)
2036  mask(this_row_indexes[t]) = 0.0;
2037  }
2038  }
2039  return new CuVector<BaseFloat>(mask);
2040 }
2041 
2043  const MiscComputationInfo &misc_info,
2044  const std::vector<Index> &input_indexes,
2045  const std::vector<Index> &output_indexes,
2046  bool need_backprop) const {
2047  KALDI_ASSERT(input_indexes == output_indexes);
2048 
2051  int32 size = input_indexes.size();
2052  KALDI_ASSERT(size != 0);
2053  // 'sort_indexes' will contain the n and t values and then
2054  // the index into input_indexes. When we sort these, it will
2055  // sort first on the n value and then on the t, which will allow us
2056  // to create ans->indexes.
2057  std::vector<std::tuple<int32, int32, int32> > sort_indexes(size);
2058 
2059  std::unordered_set<int32> all_n_values; // just for determining how many
2060  // there are.
2061  for (int32 i = 0; i < size; i++) {
2062  int32 n = input_indexes[i].n;
2063  all_n_values.insert(n);
2064  std::get<0>(sort_indexes[i]) = n;
2065  std::get<1>(sort_indexes[i]) = input_indexes[i].t;
2066  std::get<2>(sort_indexes[i]) = i;
2067  }
2068  std::sort(sort_indexes.begin(), sort_indexes.end());
2069 
2070  // the stuff with n_idx is because we don't assume the
2071  // n values start from zero and are consecutive.
2072  int32 num_n_values = all_n_values.size(),
2073  n_idx = 0,
2074  cur_n_value = std::get<0>(sort_indexes[0]);
2075  ans->indexes.resize(num_n_values);
2076  for (int32 i = 0; i < size; i++) {
2077  std::tuple<int32, int32, int32> &tp(sort_indexes[i]);
2078  int32 n = std::get<0>(tp),
2079  row_index = std::get<2>(tp);
2080  KALDI_ASSERT(n >= cur_n_value);
2081  if (n > cur_n_value) {
2082  n_idx++;
2083  KALDI_ASSERT(n_idx < num_n_values);
2084  cur_n_value = n;
2085  }
2086  ans->indexes[n_idx].push_back(row_index);
2087  }
2088  n_idx++;
2089  KALDI_ASSERT(n_idx == num_n_values);
2090  ans->tot_size = size;
2091  return ans;
2092 }
2093 
2095  bool binary) const {
2096  WriteToken(os, binary,
2097  "<SpecAugmentTimeMaskComponentPrecomputedIndexes>");
2098  WriteToken(os, binary, "<Indexes>");
2099  int32 size = indexes.size();
2100  WriteBasicType(os, binary, size);
2101  for (int32 i = 0; i < size; i++) {
2102  WriteIntegerVector(os, binary, indexes[i]);
2103  }
2104  WriteToken(os, binary,
2105  "</SpecAugmentTimeMaskComponentPrecomputedIndexes>");
2106 }
2107 
2109  bool binary) {
2110  ExpectOneOrTwoTokens(is, binary,
2111  "<SpecAugmentTimeMaskComponentPrecomputedIndexes>",
2112  "<Indexes>");
2113  int32 size;
2114  ReadBasicType(is, binary, &size);
2115  indexes.clear();
2116  indexes.resize(size);
2117  for (int32 i = 0; i < size; i++)
2118  ReadIntegerVector(is, binary, &(indexes[i]));
2119  ExpectToken(is, binary,
2120  "</SpecAugmentTimeMaskComponentPrecomputedIndexes>");
2121  tot_size = 0;
2122  for (auto v : indexes) tot_size += v.size();
2123 }
2124 
2125 
2126 
2127 
2128 } // namespace nnet3
2129 } // namespace kaldi
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void MulElements(const CuVectorBase< Real > &v)
Definition: cu-vector.cc:838
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT Stride() const
Definition: cu-matrix.h:217
virtual void ReorderIndexes(std::vector< Index > *input_indexes, std::vector< Index > *output_indexes) const
This function only does something interesting for non-simple Components.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
static int32 DivideRoundingDown(int32 a, int32 b)
Returns a / b, rounding towards negative infinity in all cases.
Definition: kaldi-math.h:287
void ApplyPow(Real power)
Definition: cu-matrix.h:438
const std::string WholeLine()
Definition: text-utils.h:230
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void SetZero()
Math operations.
Definition: cu-vector.cc:1098
virtual void Read(std::istream &istream, bool binary)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void ComputeInputIndexAndBlock(const Index &output_index, Index *input_index, int32 *block) const
void WriteIntegerPairVector(std::ostream &os, bool binary, const std::vector< std::pair< T, T > > &v)
Function for writing STL vectors of pairs of integer types.
Definition: io-funcs-inl.h:93
void CopyFromVec(const std::vector< T > &src)
This function resizes if needed.
Definition: cu-array-inl.h:120
virtual int32 OutputDim() const
Returns output-dimension of this component.
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
void CopyToVec(std::vector< T > *dst) const
This function resizes *dst if needed.
Definition: cu-array-inl.h:177
static void CopyPairVector(const CuArray< Int32Pair > &in, std::vector< std::pair< int32, int32 > > *out)
void CopyColFromMat(const CuMatrixBase< Real > &mat, MatrixIndexT col)
Definition: cu-vector.cc:103
Abstract base-class for neural-net components.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void Add(Real value)
Definition: cu-vector.cc:1157
void AddRows(Real alpha, const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
Definition: cu-matrix.cc:2766
void AddRowRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, do (*this)(r, c) += src(j, c), where j ranges from ind...
Definition: cu-matrix.cc:2931
void Init(int32 dim, BaseFloat scale, BaseFloat clipping_threshold, BaseFloat zeroing_threshold, int32 zeroing_interval, int32 recurrence_interval)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
An abstract representation of a set of Indexes.
bool WithProb(BaseFloat prob, struct RandomState *state)
Definition: kaldi-math.cc:72
void Set(Real value)
Definition: cu-vector.cc:1135
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
CuVector< BaseFloat > * GetMemo(const SpecAugmentTimeMaskComponentPrecomputedIndexes &indexes) const
void InitLearningRatesFromConfig(ConfigLine *cfl)
Real Sum() const
Definition: cu-matrix.cc:3012
void ApplyFloor(Real floor_val)
Definition: cu-matrix.h:451
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
Definition: cu-vector.cc:595
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual Component * Copy() const
Copies component (deep copy).
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
Keywords for search: natural gradient, naturalgradient, NG-SGD.
void DivRowsVec(const CuVectorBase< Real > &div)
divide i&#39;th row by scale[i]
Definition: cu-matrix.cc:899
void AddMatMatElements(const Real alpha, const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const Real beta)
*this = beta * *this + alpha * A .* B (.* element by element multiplication)
Definition: cu-matrix.cc:1447
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
BaseFloat max_change_
configuration value for imposing max-change
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void ReadIntegerPairVector(std::istream &is, bool binary, std::vector< std::pair< T, T > > *v)
Function for reading STL vector of pairs of integer types.
Definition: io-funcs-inl.h:131
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void CopyColFromVec(const CuVectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
Definition: cu-matrix.cc:2414
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
SpecAugmentTimeMaskComponent implements the time part of SpecAugment.
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
Definition: text-utils.cc:536
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
virtual Component * Copy() const
Copies component (deep copy).
CuMatrix< BaseFloat > * GetMemo(int32 num_mask_rows) const
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void Scale(Real value)
Definition: cu-matrix.cc:644
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
std::string UnusedValues() const
returns e.g.
Definition: text-utils.cc:518
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
virtual void Write(std::ostream &ostream, bool binary) const
void Add(Real value)
Definition: cu-matrix.cc:582
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void SetZero()
Math operations, some calling kernels.
Definition: cu-matrix.cc:509
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
void MulRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Does for each row r, this.Row(r) *= alpha * src.row(indexes[r]), where &#39;*=&#39; is elementwise multiplica...
Definition: cu-matrix.cc:2790
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
void CopyRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies row r from row indexes[r] of src.
Definition: cu-matrix.cc:2678
BaseFloat learning_rate_
learning rate (typically 0.0..0.01)
struct rnnlm::@11::@12 n
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
#define KALDI_ERR
Definition: kaldi-error.h:147
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
void PreconditionDirections(CuMatrixBase< BaseFloat > *X, BaseFloat *scale)
This call implements the main functionality of this class.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
BaseFloat learning_rate_factor_
learning rate factor (normally 1.0, but can be set to another < value so that when < you call SetLear...
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
virtual void Write(std::ostream &os, bool binary) const
void Scale(Real alpha)
Multiplies all elements by this constant.
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170
void Swap(OnlineNaturalGradient *other)
Real Sum() const
Returns sum of the elements.
void SetRandn()
Set vector to random normally-distributed noise.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
virtual void Write(std::ostream &os, bool binary) const
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
virtual void Write(std::ostream &ostream, bool binary) const
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:665
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
GeneralDropoutComponent implements dropout, including a continuous variant where the thing we multipl...
virtual void Write(std::ostream &os, bool binary) const
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
Matrix for CUDA computing.
Definition: matrix-common.h:69
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
A class representing a vector.
Definition: kaldi-vector.h:406
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
virtual void ReorderIndexes(std::vector< Index > *input_indexes, std::vector< Index > *output_indexes) const
This function only does something interesting for non-simple Components.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
bool is_gradient_
True if this component is to be treated as a gradient rather than as parameters.
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void WriteUpdatableCommon(std::ostream &is, bool binary) const
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
void Init(int32 input_dim, int32 output_dim)
#define NVTX_RANGE(name)
Definition: cu-common.h:143
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72
void Scale(Real value)
Definition: cu-vector.cc:1216
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
bool HasUnusedValues() const
Definition: text-utils.cc:510
bool GetValue(const std::string &key, std::string *value)
Definition: text-utils.cc:427
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Read(std::istream &istream, bool binary)
int32_cuda second
Definition: cu-matrixdim.h:80
void CopyToRows(const CuArrayBase< Real *> &dst) const
For each row r of this matrix, copies it to the array of floats at the location given by dst[r]...
Definition: cu-matrix.cc:2744
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
virtual Component * Copy() const
Copies component (deep copy).
void PrintParameterStats(std::ostringstream &os, const std::string &name, const CuVectorBase< BaseFloat > &params, bool include_mean)
Print to &#39;os&#39; some information about the mean and standard deviation of some parameters, used in Info() functions in nnet-simple-component.cc.
Definition: nnet-parse.cc:157
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual Component * Copy() const
Copies component (deep copy).
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void Add(Real c)
Add a constant to each element of a vector.
MatrixIndexT Dim() const
Return the vector dimension.
Definition: cu-array.h:49
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
std::vector< std::pair< int32, int32 > > pairs
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void Set(Real value)
Definition: cu-matrix.cc:531
CuRand< BaseFloat > random_generator_
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i&#39;th row by scale[i]
Definition: cu-matrix.cc:792
virtual Component * Copy() const
Copies component (deep copy).
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void ComputeInputPointers(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, int32 num_output_rows, std::vector< const BaseFloat *> *input_pointers) const
This file contains declarations of components that are not "simple", meaning they care about the inde...
virtual void Write(std::ostream &os, bool binary) const
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
void Set(Real)
Sets all elements to a specific value.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95
A hashing function-object for pairs of ints.
Definition: stl-utils.h:235
int32_cuda first
Definition: cu-matrixdim.h:79