nnet-general-component.h
Go to the documentation of this file.
1 // nnet3/nnet-general-component.h
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #ifndef KALDI_NNET3_NNET_GENERAL_COMPONENT_H_
21 #define KALDI_NNET3_NNET_GENERAL_COMPONENT_H_
22 
23 #include "nnet3/nnet-common.h"
26 #include <iostream>
27 
28 namespace kaldi {
29 namespace nnet3 {
30 
38 
39 
40 
57  public:
58  DistributeComponent(int32 input_dim, int32 output_dim) {
59  Init(input_dim, output_dim);
60  }
62  virtual int32 InputDim() const { return input_dim_; }
63  virtual int32 OutputDim() const { return output_dim_; }
64 
65  // use the default Info() function.
66  virtual void InitFromConfig(ConfigLine *cfl);
67  virtual std::string Type() const { return "DistributeComponent"; }
68  virtual int32 Properties() const { return 0; }
69  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
70  const CuMatrixBase<BaseFloat> &in,
71  CuMatrixBase<BaseFloat> *out) const;
72  virtual void Backprop(const std::string &debug_info,
73  const ComponentPrecomputedIndexes *indexes,
74  const CuMatrixBase<BaseFloat> &in_value,
75  const CuMatrixBase<BaseFloat> &out_value,
76  const CuMatrixBase<BaseFloat> &out_deriv,
77  void *memo,
78  Component *, // to_update,
79  CuMatrixBase<BaseFloat> *in_deriv) const;
80 
81  virtual void Read(std::istream &is, bool binary); // This Read function
82  // requires that the Component has the correct type.
83 
85  virtual void Write(std::ostream &os, bool binary) const;
86  virtual Component* Copy() const {
88  }
89 
90 
91  // Some functions that are only to be reimplemented for GeneralComponents.
92  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
93  const Index &output_index,
94  std::vector<Index> *desired_indexes) const;
95 
96  // This function returns true if at least one of the input indexes used to
97  // compute this output index is computable.
98  virtual bool IsComputable(const MiscComputationInfo &misc_info,
99  const Index &output_index,
100  const IndexSet &input_index_set,
101  std::vector<Index> *used_inputs) const;
102 
104  const MiscComputationInfo &misc_info,
105  const std::vector<Index> &input_indexes,
106  const std::vector<Index> &output_indexes,
107  bool need_backprop) const;
108 
109  // Some functions that are specific to this class.
110  void Init(int32 input_dim, int32 output_dim);
111  private:
112  // computes the input index corresponding to a particular output index.
113  // if block != NULL, also computes which block of the input this corresponds to.
114  inline void ComputeInputIndexAndBlock(const Index &output_index,
115  Index *input_index,
116  int32 *block) const;
117  inline void ComputeInputPointers(
118  const ComponentPrecomputedIndexes *indexes,
119  const CuMatrixBase<BaseFloat> &in,
120  int32 num_output_rows,
121  std::vector<const BaseFloat*> *input_pointers) const;
122  // non-const version of the above.
123  inline void ComputeInputPointers(
124  const ComponentPrecomputedIndexes *indexes,
125  int32 num_output_rows,
127  std::vector<BaseFloat*> *input_pointers) const;
130 
131 };
132 
135  public:
136 
137  // each pair is a pair (row, dim_offset), and by
138  // computing (input.Data() + row * input.Stride() + dim_offset)
139  // we get an address that points to the correct input location.
140  std::vector<std::pair<int32, int32> > pairs;
141 
142  // this class has a virtual destructor so it can be deleted from a pointer
143  // to ComponentPrecomputedIndexes.
145 
147  return new DistributeComponentPrecomputedIndexes(*this);
148  }
149 
150  virtual void Write(std::ostream &ostream, bool binary) const;
151 
152  virtual void Read(std::istream &istream, bool binary);
153 
154  virtual std::string Type() const { return "DistributeComponentPrecomputedIndexes"; }
155 };
156 
157 /*
158  Class StatisticsExtractionComponent is used together with
159  StatisticsPoolingComponent to extract moving-average mean and
160  standard-deviation statistics.
161 
162  StatisticsExtractionComponent is designed to extract statistics-- 0th-order,
163  1st-order and optionally diagonal 2nd-order stats-- from small groups of
164  frames, such as 10 frames. The statistics will then be further processed by
165  StatisticsPoolingComponent to compute moving-average means and (if configured)
166  standard deviations. The reason for the two-component way of doing this is
167  efficiency, particularly in the graph-compilation phase. (Otherwise there
168  would be too many dependencies to process). The StatisticsExtractionComponent
169  is designed to let you extract statistics from fixed-size groups of frames
170  (e.g. 10 frames), and in StatisticsPoolingComponent you are only expected to
171  compute the averages at the same fixed period (e.g. 10 frames), so it's more
172  efficient than if you were to compute a moving average at every single frame;
173  and the computation of the intermediate stats means that most of the
174  computation that goes into extracting the means and standard deviations for
175  nearby frames is shared.
176 
177  The config line in a typical setup will be something like:
178 
179  input-dim=250 input-period=1 output-period=10 include-variance=true
180 
181  input-dim is self-explanatory. The inputs will be obtained at multiples of
182  input-period (e.g. it might be 3 for chain models). output-period must be a
183  multiple of input period, and the requested output indexes will be expected to
184  be multiples of output-period (which you can ensure through use of the Round
185  descriptor). For instance, if you request the output on frame 80, it will
186  consist of stats from input frames 80 through 89.
187 
188  An output of this component will be 'computable' any time at least one of
189  the corresponding inputs is computable.
190 
191  In all cases the first dimension of the output will be a count (between 1 and
192  10 inclusive in this example). If include-variance=false, then the output
193  dimension will be input-dim + 1. and the output dimensions >0 will be
194  1st-order statistics (sums of the input). If include-variance=true, then the
195  output dimension will be input-dim * 2 + 1, where the raw diagonal 2nd-order
196  statistics are appended to the 0 and 1st order statistics.
197 
198  The default configuration values are:
199  input-dim=-1 input-period=1 output-period=1 include-variance=true
200  */
202  public:
203  // Initializes to defaults which would not pass Check(); use InitFromConfig()
204  // or Read() or copy constructor to really initialize.
206  // copy constructor, used in Copy().
208 
209  virtual int32 InputDim() const { return input_dim_; }
210  virtual int32 OutputDim() const {
211  // count + sum stats [ + sum-squared stats].
212  return 1 + input_dim_ + (include_variance_ ? input_dim_ : 0);
213  }
214  virtual void InitFromConfig(ConfigLine *cfl);
215  virtual std::string Type() const { return "StatisticsExtractionComponent"; }
216  virtual int32 Properties() const {
218  (include_variance_ ? kBackpropNeedsInput : 0);
219  }
220  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
221  const CuMatrixBase<BaseFloat> &in,
222  CuMatrixBase<BaseFloat> *out) const;
223  virtual void Backprop(const std::string &debug_info,
224  const ComponentPrecomputedIndexes *indexes,
225  const CuMatrixBase<BaseFloat> &in_value,
226  const CuMatrixBase<BaseFloat> &out_value,
227  const CuMatrixBase<BaseFloat> &out_deriv,
228  void *memo,
229  Component *, // to_update,
230  CuMatrixBase<BaseFloat> *in_deriv) const;
231 
232  virtual void Read(std::istream &is, bool binary); // This Read function
233  // requires that the Component has the correct type.
234 
236  virtual void Write(std::ostream &os, bool binary) const;
237  virtual Component* Copy() const {
238  return new StatisticsExtractionComponent(*this);
239  }
240 
241  // Some functions that are only to be reimplemented for GeneralComponents.
242  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
243  const Index &output_index,
244  std::vector<Index> *desired_indexes) const;
245 
246  virtual bool IsComputable(const MiscComputationInfo &misc_info,
247  const Index &output_index,
248  const IndexSet &input_index_set,
249  std::vector<Index> *used_inputs) const;
250 
251  // This function reorders the input and output indexes so that they
252  // are sorted first on n and then x and then t.
253  virtual void ReorderIndexes(std::vector<Index> *input_indexes,
254  std::vector<Index> *output_indexes) const;
255 
257  const MiscComputationInfo &misc_info,
258  const std::vector<Index> &input_indexes,
259  const std::vector<Index> &output_indexes,
260  bool need_backprop) const;
261 
262  private:
263  // Checks that the parameters are valid.
264  void Check() const;
265 
266  // Disallow assignment operator.
267  StatisticsExtractionComponent &operator =(
268  const StatisticsExtractionComponent &other);
269 
274 };
275 
278  public:
279  // While creating the output we sum over row ranges of the input.
280  // forward_indexes.Dim() equals the number of rows of the output, and each
281  // element is a (start, end) range of inputs, that is summed over.
283 
284  // This vector stores the number of inputs for each output. Normally this will be
285  // the same as the component's output_period_ / input_period_, but could be less
286  // due to edge effects at the utterance boundary.
288 
289  // Each input row participates in exactly one output element, and
290  // 'backward_indexes' identifies which row of the output each row
291  // of the input is part of. It's used in backprop.
293 
296  }
297 
298  virtual void Write(std::ostream &os, bool binary) const;
299 
300  virtual void Read(std::istream &is, bool binary);
301 
302  virtual std::string Type() const { return "StatisticsExtractionComponentPrecomputedIndexes"; }
303  private:
305 };
306 
307 /*
308  Class StatisticsPoolingComponent is used together with
309  StatisticsExtractionComponent to extract moving-average mean and
310  standard-deviation statistics.
311 
312  StatisticsPoolingComponent pools the stats over a specified window and
313  computes means and possibly log-count and stddevs from them for you.
314 
315  # In StatisticsPoolingComponent, the first element of the input is interpreted
316  # as a count, which we divide by.
317  # Optionally the log of the count can be output, and you can allow it to be
318  # repeated several times if you want (useful for systems using the jesus-layer).
319  # The output dimension is equal to num-log-count-features plus (input-dim - 1).
320 
321  # If include-log-count==false, the output dimension is the input dimension minus one.
322  # If output-stddevs=true, then it expects the input-dim to be of the form 2n+1 where n is
323  # presumably the original feature dim, and it interprets the last n dimensions of the feature
324  # as a variance; it outputs the square root of the variance instead of the actual variance.
325 
326  configs and their defaults: input-dim=-1, input-period=1, left-context=-1, right-context=-1,
327  num-log-count-features=0, output-stddevs=true, variance-floor=1.0e-10
328 
329  You'd access the output of the StatisticsPoolingComponent using rounding, e.g.
330  Round(component-name, 10)
331  or whatever, instead of just component-name, because its output is only defined at multiples
332  of its input-period.
333 
334  The output of StatisticsPoolingComponent will only be defined if at least one
335  input was defined.
336  */
338  public:
339  // Initializes to defaults which would not pass Check(); use InitFromConfig()
340  // or Read() or copy constructor to really initialize.
342  // copy constructor, used in Copy()
344 
345  virtual int32 InputDim() const { return input_dim_; }
346  virtual int32 OutputDim() const {
347  return input_dim_ + num_log_count_features_ - 1;
348  }
349  virtual void InitFromConfig(ConfigLine *cfl);
350  virtual std::string Type() const { return "StatisticsPoolingComponent"; }
351  virtual int32 Properties() const {
353  (output_stddevs_ || num_log_count_features_ > 0 ?
354  kBackpropNeedsOutput : 0) |
355  (num_log_count_features_ == 0 ? kBackpropNeedsInput : 0);
356  }
357  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
358  const CuMatrixBase<BaseFloat> &in,
359  CuMatrixBase<BaseFloat> *out) const;
360  virtual void Backprop(const std::string &debug_info,
361  const ComponentPrecomputedIndexes *indexes,
362  const CuMatrixBase<BaseFloat> &in_value,
363  const CuMatrixBase<BaseFloat> &out_value,
364  const CuMatrixBase<BaseFloat> &out_deriv,
365  void *memo,
366  Component *, // to_update,
367  CuMatrixBase<BaseFloat> *in_deriv) const;
368 
369  virtual void Read(std::istream &is, bool binary); // This Read function
370  // requires that the Component has the correct type.
371 
373  virtual void Write(std::ostream &os, bool binary) const;
374  virtual Component* Copy() const {
375  return new StatisticsPoolingComponent(*this);
376  }
377 
378  // Some functions that are only to be reimplemented for GeneralComponents.
379  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
380  const Index &output_index,
381  std::vector<Index> *desired_indexes) const;
382 
383  // returns true if at least one of its inputs is computable.
384  virtual bool IsComputable(const MiscComputationInfo &misc_info,
385  const Index &output_index,
386  const IndexSet &input_index_set,
387  std::vector<Index> *used_inputs) const;
388 
389  // This function reorders the input and output indexes so that they
390  // are sorted first on n and then x and then t.
391  virtual void ReorderIndexes(std::vector<Index> *input_indexes,
392  std::vector<Index> *output_indexes) const;
393 
395  const MiscComputationInfo &misc_info,
396  const std::vector<Index> &input_indexes,
397  const std::vector<Index> &output_indexes,
398  bool need_backprop) const;
399 
400  // Used in computing the 'real' context of networks involving this component;
401  // with the default value of false, the left/right context will always appear
402  // to be 0.
403  void SetRequireDirectInput(bool b) { require_direct_input_ = b; }
404 
405  private:
406  // Checks that the parameters are valid.
407  void Check() const;
408 
409  // Disallow assignment operator.
410  StatisticsPoolingComponent &operator =(
411  const StatisticsPoolingComponent &other);
412 
420  // If require_direct_input_ is set to true, in order for a particular 't'
421  // value to be available at the output of this component, it will require that
422  // 't' value to be computable at the input. This is used in computing the
423  // "real" left/right context of the network, but this member isn't currently
424  // written to disk and will default to false when read.
426 
427 };
428 
431  public:
432 
433  // in the first stage of creating the output we sum over row ranges of
434  // the input. forward_indexes.Dim() equals the number of rows of the
435  // output, and each element is a (start, end) range of inputs, that is
436  // summed over.
438 
439  // backward_indexes contains the same information as forward_indexes, but in a
440  // different format. backward_indexes.Dim() is the same as the number of rows
441  // of input, and each element contains the (start,end) of the range of outputs
442  // for which this input index appears as an element of the sum for that
443  // output. This is possible because of the way the inputs and outputs are
444  // ordered and because of how we select the elments to appear in the sum using
445  // a window. This quantity is used in backprop.
447 
449 
452  }
453 
454  virtual void Write(std::ostream &os, bool binary) const;
455 
456  virtual void Read(std::istream &is, bool binary);
457 
458  virtual std::string Type() const { return "StatisticsPoolingComponentPrecomputedIndexes"; }
459 };
460 
461 // BackpropTruncationComponent zeroes out the gradients every certain number
462 // of frames, as well as having gradient-clipping functionality as
463 // ClipGradientComponent.
464 // This component will be used to prevent gradient explosion problem in
465 // recurrent neural networks
467  public:
469  BaseFloat scale,
470  BaseFloat clipping_threshold,
471  BaseFloat zeroing_threshold,
472  int32 zeroing_interval,
473  int32 recurrence_interval) {
474  Init(dim, scale, clipping_threshold, zeroing_threshold,
475  zeroing_interval, recurrence_interval);}
476 
477  BackpropTruncationComponent(): dim_(0), scale_(1.0), clipping_threshold_(-1),
478  zeroing_threshold_(-1), zeroing_interval_(0), recurrence_interval_(0),
479  num_clipped_(0), num_zeroed_(0), count_(0), count_zeroing_boundaries_(0) { }
480 
481  virtual int32 InputDim() const { return dim_; }
482  virtual int32 OutputDim() const { return dim_; }
483  virtual void InitFromConfig(ConfigLine *cfl);
484  void Init(int32 dim, BaseFloat scale, BaseFloat clipping_threshold,
485  BaseFloat zeroing_threshold, int32 zeroing_interval,
486  int32 recurrence_interval);
487 
488  virtual std::string Type() const { return "BackpropTruncationComponent"; }
489 
490  virtual int32 Properties() const {
492  }
493 
494  virtual void ZeroStats();
495 
496  virtual Component* Copy() const;
497 
498  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
499  const CuMatrixBase<BaseFloat> &in,
500  CuMatrixBase<BaseFloat> *out) const;
501  virtual void Backprop(const std::string &debug_info,
502  const ComponentPrecomputedIndexes *indexes,
503  const CuMatrixBase<BaseFloat> &, // in_value,
504  const CuMatrixBase<BaseFloat> &, // out_value,
505  const CuMatrixBase<BaseFloat> &out_deriv,
506  void *memo,
507  Component *to_update,
508  CuMatrixBase<BaseFloat> *in_deriv) const;
509 
511  const MiscComputationInfo &misc_info,
512  const std::vector<Index> &input_indexes,
513  const std::vector<Index> &output_indexes,
514  bool need_backprop) const;
515 
516  virtual void Scale(BaseFloat scale);
517  virtual void Add(BaseFloat alpha, const Component &other);
518  virtual void Read(std::istream &is, bool binary); // This Read function
519  // requires that the Component has the correct type.
521  virtual void Write(std::ostream &os, bool binary) const;
522  virtual std::string Info() const;
524  }
525  private:
526  // input/output dimension
528 
529  // Scale that is applied in the forward propagation (and of course in the
530  // backprop to match. Expected to normally be 1, but setting this to other
531  // values (e.g. slightly less than 1) can be used to produce variants of
532  // LSTMs where the activations are bounded.
534 
535  // threshold (e.g., 30) to be used for clipping corresponds to max-row-norm
537 
538  // threshold (e.g., 3) to be used for zeroing corresponds to max-row-norm
540 
541  // interval (e.g., 20, in number of frames) at which we would zero the
542  // gradient if the norm of the gradient is above zeroing_threshold_
544 
545  // recurrence_interval_ should be the absolute recurrence offset used in RNNs
546  // (e.g., 3). It is used to see whether the index the component is processing,
547  // crosses a boundary that's a multiple of zeroing_interval_ frames.
549 
550  // component-node name, used in the destructor to print out stats of
551  // self-repair
552  std::string debug_info_;
553 
554  BackpropTruncationComponent &operator =
555  (const BackpropTruncationComponent &other); // Disallow.
556 
557  protected:
558  // variables to store stats
559  // An element corresponds to rows of derivative matrix
560  double num_clipped_; // number of elements which were clipped
561  double num_zeroed_; // number of elements which were zeroed
562  double count_; // number of elements which were processed
563  double count_zeroing_boundaries_; // number of zeroing boundaries where we had
564  // the opportunity to perform zeroing
565  // the gradient
566 
567 };
568 
571  public:
572 
573  // zeroing has the same dimension as the number of rows of out-deriv.
574  // Each element in zeroing can take two possible values: -1.0, meaning its
575  // corresponding frame is one that we need to consider zeroing the
576  // gradient of, and 0.0 otherwise
578 
579  // caches the negative sum of elements in zeroing for less CUDA calls
580  // (the sum is computed by CPU). Note that this value would be positive.
582 
584 
585  // this class has a virtual destructor so it can be deleted from a pointer
586  // to ComponentPrecomputedIndexes.
588 
591  }
592 
593  virtual void Write(std::ostream &ostream, bool binary) const;
594 
595  virtual void Read(std::istream &istream, bool binary);
596 
597  virtual std::string Type() const {
598  return "BackpropTruncationComponentPrecomputedIndexes";
599  }
600 };
601 
602 
603 /*
604  ConstantComponent returns a constant value for all requested
605  indexes, and it has no dependencies on any input.
606  It's like a ConstantFunctionComponent, but done the "right"
607  way without requiring an unnecessary input.
608  It is optionally trainable, and optionally you can use natural
609  gradient.
610 
611  Configuration values accepted by this component, with defaults if
612  applicable:
613 
614  output-dim Dimension that this component outputs.
615  is-updatable=true True if you want this to be updatable.
616  use-natural-gradient=true True if you want the update to use natural gradient.
617  output-mean=0.0 Mean of the parameters at initialization (the parameters
618  are what it outputs).
619  output-stddev=0.0 Standard deviation of the parameters at initialization.
620 
621 
622  Values inherited from UpdatableComponent (see its declaration in
623  nnet-component-itf for details):
624  learning-rate
625  learning-rate-factor
626  max-change
627 */
629  public:
630  // actually this component requires no inputs; this value
631  // is really a don't-care.
632  virtual int32 InputDim() const { return output_.Dim(); }
633 
634  virtual int32 OutputDim() const { return output_.Dim(); }
635 
636  virtual std::string Info() const;
637 
638  // possible parameter values with their defaults:
639  // is-updatable=true use-natural-gradient=true output-dim=-1
640  // output-mean=0 output-stddev=0
641  virtual void InitFromConfig(ConfigLine *cfl);
642 
644 
645  ConstantComponent(const ConstantComponent &other);
646 
647  virtual std::string Type() const { return "ConstantComponent"; }
648  virtual int32 Properties() const {
649  return
650  (is_updatable_ ? kUpdatableComponent : 0);
651  }
652  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
653  const CuMatrixBase<BaseFloat> &in,
654  CuMatrixBase<BaseFloat> *out) const;
655  virtual void Backprop(const std::string &debug_info,
656  const ComponentPrecomputedIndexes *indexes,
657  const CuMatrixBase<BaseFloat> &, // in_value
658  const CuMatrixBase<BaseFloat> &, // out_value
659  const CuMatrixBase<BaseFloat> &out_deriv,
660  void *memo,
661  Component *to_update,
662  CuMatrixBase<BaseFloat> *in_deriv) const;
663 
664  virtual void Read(std::istream &is, bool binary);
665  virtual void Write(std::ostream &os, bool binary) const;
666 
667  virtual Component* Copy() const;
668 
669  // Some functions that are only to be reimplemented for GeneralComponents.
670  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
671  const Index &output_index,
672  std::vector<Index> *desired_indexes) const {
673  desired_indexes->clear(); // requires no inputs.
674  }
675 
676  // This function returns true if at least one of the input indexes used to
677  // compute this output index is computable.
678  // it's simple because this component requires no inputs.
679  virtual bool IsComputable(const MiscComputationInfo &misc_info,
680  const Index &output_index,
681  const IndexSet &input_index_set,
682  std::vector<Index> *used_inputs) const {
683  if (used_inputs) used_inputs->clear();
684  return true;
685  }
686 
687  // Some functions from base-class UpdatableComponent.
688  virtual void Scale(BaseFloat scale);
689  virtual void Add(BaseFloat alpha, const Component &other);
690  virtual void PerturbParams(BaseFloat stddev);
691  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
692  virtual int32 NumParameters() const;
693  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
694  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
695 
696  virtual void ConsolidateMemory();
697  private:
698 
699  // the output value-- a vector.
701 
703  // if true, and if updatable, do natural-gradient update.
706 
707  const ConstantComponent &operator
708  = (const ConstantComponent &other); // Disallow.
709 };
710 
711 
712 
713 // DropoutMaskComponent outputs a random zero-or-one value for all dimensions of
714 // all requested indexes, and it has no dependencies on any input. It's like a
715 // ConstantComponent, but with random output that has value zero
716 // a proportion (dropout_proportion) of the time, and otherwise one.
717 // This is not the normal way to implement dropout; you'd normally use a
718 // DropoutComponent (see nnet-simple-component.h). This component is used while
719 // implementing per-frame dropout with the LstmNonlinearityComponent; we
720 // generate a two-dimensional output representing dropout
721 //
723  public:
724  // actually this component requires no inputs; this value
725  // is really a don't-care.
726  virtual int32 InputDim() const { return output_dim_; }
727 
728  virtual int32 OutputDim() const { return output_dim_; }
729 
730  virtual std::string Info() const;
731 
732  // possible parameter values with their defaults:
733  // dropout-proportion=0.5 output-dim=-1 continuous=false
734  // With the 'continous=false' option (the default), it generates
735  // 0 with probability 'dropout-proportion' and 1 otherwise.
736  // With 'continuous=true' it outputs 1 plus dropout-proportion times
737  // a value uniformly distributed on [-2, 2]. (e.g. if dropout-proportion is
738  // 0.5, this would amount to a value uniformly distributed on [0,2].)
739  virtual void InitFromConfig(ConfigLine *cfl);
740 
742 
744 
745  virtual std::string Type() const { return "DropoutMaskComponent"; }
746  virtual int32 Properties() const { return kRandomComponent; }
747  // note: the matrix 'in' will be empty.
748  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
749  const CuMatrixBase<BaseFloat> &in,
750  CuMatrixBase<BaseFloat> *out) const;
751  // backprop does nothing, there is nothing to backprop to and nothing
752  // to update.
753  virtual void Backprop(const std::string &debug_info,
754  const ComponentPrecomputedIndexes *indexes,
755  const CuMatrixBase<BaseFloat> &, // in_value
756  const CuMatrixBase<BaseFloat> &, // out_value
757  const CuMatrixBase<BaseFloat> &out_deriv,
758  void *memo,
759  Component *to_update,
760  CuMatrixBase<BaseFloat> *in_deriv) const { }
761 
762  virtual void Read(std::istream &is, bool binary);
763  virtual void Write(std::ostream &os, bool binary) const;
764 
765  virtual Component* Copy() const;
766 
767  // Some functions that are only to be reimplemented for GeneralComponents.
768  virtual void GetInputIndexes(const MiscComputationInfo &misc_info,
769  const Index &output_index,
770  std::vector<Index> *desired_indexes) const {
771  desired_indexes->clear(); // requires no inputs.
772  }
773 
774  // This function returns true if at least one of the input indexes used to
775  // compute this output index is computable.
776  // it's simple because this component requires no inputs.
777  virtual bool IsComputable(const MiscComputationInfo &misc_info,
778  const Index &output_index,
779  const IndexSet &input_index_set,
780  std::vector<Index> *used_inputs) const {
781  if (used_inputs) used_inputs->clear();
782  return true;
783  }
784 
785  void SetDropoutProportion(BaseFloat p) { dropout_proportion_ = p; }
786 
787  private:
788 
789  // The output dimension
791 
793 
795 
796  const DropoutMaskComponent &operator
797  = (const DropoutMaskComponent &other); // Disallow.
798 };
799 
800 
801 
876  public:
877  virtual int32 InputDim() const { return dim_; }
878 
879  virtual int32 OutputDim() const { return dim_; }
880 
881  virtual std::string Info() const;
882 
883  virtual void InitFromConfig(ConfigLine *cfl);
884 
886 
888 
889  virtual std::string Type() const { return "GeneralDropoutComponent"; }
890  virtual int32 Properties() const {
892  (block_dim_ != dim_ ? (kInputContiguous|kOutputContiguous) : 0);
893  }
894 
895  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
896  const CuMatrixBase<BaseFloat> &in,
897  CuMatrixBase<BaseFloat> *out) const;
898  virtual void Backprop(const std::string &debug_info,
899  const ComponentPrecomputedIndexes *indexes,
900  const CuMatrixBase<BaseFloat> &, // in_value
901  const CuMatrixBase<BaseFloat> &, // out_value
902  const CuMatrixBase<BaseFloat> &out_deriv,
903  void *memo,
904  Component *to_update,
905  CuMatrixBase<BaseFloat> *in_deriv) const;
906 
907  virtual void DeleteMemo(void *memo) const {
908  delete static_cast<CuMatrix<BaseFloat>*>(memo);
909  }
910 
912  const MiscComputationInfo &misc_info,
913  const std::vector<Index> &input_indexes,
914  const std::vector<Index> &output_indexes,
915  bool need_backprop) const;
916 
917  virtual void Read(std::istream &is, bool binary);
918  virtual void Write(std::ostream &os, bool binary) const;
919 
920  virtual Component* Copy() const;
921 
922  void SetDropoutProportion(BaseFloat p) { dropout_proportion_ = p; }
923 
924  private:
925 
926  // Returns a random matrix reflecting the masking we are applying.
927  // In the normal case where we are doing a
928  // of dimension 'num_mask_rows' by 'block_dim_'. This
929  // should not be called if test_mode_ is true or dropout_proportion_ is zero.
930  CuMatrix<BaseFloat> *GetMemo(int32 num_mask_rows) const;
931 
932 
933  // The input and output dimension
935 
936  // block_dim_ must divide dim_.
938 
939  // time_period_ can be zero if we want all 't' values to share the same
940  // dropout mask, and a value more than zero if we want blocks of 't' values to
941  // share the dropout mask. For example, if time_period_ is 10, blocks of size
942  // 10 frames will share the same dropout mask.
944 
946 
948 
950 
952 
953  const GeneralDropoutComponent &operator
954  = (const GeneralDropoutComponent &other); // Disallow.
955 };
956 
957 // This stores some precomputed indexes for GeneralDropoutComponent.
958 // This object is created for every instance of the Propagate()
959 // function in the compiled computation.
962  public:
963 
964 
965  // num_mask_rows is the number of rows in the dropout-mask matrix, which will
966  // in the normal case equal the number of sequences we are processing. Its
967  // num-cols is the block_dim_ of the component (e.g. might be the InputDim()
968  // (which is the same as OutputDim()), or maybe less if the block-dim option
969  // was specified.
971 
972  // 'indexes' is of dimension (the number of rows in the matrix we're doing
973  // Propagate() or Backprop() on) times the (dim_ / block_dim_) of the
974  // GeneralDropoutComponent. Each value is in the range [0, num_mask_rows-1],
975  // and each value is repeated (dim_ / block_dim_) times. This array is used
976  // to multiply the reshaped values or derivatives by the appropriate rows of
977  // the dropout matrix.
979 
981 
984  }
985 
986  virtual void Write(std::ostream &os, bool binary) const;
987 
988  virtual void Read(std::istream &is, bool binary);
989 
990  virtual std::string Type() const {
991  return "GeneralDropoutComponentPrecomputedIndexes";
992  }
993 };
994 
995 
997 
1018  public:
1019  virtual int32 InputDim() const { return dim_; }
1020 
1021  virtual int32 OutputDim() const { return dim_; }
1022 
1023  virtual std::string Info() const;
1024 
1025  virtual void InitFromConfig(ConfigLine *cfl);
1026 
1028 
1030 
1031  virtual std::string Type() const { return "SpecAugmentTimeMaskComponent"; }
1032  virtual int32 Properties() const {
1034  }
1035 
1036  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1037  const CuMatrixBase<BaseFloat> &in,
1038  CuMatrixBase<BaseFloat> *out) const;
1039  virtual void Backprop(const std::string &debug_info,
1040  const ComponentPrecomputedIndexes *indexes,
1041  const CuMatrixBase<BaseFloat> &, // in_value
1042  const CuMatrixBase<BaseFloat> &, // out_value
1043  const CuMatrixBase<BaseFloat> &out_deriv,
1044  void *memo,
1045  Component *to_update,
1046  CuMatrixBase<BaseFloat> *in_deriv) const;
1047 
1048  virtual void DeleteMemo(void *memo) const {
1049  delete static_cast<CuVector<BaseFloat>*>(memo);
1050  }
1051 
1053  const MiscComputationInfo &misc_info,
1054  const std::vector<Index> &input_indexes,
1055  const std::vector<Index> &output_indexes,
1056  bool need_backprop) const;
1057 
1058  virtual void Read(std::istream &is, bool binary);
1059  virtual void Write(std::ostream &os, bool binary) const;
1060 
1061  virtual Component* Copy() const;
1062 
1063  private:
1064 
1065  // Returns a random vector reflecting the masking we are applying.
1066  CuVector<BaseFloat> *GetMemo(
1067  const SpecAugmentTimeMaskComponentPrecomputedIndexes &indexes) const;
1068 
1069 
1070  // The input and output dimension
1072 
1074 
1076 
1077  const SpecAugmentTimeMaskComponent &operator
1078  = (const SpecAugmentTimeMaskComponent &other); // Disallow.
1079 };
1080 
1081 // This stores some precomputed indexes for SpecAugmentTimeMaskComponent.
1082 // This object is created for every instance of the Propagate()
1083 // function in the compiled computation.
1086  public:
1087 
1088  // 'indexes' is indexed first by sequence and then by time within that
1089  // sequence; each list indexes[s] is a consecutive list of the elements of
1090  // that sequence (e.g. t=0, t=1, and so on). The int32 values inside these
1091  // lists are row-indexes into the matrix that is at the input and output of
1092  // this component.
1093  std::vector<std::vector<int32> > indexes;
1094 
1095  // 'tot_size' is the total number of elements in 'indexes', equal to the
1096  // num-rows of the matrix we're doing dropout on.
1098 
1100 
1103  }
1104 
1105  virtual void Write(std::ostream &os, bool binary) const;
1106 
1107  virtual void Read(std::istream &is, bool binary);
1108 
1109  virtual std::string Type() const {
1110  return "SpecAugmentTimeMaskComponentPrecomputedIndexes";
1111  }
1112 };
1113 
1114 
1115 
1116 
1117 
1118 
1119 } // namespace nnet3
1120 } // namespace kaldi
1121 
1122 
1123 #endif
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void ComputeInputIndexAndBlock(const Index &output_index, Index *input_index, int32 *block) const
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
Abstract base-class for neural-net components.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
virtual ComponentPrecomputedIndexes * Copy() const
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
An abstract representation of a set of Indexes.
virtual void DeleteMemo(void *memo) const
This virtual function only needs to be overwritten by Components that return a non-NULL memo from the...
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
kaldi::int32 int32
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
Keywords for search: natural gradient, naturalgradient, NG-SGD.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
SpecAugmentTimeMaskComponent implements the time part of SpecAugment.
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual int32 OutputDim() const
Returns output-dimension of this component.
int32 NumParameters(const Nnet &src)
Returns the total of the number of parameters in the updatable components of the nnet.
Definition: nnet-utils.cc:359
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void ReorderIndexes(std::vector< Index > *input_indexes, std::vector< Index > *output_indexes) const
This function only does something interesting for non-simple Components.
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
This Component takes a larger input-dim than output-dim, where the input-dim must be a multiple of th...
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
BaseFloat DotProduct(const Nnet &nnet1, const Nnet &nnet2)
Returns dot product between two networks of the same structure (calls the DotProduct functions of the...
Definition: nnet-utils.cc:250
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 InputDim() const
Returns input-dimension of this component.
DistributeComponent(int32 input_dim, int32 output_dim)
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
GeneralDropoutComponent implements dropout, including a continuous variant where the thing we multipl...
virtual int32 OutputDim() const
Returns output-dimension of this component.
Matrix for CUDA computing.
Definition: matrix-common.h:69
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
void Init(int32 input_dim, int32 output_dim)
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
virtual int32 InputDim() const
Returns input-dimension of this component.
void PerturbParams(BaseFloat stddev, Nnet *nnet)
Calls PerturbParams (with the given stddev) on all updatable components of the nnet.
Definition: nnet-utils.cc:199
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
std::vector< std::pair< int32, int32 > > pairs
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual ComponentPrecomputedIndexes * Copy() const
virtual int32 InputDim() const
Returns input-dimension of this component.
BackpropTruncationComponent(int32 dim, BaseFloat scale, BaseFloat clipping_threshold, BaseFloat zeroing_threshold, int32 zeroing_interval, int32 recurrence_interval)
virtual int32 InputDim() const
Returns input-dimension of this component.
void ComputeInputPointers(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, int32 num_output_rows, std::vector< const BaseFloat *> *input_pointers) const
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void DeleteMemo(void *memo) const
This virtual function only needs to be overwritten by Components that return a non-NULL memo from the...