nnet-simple-component.h
Go to the documentation of this file.
1 // nnet3/nnet-simple-component.h
2 
3 // Copyright 2011-2013 Karel Vesely
4 // 2012-2017 Johns Hopkins University (author: Daniel Povey)
5 // 2013 Xiaohui Zhang
6 // 2014-2016 Vijayaditya Peddinti
7 // 2014-2015 Guoguo Chen
8 // 2015 Daniel Galvez
9 // 2015 Tom Ko
10 
11 // See ../../COPYING for clarification regarding multiple authors
12 //
13 // Licensed under the Apache License, Version 2.0 (the "License");
14 // you may not use this file except in compliance with the License.
15 // You may obtain a copy of the License at
16 //
17 // http://www.apache.org/licenses/LICENSE-2.0
18 //
19 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
21 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
22 // MERCHANTABLITY OR NON-INFRINGEMENT.
23 // See the Apache 2 License for the specific language governing permissions and
24 // limitations under the License.
25 
26 #ifndef KALDI_NNET3_NNET_SIMPLE_COMPONENT_H_
27 #define KALDI_NNET3_NNET_SIMPLE_COMPONENT_H_
28 
29 #include "nnet3/nnet-common.h"
32 #include <iostream>
33 
34 namespace kaldi {
35 namespace nnet3 {
36 
46 
47 // This "nnet3" version of the p-norm component only supports the 2-norm.
48 class PnormComponent: public Component {
49  public:
50  void Init(int32 input_dim, int32 output_dim);
51  explicit PnormComponent(int32 input_dim, int32 output_dim) {
52  Init(input_dim, output_dim);
53  }
54  virtual int32 Properties() const {
56  }
58  virtual std::string Type() const { return "PnormComponent"; }
59  virtual void InitFromConfig(ConfigLine *cfl);
60  virtual int32 InputDim() const { return input_dim_; }
61  virtual int32 OutputDim() const { return output_dim_; }
62  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
63  const CuMatrixBase<BaseFloat> &in,
64  CuMatrixBase<BaseFloat> *out) const;
65  virtual void Backprop(const std::string &debug_info,
66  const ComponentPrecomputedIndexes *indexes,
67  const CuMatrixBase<BaseFloat> &in_value,
68  const CuMatrixBase<BaseFloat> &out_value,
69  const CuMatrixBase<BaseFloat> &out_deriv,
70  void *memo,
71  Component *to_update,
72  CuMatrixBase<BaseFloat> *in_deriv) const;
73  virtual Component* Copy() const { return new PnormComponent(input_dim_,
74  output_dim_); }
75 
76  virtual void Read(std::istream &is, bool binary); // This Read function
77  // requires that the Component has the correct type.
78 
80  virtual void Write(std::ostream &os, bool binary) const;
81 
82  protected:
85 };
86 
87 // This component randomly zeros dropout_proportion of the input
88 // and the derivatives are backpropagated through the nonzero inputs.
89 // Typically this component used during training but not in test time.
90 // The idea is described under the name Dropout, in the paper
91 // "Dropout: A Simple Way to Prevent Neural Networks from Overfitting".
93  public:
94  void Init(int32 dim, BaseFloat dropout_proportion = 0.0,
95  bool dropout_per_frame = false);
96 
97  DropoutComponent(int32 dim, BaseFloat dropout = 0.0,
98  bool dropout_per_frame = false) {
99  Init(dim, dropout, dropout_per_frame);
100  }
101 
102  DropoutComponent(): dim_(0), dropout_proportion_(0.0),
103  dropout_per_frame_(false) { }
104 
105  DropoutComponent(const DropoutComponent &other);
106 
107  virtual int32 Properties() const {
110  }
111  virtual std::string Type() const { return "DropoutComponent"; }
112 
113  virtual void InitFromConfig(ConfigLine *cfl);
114 
115  virtual int32 InputDim() const { return dim_; }
116 
117  virtual int32 OutputDim() const { return dim_; }
118 
119  virtual void Read(std::istream &is, bool binary);
120 
121  // Write component to stream
122  virtual void Write(std::ostream &os, bool binary) const;
123 
124  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
125  const CuMatrixBase<BaseFloat> &in,
126  CuMatrixBase<BaseFloat> *out) const;
127  virtual void Backprop(const std::string &debug_info,
128  const ComponentPrecomputedIndexes *indexes,
129  const CuMatrixBase<BaseFloat> &in_value,
130  const CuMatrixBase<BaseFloat> &out_value,
131  const CuMatrixBase<BaseFloat> &out_deriv,
132  void *memo,
133  Component *to_update,
134  CuMatrixBase<BaseFloat> *in_deriv) const;
135 
136  virtual Component* Copy() const;
137 
138  virtual std::string Info() const;
139 
140  void SetDropoutProportion(BaseFloat dropout_proportion) {
141  dropout_proportion_ = dropout_proportion;
142  }
143 
144  BaseFloat DropoutProportion() const { return dropout_proportion_; }
145  private:
151 };
152 
154  public:
155  void Init(int32 input_dim, int32 output_dim);
156  explicit ElementwiseProductComponent(int32 input_dim, int32 output_dim) {
157  Init(input_dim, output_dim);
158  }
159  virtual int32 Properties() const {
161  }
163  virtual std::string Type() const { return "ElementwiseProductComponent"; }
164  virtual void InitFromConfig(ConfigLine *cfl);
165  virtual int32 InputDim() const { return input_dim_; }
166  virtual int32 OutputDim() const { return output_dim_; }
167  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
168  const CuMatrixBase<BaseFloat> &in,
169  CuMatrixBase<BaseFloat> *out) const;
170  virtual void Backprop(const std::string &debug_info,
171  const ComponentPrecomputedIndexes *indexes,
172  const CuMatrixBase<BaseFloat> &in_value,
173  const CuMatrixBase<BaseFloat> &out_value,
174  const CuMatrixBase<BaseFloat> &out_deriv,
175  void *memo,
176  Component *to_update,
177  CuMatrixBase<BaseFloat> *in_deriv) const;
178  virtual Component* Copy() const { return new ElementwiseProductComponent(input_dim_,
179  output_dim_); }
180 
181  virtual void Read(std::istream &is, bool binary); // This Read function
182  // requires that the Component has the correct type.
183 
185  virtual void Write(std::ostream &os, bool binary) const;
186 
187  protected:
190 };
191 
192 /*
193  Implements the sigmoid nonlinearity, i.e. the function y = exp(-x).
194 
195  Configuration values accepted:
196  dim Dimension of this component, e.g. 1024
197 
198  Configuration values inherited from NonlinearComponent, and their
199  local meanings:
200  self-repair-lower-threshold e.g. self-repair-lower-threshold=0.05. This
201  controls the self-repair mechanism, which for sigmoid units
202  consists of identifying units which are oversaturated (i.e.
203  usually close to -1 or +1) and nudging the inputs to be
204  closer to zero. It gates on the average derivative of the
205  nonlinearity, which for sigmoid is a value between 0 and
206  0.25. For units where the average function-derivative
207  accumulated during this iteration (job) of training is less
208  than this threshold, we activate self-repair, which consists
209  of adding (-self-repair-scale * (2*the output of the
210  nonlinearity - 1.0)) to the backpropagated derivatives.
211  This just happens to be a convenient-to-compute function
212  that's +1 for large negative inputs, and -1 for large positive
213  inputs, and smooth in between.
214  The default value of this is -1000, which the code internally
215  maps to 0.05 which is suitable for sigmoid units; if you do set it,
216  you can set it to a value like 0.025 or 0.075.
217  self-repair-scale Scale for the self-repair mechanism; see comments above.
218  default=0, but we usually set this to 1.0e-05 (or
219  occasionally 1.0e-04) in the scripts.
220 
221  */
223  public:
224  explicit SigmoidComponent(const SigmoidComponent &other): NonlinearComponent(other) { }
226  virtual std::string Type() const { return "SigmoidComponent"; }
227  virtual int32 Properties() const {
229  }
230  virtual Component* Copy() const { return new SigmoidComponent(*this); }
231  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
232  const CuMatrixBase<BaseFloat> &in,
233  CuMatrixBase<BaseFloat> *out) const;
234  virtual void Backprop(const std::string &debug_info,
235  const ComponentPrecomputedIndexes *indexes,
236  const CuMatrixBase<BaseFloat> &, //in_value
237  const CuMatrixBase<BaseFloat> &out_value,
238  const CuMatrixBase<BaseFloat> &out_deriv,
239  void *memo,
240  Component *to_update,
241  CuMatrixBase<BaseFloat> *in_deriv) const;
242  virtual void StoreStats(const CuMatrixBase<BaseFloat> &in_value,
243  const CuMatrixBase<BaseFloat> &out_value,
244  void *memo);
245  private:
246  // this function is called from Backprop code and only does something if the
247  // self-repair-scale config value is set.
248  void RepairGradients(const CuMatrixBase<BaseFloat> &out_value,
249  CuMatrixBase<BaseFloat> *in_deriv,
250  SigmoidComponent *to_update) const;
251 
252  SigmoidComponent &operator = (const SigmoidComponent &other); // Disallow.
253 };
254 
255 /*
256  Implements the tanh nonlinearity, i.e. the function y = tanh(x).
257 
258  Configuration values accepted:
259  dim Dimension of this component, e.g. 1024
260 
261  Configuration values inherited from NonlinearComponent, and their
262  local meanings:
263  self-repair-lower-threshold e.g. self-repair-lower-threshold=0.2. This
264  controls the self-repair mechanism, which for tanh units
265  consists of identifying units which are oversaturated (i.e.
266  usually close to -1 or +1) and nudging the inputs to be
267  closer to zero. It gates on the average derivative of
268  the nonlinearity, which for tanh is a value between 0 and 1.
269  For units where the average function-derivative accumulated
270  during this iteration (job) of training is less than
271  this threshold, we activate self-repair, which consists of
272  adding (-self-repair-scale * the output of the nonlinearity),
273  i.e. (-self-repair-scale * tanh(x)) to the backpropagated
274  derivatives.
275  The default value of this is -1000, which the code internally
276  maps to 0.2 which is suitable for tanh units; if you do set it,
277  you can set it to a value like 0.1 or 0.3.
278  self-repair-scale Scale for the self-repair mechanism; see comments above.
279  default=0, but we usually set this to 1.0e-05 (or
280  occasionally 1.0e-04) in the scripts.
281  */
283  public:
284  explicit TanhComponent(const TanhComponent &other): NonlinearComponent(other) { }
286  virtual std::string Type() const { return "TanhComponent"; }
287  virtual Component* Copy() const { return new TanhComponent(*this); }
288  virtual int32 Properties() const {
290  }
291  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
292  const CuMatrixBase<BaseFloat> &in,
293  CuMatrixBase<BaseFloat> *out) const;
294  virtual void Backprop(const std::string &debug_info,
295  const ComponentPrecomputedIndexes *indexes,
296  const CuMatrixBase<BaseFloat> &, //in_value
297  const CuMatrixBase<BaseFloat> &out_value,
298  const CuMatrixBase<BaseFloat> &out_deriv,
299  void *memo,
300  Component *to_update,
301  CuMatrixBase<BaseFloat> *in_deriv) const;
302  virtual void StoreStats(const CuMatrixBase<BaseFloat> &in_value,
303  const CuMatrixBase<BaseFloat> &out_value,
304  void *memo);
305  private:
306  // this function is called from Backprop code and only does something if the
307  // self-repair-scale config value is set.
308  void RepairGradients(const CuMatrixBase<BaseFloat> &out_value,
309  CuMatrixBase<BaseFloat> *in_deriv,
310  TanhComponent *to_update) const;
311 
312  TanhComponent &operator = (const TanhComponent &other); // Disallow.
313 };
314 
315 
316 /*
317  Implements the Rectified Linear Unit nonlinearity, a.k.a. ReLU.
318 
319  Configuration values accepted:
320  dim Dimension of this component, e.g. 1024
321 
322  Configuration values inherited from NonlinearComponent, and their
323  local meanings:
324  self-repair-lower-threshold e.g. self-repair-lower-threshold=0.05. (Lower
325  threshold for self-repair, if set; in this case acts on
326  the average function-derivative, which is the proportion
327  of the time the output is > 0. For any unit where the
328  average function-derivative is lower than this threshold,
329  we add 'self-repair-scale' to the backpropagated
330  derivatives in backprop. There is no default
331  (default=-1000, which is interpreted specially).
332  self-repair-upper-threshold e.g. self-repair-upper-threshold=0.95.
333  Like self-repair-lower-threshold, but controls self-repair
334  for units that are active *too* much of the time. Units
335  whose average function-derivative exceeds this threshold
336  will have the negative of 'self-repair-scale' added to their
337  input derivatives in backprop. There is no default
338  (default=-1000, which is interpreted specially).
339  self-repair-scale Scale for the self-repair mechanism; see comments for
340  self-repair-lower-threshold and self-repair-upper-threshold
341  for details. default=0, but we usually set this to 1.0e-05
342  (or occasionally 1.0e-04) in the scripts.
343  */
345  public:
347  NonlinearComponent(other) { }
349  virtual std::string Type() const { return "RectifiedLinearComponent"; }
350  virtual Component* Copy() const { return new RectifiedLinearComponent(*this); }
351  virtual int32 Properties() const {
353  kStoresStats|(block_dim_ != dim_ ? kInputContiguous : 0);
354  }
355  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
356  const CuMatrixBase<BaseFloat> &in,
357  CuMatrixBase<BaseFloat> *out) const;
358  virtual void Backprop(const std::string &debug_info,
359  const ComponentPrecomputedIndexes *indexes,
360  const CuMatrixBase<BaseFloat> &, //in_value
361  const CuMatrixBase<BaseFloat> &out_value,
362  const CuMatrixBase<BaseFloat> &out_deriv,
363  void *memo,
364  Component *to_update,
365  CuMatrixBase<BaseFloat> *in_deriv) const;
366  virtual void StoreStats(const CuMatrixBase<BaseFloat> &in_value,
367  const CuMatrixBase<BaseFloat> &out_value,
368  void *memo);
369  private:
370  // this function is called from Backprop code and only does something if the
371  // self-repair-scale config value is set.
372  void RepairGradients(CuMatrixBase<BaseFloat> *in_deriv,
373  RectifiedLinearComponent *to_update) const;
374 
375  RectifiedLinearComponent &operator = (const RectifiedLinearComponent &other); // Disallow.
376 };
377 
378 
380 class FixedScaleComponent;
383 
384 /*
385  Affine means a linear function plus an offset.
386  Note: although this class can be instantiated, it also
387  functions as a base-class for more specialized versions of
388  AffineComponent.
389 
390  Parameters accepted on the config line, with default if applicable:
391 
392  matrix If specified, a filename containing the parameters of the class as
393  a single matrix containing the linear_params, plus the bias_params
394  as the last column
395 
396  input-dim The input dimension of the component
397  output-dim The output dimension of the component
398  param-stddev=1/sqrt(input-dim) The standard deviation of the elements of the linear parameters
399  (they will have a Gaussian distribution with this standard deviation).
400  bias-stddev=1.0 The standard deviation of the elements of the bias parameters
401 
402  orthonormal-constraint=0.0 Can be used to constrain the linear parameter matrix
403  to be semi-orthogonal, see ConstraintOrhonormal() in nnet-utils.h,
404  and http://www.danielpovey.com/files/2018_interspeech_tdnnf.pdf.
405 */
407  public:
408  virtual int32 InputDim() const { return linear_params_.NumCols(); }
409  virtual int32 OutputDim() const { return linear_params_.NumRows(); }
410 
411  BaseFloat OrthonormalConstraint() const { return orthonormal_constraint_; }
412 
413  virtual std::string Info() const;
414  virtual void InitFromConfig(ConfigLine *cfl);
415 
416  AffineComponent(): orthonormal_constraint_(0.0) { } // use Init to really initialize.
417  virtual std::string Type() const { return "AffineComponent"; }
418  virtual int32 Properties() const {
421  }
422 
423 
424  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
425  const CuMatrixBase<BaseFloat> &in,
426  CuMatrixBase<BaseFloat> *out) const;
427  virtual void Backprop(const std::string &debug_info,
428  const ComponentPrecomputedIndexes *indexes,
429  const CuMatrixBase<BaseFloat> &in_value,
430  const CuMatrixBase<BaseFloat> &, // out_value
431  const CuMatrixBase<BaseFloat> &out_deriv,
432  void *memo,
433  Component *to_update,
434  CuMatrixBase<BaseFloat> *in_deriv) const;
435 
436  virtual void Read(std::istream &is, bool binary);
437  virtual void Write(std::ostream &os, bool binary) const;
438 
439  virtual Component* Copy() const;
440 
441 
442  // Some functions from base-class UpdatableComponent.
443  virtual void Scale(BaseFloat scale);
444  virtual void Add(BaseFloat alpha, const Component &other);
445  virtual void PerturbParams(BaseFloat stddev);
446  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
447  virtual int32 NumParameters() const;
448  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
449  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
450 
451  // Some functions that are specific to this class.
452 
453  virtual void SetParams(const CuVectorBase<BaseFloat> &bias,
454  const CuMatrixBase<BaseFloat> &linear);
455  const CuVector<BaseFloat> &BiasParams() const { return bias_params_; }
456  CuVector<BaseFloat> &BiasParams() { return bias_params_; }
457  const CuMatrix<BaseFloat> &LinearParams() const { return linear_params_; }
458  CuMatrix<BaseFloat> &LinearParams() { return linear_params_; }
459  explicit AffineComponent(const AffineComponent &other);
460  // The next constructor is used in converting from nnet1.
461  AffineComponent(const CuMatrixBase<BaseFloat> &linear_params,
462  const CuVectorBase<BaseFloat> &bias_params,
463  BaseFloat learning_rate);
464  // This function resizes the dimensions of the component, setting the
465  // parameters to zero, while leaving any other configuration values the same.
466  virtual void Resize(int32 input_dim, int32 output_dim);
467 
468  void Init(int32 input_dim, int32 output_dim,
469  BaseFloat param_stddev, BaseFloat bias_stddev);
470  protected:
471  void Init(std::string matrix_filename);
472 
474  // This function Update() is for extensibility; child classes may override
475  // this, e.g. for natural gradient update.
476  virtual void Update(
477  const std::string &debug_info,
478  const CuMatrixBase<BaseFloat> &in_value,
479  const CuMatrixBase<BaseFloat> &out_deriv) {
480  UpdateSimple(in_value, out_deriv);
481  }
482  // UpdateSimple is used when *this is a gradient. Child classes may override
483  // this if needed, but typically won't need to.
484  virtual void UpdateSimple(
485  const CuMatrixBase<BaseFloat> &in_value,
486  const CuMatrixBase<BaseFloat> &out_deriv);
487 
488  const AffineComponent &operator = (const AffineComponent &other); // Disallow.
491  // see documentation at the top of this class for more information on the
492  // following.
494 };
495 
497 
506  public:
507  virtual int32 InputDim() const { return linear_params_.NumCols() * num_blocks_; }
508  virtual int32 OutputDim() const { return linear_params_.NumRows(); }
509 
510  virtual std::string Info() const;
511  virtual void InitFromConfig(ConfigLine *cfl);
512 
514  virtual std::string Type() const { return "BlockAffineComponent"; }
515  virtual int32 Properties() const {
518  }
519 
520  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
521  const CuMatrixBase<BaseFloat> &in,
522  CuMatrixBase<BaseFloat> *out) const;
523 
524  virtual void Backprop(const std::string &debug_info,
525  const ComponentPrecomputedIndexes *indexes,
526  const CuMatrixBase<BaseFloat> &in_value,
527  const CuMatrixBase<BaseFloat> &, // out_value
528  const CuMatrixBase<BaseFloat> &out_deriv,
529  void *memo,
530  Component *to_update,
531  CuMatrixBase<BaseFloat> *in_deriv) const;
532 
533  virtual void Read(std::istream &is, bool binary);
534  virtual void Write(std::ostream &os, bool binary) const;
535 
536  virtual Component* Copy() const;
537 
538  // Functions from base-class UpdatableComponent.
539  virtual void Scale(BaseFloat scale);
540  virtual void Add(BaseFloat alpha, const Component &other);
541  virtual void PerturbParams(BaseFloat stddev);
542  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
543  virtual int32 NumParameters() const;
544  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
545  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
546 
547  explicit BlockAffineComponent(const BlockAffineComponent &other);
548  explicit BlockAffineComponent(const RepeatedAffineComponent &rac);
549  protected:
550  // The matrix linear_params_ has a block structure, with num_blocks_ blocks of
551  // equal size. The blocks are stored in linear_params_ as
552  // [ M
553  // N
554  // O ] but we actually treat it as the matrix:
555  // [ M 0 0
556  // 0 N 0
557  // 0 0 O ]
561  private:
562  // BlockAffine-specific functions.
563  void Init(int32 input_dim, int32 output_dim, int32 num_blocks,
564  BaseFloat param_stddev, BaseFloat bias_mean,
565  BaseFloat bias_stddev);
566 
567  const BlockAffineComponent &operator = (const BlockAffineComponent &other); // Disallow.
568 };
569 
571  public:
572 
573  virtual int32 InputDim() const { return linear_params_.NumCols() * num_repeats_; }
574  virtual int32 OutputDim() const { return linear_params_.NumRows() * num_repeats_; }
575 
576  virtual std::string Info() const;
577  virtual void InitFromConfig(ConfigLine *cfl);
578 
579  RepeatedAffineComponent() { } // use Init to really initialize.
580  virtual std::string Type() const { return "RepeatedAffineComponent"; }
581  virtual int32 Properties() const {
584  }
585  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
586  const CuMatrixBase<BaseFloat> &in,
587  CuMatrixBase<BaseFloat> *out) const;
588  virtual void Backprop(const std::string &debug_info,
589  const ComponentPrecomputedIndexes *indexes,
590  const CuMatrixBase<BaseFloat> &in_value,
591  const CuMatrixBase<BaseFloat> &, // out_value
592  const CuMatrixBase<BaseFloat> &out_deriv,
593  void *memo,
594  Component *to_update,
595  CuMatrixBase<BaseFloat> *in_deriv) const;
596 
597  virtual void Read(std::istream &is, bool binary);
598  virtual void Write(std::ostream &os, bool binary) const;
599 
600  virtual Component* Copy() const;
601 
602  // Some functions from base-class UpdatableComponent.
603  virtual void Scale(BaseFloat scale);
604  virtual void Add(BaseFloat alpha, const Component &other);
605  virtual void PerturbParams(BaseFloat stddev);
606  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
607  virtual int32 NumParameters() const;
608  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
609  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
610 
611  // Some functions that are specific to this class.
612  const CuVector<BaseFloat> &BiasParams() const { return bias_params_; }
613  const CuMatrix<BaseFloat> &LinearParams() const { return linear_params_; }
614  explicit RepeatedAffineComponent(const RepeatedAffineComponent &other);
615 
617  protected:
618  void Init(int32 input_dim, int32 output_dim, int32 num_repeats,
619  BaseFloat param_stddev, BaseFloat bias_mean,
620  BaseFloat bias_stddev);
621 
622  // This function Update(), called from backprop, is broken out for
623  // extensibility to natural gradient update.
624  virtual void Update(
625  const CuMatrixBase<BaseFloat> &in_value,
626  const CuMatrixBase<BaseFloat> &out_deriv);
627 
628  // This function does nothing here but is redefined in child-class
629  // NaturalGradientRepeatedAffineComponent. This help avoid repeated code.
630  virtual void SetNaturalGradientConfigs() { }
631 
632  const RepeatedAffineComponent &operator = (const RepeatedAffineComponent &other); // Disallow.
636 };
637 
639  public:
640  // Use Init() to really initialize.
642 
643  // Most of the public functions are inherited from RepeatedAffineComponent.
644  virtual std::string Type() const {
645  return "NaturalGradientRepeatedAffineComponent";
646  }
647 
648  virtual Component* Copy() const;
649 
650  // Copy constructor
653 
654  virtual void ConsolidateMemory();
655 
656  private:
657  virtual void Update(
658  const CuMatrixBase<BaseFloat> &in_value,
659  const CuMatrixBase<BaseFloat> &out_deriv);
660 
662  const NaturalGradientRepeatedAffineComponent &other); // Disallow.
663 
664  // Applies the default configuration to preconditioner_in_.
665  virtual void SetNaturalGradientConfigs();
666 
667  // For efficiency reasons we only apply the natural gradient to the input
668  // side, i.e. not to the space of output derivatives-- we believe the input
669  // side is the more important side. We don't make the natural-gradient
670  // configurable; we just give it a reasonable configuration.
671  // Instead of using the individual data-points, for efficiency reasons we use
672  // the distribution of per-minibatch summed derivatives over each dimension of
673  // the output space, as the source for the Fisher matrix.
675 };
676 
678  public:
679  explicit SoftmaxComponent(const SoftmaxComponent &other):
680  NonlinearComponent(other) { }
682  virtual std::string Type() const { return "SoftmaxComponent"; }
683  virtual int32 Properties() const {
686  }
687  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
688  const CuMatrixBase<BaseFloat> &in,
689  CuMatrixBase<BaseFloat> *out) const;
690  virtual void Backprop(const std::string &debug_info,
691  const ComponentPrecomputedIndexes *indexes,
692  const CuMatrixBase<BaseFloat> &in_value,
693  const CuMatrixBase<BaseFloat> &out_value,
694  const CuMatrixBase<BaseFloat> &out_deriv,
695  void *memo,
696  Component *to_update,
697  CuMatrixBase<BaseFloat> *in_deriv) const;
698  virtual void StoreStats(const CuMatrixBase<BaseFloat> &in_value,
699  const CuMatrixBase<BaseFloat> &out_value,
700  void *memo);
701  virtual Component* Copy() const { return new SoftmaxComponent(*this); }
702  private:
703  SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
704 };
705 
706 
707 /*
708  Implements the log of a softmax nonlinearity, so it's the same
709  as shifting each input vector by a constant offset so that, when
710  exponentiated, it would sum to one.
711 
712  We usually use this in place of softmax because the log-scale
713  output will not saturate.
714 
715  Configuration values accepted:
716  dim e.g. dim=8061. Usually this is the last component
717  in a network, so 'dim' is the number of classes.
718  */
720  public:
721  explicit LogSoftmaxComponent(const LogSoftmaxComponent &other):
722  NonlinearComponent(other) { }
724  virtual std::string Type() const { return "LogSoftmaxComponent"; }
725  virtual int32 Properties() const {
727  }
728  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
729  const CuMatrixBase<BaseFloat> &in,
730  CuMatrixBase<BaseFloat> *out) const;
731  virtual void Backprop(const std::string &debug_info,
732  const ComponentPrecomputedIndexes *indexes,
733  const CuMatrixBase<BaseFloat> &in_value,
734  const CuMatrixBase<BaseFloat> &out_value,
735  const CuMatrixBase<BaseFloat> &out_deriv,
736  void *memo,
737  Component *to_update,
738  CuMatrixBase<BaseFloat> *in_deriv) const;
739 
740  virtual Component* Copy() const { return new LogSoftmaxComponent(*this); }
741  private:
742  LogSoftmaxComponent &operator = (const LogSoftmaxComponent &other); // Disallow.
743 };
744 
745 /*
746  Keywords: natural gradient descent, NG-SGD, naturalgradient. For
747  the top-level of the natural gradient code look here, and also in
748  nnet-precondition-online.h.
749  NaturalGradientAffineComponent is
750  a version of AffineComponent that has a non-(multiple of unit) learning-rate
751  matrix. See nnet-precondition-online.h for a description of the technique.
752  It is described, under the name Online NG-SGD, in the paper "Parallel
753  training of DNNs with Natural Gradient and Parameter Averaging" (ICLR
754  workshop, 2015) by Daniel Povey, Xiaohui Zhang and Sanjeev Khudanpur.
755 
756  Configuration values accepted by this component:
757 
758  Values inherited from UpdatableComponent (see its declaration in
759  nnet-component-itf.h for details):
760  learning-rate
761  learning-rate-factor
762  max-change
763 
764  Values used in initializing the component's parameters:
765  input-dim e.g. input-dim=1024. The input dimension.
766  output-dim e.g. output-dim=1024. The output dimension.
767  param-stddev e.g. param-stddev=0.025. The standard deviation
768  used to randomly initialize the linear parameters
769  (as Gaussian random values * param-stddev).
770  Defaults to 1/sqrt(input-dim), which is Glorot
771  initialization.
772  bias-stddev e.g. bias-stddev=0.0. The standard deviation
773  used to randomly initialize the bias parameters.
774  Defaults to 1.0 but we usually set it to 0.0
775  in the config.
776  bias-mean e.g. bias-mean=1.0. Allows you to ininialize the
777  bias parameters with an offset. Default is 0.0
778  which is normally suitable
779 
780  matrix e.g. matrix=foo/bar/init.mat May be used as an
781  alternative to (input-dim, output-dim, param-stddev,
782  bias-stddev, bias-mean) to initialize the parameters.
783  Dimension is output-dim by (input-dim + 1), last
784  column is interpreted as the bias.
785 
786  Other options:
787  orthonormal-constraint=0.0 If you set this to 1.0, then
788  the linear_params_ matrix will be (approximately)
789  constrained during training to have orthonormal rows
790  (or columns, whichever is fewer).. it turns out the
791  real name for this is a "semi-orthogonal" matrix.
792  You can choose a positive nonzero value different
793  than 1.0 to have a scaled semi-orthgonal matrix,
794  i.e. with singular values at the selected value
795  (e.g. 0.5, or 2.0). This is not enforced inside the
796  component itself; you have to call
797  ConstrainOrthonormal() from the training code to do
798  this. All this component does is return the
799  OrthonormalConstraint() value. If you set this to a
800  negative value, it's like saying "for any value",
801  i.e. it will constrain the parameter matrix to be
802  closer to "any alpha" times a semi-orthogonal matrix,
803  without changing its overall norm.
804 
805 
806  Options to the natural gradient (you won't normally have to set these,
807  the defaults are suitable):
808 
809  num-samples-history Number of frames used as the time-constant to
810  determine how 'up-to-date' the Fisher-matrix
811  estimates are. Smaller -> more up-to-date, but more
812  noisy. default=2000.
813  alpha Constant that determines how much we smooth the
814  Fisher-matrix estimates with the unit matrix.
815  Larger means more smoothing. default=4.0
816  rank-in Rank used in low-rank-plus-unit estimate of Fisher
817  matrix in the input space. default=20.
818  rank-out Rank used in low-rank-plus-unit estimate of Fisher
819  matrix in the output-derivative space. default=80.
820  update-period Determines the period (in minibatches) with which
821  we update the Fisher-matrix estimates;
822  making this > 1 saves a little time in training.
823  default=4.
824 */
826  public:
827  virtual std::string Type() const { return "NaturalGradientAffineComponent"; }
828  virtual void Read(std::istream &is, bool binary);
829  virtual void Write(std::ostream &os, bool binary) const;
830  // this constructor does not really initialize, use InitFromConfig() or Read().
832  void InitFromConfig(ConfigLine *cfl);
833  virtual std::string Info() const;
834  virtual Component* Copy() const;
835  virtual void Scale(BaseFloat scale);
836  virtual void Add(BaseFloat alpha, const Component &other);
837  virtual void FreezeNaturalGradient(bool freeze);
838 
839  virtual void ConsolidateMemory();
840 
841  // copy constructor
843  const NaturalGradientAffineComponent &other);
845  const CuMatrixBase<BaseFloat> &linear_params,
846  const CuVectorBase<BaseFloat> &bias_params);
847  private:
848  // disallow assignment operator.
849  NaturalGradientAffineComponent &operator= (
851 
853 
855 
856  virtual void Update(
857  const std::string &debug_info,
858  const CuMatrixBase<BaseFloat> &in_value,
859  const CuMatrixBase<BaseFloat> &out_deriv);
860 };
861 
862 /*
863  LinearComponent represents a linear (matrix) transformation of its input, with
864  a matrix as its trainable parameters. It's the same as
865  NaturalGradientAffineComponent, but without the bias term.
866 
867  Configuration values accepted by this component:
868 
869  Values inherited from UpdatableComponent (see its declaration in
870  nnet-component-itf for details):
871  learning-rate
872  learning-rate-factor
873  max-change
874 
875  Values used in initializing the component's parameters:
876  input-dim e.g. input-dim=1024. The input dimension.
877  output-dim e.g. output-dim=1024. The output dimension.
878  param-stddev e.g. param-stddev=0.025. The standard deviation
879  used to randomly initialize the linear parameters
880  (as Gaussian random values * param-stddev).
881  Defaults to 1/sqrt(input-dim), which is Glorot
882  initialization.
883  matrix e.g. matrix=foo/bar/init.mat May be used as an
884  alternative to (input-dim, output-dim, param-stddev,
885  bias-stddev, bias-mean) to initialize the parameters.
886  Dimension is output-dim by (input-dim + 1), last
887  column is interpreted as the bias.
888  orthonormal-constraint=0.0 If you set this to 1.0, then
889  the linear_params_ matrix will be (approximately)
890  constrained during training to have orthonormal rows
891  (or columns, whichever is fewer).. it turns out the
892  real name for this is a "semi-orthogonal" matrix.
893  You can choose a positive nonzero value different
894  than 1.0 to have a scaled semi-orthgonal matrix,
895  i.e. with singular values at the selected value
896  (e.g. 0.5, or 2.0). This is not enforced inside the
897  component itself; you have to call
898  ConstrainOrthonormal() from the training code to do
899  this. All this component does is return the
900  OrthonormalConstraint() value. If you set this to a
901  negative value, it's like saying "for any value",
902  i.e. it will constrain the parameter matrix to be
903  closer to "any alpha" times a semi-orthogonal matrix,
904  without changing its overall norm.
905 
906  Options to the natural gradient (you won't normally have to set these,
907  the defaults are suitable):
908 
909  use-natural-gradient=true Set this to false to disable the natural-gradient
910  update entirely (it will do regular SGD).
911  num-samples-history Number of frames used as the time-constant to
912  determine how 'up-to-date' the Fisher-matrix
913  estimates are. Smaller -> more up-to-date, but more
914  noisy. default=2000.
915  alpha Constant that determines how much we smooth the
916  Fisher-matrix estimates with the unit matrix.
917  Larger means more smoothing. default=4.0
918  rank-in Rank used in low-rank-plus-unit estimate of Fisher
919  matrix in the input space. default=20.
920  rank-out Rank used in low-rank-plus-unit estimate of Fisher
921  matrix in the output-derivative space. default=80.
922  update-period Determines after with what frequency (in
923  minibatches) we update the Fisher-matrix estimates;
924  making this > 1 saves a little time in training.
925  default=4.
926 */
928  public:
929  virtual int32 InputDim() const { return params_.NumCols(); }
930  virtual int32 OutputDim() const { return params_.NumRows(); }
931 
932  virtual std::string Type() const { return "LinearComponent"; }
933  virtual int32 Properties() const {
936  }
937 
938  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
939  const CuMatrixBase<BaseFloat> &in,
940  CuMatrixBase<BaseFloat> *out) const;
941  virtual void Backprop(const std::string &debug_info,
942  const ComponentPrecomputedIndexes *indexes,
943  const CuMatrixBase<BaseFloat> &in_value,
944  const CuMatrixBase<BaseFloat> &, // out_value
945  const CuMatrixBase<BaseFloat> &out_deriv,
946  void *memo,
947  Component *to_update,
948  CuMatrixBase<BaseFloat> *in_deriv) const;
949  virtual void Read(std::istream &is, bool binary);
950  virtual void Write(std::ostream &os, bool binary) const;
951  // this constructor does not really initialize, use InitFromConfig() or Read().
953  void InitFromConfig(ConfigLine *cfl);
954  virtual std::string Info() const;
955  virtual Component* Copy() const;
956  virtual void Scale(BaseFloat scale);
957  virtual void Add(BaseFloat alpha, const Component &other);
958  virtual void PerturbParams(BaseFloat stddev);
959  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
960  virtual int32 NumParameters() const;
961  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
962  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
963  virtual void FreezeNaturalGradient(bool freeze);
964  virtual void ConsolidateMemory();
965 
966  // copy constructor
967  explicit LinearComponent(const LinearComponent &other);
968 
969  explicit LinearComponent(const CuMatrix<BaseFloat> &params);
970 
971  BaseFloat OrthonormalConstraint() const { return orthonormal_constraint_; }
972  CuMatrixBase<BaseFloat> &Params() { return params_; }
973  const CuMatrixBase<BaseFloat> &Params() const { return params_; }
974  private:
975 
976  // disallow assignment operator.
977  LinearComponent &operator= (
978  const LinearComponent&);
979 
981 
983  // If true (and if no this->is_gradient_), use natural gradient updates.
987 };
988 
989 
993  public:
995  virtual std::string Type() const { return "FixedAffineComponent"; }
996  virtual std::string Info() const;
997 
998  // Copy constructor from AffineComponent-- can be used when we're done
999  // training a particular part of the model and want to efficiently disable
1000  // further training.
1002 
1004  void Init(const CuMatrixBase<BaseFloat> &matrix);
1005 
1006  // The ConfigLine cfl contains just the option matrix=<string>,
1007  // where the string is the filename of a Kaldi-format matrix to read.
1008  virtual void InitFromConfig(ConfigLine *cfl);
1009 
1010  virtual int32 Properties() const { return kSimpleComponent|kBackpropAdds; }
1011  virtual int32 InputDim() const { return linear_params_.NumCols(); }
1012  virtual int32 OutputDim() const { return linear_params_.NumRows(); }
1013 
1014  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1015  const CuMatrixBase<BaseFloat> &in,
1016  CuMatrixBase<BaseFloat> *out) const;
1017  virtual void Backprop(const std::string &debug_info,
1018  const ComponentPrecomputedIndexes *indexes,
1019  const CuMatrixBase<BaseFloat> &in_value,
1020  const CuMatrixBase<BaseFloat> &, // out_value
1021  const CuMatrixBase<BaseFloat> &out_deriv,
1022  void *memo,
1023  Component *to_update,
1024  CuMatrixBase<BaseFloat> *in_deriv) const;
1025 
1026 
1027  virtual Component* Copy() const;
1028  virtual void Read(std::istream &is, bool binary);
1029  virtual void Write(std::ostream &os, bool binary) const;
1030 
1031  const CuMatrix<BaseFloat> &LinearParams() const { return linear_params_; }
1032  const CuVector<BaseFloat> &BiasParams() const { return bias_params_; }
1033  protected:
1034  friend class AffineComponent;
1037 
1039 };
1040 
1054 public:
1055  virtual int32 InputDim() const { return input_dim_; }
1056  virtual int32 OutputDim() const { return output_dim_; }
1057  void Init(const std::vector<int32> &sizes); // the vector is of the input dim
1058  // (>= 1) for each output dim.
1059  void Init(int32 input_dim, int32 output_dim);
1060  void GetSizes(std::vector<int32> *sizes) const; // Get a vector saying, for
1061  // each output-dim, how many
1062  // inputs were summed over.
1063  virtual void InitFromConfig(ConfigLine *cfl);
1065  virtual std::string Type() const { return "SumGroupComponent"; }
1066  virtual int32 Properties() const { return kSimpleComponent; }
1067  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1068  const CuMatrixBase<BaseFloat> &in,
1069  CuMatrixBase<BaseFloat> *out) const;
1070  virtual void Backprop(const std::string &debug_info,
1071  const ComponentPrecomputedIndexes *indexes,
1072  const CuMatrixBase<BaseFloat> &in_value,
1073  const CuMatrixBase<BaseFloat> &, // out_value
1074  const CuMatrixBase<BaseFloat> &out_deriv,
1075  void *memo,
1076  Component *to_update,
1077  CuMatrixBase<BaseFloat> *in_deriv) const;
1078  virtual Component* Copy() const;
1079  virtual void Read(std::istream &is, bool binary);
1080  virtual void Write(std::ostream &os, bool binary) const;
1081 
1082 private:
1084  // Note: Int32Pair is just struct{ int32 first; int32 second }; it's defined
1085  // in cu-matrixdim.h as extern "C" which is needed for the CUDA interface.
1086  CuArray<Int32Pair> indexes_; // for each output index, the (start, end) input
1087  // index.
1088  CuArray<int32> reverse_indexes_; // for each input index, the output index.
1091 };
1092 
1093 
1098  public:
1100  virtual std::string Type() const { return "FixedScaleComponent"; }
1101  virtual std::string Info() const;
1102  virtual int32 Properties() const {
1104  }
1105 
1106  void Init(const CuVectorBase<BaseFloat> &scales);
1107 
1108  // The ConfigLine cfl contains only the option scales=<string>,
1109  // where the string is the filename of a Kaldi-format matrix to read.
1110  virtual void InitFromConfig(ConfigLine *cfl);
1111 
1112  virtual int32 InputDim() const { return scales_.Dim(); }
1113  virtual int32 OutputDim() const { return scales_.Dim(); }
1114 
1115  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1116  const CuMatrixBase<BaseFloat> &in,
1117  CuMatrixBase<BaseFloat> *out) const;
1118  virtual void Backprop(const std::string &debug_info,
1119  const ComponentPrecomputedIndexes *indexes,
1120  const CuMatrixBase<BaseFloat> &, // in_value
1121  const CuMatrixBase<BaseFloat> &, // out_value
1122  const CuMatrixBase<BaseFloat> &out_deriv,
1123  void *memo,
1124  Component *, // to_update
1125  CuMatrixBase<BaseFloat> *in_deriv) const;
1126  virtual Component* Copy() const;
1127  virtual void Read(std::istream &is, bool binary);
1128  virtual void Write(std::ostream &os, bool binary) const;
1129 
1130  const CuVector<BaseFloat> &Scales() const { return scales_; }
1131  protected:
1134 };
1135 
1136 
1141  public:
1143  virtual std::string Type() const { return "FixedBiasComponent"; }
1144  virtual std::string Info() const;
1145 
1146  virtual int32 Properties() const {
1148  }
1149 
1150  void Init(const CuVectorBase<BaseFloat> &scales);
1151 
1152  // The ConfigLine cfl contains only the option bias=<string>,
1153  // where the string is the filename of a Kaldi-format matrix to read.
1154  virtual void InitFromConfig(ConfigLine *cfl);
1155  virtual int32 InputDim() const { return bias_.Dim(); }
1156  virtual int32 OutputDim() const { return bias_.Dim(); }
1157  using Component::Propagate; // to avoid name hiding
1158  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1159  const CuMatrixBase<BaseFloat> &in,
1160  CuMatrixBase<BaseFloat> *out) const;
1161  virtual void Backprop(const std::string &debug_info,
1162  const ComponentPrecomputedIndexes *indexes,
1163  const CuMatrixBase<BaseFloat> &, // in_value,
1164  const CuMatrixBase<BaseFloat> &, // out_value
1165  const CuMatrixBase<BaseFloat> &out_deriv,
1166  void *memo,
1167  Component *, // to_update
1168  CuMatrixBase<BaseFloat> *in_deriv) const;
1169  virtual Component* Copy() const;
1170  virtual void Read(std::istream &is, bool binary);
1171  virtual void Write(std::ostream &os, bool binary) const;
1172 
1173  protected:
1176 };
1177 
1186 class NoOpComponent: public Component {
1187  public:
1188  explicit NoOpComponent(const NoOpComponent &other):
1189  dim_(other.dim_), backprop_scale_(other.backprop_scale_) { }
1191  virtual std::string Type() const { return "NoOpComponent"; }
1192  virtual int32 Properties() const {
1194  }
1195  virtual int32 InputDim() const { return dim_; }
1196  virtual int32 OutputDim() const { return dim_; }
1197  virtual Component *Copy() { return new NoOpComponent(*this); }
1198  virtual void InitFromConfig(ConfigLine *cfl);
1199  virtual void Read(std::istream &is, bool binary);
1200  virtual void Write(std::ostream &os, bool binary) const;
1201  virtual std::string Info() const;
1202  virtual Component* Copy() const { return new NoOpComponent(*this); }
1203  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1204  const CuMatrixBase<BaseFloat> &in,
1205  CuMatrixBase<BaseFloat> *out) const;
1206  virtual void Backprop(const std::string &debug_info,
1207  const ComponentPrecomputedIndexes *indexes,
1208  const CuMatrixBase<BaseFloat> &, //in_value
1209  const CuMatrixBase<BaseFloat> &, // out_value,
1210  const CuMatrixBase<BaseFloat> &out_deriv,
1211  void *memo,
1212  Component *to_update,
1213  CuMatrixBase<BaseFloat> *in_deriv) const;
1214  private:
1217 
1218  NoOpComponent &operator = (const NoOpComponent &other); // Disallow.
1219 };
1220 
1236  public:
1237  explicit SumBlockComponent(const SumBlockComponent &other);
1239  virtual std::string Type() const { return "SumBlockComponent"; }
1240  virtual int32 Properties() const {
1242  }
1243  virtual void InitFromConfig(ConfigLine *cfl);
1244  virtual int32 InputDim() const { return input_dim_; }
1245  virtual int32 OutputDim() const { return output_dim_; }
1246  virtual void Read(std::istream &is, bool binary);
1247  virtual void Write(std::ostream &os, bool binary) const;
1248  virtual std::string Info() const;
1249  virtual Component* Copy() const { return new SumBlockComponent(*this); }
1250  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1251  const CuMatrixBase<BaseFloat> &in,
1252  CuMatrixBase<BaseFloat> *out) const;
1253  virtual void Backprop(const std::string &debug_info,
1254  const ComponentPrecomputedIndexes *indexes,
1255  const CuMatrixBase<BaseFloat> &, //in_value
1256  const CuMatrixBase<BaseFloat> &, // out_value,
1257  const CuMatrixBase<BaseFloat> &out_deriv,
1258  void *memo,
1259  Component *to_update,
1260  CuMatrixBase<BaseFloat> *in_deriv) const;
1261  private:
1265  SumBlockComponent &operator = (const SumBlockComponent &other); // Disallow.
1266 };
1267 
1268 
1269 /*
1270  ClipGradientComponent just duplicates its input, but clips gradients
1271  during backpropagation if they cross a predetermined threshold.
1272  This component will be used to prevent gradient explosion problem in
1273  recurrent neural networks.
1274 
1275  Configuration values accepted:
1276  dim Dimension of this component, e.g. 1024
1277  clipping-threshold Threshold to be used for clipping. It could correspond
1278  to max-row-norm (if norm_based_clipping_ == true) or
1279  max-absolute-value (otherwise).
1280  norm-based-clipping If true, the max-row-norm will be clipped. Else element-wise
1281  absolute value clipping is done.
1282  self-repair-clipped-proportion-threshold The threshold of clipped-proportion
1283  for self-repair mechanism to be activated. The self-repair mechanism
1284  adds a term (proportional to [-(input vector - self_repair_target_)])
1285  to in-deriv, attempting to shrink the maginitude of the input towards
1286  self_repair_target_ (e.g. 0.0 or 0.5). The default value is 1.0.
1287  self-repair-target The target value towards which self-repair is trying to set
1288  for in-deriv. The default value is 0.0.
1289  self-repair-scale Scale for the self-repair mechanism; see comments above.
1290  The default value is 0.0, but we usually set this to 1.0e-05 (or
1291  occasionally 1.0e-04) in the scripts.
1292 */
1293 
1295  public:
1296  ClipGradientComponent(int32 dim, BaseFloat clipping_threshold,
1297  bool norm_based_clipping,
1298  BaseFloat self_repair_clipped_proportion_threshold,
1299  BaseFloat self_repair_target,
1300  BaseFloat self_repair_scale,
1301  int32 num_clipped,
1302  int32 count,
1303  int32 num_self_repaired,
1304  int32 num_backpropped) {
1305  Init(dim, clipping_threshold, norm_based_clipping,
1306  self_repair_clipped_proportion_threshold,
1307  self_repair_target,
1308  self_repair_scale,
1309  num_clipped, count,
1310  num_self_repaired, num_backpropped);}
1311 
1312  ClipGradientComponent(): dim_(0), clipping_threshold_(-1),
1313  norm_based_clipping_(false),
1314  self_repair_clipped_proportion_threshold_(1.0),
1315  self_repair_target_(0.0),
1316  self_repair_scale_(0.0),
1317  num_clipped_(0), count_(0),
1318  num_self_repaired_(0), num_backpropped_(0) { }
1319 
1320  virtual int32 InputDim() const { return dim_; }
1321  virtual int32 OutputDim() const { return dim_; }
1322  virtual void InitFromConfig(ConfigLine *cfl);
1323  void Init(int32 dim, BaseFloat clipping_threshold, bool norm_based_clipping,
1324  BaseFloat self_repair_clipped_proportion_threshold,
1325  BaseFloat self_repair_target,
1326  BaseFloat self_repair_scale,
1327  int32 num_clipped, int32 count,
1328  int32 num_self_repaired, int32 num_backpropped);
1329 
1330  virtual std::string Type() const { return "ClipGradientComponent"; }
1331 
1332  virtual int32 Properties() const {
1335  }
1336 
1337  virtual void ZeroStats();
1338 
1339  virtual Component* Copy() const {
1340  return new ClipGradientComponent(dim_,
1341  clipping_threshold_,
1342  norm_based_clipping_,
1343  self_repair_clipped_proportion_threshold_,
1344  self_repair_target_,
1345  self_repair_scale_,
1346  num_clipped_,
1347  count_,
1348  num_self_repaired_,
1349  num_backpropped_);}
1350 
1351  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1352  const CuMatrixBase<BaseFloat> &in,
1353  CuMatrixBase<BaseFloat> *out) const;
1354  virtual void Backprop(const std::string &debug_info,
1355  const ComponentPrecomputedIndexes *indexes,
1356  const CuMatrixBase<BaseFloat> &in_value,
1357  const CuMatrixBase<BaseFloat> &, // out_value,
1358  const CuMatrixBase<BaseFloat> &out_deriv,
1359  void *memo,
1360  Component *to_update,
1361  CuMatrixBase<BaseFloat> *in_deriv) const;
1362 
1363  virtual void Scale(BaseFloat scale);
1364  virtual void Add(BaseFloat alpha, const Component &other);
1365  virtual void Read(std::istream &is, bool binary); // This Read function
1366  // requires that the Component has the correct type.
1368  virtual void Write(std::ostream &os, bool binary) const;
1369  virtual std::string Info() const;
1371  if (num_self_repaired_ > 0)
1372  KALDI_LOG << "ClipGradientComponent(node_name=" << debug_info_
1373  << ")'s self-repair was activated " << num_self_repaired_
1374  << " time(s) out of " << num_backpropped_
1375  << " times of calling Backprop() in this training job.";
1376  }
1377  private:
1378  int32 dim_; // input/output dimension
1379  BaseFloat clipping_threshold_; // threshold to be used for clipping
1380  // could correspond to max-row-norm (if
1381  // norm_based_clipping_ == true) or
1382  // max-absolute-value (otherwise)
1383  bool norm_based_clipping_; // if true the max-row-norm will be clipped
1384  // else element-wise absolute value clipping is
1385  // done
1386 
1387  // some configuration values relating to self-repairing.
1389  // clipped-proportion
1390  // for self-repair to be
1391  // activated
1392  BaseFloat self_repair_target_; // the target value towards which self-repair
1393  // is trying to set for in-deriv
1394  BaseFloat self_repair_scale_; // constant scaling the self-repair vector
1395  std::string debug_info_; // component-node name, used in the destructor to
1396  // print out stats of self-repair
1397 
1398  // this function is called from Backprop code, and only does something if the
1399  // self-repair-scale config value is set and the current clipped proportion
1400  // exceeds the threshold. What it does is to add a term to in-deriv that
1401  // forces the input to the ClipGradientComponent to be close to some small
1402  // value (e.g., 0.0 or 0.5, depending on what the input is, e.g.,
1403  // Sigmoid or Tanh or Affine). The hope is that if the input is forced to be
1404  // small, the parameters on the path will also tend to be small, which may
1405  // help tamp down the divergence caused by gradient explosion.
1406  void RepairGradients(const std::string &debug_info,
1407  const CuMatrixBase<BaseFloat> &in_value,
1408  CuMatrixBase<BaseFloat> *in_deriv,
1409  ClipGradientComponent *to_update) const;
1410 
1411  ClipGradientComponent &operator =
1412  (const ClipGradientComponent &other); // Disallow.
1413 
1414  protected:
1415  // variables to store stats
1416  // An element corresponds to rows of derivative matrix, when
1417  // norm_based_clipping_ is true,
1418  // else it corresponds to each element of the derivative matrix
1419  // Note: no stats are stored when norm_based_clipping_ is false
1420  int32 num_clipped_; // number of elements which were clipped
1421  int32 count_; // number of elements which were processed
1422  int32 num_self_repaired_; // number of times self-repair is activated
1423  int32 num_backpropped_; //number of times backprop is called
1424 
1425 };
1426 
1441  public:
1443  PermuteComponent(const std::vector<int32> &column_map) { Init(column_map); }
1444 
1445  virtual int32 InputDim() const { return column_map_.Dim(); }
1446  virtual int32 OutputDim() const { return column_map_.Dim(); }
1447  virtual void InitFromConfig(ConfigLine *cfl);
1448  void Init(const std::vector<int32> &column_map);
1449 
1450  virtual std::string Type() const { return "PermuteComponent"; }
1451 
1452  virtual int32 Properties() const {
1453  return kSimpleComponent;
1454  }
1455 
1456  virtual void ZeroStats() {}
1457 
1458  virtual Component* Copy() const;
1459 
1460  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1461  const CuMatrixBase<BaseFloat> &in,
1462  CuMatrixBase<BaseFloat> *out) const;
1463  virtual void Backprop(const std::string &debug_info,
1464  const ComponentPrecomputedIndexes *indexes,
1465  const CuMatrixBase<BaseFloat> &, //in_value
1466  const CuMatrixBase<BaseFloat> &, // out_value,
1467  const CuMatrixBase<BaseFloat> &out_deriv,
1468  void *memo,
1469  Component *to_update,
1470  CuMatrixBase<BaseFloat> *in_deriv) const;
1471 
1472  virtual void Scale(BaseFloat scale) {}
1473  virtual void Add(BaseFloat alpha, const Component &other) {}
1474  virtual void Read(std::istream &is, bool binary); // This Read function
1475  // requires that the Component has the correct type.
1477  virtual void Write(std::ostream &os, bool binary) const;
1478  virtual std::string Info() const;
1479  private:
1480  // computes the reverse column map. Must not be called if column_map_.Dim()
1481  // == 0
1482  void ComputeReverseColumnMap();
1484  // the following is a derived variable, not written to disk.
1485  // It is used in backprop.
1487  PermuteComponent &operator =
1488  (const PermuteComponent &other); // Disallow.
1489 };
1490 
1491 
1492 
1493 
1514  public:
1515  virtual int32 InputDim() const { return scales_.Dim(); }
1516  virtual int32 OutputDim() const { return scales_.Dim(); }
1517 
1518  virtual std::string Info() const;
1519  virtual void InitFromConfig(ConfigLine *cfl);
1520 
1521  PerElementScaleComponent() { } // use Init to really initialize.
1522  virtual std::string Type() const { return "PerElementScaleComponent"; }
1523  virtual int32 Properties() const {
1526  }
1527 
1528  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1529  const CuMatrixBase<BaseFloat> &in,
1530  CuMatrixBase<BaseFloat> *out) const;
1531  virtual void Backprop(const std::string &debug_info,
1532  const ComponentPrecomputedIndexes *indexes,
1533  const CuMatrixBase<BaseFloat> &in_value,
1534  const CuMatrixBase<BaseFloat> &, // out_value
1535  const CuMatrixBase<BaseFloat> &out_deriv,
1536  void *memo,
1537  Component *to_update,
1538  CuMatrixBase<BaseFloat> *in_deriv) const;
1539 
1540  virtual void Read(std::istream &is, bool binary);
1541  virtual void Write(std::ostream &os, bool binary) const;
1542 
1543  virtual Component* Copy() const;
1544 
1545 
1546  // Some functions from base-class UpdatableComponent.
1547  virtual void Scale(BaseFloat scale);
1548  virtual void Add(BaseFloat alpha, const Component &other);
1549  virtual void PerturbParams(BaseFloat stddev);
1550  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1551  virtual int32 NumParameters() const;
1552  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1553  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
1554 
1555  // Some functions that are specific to this class.
1556  explicit PerElementScaleComponent(const PerElementScaleComponent &other);
1557 
1558  void Init(int32 dim, BaseFloat param_mean, BaseFloat param_stddev);
1559  void Init(std::string vector_filename);
1560 
1561  protected:
1562  // This function Update() is for extensibility; child classes may override
1563  // this, e.g. for natural gradient update.
1564  virtual void Update(
1565  const std::string &debug_info,
1566  const CuMatrixBase<BaseFloat> &in_value,
1567  const CuMatrixBase<BaseFloat> &out_deriv) {
1568  UpdateSimple(in_value, out_deriv);
1569  }
1570  // UpdateSimple is used when *this is a gradient. Child classes may override
1571  // this if needed, but typically won't need to.
1572  virtual void UpdateSimple(
1573  const CuMatrixBase<BaseFloat> &in_value,
1574  const CuMatrixBase<BaseFloat> &out_deriv);
1575 
1576  const PerElementScaleComponent &operator
1577  = (const PerElementScaleComponent &other); // Disallow.
1579 };
1580 
1581 /*
1582  PerElementOffsetComponent offsets each dimension of its input with a separate
1583  trainable bias; it's like an affine component with fixed weight matrix which
1584  is always equal to I.
1585 
1586  Accepted values on its config line, with defaults if applicable:
1587 
1588  vector If specified, the offsets will be read from this file ('vector'
1589  is interpreted as an rxfilename).
1590 
1591  dim The dimension that this component inputs and outputs.
1592 
1593  block-dim [Should not be specified if you specify 'vector'].
1594  If specified, must be nonzero and divide 'dim'. In this
1595  case, blocks of the input of this dimension will get
1596  the same offset. Useful in CNNs.
1597 
1598  param-mean=0.0 Mean of randomly initialized offset parameters; should only
1599  be supplied if 'vector' is not supplied.
1600  param-stddev=0.0 Standard deviation of randomly initialized offset parameters;
1601  should only be supplied if 'vector' is not supplied.
1602 
1603  use-natural-gradient=true If true, we will use natural gradient in the
1604  update. Note: this is different from PerElementScaleComponent,
1605  which does not support natural gradient directly-- in that
1606  case you have to use NaturalGradientPerElementScaleComponent
1607  if you want to use natural gradient update.
1608 
1609  Values inherited from UpdatableComponent (see its declaration in
1610  nnet-component-itf for details):
1611  learning-rate
1612  learning-rate-factor
1613  max-change
1614 */
1616  public:
1617  virtual int32 InputDim() const { return dim_; }
1618  virtual int32 OutputDim() const { return dim_; }
1619 
1620  virtual std::string Info() const;
1621  virtual void InitFromConfig(ConfigLine *cfl);
1622 
1623  PerElementOffsetComponent() { } // use Init to really initialize.
1624  virtual std::string Type() const { return "PerElementOffsetComponent"; }
1625  virtual int32 Properties() const {
1628  (dim_ != offsets_.Dim() ? kOutputContiguous : 0);
1629  }
1630 
1631  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1632  const CuMatrixBase<BaseFloat> &in,
1633  CuMatrixBase<BaseFloat> *out) const;
1634  virtual void Backprop(const std::string &debug_info,
1635  const ComponentPrecomputedIndexes *indexes,
1636  const CuMatrixBase<BaseFloat> &, // in_value
1637  const CuMatrixBase<BaseFloat> &, // out_value
1638  const CuMatrixBase<BaseFloat> &out_deriv,
1639  void *memo,
1640  Component *to_update,
1641  CuMatrixBase<BaseFloat> *in_deriv) const;
1642 
1643  virtual void Read(std::istream &is, bool binary);
1644  virtual void Write(std::ostream &os, bool binary) const;
1645 
1646  virtual Component* Copy() const;
1647 
1648  // Some functions from base-class UpdatableComponent.
1649  virtual void Scale(BaseFloat scale);
1650  virtual void Add(BaseFloat alpha, const Component &other);
1651  virtual void PerturbParams(BaseFloat stddev);
1652  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1653  virtual int32 NumParameters() const;
1654  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1655  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
1656 
1657  // Copy constructor
1659  protected:
1660  const PerElementOffsetComponent &operator
1661  = (const PerElementOffsetComponent &other); // Disallow.
1663  // dim_ will normally be the same as offsets_ dim, but in general will be an
1664  // integer multiple of it (in case the same offset vector is applied to
1665  // successive blocks of the input).
1669 };
1670 
1671 
1672 // ConstantFunctionComponent returns constant function of its input,
1673 // i.e. its output does not depend on its input. It is the same as
1674 // an affine component with the linear term fixed at zero.
1675 // It is optionally trainable, and optionally you can use natural
1676 // gradient. The input is required only because it's more convenient
1677 // to make SimpleComponents [but see ConstantComponent, which requires
1678 // no inputs].
1680  public:
1681  virtual int32 InputDim() const { return input_dim_; }
1682  virtual int32 OutputDim() const { return output_.Dim(); }
1683 
1684  virtual std::string Info() const;
1685  // possible parameter values with their defaults:
1686  // input-dim=-1 is-updatable=true use-natural-gradient=true output-dim=-1
1687  // output-mean=0 output-stddev=0
1688  virtual void InitFromConfig(ConfigLine *cfl);
1689 
1691 
1693 
1694  virtual std::string Type() const { return "ConstantFunctionComponent"; }
1695  virtual int32 Properties() const {
1696  return kSimpleComponent|
1697  (is_updatable_ ? kUpdatableComponent : 0) |
1698  (InputDim() == OutputDim() ? kPropagateInPlace: 0) |
1699  kBackpropAdds;
1700  }
1701  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1702  const CuMatrixBase<BaseFloat> &in,
1703  CuMatrixBase<BaseFloat> *out) const;
1704  virtual void Backprop(const std::string &debug_info,
1705  const ComponentPrecomputedIndexes *indexes,
1706  const CuMatrixBase<BaseFloat> &, // in_value
1707  const CuMatrixBase<BaseFloat> &, // out_value
1708  const CuMatrixBase<BaseFloat> &out_deriv,
1709  void *memo,
1710  Component *to_update,
1711  CuMatrixBase<BaseFloat> *in_deriv) const;
1712 
1713  virtual void Read(std::istream &is, bool binary);
1714  virtual void Write(std::ostream &os, bool binary) const;
1715 
1716  virtual Component* Copy() const;
1717 
1718  // Some functions from base-class UpdatableComponent.
1719  virtual void Scale(BaseFloat scale);
1720  virtual void Add(BaseFloat alpha, const Component &other);
1721  virtual void PerturbParams(BaseFloat stddev);
1722  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1723  virtual int32 NumParameters() const;
1724  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1725  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
1726  virtual void ConsolidateMemory();
1727  private:
1729  // the output value-- a vector.
1731 
1733  // if true, and if updatable, do natural-gradient update.
1736 
1737  const ConstantFunctionComponent &operator
1738  = (const ConstantFunctionComponent &other); // Disallow.
1739 };
1740 
1741 
1742 
1767  public:
1768 
1769  virtual std::string Info() const;
1770 
1771  virtual void InitFromConfig(ConfigLine *cfl);
1772 
1773  NaturalGradientPerElementScaleComponent() { } // use Init to really initialize.
1774  virtual std::string Type() const {
1775  return "NaturalGradientPerElementScaleComponent";
1776  }
1777 
1778  virtual void Read(std::istream &is, bool binary);
1779  virtual void Write(std::ostream &os, bool binary) const;
1780  virtual void FreezeNaturalGradient(bool freeze);
1781 
1782  virtual Component* Copy() const;
1783 
1784  // Some functions that are specific to this class:
1787 
1788  void Init(int32 dim, BaseFloat param_mean,
1789  BaseFloat param_stddev, int32 rank, int32 update_period,
1790  BaseFloat num_samples_history, BaseFloat alpha);
1791  void Init(std::string vector_filename,
1792  int32 rank, int32 update_period, BaseFloat num_samples_history,
1793  BaseFloat alpha);
1794 
1795  void ConsolidateMemory();
1796 
1797  private:
1798  // unlike the NaturalGradientAffineComponent, there is only one dimension to
1799  // consider as the parameters are a vector not a matrix, so we only need one
1800  // preconditioner.
1801  // The preconditioner stores its own configuration values; we write and read
1802  // these, but not the preconditioner object itself.
1804 
1805  // Override of the parent-class Update() function, called only
1806  // if this->is_gradient_ = false; this implements the natural
1807  // gradient update.
1808  virtual void Update(
1809  const std::string &debug_info,
1810  const CuMatrixBase<BaseFloat> &in_value,
1811  const CuMatrixBase<BaseFloat> &out_deriv);
1812 
1814  = (const NaturalGradientPerElementScaleComponent &other); // Disallow.
1815 };
1816 
1817 
1818 /*
1819  ScaleAndOffsetComponent implements a per-element scale and offset.
1820  It may be useful just after BatchNormComponent, as the trainable offset
1821  and scale of batch-norm.
1822  Note: by default this includes natural gradient for the update.
1823 
1824  Currently accepted values on its config line are as follows.
1825  Major configuration values:
1826 
1827  dim The feature-dimension that the component takes as
1828  input, and outputs.
1829  block-dim If set, this must be set to a value that divides
1830  'dim'. In this case, the same offset and scale
1831  will be applied to each block, and the number
1832  of parameters will be 2*block-dim instead of 2*dim.
1833 
1834  There is currently no way to configure what values will be used for
1835  the initialization and it is hardcoded to zero offset, unit scale.
1836  If in future more configurability is needed, we'll address it then.
1837 
1838  Values inherited from UpdatableComponent (see its declaration in
1839  nnet-component-itf for details):
1840  learning-rate
1841  learning-rate-factor
1842  max-change
1843 
1844 
1845  Options to the natural gradient (you won't normally have to set these,
1846  the defaults are suitable):
1847 
1848  use-natural-gradient Defaults to true; false turns off the application
1849  of natural gradient update to this layer.
1850  rank Rank used in low-rank-plus-unit estimate of Fisher
1851  matrix in the input space. default=20.
1852 */
1854  public:
1855  virtual int32 InputDim() const { return dim_; }
1856  virtual int32 OutputDim() const { return dim_; }
1857 
1858  virtual std::string Info() const;
1859  virtual void InitFromConfig(ConfigLine *cfl);
1860 
1861  ScaleAndOffsetComponent() { } // use Init to really initialize.
1862  virtual std::string Type() const { return "ScaleAndOffsetComponent"; }
1863  virtual int32 Properties() const {
1864  // Note: the backprop would most naturally consume the input, but we
1865  // have arranged things so that the backprop consumes the output value
1866  // instead; this allows less memory use, since in typical configurations,
1867  // this will be followed by an affine component which needs its input
1868  // for the backprop (so requiring it to be present adds no extra
1869  // burden).
1873  (dim_ != scales_.Dim() ?
1875  }
1876  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1877  const CuMatrixBase<BaseFloat> &in,
1878  CuMatrixBase<BaseFloat> *out) const;
1879  virtual void Backprop(const std::string &debug_info,
1880  const ComponentPrecomputedIndexes *indexes,
1881  const CuMatrixBase<BaseFloat> &, // in_value
1882  const CuMatrixBase<BaseFloat> &, // out_value
1883  const CuMatrixBase<BaseFloat> &out_deriv,
1884  void *memo,
1885  Component *to_update,
1886  CuMatrixBase<BaseFloat> *in_deriv) const;
1887 
1888  virtual void Read(std::istream &is, bool binary);
1889  virtual void Write(std::ostream &os, bool binary) const;
1890 
1891  virtual Component* Copy() const { return new ScaleAndOffsetComponent(*this); }
1892 
1893  // Some functions from base-class UpdatableComponent.
1894  virtual void Scale(BaseFloat scale);
1895  virtual void Add(BaseFloat alpha, const Component &other);
1896  virtual void PerturbParams(BaseFloat stddev);
1897  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1898  virtual int32 NumParameters() const { return 2 * scales_.Dim(); }
1899  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1900  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
1901  virtual void ConsolidateMemory();
1902 
1903 
1904  // copy constructor
1905  explicit ScaleAndOffsetComponent(const ScaleAndOffsetComponent &other);
1906  private:
1907  // Internal version of propagate, requires in.NumCols() equal to scales_.Dim()
1908  // (if batch-dim was set, this may require the caller to reshape the input and
1909  // output.
1910  void PropagateInternal(const CuMatrixBase<BaseFloat> &in,
1911  CuMatrixBase<BaseFloat> *out) const;
1912  // Internal version of backprop, where the num-cols of the
1913  // argument matrices are equal to scales_.Dim().
1914  void BackpropInternal(const std::string &debug_info,
1915  const CuMatrixBase<BaseFloat> &out_value,
1916  const CuMatrixBase<BaseFloat> &out_deriv,
1917  ScaleAndOffsetComponent *to_update,
1918  CuMatrixBase<BaseFloat> *in_deriv) const;
1919 
1920  // We do this instead of defining a constant, which is a hassle in C++.
1921  inline BaseFloat Epsilon() const { return 1.0e-04; }
1922 
1923  // called from BackpropInternal if 'to_update' is non-NULL.
1924  void Update(
1925  const std::string &debug_info,
1926  const CuMatrixBase<BaseFloat> &in_value,
1927  const CuMatrixBase<BaseFloat> &out_deriv);
1928 
1929 
1930  const ScaleAndOffsetComponent &operator
1931  = (const ScaleAndOffsetComponent &other); // Disallow.
1932 
1933  // Note: dim_ is the dimension that the component takes as input
1934  // and output. It is an integer multiple of scales_.Dim(),
1935  // and will be the same as scales_.Dim() unless 'block-dim'
1936  // was specified on the config line.
1937  // (note: scales_.Dim() and offset_.Dim() will be the same).
1939 
1940  // note: output is y(i) = scales_(i) * x(i) + offsets_(i).
1946 };
1947 
1948 
1972  public:
1973  virtual int32 InputDim() const;
1974  virtual int32 OutputDim() const;
1975 
1976  virtual std::string Info() const;
1977 
1978  virtual void InitFromConfig(ConfigLine *cfl);
1979 
1980  virtual Component* Copy() const;
1981 
1982  CompositeComponent() { } // use Init() or InitFromConfig() to really initialize.
1983 
1984  // Initialize from this list of components; takes ownership of the pointers.
1985  void Init(const std::vector<Component*> &components,
1986  int32 max_rows_process);
1987 
1988  virtual std::string Type() const { return "CompositeComponent"; }
1989 
1990  // The properties depend on the properties of the constituent components. As
1991  // a special case, we never return kStoresStats in the properties: by default
1992  // we store things like activation stats (e.g. for nonlinear components like
1993  // ReLU) as part of the backprop. This means we may wastefully store stats
1994  // even when not requested, but it does save time as a separate StoreStats()
1995  // call would involve propagating the internals.
1996  virtual int32 Properties() const;
1997 
1998  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
1999  const CuMatrixBase<BaseFloat> &in,
2000  CuMatrixBase<BaseFloat> *out) const;
2001  virtual void Backprop(const std::string &debug_info,
2002  const ComponentPrecomputedIndexes *indexes,
2003  const CuMatrixBase<BaseFloat> &in_value,
2004  const CuMatrixBase<BaseFloat> &, // out_value
2005  const CuMatrixBase<BaseFloat> &out_deriv,
2006  void *memo,
2007  Component *to_update,
2008  CuMatrixBase<BaseFloat> *in_deriv) const;
2009 
2010  // note, we don't implement StoreStats() as it would be inefficient. Instead,
2011  // by default we call StoreStats() on all members that have the flag set,
2012  // inside the Backprop.
2013  virtual void ZeroStats();
2014 
2015  virtual void Read(std::istream &is, bool binary);
2016  virtual void Write(std::ostream &os, bool binary) const;
2017 
2018  // Don't implement Copy() at this level: implement it in the child class.
2019 
2020  // Some functions from base-class UpdatableComponent.
2021  virtual void SetUnderlyingLearningRate(BaseFloat lrate);
2022  virtual void SetActualLearningRate(BaseFloat lrate);
2023  virtual void SetAsGradient();
2024  virtual void Scale(BaseFloat scale);
2025  virtual void Add(BaseFloat alpha, const Component &other);
2026  virtual void PerturbParams(BaseFloat stddev);
2027  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
2028  virtual int32 NumParameters() const;
2029  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
2030  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
2031  virtual void FreezeNaturalGradient(bool freeze);
2032 
2033  // note: we dont implement the StoreStats function as it would be quite
2034  // expensive; instead, by default we call StoreStats() for any components that
2035  // want to store stats, as part of the backprop pass. This is not 100% ideal
2036  // but it will usually do what you want. We can revisit this later if needed.
2037 
2038  // Functions to iterate over the internal components
2039 
2040  int32 NumComponents() const { return components_.size(); }
2044  const Component* GetComponent(int32 i) const;
2048  void SetComponent(int32 i, Component *component);
2049 
2050  virtual ~CompositeComponent() { DeletePointers(&components_); }
2051  private:
2052  // returns the stride type, kDefaultStride or kStrideEqualNumCols,
2053  // at the output of the i'th component.
2054  inline MatrixStrideType GetStrideType(int32 i) const;
2055 
2056  // returns true if at least one of 'components_' returns the kUpdatable flag
2057  // in its flags.
2058  bool IsUpdatable() const;
2059 
2060  // the maximum number of
2062  std::vector<Component*> components_;
2063 
2064 };
2065 
2066 
2067 } // namespace nnet3
2068 } // namespace kaldi
2069 
2070 
2071 #endif
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual Component * Copy() const
Copies component (deep copy).
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
void SetDropoutProportion(BaseFloat dropout_proportion)
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
Definition: stl-utils.h:184
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const =0
Propagate function.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 InputDim() const
Returns input-dimension of this component.
const CuVector< BaseFloat > & BiasParams() const
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 InputDim() const
Returns input-dimension of this component.
Abstract base-class for neural-net components.
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
ClipGradientComponent(int32 dim, BaseFloat clipping_threshold, bool norm_based_clipping, BaseFloat self_repair_clipped_proportion_threshold, BaseFloat self_repair_target, BaseFloat self_repair_scale, int32 num_clipped, int32 count, int32 num_self_repaired, int32 num_backpropped)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
kaldi::int32 int32
virtual int32 InputDim() const
Returns input-dimension of this component.
Keywords for search: natural gradient, naturalgradient, NG-SGD.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
LogSoftmaxComponent(const LogSoftmaxComponent &other)
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
const CuMatrix< BaseFloat > & LinearParams() const
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 InputDim() const
Returns input-dimension of this component.
PermuteComponent changes the order of the columns (i.e.
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 InputDim() const
Returns input-dimension of this component.
PermuteComponent(const std::vector< int32 > &column_map)
CompositeComponent is a component representing a sequence of [simple] components. ...
void Init(int32 input_dim, int32 output_dim)
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
RectifiedLinearComponent(const RectifiedLinearComponent &other)
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
FixedScaleComponent applies a fixed per-element scale; it&#39;s similar to the Rescale component in the n...
virtual Component * Copy() const
Copies component (deep copy).
OnlineNaturalGradient preconditioner_in_
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 OutputDim() const
Returns output-dimension of this component.
void FreezeNaturalGradient(bool freeze, Nnet *nnet)
Controls if natural gradient will be updated.
Definition: nnet-utils.cc:432
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
const size_t count
virtual int32 InputDim() const
Returns input-dimension of this component.
NaturalGradientPerElementScaleComponent is like PerElementScaleComponent but it uses a natural gradie...
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
int32 NumParameters(const Nnet &src)
Returns the total of the number of parameters in the updatable components of the nnet.
Definition: nnet-utils.cc:359
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void StoreStats(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, void *memo)
This function may store stats on average activation values, and for some component types...
const CuVector< BaseFloat > & BiasParams() const
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 OutputDim() const
Returns output-dimension of this component.
CuMatrixBase< BaseFloat > & Params()
SumGroupComponent is used to sum up groups of posteriors.
MatrixStrideType
Definition: matrix-common.h:44
const CuMatrixBase< BaseFloat > & Params() const
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual int32 OutputDim() const
Returns output-dimension of this component.
std::vector< Component * > components_
SigmoidComponent(const SigmoidComponent &other)
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
const CuMatrix< BaseFloat > & LinearParams() const
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual Component * Copy() const
Copies component (deep copy).
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
BaseFloat DotProduct(const Nnet &nnet1, const Nnet &nnet2)
Returns dot product between two networks of the same structure (calls the DotProduct functions of the...
Definition: nnet-utils.cc:250
virtual Component * Copy() const
Copies component (deep copy).
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
SoftmaxComponent(const SoftmaxComponent &other)
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
TanhComponent(const TanhComponent &other)
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual Component * Copy() const
Copies component (deep copy).
CuMatrix< BaseFloat > linear_params_
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Matrix for CUDA computing.
Definition: matrix-common.h:69
CuMatrix< BaseFloat > & LinearParams()
virtual int32 OutputDim() const
Returns output-dimension of this component.
KALDI_DISALLOW_COPY_AND_ASSIGN(Component)
virtual int32 OutputDim() const
Returns output-dimension of this component.
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
PerElementScaleComponent scales each dimension of its input with a separate trainable scale; it&#39;s lik...
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
SumBlockComponent sums over blocks of its input: for instance, if you create one with the config "inp...
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
const CuVector< BaseFloat > & BiasParams() const
FixedBiasComponent applies a fixed per-element bias; it&#39;s similar to the AddShift component in the nn...
NoOpComponent just duplicates its input.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
void PerturbParams(BaseFloat stddev, Nnet *nnet)
Calls PerturbParams (with the given stddev) on all updatable components of the nnet.
Definition: nnet-utils.cc:199
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual int32 OutputDim() const
Returns output-dimension of this component.
ElementwiseProductComponent(int32 input_dim, int32 output_dim)
NoOpComponent(const NoOpComponent &other)
virtual int32 OutputDim() const
Returns output-dimension of this component.
PnormComponent(int32 input_dim, int32 output_dim)
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
virtual int32 InputDim() const
Returns input-dimension of this component.
#define KALDI_LOG
Definition: kaldi-error.h:153
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
OnlineNaturalGradient preconditioner_out_
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
DropoutComponent(int32 dim, BaseFloat dropout=0.0, bool dropout_per_frame=false)
virtual Component * Copy() const
Copies component (deep copy).
const CuMatrix< BaseFloat > & LinearParams() const
virtual int32 InputDim() const
Returns input-dimension of this component.
BaseFloat dropout_proportion_
dropout-proportion is the proportion that is dropped out, e.g.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
const CuVector< BaseFloat > & Scales() const
CuVector< BaseFloat > & BiasParams()
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Vector for CUDA computing.
Definition: matrix-common.h:72
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
FixedAffineComponent is an affine transform that is supplied at network initialization time and is no...
virtual int32 InputDim() const
Returns input-dimension of this component.
This class implements an affine transform using a block diagonal matrix e.g., one whose weight matrix...