nnet-combined-component.h
Go to the documentation of this file.
1 // nnet3/nnet-combined-component.h
2 
3 // Copyright 2018 Johns Hopkins University (author: Daniel Povey)
4 // 2018 Hang Lyu
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_NNET3_NNET_SPECIAL_COMPONENT_H_
22 #define KALDI_NNET3_NNET_SPECIAL_COMPONENT_H_
23 
24 #include "nnet3/nnet-common.h"
27 #include <iostream>
28 
29 namespace kaldi {
30 namespace nnet3 {
31 
38 
39 
40 
115  public:
117  kYzx = 0,
118  kZyx = 1
119  };
120 
122  // constructor using another component
123  ConvolutionComponent(const ConvolutionComponent &component);
124  // constructor using parameters
126  const CuMatrixBase<BaseFloat> &filter_params,
127  const CuVectorBase<BaseFloat> &bias_params,
128  int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
129  int32 filt_x_dim, int32 filt_y_dim,
130  int32 filt_x_step, int32 filt_y_step,
131  TensorVectorizationType input_vectorization,
132  BaseFloat learning_rate);
133 
134  virtual int32 InputDim() const;
135  virtual int32 OutputDim() const;
136 
137  virtual std::string Info() const;
138  virtual void InitFromConfig(ConfigLine *cfl);
139  virtual std::string Type() const { return "ConvolutionComponent"; }
140  virtual int32 Properties() const {
143  }
144 
145  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
146  const CuMatrixBase<BaseFloat> &in,
147  CuMatrixBase<BaseFloat> *out) const;
148  virtual void Backprop(const std::string &debug_info,
149  const ComponentPrecomputedIndexes *indexes,
150  const CuMatrixBase<BaseFloat> &in_value,
151  const CuMatrixBase<BaseFloat> &, // out_value,
152  const CuMatrixBase<BaseFloat> &out_deriv,
153  void *memo,
154  Component *to_update_in,
155  CuMatrixBase<BaseFloat> *in_deriv) const;
156  void Update(const std::string &debug_info,
157  const CuMatrixBase<BaseFloat> &in_value,
158  const CuMatrixBase<BaseFloat> &out_deriv,
159  const std::vector<CuSubMatrix<BaseFloat> *>& out_deriv_batch);
160 
161 
162  virtual void Read(std::istream &is, bool binary);
163  virtual void Write(std::ostream &os, bool binary) const;
164 
165  virtual Component* Copy() const;
166 
167  // Some functions from base-class UpdatableComponent.
168  virtual void Scale(BaseFloat scale);
169  virtual void Add(BaseFloat alpha, const Component &other);
170  virtual void PerturbParams(BaseFloat stddev);
171  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
172  virtual int32 NumParameters() const;
173  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
174  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
175 
176  // Some functions that are specific to this class.
177  void SetParams(const VectorBase<BaseFloat> &bias,
178  const MatrixBase<BaseFloat> &filter);
179  const CuVector<BaseFloat> &BiasParams() const { return bias_params_; }
181  void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
182  int32 filt_x_dim, int32 filt_y_dim,
183  int32 filt_x_step, int32 filt_y_step, int32 num_filters,
184  TensorVectorizationType input_vectorization,
185  BaseFloat param_stddev, BaseFloat bias_stddev);
186  // there is no filt_z_dim parameter as the length of the filter along
187  // z-dimension is same as the input
188  void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
189  int32 filt_x_dim, int32 filt_y_dim,
190  int32 filt_x_step, int32 filt_y_step,
191  TensorVectorizationType input_vectorization,
192  std::string matrix_filename);
193 
194  // resize the component, setting the parameters to zero, while
195  // leaving any other configuration values the same
196  void Resize(int32 input_dim, int32 output_dim);
197 
198  void Update(const std::string &debug_info,
199  const CuMatrixBase<BaseFloat> &in_value,
200  const CuMatrixBase<BaseFloat> &out_deriv);
201 
202 
203  private:
204  int32 input_x_dim_; // size of the input along x-axis
205  // (e.g. number of time steps)
206 
207  int32 input_y_dim_; // size of input along y-axis
208  // (e.g. number of mel-frequency bins)
209 
210  int32 input_z_dim_; // size of input along z-axis
211  // (e.g. number of channels is 3 if the input has
212  // features + delta + delta-delta features
213 
214  int32 filt_x_dim_; // size of the filter along x-axis
215 
216  int32 filt_y_dim_; // size of the filter along y-axis
217 
218  // there is no filt_z_dim_ as it is always assumed to be
219  // the same as input_z_dim_
220 
221  int32 filt_x_step_; // the number of steps taken along x-axis of input
222  // before computing the next dot-product
223  // of filter and input
224 
225  int32 filt_y_step_; // the number of steps taken along y-axis of input
226  // before computing the next dot-product of the filter
227  // and input
228 
229  // there is no filt_z_step_ as only dot product is possible along this axis
230 
231  TensorVectorizationType input_vectorization_; // type of vectorization of the
232  // input 3D tensor. Accepts zyx and yzx formats
233 
235  // the filter (or kernel) matrix is a matrix of vectorized 3D filters
236  // where each row in the matrix corresponds to one filter.
237  // The 3D filter tensor is vectorizedin zyx format.
238  // The first row of the matrix corresponds to the first filter and so on.
239  // Keep in mind the vectorization type and order of filters when using file
240  // based initialization.
241 
243  // the filter-specific bias vector (i.e., there is a seperate bias added
244  // to the output of each filter).
245 
247  CuMatrix<BaseFloat> *patches) const;
248  void InderivPatchesToInderiv(const CuMatrix<BaseFloat>& in_deriv_patches,
249  CuMatrixBase<BaseFloat> *in_deriv) const;
250  const ConvolutionComponent &operator = (const ConvolutionComponent &other); // Disallow.
251 };
252 
253 
254 /*
255  LstmNonlinearityComponent is a component that implements part of an LSTM, by
256  combining together the sigmoids and tanh's, plus some diagonal terms, into
257  a single block.
258  We will refer to the LSTM formulation used in
259 
260  Long Short-Term Memory Recurrent Neural Network Architectures for Large Scale Acoustic Modeling"
261  by H. Sak et al,
262  http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43905.pdf.
263 
264  Suppose the cell dimension is C. Then outside this component, we compute
265  the 4 * C-dimensional quantity consisting of 4 blocks as follows, by a single
266  matrix multiplication:
267 
268  i_part = W_{ix} x_t + W_{im} m_{t-1} + b_i
269  f_part = W_{fx} x_t + W_{fm} m_{t-1} + b_f
270  c_part = W_{cx} x_t + W_{cm} m_{t-1} + b_c
271  o_part = W_{cx} x_t + W_{om} m_{t-1} + b_o
272 
273  The part of the computation that takes place in this component is as follows.
274  Its input is of dimension 5C [however, search for 'dropout' below],
275  consisting of 5 blocks: (i_part, f_part, c_part, o_part, and c_{t-1}). Its
276  output is of dimension 2C, consisting of 2 blocks: c_t and m_t.
277 
278  To recap: the input is (i_part, f_part, c_part, o_part, c_{t-1}); the output is (c_t, m_t).
279 
280  This component has parameters, 3C of them in total: the diagonal matrices w_i, w_f
281  and w_o.
282 
283 
284  In the forward pass (Propagate), this component computes the following:
285 
286  i_t = Sigmoid(i_part + w_{ic}*c_{t-1}) (1)
287  f_t = Sigmoid(f_part + w_{fc}*c_{t-1}) (2)
288  c_t = f_t*c_{t-1} + i_t * Tanh(c_part) (3)
289  o_t = Sigmoid(o_part + w_{oc}*c_t) (4)
290  m_t = o_t * Tanh(c_t) (5)
291  # note: the outputs are just c_t and m_t.
292 
293  [Note regarding dropout: optionally the input-dimension may be 5C + 3 instead
294  of 5C in this case, the last three input dimensions will be interpreted as
295  per-frame dropout masks on i_t, f_t and o_t respectively, so that on the RHS of
296  (3), i_t is replaced by i_t * i_t_scale, and likewise for f_t and o_t.]
297 
298  The backprop is as you would think, but for the "self-repair" we need to pass
299  in additional vectors (of the same dim as the parameters of the layer) that
300  dictate whether or not we add an additional term to the backpropagated
301  derivatives. (This term helps force the input to the nonlinearities into the
302  range where the derivatives are not too small).
303 
304  This component stores stats of the same form as are normally stored by the
305  StoreStats() functions for the sigmoid and tanh units, i.e. averages of the
306  activations and derivatives, but this is done inside the Backprop() functions.
307  [the StoreStats() functions don't take the input data as an argument, so
308  storing this data that way is impossible, and anyway it's more efficient to
309  do it as part of backprop.]
310 
311  Configuration values accepted:
312  cell-dim e.g. cell-dim=1024 Cell dimension. The input
313  dimension of this component is cell-dim * 5, and the
314  output dimension is cell-dim * 2. Note: this
315  component implements only part of the LSTM layer,
316  see comments above.
317  param-stddev Standard deviation for random initialization of
318  the diagonal matrices (AKA peephole connections).
319  default=1.0, which is probably too high but
320  we couldn't see any reliable gain from decreasing it.
321  tanh-self-repair-threshold Equivalent to the self-repair-lower-threshold
322  in a TanhComponent; applies to both the tanh nonlinearities.
323  default=0.2, you probably won't want to changethis.
324  sigmoid-self-repair-threshold Equivalent to self-repair-lower-threshold
325  in a SigmoidComponent; applies to all three of the sigmoid
326  nonlinearities. default=0.05, you probably won't want to
327  change this.
328  self-repair-scale Equivalent to the self-repair-scale in a SigmoidComponent
329  or TanhComponent; applies to both the sigmoid and tanh
330  nonlinearities. default=1.0e-05, which you probably won't
331  want to change unless dealing with an objective function
332  that has smaller or larger dynamic range than normal, in
333  which case you might want to make it smaller or larger.
334 */
336  public:
337 
338  virtual int32 InputDim() const;
339  virtual int32 OutputDim() const;
340  virtual std::string Info() const;
341  virtual void InitFromConfig(ConfigLine *cfl);
342  LstmNonlinearityComponent(): use_dropout_(false) { }
343  virtual std::string Type() const { return "LstmNonlinearityComponent"; }
344  virtual int32 Properties() const {
346  }
347 
348  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
349  const CuMatrixBase<BaseFloat> &in,
350  CuMatrixBase<BaseFloat> *out) const;
351  virtual void Backprop(const std::string &debug_info,
352  const ComponentPrecomputedIndexes *indexes,
353  const CuMatrixBase<BaseFloat> &in_value,
354  const CuMatrixBase<BaseFloat> &, // out_value,
355  const CuMatrixBase<BaseFloat> &out_deriv,
356  void *memo,
357  Component *to_update_in,
358  CuMatrixBase<BaseFloat> *in_deriv) const;
359 
360  virtual void Read(std::istream &is, bool binary);
361  virtual void Write(std::ostream &os, bool binary) const;
362 
363  virtual Component* Copy() const;
364 
365  // Some functions from base-class UpdatableComponent.
366  virtual void Scale(BaseFloat scale);
367  virtual void Add(BaseFloat alpha, const Component &other);
368  virtual void PerturbParams(BaseFloat stddev);
369  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
370  virtual int32 NumParameters() const;
371  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
372  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
373  virtual void ZeroStats();
374  virtual void FreezeNaturalGradient(bool freeze);
375 
376  // Some functions that are specific to this class:
377  explicit LstmNonlinearityComponent(
378  const LstmNonlinearityComponent &other);
379 
380  void Init(int32 cell_dim, bool use_dropout,
381  BaseFloat param_stddev,
382  BaseFloat tanh_self_repair_threshold,
383  BaseFloat sigmoid_self_repair_threshold,
384  BaseFloat self_repair_scale);
385 
386  virtual void ConsolidateMemory();
387 
388  private:
389 
390  // Initializes the natural-gradient object with the configuration we
391  // use for this object, which for now is hardcoded at the C++ level.
392  void InitNaturalGradient();
393 
394  // Notation: C is the cell dimension; it equals params_.NumCols().
395 
396  // The dimension of the parameter matrix is (3 x C);
397  // it contains the 3 diagonal parameter matrices w_i, w_f and w_o.
399 
400  // If true, we expect an extra 3 dimensions on the input, for dropout masks
401  // for i_t and f_t.
403 
404  // Of dimension 5 * C, with a row for each of the Sigmoid/Tanh functions in
405  // equations (1) through (5), this is the sum of the values of the nonliearities
406  // (used for diagnostics only). It is comparable to value_sum_ vector
407  // in base-class NonlinearComponent.
409 
410  // Of dimension 5 * C, with a row for each of the Sigmoid/Tanh functions in
411  // equations (1) through (5), this is the sum of the derivatives of the
412  // nonliearities (used for diagnostics and to control self-repair). It is
413  // comparable to the deriv_sum_ vector in base-class
414  // NonlinearComponent.
416 
417  // This matrix has dimension 10. The contents are a block of 5 self-repair
418  // thresholds (typically "0.05 0.05 0.2 0.05 0.2"), then a block of 5
419  // self-repair scales (typically all 0.00001). These are for each of the 5
420  // nonlinearities in the LSTM component in turn (see comments in cu-math.h for
421  // more info).
423 
424  // This matrix has dimension 5. For each of the 5 nonlinearities in the LSTM
425  // component (see comments in cu-math.h for more info), it contains the total,
426  // over all frames represented in count_, of the number of dimensions that
427  // were subject to self_repair. To get the self-repair proportion you should
428  // divide by (count_ times cell_dim_).
430 
431  // The total count (number of frames) corresponding to the stats in value_sum_
432  // and deriv_sum_.
433  double count_;
434 
435  // Preconditioner for the parameters of this component [operates in the space
436  // of dimension C].
437  // The preconditioner stores its own configuration values; we write and read
438  // these, but not the preconditioner object itself.
440 
441  const LstmNonlinearityComponent &operator
442  = (const LstmNonlinearityComponent &other); // Disallow.
443 };
444 
445 
446 
447 
448 /*
449  * WARNING, this component is deprecated as it's not compatible with
450  * TimeHeightConvolutionComponent, and it will eventually be deleted.
451  * MaxPoolingComponent :
452  * Maxpooling component was firstly used in ConvNet for selecting an
453  * representative activation in an area. It inspired Maxout nonlinearity.
454  * Each output element of this component is the maximum of a block of
455  * input elements where the block has a 3D dimension (pool_x_size_,
456  * pool_y_size_, pool_z_size_).
457  * Blocks could overlap if the shift value on any axis is smaller
458  * than its corresponding pool size (e.g. pool_x_step_ < pool_x_size_).
459  * If the shift values are euqal to their pool size, there is no
460  * overlap; while if they all equal 1, the blocks overlap to
461  * the greatest possible extent.
462  *
463  * This component is designed to be used after a ConvolutionComponent
464  * so that the input matrix is propagated from a 2d-convolutional layer.
465  * This component implements 3d-maxpooling which performs
466  * max pooling along the three axes.
467  * Input : A matrix where each row is a vectorized 3D-tensor.
468  * The 3D tensor has dimensions
469  * x: (e.g. time)
470  * y: (e.g. frequency)
471  * z: (e.g. channels like number of filters in the ConvolutionComponent)
472  *
473  * The component assumes input vectorizations of type zyx
474  * which is the default output vectorization type of a ConvolutionComponent.
475  * e.g. for input vectorization of type zyx the input is vectorized by
476  * spanning axes z, y and x of the tensor in that order.
477  * Given 3d tensor A with sizes (2, 2, 2) along the three dimensions
478  * the zyx vectorized input looks like
479  * A(0,0,0) A(0,0,1) A(0,1,0) A(0,1,1) A(1,0,0) A(1,0,1) A(1,1,0) A(1,1,1)
480  *
481  * Output : The output is also a 3D tensor vectorized in the zyx format.
482  *
483  * For information on the hyperparameters and parameters of this component see
484  * the variable declarations.
485  *
486  *
487  */
489  public:
490 
492  pool_x_size_(0), pool_y_size_(0), pool_z_size_(0),
493  pool_x_step_(0), pool_y_step_(0), pool_z_step_(0) { }
494  // constructor using another component
495  MaxpoolingComponent(const MaxpoolingComponent &component);
496 
497  virtual int32 InputDim() const;
498  virtual int32 OutputDim() const;
499 
500  virtual std::string Info() const;
501  virtual void InitFromConfig(ConfigLine *cfl);
502  virtual std::string Type() const { return "MaxpoolingComponent"; }
503  virtual int32 Properties() const {
506  }
507 
508  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
509  const CuMatrixBase<BaseFloat> &in,
510  CuMatrixBase<BaseFloat> *out) const;
511  virtual void Backprop(const std::string &debug_info,
512  const ComponentPrecomputedIndexes *indexes,
513  const CuMatrixBase<BaseFloat> &in_value,
514  const CuMatrixBase<BaseFloat> &out_value,
515  const CuMatrixBase<BaseFloat> &out_deriv,
516  void *memo,
517  Component *, // to_update,
518  CuMatrixBase<BaseFloat> *in_deriv) const;
519 
520  virtual void Read(std::istream &is, bool binary); // This Read function
521  // requires that the Component has the correct type.
522 
524  virtual void Write(std::ostream &os, bool binary) const;
525  virtual Component* Copy() const { return new MaxpoolingComponent(*this); }
526 
527 
528  protected:
530  CuMatrix<BaseFloat> *patches) const;
531  void InderivPatchesToInderiv(const CuMatrix<BaseFloat>& in_deriv_patches,
532  CuMatrixBase<BaseFloat> *in_deriv) const;
533  virtual void Check() const;
534 
535 
536  int32 input_x_dim_; // size of the input along x-axis
537  // (e.g. number of time steps)
538  int32 input_y_dim_; // size of input along y-axis
539  // (e.g. number of mel-frequency bins)
540  int32 input_z_dim_; // size of input along z-axis
541  // (e.g. number of filters in the ConvolutionComponent)
542 
543  int32 pool_x_size_; // size of the pooling window along x-axis
544  int32 pool_y_size_; // size of the pooling window along y-axis
545  int32 pool_z_size_; // size of the pooling window along z-axis
546 
547  int32 pool_x_step_; // the number of steps taken along x-axis of input
548  // before computing the next pool
549  int32 pool_y_step_; // the number of steps taken along y-axis of input
550  // before computing the next pool
551  int32 pool_z_step_; // the number of steps taken along z-axis of input
552  // before computing the next pool
553 
554 };
555 
556 
713 class GruNonlinearityComponent: public UpdatableComponent {
714  public:
715 
716  virtual int32 InputDim() const;
717  virtual int32 OutputDim() const;
718  virtual std::string Info() const;
719  virtual void InitFromConfig(ConfigLine *cfl);
720  GruNonlinearityComponent() { }
721  virtual std::string Type() const { return "GruNonlinearityComponent"; }
722  virtual int32 Properties() const {
724  kBackpropNeedsOutput|kBackpropAdds;
725  }
726  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
727  const CuMatrixBase<BaseFloat> &in,
728  CuMatrixBase<BaseFloat> *out) const;
729  virtual void Backprop(const std::string &debug_info,
730  const ComponentPrecomputedIndexes *indexes,
731  const CuMatrixBase<BaseFloat> &in_value,
732  const CuMatrixBase<BaseFloat> &, // out_value,
733  const CuMatrixBase<BaseFloat> &out_deriv,
734  void *memo,
735  Component *to_update_in,
736  CuMatrixBase<BaseFloat> *in_deriv) const;
737 
738  virtual void Read(std::istream &is, bool binary);
739  virtual void Write(std::ostream &os, bool binary) const;
740 
741  virtual Component* Copy() const { return new GruNonlinearityComponent(*this); }
742 
743  virtual void Scale(BaseFloat scale);
744  virtual void Add(BaseFloat alpha, const Component &other);
745 
746  // Some functions from base-class UpdatableComponent.
747  virtual void PerturbParams(BaseFloat stddev);
748  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
749  virtual int32 NumParameters() const;
750  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
751  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
752  virtual void ZeroStats();
753  virtual void FreezeNaturalGradient(bool freeze);
754 
755  // Some functions that are specific to this class:
756  explicit GruNonlinearityComponent(
757  const GruNonlinearityComponent &other);
758 
759  private:
760 
761  void Check() const; // checks dimensions, etc.
762 
776  void TanhStatsAndSelfRepair(const CuMatrixBase<BaseFloat> &h_t,
777  CuMatrixBase<BaseFloat> *h_t_deriv);
778 
779  /* This function is responsible for updating the w_h_ matrix
780  (taking into account the learning rate).
781  @param [in] sdotr The value of the expression (s_{t-1} \dot r_t).
782  @param [in] h_t_deriv The derivative of the objective
783  function w.r.t. the argument of the tanh
784  function, i.e. w.r.t. the expression
785  "hpart_t + W^h (s_{t-1} \dot r_t)".
786  This function is concerned with the second
787  term as it affects the derivative w.r.t. W^h.
788  */
789  void UpdateParameters(const CuMatrixBase<BaseFloat> &sdotr,
790  const CuMatrixBase<BaseFloat> &h_t_deriv);
791 
792 
793  int32 cell_dim_; // cell dimension, e.g. 1024.
794  int32 recurrent_dim_; // recurrent dimension, e.g. 256 for projected GRU;
795  // if it's the same as cell_dim it means we are
796  // implementing regular (non-projected) GRU
797 
798 
799  // The matrix W^h, of dimension cell_dim_ by recurrent_dim_.
800  // There is no bias term needed here because hpart_t comes from
801  // an affine component that has a bias.
802  CuMatrix<BaseFloat> w_h_;
803 
804  // Of dimension cell_dim_, this is comparable to the value_sum_ vector in
805  // class NonlinearComponent. It stores the sum of the tanh nonlinearity.
806  // Normalize by dividing by count_.
807  CuVector<double> value_sum_;
808 
809  // Of dimension cell_dim_, this is comparable to the deriv_sum_ vector in
810  // class NonlinearComponent. It stores the sum of the function-derivative of
811  // the tanh nonlinearity. Normalize by dividing by count_.
812  CuVector<double> deriv_sum_;
813 
814  // This is part of the stats (along with value_sum_, deriv_sum_, and count_);
815  // if you divide it by count_ it gives you the proportion of the time that an
816  // average dimension was subject to self-repair.
817  double self_repair_total_;
818 
819  // The total count (number of frames) corresponding to the stats in value_sum_,
820  // deriv_sum_, and self_repair_total_.
821  double count_;
822 
823  // A configuration parameter, this determines how saturated the derivative
824  // has to be for a particular dimension, before we activate self-repair.
825  // Default value is 0.2, the same as for TanhComponent.
826  BaseFloat self_repair_threshold_;
827 
828  // A configuration parameter, this determines the maximum absolute value of
829  // the extra term that we add to the input derivative of the tanh when doing
830  // self repair. The default value is 1.0e-05.
831  BaseFloat self_repair_scale_;
832 
833  // Preconditioner for the input space when updating w_h_ (has dimension
834  // recurrent_dim_ if use-natural-gradient was true, else not set up).
835  // The preconditioner stores its own configuration values; we write and read
836  // these, but not the preconditioner object itself.
837  OnlineNaturalGradient preconditioner_in_;
838  // Preconditioner for the output space when updating w_h_ (has dimension
839  // recurrent_dim_ if use-natural-gradient was true, else not set up).
840 
841  OnlineNaturalGradient preconditioner_out_;
842 
843  const GruNonlinearityComponent &operator
844  = (const GruNonlinearityComponent &other); // Disallow.
845 };
846 
847 
979 class OutputGruNonlinearityComponent: public UpdatableComponent {
980  public:
981 
982  virtual int32 InputDim() const;
983  virtual int32 OutputDim() const;
984  virtual std::string Info() const;
985  virtual void InitFromConfig(ConfigLine *cfl);
986  OutputGruNonlinearityComponent() { }
987  virtual std::string Type() const { return "OutputGruNonlinearityComponent"; }
988  virtual int32 Properties() const {
990  kBackpropNeedsOutput|kBackpropAdds;
991  }
992  virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
993  const CuMatrixBase<BaseFloat> &in,
994  CuMatrixBase<BaseFloat> *out) const;
995  virtual void Backprop(const std::string &debug_info,
996  const ComponentPrecomputedIndexes *indexes,
997  const CuMatrixBase<BaseFloat> &in_value,
998  const CuMatrixBase<BaseFloat> &, // out_value,
999  const CuMatrixBase<BaseFloat> &out_deriv,
1000  void *memo,
1001  Component *to_update_in,
1002  CuMatrixBase<BaseFloat> *in_deriv) const;
1003 
1004  virtual void Read(std::istream &is, bool binary);
1005  virtual void Write(std::ostream &os, bool binary) const;
1006 
1007  virtual Component* Copy() const { return new OutputGruNonlinearityComponent(*this); }
1008 
1009  virtual void Scale(BaseFloat scale);
1010  virtual void Add(BaseFloat alpha, const Component &other);
1011 
1012  // Some functions from base-class UpdatableComponent.
1013  virtual void PerturbParams(BaseFloat stddev);
1014  virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1015  virtual int32 NumParameters() const;
1016  virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1017  virtual void UnVectorize(const VectorBase<BaseFloat> &params);
1018  virtual void ZeroStats();
1019  virtual void FreezeNaturalGradient(bool freeze);
1020 
1021  // Some functions that are specific to this class:
1022  explicit OutputGruNonlinearityComponent(
1023  const OutputGruNonlinearityComponent &other);
1024 
1025  private:
1026 
1027  void Check() const; // checks dimensions, etc.
1028 
1042  void TanhStatsAndSelfRepair(const CuMatrixBase<BaseFloat> &h_t,
1043  CuMatrixBase<BaseFloat> *h_t_deriv);
1044 
1045  /* This function is responsible for updating the w_h_ matrix
1046  (taking into account the learning rate).
1047  @param [in] c_t1_value The value of c_{t-1}.
1048  @param [in] h_t_deriv The derivative of the objective
1049  function w.r.t. the argument of the tanh
1050  function, i.e. w.r.t. the expression
1051  "hpart_t + W^h \dot c_t1".
1052  This function is concerned with the second
1053  term as it affects the derivative w.r.t. W^h.
1054  */
1055  void UpdateParameters(const CuMatrixBase<BaseFloat> &c_t1_value,
1056  const CuMatrixBase<BaseFloat> &h_t_deriv);
1057 
1058 
1059  int32 cell_dim_; // cell dimension, e.g. 1024.
1060 
1061  // The matrix W^h, of dimension cell_dim_ by recurrent_dim_.
1062  // There is no bias term needed here because hpart_t comes from
1063  // an affine component that has a bias.
1064  CuVector<BaseFloat> w_h_;
1065 
1066  // Of dimension cell_dim_, this is comparable to the value_sum_ vector in
1067  // class NonlinearComponent. It stores the sum of the tanh nonlinearity.
1068  // Normalize by dividing by count_.
1069  CuVector<double> value_sum_;
1070 
1071  // Of dimension cell_dim_, this is comparable to the deriv_sum_ vector in
1072  // class NonlinearComponent. It stores the sum of the function-derivative of
1073  // the tanh nonlinearity. Normalize by dividing by count_.
1074  CuVector<double> deriv_sum_;
1075 
1076  // This is part of the stats (along with value_sum_, deriv_sum_, and count_);
1077  // if you divide it by count_ it gives you the proportion of the time that an
1078  // average dimension was subject to self-repair.
1079  double self_repair_total_;
1080 
1081  // The total count (number of frames) corresponding to the stats in value_sum_,
1082  // deriv_sum_, and self_repair_total_.
1083  double count_;
1084 
1085  // A configuration parameter, this determines how saturated the derivative
1086  // has to be for a particular dimension, before we activate self-repair.
1087  // Default value is 0.2, the same as for TanhComponent.
1088  BaseFloat self_repair_threshold_;
1089 
1090  // A configuration parameter, this determines the maximum absolute value of
1091  // the extra term that we add to the input derivative of the tanh when doing
1092  // self repair. The default value is 1.0e-05.
1093  BaseFloat self_repair_scale_;
1094 
1095  // Unlike the GruNonlinearityComponent, there is only one dimension to
1096  // consider as the parameters are a vector not a matrix, so we only need one
1097  // preconditioner.
1098  OnlineNaturalGradient preconditioner_;
1099 
1100  const OutputGruNonlinearityComponent &operator
1101  = (const OutputGruNonlinearityComponent &other); // Disallow.
1102 };
1103 
1104 
1105 } // namespace nnet3
1106 } // namespace kaldi
1107 
1108 
1109 #endif
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void InputToInputPatches(const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *patches) const
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim, int32 filt_x_dim, int32 filt_y_dim, int32 filt_x_step, int32 filt_y_step, int32 num_filters, TensorVectorizationType input_vectorization, BaseFloat param_stddev, BaseFloat bias_stddev)
virtual Component * Copy() const
Copies component (deep copy).
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
Abstract base-class for neural-net components.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
kaldi::int32 int32
Keywords for search: natural gradient, naturalgradient, NG-SGD.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &filter)
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
void Resize(int32 input_dim, int32 output_dim)
const CuVector< BaseFloat > & BiasParams() const
const ConvolutionComponent & operator=(const ConvolutionComponent &other)
Matrix for CUDA computing.
Definition: matrix-common.h:69
const CuMatrix< BaseFloat > & LinearParams() const
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update_in, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void InderivPatchesToInderiv(const CuMatrix< BaseFloat > &in_deriv_patches, CuMatrixBase< BaseFloat > *in_deriv) const
WARNING, this component is deprecated in favor of TimeHeightConvolutionComponent, and will be deleted...
void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv, const std::vector< CuSubMatrix< BaseFloat > *> &out_deriv_batch)
Vector for CUDA computing.
Definition: matrix-common.h:72