doc/nnet-combined-component_8h_source.html

 // nnet3/nnet-combined-component.h

 // Copyright      2018  Johns Hopkins University (author: Daniel Povey)
 //                2018  Hang Lyu

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #ifndef KALDI_NNET3_NNET_SPECIAL_COMPONENT_H_
 #define KALDI_NNET3_NNET_SPECIAL_COMPONENT_H_

 #include "nnet3/nnet-common.h"
 #include "nnet3/nnet-component-itf.h"
 #include "nnet3/natural-gradient-online.h"
 #include <iostream>

 namespace kaldi {
 namespace nnet3 {


 class ConvolutionComponent: public UpdatableComponent {
  public:
   enum TensorVectorizationType  {
     kYzx = 0,
     kZyx = 1
   };

   ConvolutionComponent();
   // constructor using another component
   ConvolutionComponent(const ConvolutionComponent &component);
   // constructor using parameters
   ConvolutionComponent(
     const CuMatrixBase<BaseFloat> &filter_params,
     const CuVectorBase<BaseFloat> &bias_params,
     int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
     int32 filt_x_dim, int32 filt_y_dim,
     int32 filt_x_step, int32 filt_y_step,
     TensorVectorizationType input_vectorization,
     BaseFloat learning_rate);

   virtual int32 InputDim() const;
   virtual int32 OutputDim() const;

   virtual std::string Info() const;
   virtual void InitFromConfig(ConfigLine *cfl);
   virtual std::string Type() const { return "ConvolutionComponent"; }
   virtual int32 Properties() const {
     return kSimpleComponent|kUpdatableComponent|kBackpropNeedsInput|
            kBackpropAdds|kPropagateAdds;
   }

   virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
   virtual void Backprop(const std::string &debug_info,
                         const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in_value,
                         const CuMatrixBase<BaseFloat> &, // out_value,
                         const CuMatrixBase<BaseFloat> &out_deriv,
                         void *memo,
                         Component *to_update_in,
                         CuMatrixBase<BaseFloat> *in_deriv) const;
   void Update(const std::string &debug_info,
               const CuMatrixBase<BaseFloat> &in_value,
               const CuMatrixBase<BaseFloat> &out_deriv,
               const std::vector<CuSubMatrix<BaseFloat> *>& out_deriv_batch);


   virtual void Read(std::istream &is, bool binary);
   virtual void Write(std::ostream &os, bool binary) const;

   virtual Component* Copy() const;

   // Some functions from base-class UpdatableComponent.
   virtual void Scale(BaseFloat scale);
   virtual void Add(BaseFloat alpha, const Component &other);
   virtual void PerturbParams(BaseFloat stddev);
   virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
   virtual int32 NumParameters() const;
   virtual void Vectorize(VectorBase<BaseFloat> *params) const;
   virtual void UnVectorize(const VectorBase<BaseFloat> &params);

   // Some functions that are specific to this class.
   void SetParams(const VectorBase<BaseFloat> &bias,
                  const MatrixBase<BaseFloat> &filter);
   const CuVector<BaseFloat> &BiasParams() const { return bias_params_; }
   const CuMatrix<BaseFloat> &LinearParams() const { return filter_params_; }
   void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
             int32 filt_x_dim, int32 filt_y_dim,
             int32 filt_x_step, int32 filt_y_step, int32 num_filters,
             TensorVectorizationType input_vectorization,
             BaseFloat param_stddev, BaseFloat bias_stddev);
   // there is no filt_z_dim parameter as the length of the filter along
   // z-dimension is same as the input
   void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
             int32 filt_x_dim, int32 filt_y_dim,
             int32 filt_x_step, int32 filt_y_step,
             TensorVectorizationType input_vectorization,
             std::string matrix_filename);

   // resize the component, setting the parameters to zero, while
   // leaving any other configuration values the same
   void Resize(int32 input_dim, int32 output_dim);

   void Update(const std::string &debug_info,
               const CuMatrixBase<BaseFloat> &in_value,
               const CuMatrixBase<BaseFloat> &out_deriv);


  private:
   int32 input_x_dim_;   // size of the input along x-axis
                         // (e.g. number of time steps)

   int32 input_y_dim_;   // size of input along y-axis
                         // (e.g. number of mel-frequency bins)

   int32 input_z_dim_;   // size of input along z-axis
                         // (e.g. number of channels is 3 if the input has
                         // features + delta + delta-delta features

   int32 filt_x_dim_;    // size of the filter along x-axis

   int32 filt_y_dim_;    // size of the filter along y-axis

   // there is no filt_z_dim_ as it is always assumed to be
   // the same as input_z_dim_

   int32 filt_x_step_;   // the number of steps taken along x-axis of input
                         //  before computing the next dot-product
                         //  of filter and input

   int32 filt_y_step_;   // the number of steps taken along y-axis of input
                         // before computing the next dot-product of the filter
                         // and input

   // there is no filt_z_step_ as only dot product is possible along this axis

   TensorVectorizationType input_vectorization_; // type of vectorization of the
   // input 3D tensor. Accepts zyx and yzx formats

   CuMatrix<BaseFloat> filter_params_;
   // the filter (or kernel) matrix is a matrix of vectorized 3D filters
   // where each row in the matrix corresponds to one filter.
   // The 3D filter tensor is vectorizedin zyx format.
   // The first row of the matrix corresponds to the first filter and so on.
   // Keep in mind the vectorization type and order of filters when using file
   // based initialization.

   CuVector<BaseFloat> bias_params_;
   // the filter-specific bias vector (i.e., there is a seperate bias added
   // to the output of each filter).

   void InputToInputPatches(const CuMatrixBase<BaseFloat>& in,
                            CuMatrix<BaseFloat> *patches) const;
   void InderivPatchesToInderiv(const CuMatrix<BaseFloat>& in_deriv_patches,
                                CuMatrixBase<BaseFloat> *in_deriv) const;
   const ConvolutionComponent &operator = (const ConvolutionComponent &other); // Disallow.
 };


 /*
   LstmNonlinearityComponent is a component that implements part of an LSTM, by
   combining together the sigmoids and tanh's, plus some diagonal terms, into
   a single block.
   We will refer to the LSTM formulation used in

   Long Short-Term Memory Recurrent Neural Network Architectures for Large Scale Acoustic Modeling"
   by H. Sak et al,
   http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43905.pdf.

   Suppose the cell dimension is C.  Then outside this component, we compute
   the 4 * C-dimensional quantity consisting of 4 blocks as follows, by a single
   matrix multiplication:

   i_part = W_{ix} x_t + W_{im} m_{t-1} + b_i
   f_part = W_{fx} x_t + W_{fm} m_{t-1} + b_f
   c_part = W_{cx} x_t + W_{cm} m_{t-1} + b_c
   o_part = W_{cx} x_t + W_{om} m_{t-1} + b_o

   The part of the computation that takes place in this component is as follows.
   Its input is of dimension 5C [however, search for 'dropout' below],
   consisting of 5 blocks: (i_part, f_part, c_part, o_part, and c_{t-1}).  Its
   output is of dimension 2C, consisting of 2 blocks: c_t and m_t.

   To recap: the input is (i_part, f_part, c_part, o_part, c_{t-1}); the output is (c_t, m_t).

   This component has parameters, 3C of them in total: the diagonal matrices w_i, w_f
   and w_o.


   In the forward pass (Propagate), this component computes the following:

      i_t = Sigmoid(i_part + w_{ic}*c_{t-1})   (1)
      f_t = Sigmoid(f_part + w_{fc}*c_{t-1})   (2)
      c_t = f_t*c_{t-1} + i_t * Tanh(c_part)   (3)
      o_t = Sigmoid(o_part + w_{oc}*c_t)       (4)
      m_t = o_t * Tanh(c_t)                    (5)
     # note: the outputs are just c_t and m_t.

   [Note regarding dropout: optionally the input-dimension may be 5C + 3 instead
   of 5C in this case, the last three input dimensions will be interpreted as
   per-frame dropout masks on i_t, f_t and o_t respectively, so that on the RHS of
   (3), i_t is replaced by i_t * i_t_scale, and likewise for f_t and o_t.]

   The backprop is as you would think, but for the "self-repair" we need to pass
   in additional vectors (of the same dim as the parameters of the layer) that
   dictate whether or not we add an additional term to the backpropagated
   derivatives.  (This term helps force the input to the nonlinearities into the
   range where the derivatives are not too small).

   This component stores stats of the same form as are normally stored by the
   StoreStats() functions for the sigmoid and tanh units, i.e. averages of the
   activations and derivatives, but this is done inside the Backprop() functions.
   [the StoreStats() functions don't take the input data as an argument, so
   storing this data that way is impossible, and anyway it's more efficient to
   do it as part of backprop.]

   Configuration values accepted:
          cell-dim          e.g. cell-dim=1024  Cell dimension.  The input
                           dimension of this component is cell-dim * 5, and the
                           output dimension is cell-dim * 2.  Note: this
                           component implements only part of the LSTM layer,
                           see comments above.
          param-stddev     Standard deviation for random initialization of
                           the diagonal matrices (AKA peephole connections).
                           default=1.0, which is probably too high but
                           we couldn't see any reliable gain from decreasing it.
          tanh-self-repair-threshold   Equivalent to the self-repair-lower-threshold
                           in a TanhComponent; applies to both the tanh nonlinearities.
                           default=0.2, you probably won't want to changethis.
          sigmoid-self-repair-threshold   Equivalent to self-repair-lower-threshold
                           in a SigmoidComponent; applies to all three of the sigmoid
                           nonlinearities.  default=0.05, you probably won't want to
                           change this.
          self-repair-scale Equivalent to the self-repair-scale in a SigmoidComponent
                           or TanhComponent; applies to both the sigmoid and tanh
                           nonlinearities.  default=1.0e-05, which you probably won't
                           want to change unless dealing with an objective function
                           that has smaller or larger dynamic range than normal, in
                           which case you might want to make it smaller or larger.
 */
 class LstmNonlinearityComponent: public UpdatableComponent {
  public:

   virtual int32 InputDim() const;
   virtual int32 OutputDim() const;
   virtual std::string Info() const;
   virtual void InitFromConfig(ConfigLine *cfl);
   LstmNonlinearityComponent(): use_dropout_(false) { }
   virtual std::string Type() const { return "LstmNonlinearityComponent"; }
   virtual int32 Properties() const {
     return kSimpleComponent|kUpdatableComponent|kBackpropNeedsInput;
   }

   virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
   virtual void Backprop(const std::string &debug_info,
                         const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in_value,
                         const CuMatrixBase<BaseFloat> &, // out_value,
                         const CuMatrixBase<BaseFloat> &out_deriv,
                         void *memo,
                         Component *to_update_in,
                         CuMatrixBase<BaseFloat> *in_deriv) const;

   virtual void Read(std::istream &is, bool binary);
   virtual void Write(std::ostream &os, bool binary) const;

   virtual Component* Copy() const;

   // Some functions from base-class UpdatableComponent.
   virtual void Scale(BaseFloat scale);
   virtual void Add(BaseFloat alpha, const Component &other);
   virtual void PerturbParams(BaseFloat stddev);
   virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
   virtual int32 NumParameters() const;
   virtual void Vectorize(VectorBase<BaseFloat> *params) const;
   virtual void UnVectorize(const VectorBase<BaseFloat> &params);
   virtual void ZeroStats();
   virtual void FreezeNaturalGradient(bool freeze);

   // Some functions that are specific to this class:
   explicit LstmNonlinearityComponent(
       const LstmNonlinearityComponent &other);

   void Init(int32 cell_dim, bool use_dropout,
             BaseFloat param_stddev,
             BaseFloat tanh_self_repair_threshold,
             BaseFloat sigmoid_self_repair_threshold,
             BaseFloat self_repair_scale);

   virtual void ConsolidateMemory();

  private:

   // Initializes the natural-gradient object with the configuration we
   // use for this object, which for now is hardcoded at the C++ level.
   void InitNaturalGradient();

   // Notation: C is the cell dimension; it equals params_.NumCols().

   // The dimension of the parameter matrix is (3 x C);
   // it contains the 3 diagonal parameter matrices w_i, w_f and w_o.
   CuMatrix<BaseFloat> params_;

   // If true, we expect an extra 3 dimensions on the input, for dropout masks
   // for i_t and f_t.
   bool use_dropout_;

   // Of dimension 5 * C, with a row for each of the Sigmoid/Tanh functions in
   // equations (1) through (5), this is the sum of the values of the nonliearities
   // (used for diagnostics only).  It is comparable to value_sum_ vector
   // in base-class NonlinearComponent.
   CuMatrix<double> value_sum_;

   // Of dimension 5 * C, with a row for each of the Sigmoid/Tanh functions in
   // equations (1) through (5), this is the sum of the derivatives of the
   // nonliearities (used for diagnostics and to control self-repair).  It is
   // comparable to the deriv_sum_ vector in base-class
   // NonlinearComponent.
   CuMatrix<double> deriv_sum_;

   // This matrix has dimension 10.  The contents are a block of 5 self-repair
   // thresholds (typically "0.05 0.05 0.2 0.05 0.2"), then a block of 5
   // self-repair scales (typically all 0.00001).  These are for each of the 5
   // nonlinearities in the LSTM component in turn (see comments in cu-math.h for
   // more info).
   CuVector<BaseFloat> self_repair_config_;

   // This matrix has dimension 5.  For each of the 5 nonlinearities in the LSTM
   // component (see comments in cu-math.h for more info), it contains the total,
   // over all frames represented in count_, of the number of dimensions that
   // were subject to self_repair.  To get the self-repair proportion you should
   // divide by (count_ times cell_dim_).
   CuVector<double> self_repair_total_;

   // The total count (number of frames) corresponding to the stats in value_sum_
   // and deriv_sum_.
   double count_;

   // Preconditioner for the parameters of this component [operates in the space
   // of dimension C].
   // The preconditioner stores its own configuration values; we write and read
   // these, but not the preconditioner object itself.
   OnlineNaturalGradient preconditioner_;

   const LstmNonlinearityComponent &operator
       = (const LstmNonlinearityComponent &other); // Disallow.
 };


 /*
  * WARNING, this component is deprecated as it's not compatible with
  *   TimeHeightConvolutionComponent, and it will eventually be deleted.
  * MaxPoolingComponent :
  * Maxpooling component was firstly used in ConvNet for selecting an
  * representative activation in an area. It inspired Maxout nonlinearity.
  * Each output element of this component is the maximum of a block of
  * input elements where the block has a 3D dimension (pool_x_size_,
  * pool_y_size_, pool_z_size_).
  * Blocks could overlap if the shift value on any axis is smaller
  * than its corresponding pool size (e.g. pool_x_step_ < pool_x_size_).
  * If the shift values are euqal to their pool size, there is no
  * overlap; while if they all equal 1, the blocks overlap to
  * the greatest possible extent.
  *
  * This component is designed to be used after a ConvolutionComponent
  * so that the input matrix is propagated from a 2d-convolutional layer.
  * This component implements 3d-maxpooling which performs
  * max pooling along the three axes.
  * Input : A matrix where each row is a vectorized 3D-tensor.
  *        The 3D tensor has dimensions
  *        x: (e.g. time)
  *        y: (e.g. frequency)
  *        z: (e.g. channels like number of filters in the ConvolutionComponent)
  *
  *        The component assumes input vectorizations of type zyx
  *        which is the default output vectorization type of a ConvolutionComponent.
  *        e.g. for input vectorization of type zyx the input is vectorized by
  *        spanning axes z, y and x of the tensor in that order.
  *        Given 3d tensor A with sizes (2, 2, 2) along the three dimensions
  *        the zyx vectorized input looks like
  *  A(0,0,0) A(0,0,1) A(0,1,0) A(0,1,1) A(1,0,0) A(1,0,1) A(1,1,0) A(1,1,1)
  *
  * Output : The output is also a 3D tensor vectorized in the zyx format.
  *
  * For information on the hyperparameters and parameters of this component see
  * the variable declarations.
  *
  *
  */
 class MaxpoolingComponent: public Component {
  public:

   MaxpoolingComponent(): input_x_dim_(0), input_y_dim_(0), input_z_dim_(0),
                            pool_x_size_(0), pool_y_size_(0), pool_z_size_(0),
                            pool_x_step_(0), pool_y_step_(0), pool_z_step_(0) { }
   // constructor using another component
   MaxpoolingComponent(const MaxpoolingComponent &component);

   virtual int32 InputDim() const;
   virtual int32 OutputDim() const;

   virtual std::string Info() const;
   virtual void InitFromConfig(ConfigLine *cfl);
   virtual std::string Type() const { return "MaxpoolingComponent"; }
   virtual int32 Properties() const {
     return kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput|
            kBackpropAdds;
   }

   virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
   virtual void Backprop(const std::string &debug_info,
                         const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in_value,
                         const CuMatrixBase<BaseFloat> &out_value,
                         const CuMatrixBase<BaseFloat> &out_deriv,
                         void *memo,
                         Component *, // to_update,
                         CuMatrixBase<BaseFloat> *in_deriv) const;

   virtual void Read(std::istream &is, bool binary); // This Read function
   // requires that the Component has the correct type.

   virtual void Write(std::ostream &os, bool binary) const;
   virtual Component* Copy() const { return new MaxpoolingComponent(*this); }


  protected:
   void InputToInputPatches(const CuMatrixBase<BaseFloat>& in,
                            CuMatrix<BaseFloat> *patches) const;
   void InderivPatchesToInderiv(const CuMatrix<BaseFloat>& in_deriv_patches,
                                CuMatrixBase<BaseFloat> *in_deriv) const;
   virtual void Check() const;


   int32 input_x_dim_;   // size of the input along x-axis
   // (e.g. number of time steps)
   int32 input_y_dim_;   // size of input along y-axis
   // (e.g. number of mel-frequency bins)
   int32 input_z_dim_;   // size of input along z-axis
   // (e.g. number of filters in the ConvolutionComponent)

   int32 pool_x_size_;    // size of the pooling window along x-axis
   int32 pool_y_size_;    // size of the pooling window along y-axis
   int32 pool_z_size_;    // size of the pooling window along z-axis

   int32 pool_x_step_;   // the number of steps taken along x-axis of input
   //  before computing the next pool
   int32 pool_y_step_;   // the number of steps taken along y-axis of input
   // before computing the next pool
   int32 pool_z_step_;   // the number of steps taken along z-axis of input
   // before computing the next pool

 };


 class GruNonlinearityComponent: public UpdatableComponent {
  public:

   virtual int32 InputDim() const;
   virtual int32 OutputDim() const;
   virtual std::string Info() const;
   virtual void InitFromConfig(ConfigLine *cfl);
   GruNonlinearityComponent() { }
   virtual std::string Type() const { return "GruNonlinearityComponent"; }
   virtual int32 Properties() const {
     return kSimpleComponent|kUpdatableComponent|kBackpropNeedsInput|\
         kBackpropNeedsOutput|kBackpropAdds;
   }
   virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
   virtual void Backprop(const std::string &debug_info,
                         const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in_value,
                         const CuMatrixBase<BaseFloat> &, // out_value,
                         const CuMatrixBase<BaseFloat> &out_deriv,
                         void *memo,
                         Component *to_update_in,
                         CuMatrixBase<BaseFloat> *in_deriv) const;

   virtual void Read(std::istream &is, bool binary);
   virtual void Write(std::ostream &os, bool binary) const;

   virtual Component* Copy() const { return new GruNonlinearityComponent(*this); }

   virtual void Scale(BaseFloat scale);
   virtual void Add(BaseFloat alpha, const Component &other);

   // Some functions from base-class UpdatableComponent.
   virtual void PerturbParams(BaseFloat stddev);
   virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
   virtual int32 NumParameters() const;
   virtual void Vectorize(VectorBase<BaseFloat> *params) const;
   virtual void UnVectorize(const VectorBase<BaseFloat> &params);
   virtual void ZeroStats();
   virtual void FreezeNaturalGradient(bool freeze);

   // Some functions that are specific to this class:
   explicit GruNonlinearityComponent(
       const GruNonlinearityComponent &other);

  private:

   void Check() const;  // checks dimensions, etc.

   void TanhStatsAndSelfRepair(const CuMatrixBase<BaseFloat> &h_t,
                               CuMatrixBase<BaseFloat> *h_t_deriv);

   /*  This function is responsible for updating the w_h_ matrix
       (taking into account the learning rate).
         @param [in] sdotr  The value of the expression (s_{t-1} \dot r_t).
         @param [in] h_t_deriv  The derivative of the objective
                         function w.r.t. the argument of the tanh
                         function, i.e. w.r.t. the expression
                         "hpart_t + W^h (s_{t-1} \dot r_t)".
                         This function is concerned with the second
                         term as it affects the derivative w.r.t. W^h.
    */
   void UpdateParameters(const CuMatrixBase<BaseFloat> &sdotr,
                         const CuMatrixBase<BaseFloat> &h_t_deriv);


   int32 cell_dim_;  // cell dimension, e.g. 1024.
   int32 recurrent_dim_;  // recurrent dimension, e.g. 256 for projected GRU;
                          // if it's the same as cell_dim it means we are
                          // implementing regular (non-projected) GRU


   // The matrix W^h, of dimension cell_dim_ by recurrent_dim_.
   // There is no bias term needed here because hpart_t comes from
   // an affine component that has a bias.
   CuMatrix<BaseFloat> w_h_;

   // Of dimension cell_dim_, this is comparable to the value_sum_ vector in
   // class NonlinearComponent.  It stores the sum of the tanh nonlinearity.
   // Normalize by dividing by count_.
   CuVector<double> value_sum_;

   // Of dimension cell_dim_, this is comparable to the deriv_sum_ vector in
   // class NonlinearComponent.  It stores the sum of the function-derivative of
   // the tanh nonlinearity.  Normalize by dividing by count_.
   CuVector<double> deriv_sum_;

   // This is part of the stats (along with value_sum_, deriv_sum_, and count_);
   // if you divide it by count_ it gives you the proportion of the time that an
   // average dimension was subject to self-repair.
   double self_repair_total_;

   // The total count (number of frames) corresponding to the stats in value_sum_,
   // deriv_sum_, and self_repair_total_.
   double count_;

   // A configuration parameter, this determines how saturated the derivative
   // has to be for a particular dimension, before we activate self-repair.
   // Default value is 0.2, the same as for TanhComponent.
   BaseFloat self_repair_threshold_;

   // A configuration parameter, this determines the maximum absolute value of
   // the extra term that we add to the input derivative of the tanh when doing
   // self repair.  The default value is 1.0e-05.
   BaseFloat self_repair_scale_;

   // Preconditioner for the input space when updating w_h_ (has dimension
   // recurrent_dim_ if use-natural-gradient was true, else not set up).
   // The preconditioner stores its own configuration values; we write and read
   // these, but not the preconditioner object itself.
   OnlineNaturalGradient preconditioner_in_;
   // Preconditioner for the output space when updating w_h_ (has dimension
   // recurrent_dim_ if use-natural-gradient was true, else not set up).

   OnlineNaturalGradient preconditioner_out_;

   const GruNonlinearityComponent &operator
       = (const GruNonlinearityComponent &other); // Disallow.
 };


 class OutputGruNonlinearityComponent: public UpdatableComponent {
  public:

   virtual int32 InputDim() const;
   virtual int32 OutputDim() const;
   virtual std::string Info() const;
   virtual void InitFromConfig(ConfigLine *cfl);
   OutputGruNonlinearityComponent() { }
   virtual std::string Type() const { return "OutputGruNonlinearityComponent"; }
   virtual int32 Properties() const {
     return kSimpleComponent|kUpdatableComponent|kBackpropNeedsInput|\
         kBackpropNeedsOutput|kBackpropAdds;
   }
   virtual void* Propagate(const ComponentPrecomputedIndexes *indexes,
                          const CuMatrixBase<BaseFloat> &in,
                          CuMatrixBase<BaseFloat> *out) const;
   virtual void Backprop(const std::string &debug_info,
                         const ComponentPrecomputedIndexes *indexes,
                         const CuMatrixBase<BaseFloat> &in_value,
                         const CuMatrixBase<BaseFloat> &, // out_value,
                         const CuMatrixBase<BaseFloat> &out_deriv,
                         void *memo,
                         Component *to_update_in,
                         CuMatrixBase<BaseFloat> *in_deriv) const;

   virtual void Read(std::istream &is, bool binary);
   virtual void Write(std::ostream &os, bool binary) const;

   virtual Component* Copy() const { return new OutputGruNonlinearityComponent(*this); }

   virtual void Scale(BaseFloat scale);
   virtual void Add(BaseFloat alpha, const Component &other);

   // Some functions from base-class UpdatableComponent.
   virtual void PerturbParams(BaseFloat stddev);
   virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
   virtual int32 NumParameters() const;
   virtual void Vectorize(VectorBase<BaseFloat> *params) const;
   virtual void UnVectorize(const VectorBase<BaseFloat> &params);
   virtual void ZeroStats();
   virtual void FreezeNaturalGradient(bool freeze);

   // Some functions that are specific to this class:
   explicit OutputGruNonlinearityComponent(
       const OutputGruNonlinearityComponent &other);

  private:

   void Check() const;  // checks dimensions, etc.

   void TanhStatsAndSelfRepair(const CuMatrixBase<BaseFloat> &h_t,
                               CuMatrixBase<BaseFloat> *h_t_deriv);

   /*  This function is responsible for updating the w_h_ matrix
       (taking into account the learning rate).
         @param [in] c_t1_value  The value of c_{t-1}.
         @param [in] h_t_deriv  The derivative of the objective
                         function w.r.t. the argument of the tanh
                         function, i.e. w.r.t. the expression
                         "hpart_t + W^h \dot c_t1".
                         This function is concerned with the second
                         term as it affects the derivative w.r.t. W^h.
    */
   void UpdateParameters(const CuMatrixBase<BaseFloat> &c_t1_value,
                         const CuMatrixBase<BaseFloat> &h_t_deriv);


   int32 cell_dim_;  // cell dimension, e.g. 1024.

   // The matrix W^h, of dimension cell_dim_ by recurrent_dim_.
   // There is no bias term needed here because hpart_t comes from
   // an affine component that has a bias.
   CuVector<BaseFloat> w_h_;

   // Of dimension cell_dim_, this is comparable to the value_sum_ vector in
   // class NonlinearComponent.  It stores the sum of the tanh nonlinearity.
   // Normalize by dividing by count_.
   CuVector<double> value_sum_;

   // Of dimension cell_dim_, this is comparable to the deriv_sum_ vector in
   // class NonlinearComponent.  It stores the sum of the function-derivative of
   // the tanh nonlinearity.  Normalize by dividing by count_.
   CuVector<double> deriv_sum_;

   // This is part of the stats (along with value_sum_, deriv_sum_, and count_);
   // if you divide it by count_ it gives you the proportion of the time that an
   // average dimension was subject to self-repair.
   double self_repair_total_;

   // The total count (number of frames) corresponding to the stats in value_sum_,
   // deriv_sum_, and self_repair_total_.
   double count_;

   // A configuration parameter, this determines how saturated the derivative
   // has to be for a particular dimension, before we activate self-repair.
   // Default value is 0.2, the same as for TanhComponent.
   BaseFloat self_repair_threshold_;

   // A configuration parameter, this determines the maximum absolute value of
   // the extra term that we add to the input derivative of the tanh when doing
   // self repair.  The default value is 1.0e-05.
   BaseFloat self_repair_scale_;

   // Unlike the GruNonlinearityComponent, there is only one dimension to
   // consider as the parameters are a vector not a matrix, so we only need one
   // preconditioner.
   OnlineNaturalGradient preconditioner_;

   const OutputGruNonlinearityComponent &operator
       = (const OutputGruNonlinearityComponent &other); // Disallow.
 };


 } // namespace nnet3
 } // namespace kaldi


 #endif
kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet3::ConvolutionComponent::OutputDim
virtual int32 OutputDim() const
Returns output-dimension of this component.
Definition: nnet-combined-component.cc:86

kaldi::nnet3::LstmNonlinearityComponent::value_sum_
CuMatrix< double > value_sum_
Definition: nnet-combined-component.h:408

kaldi::nnet3::MaxpoolingComponent::input_z_dim_
int32 input_z_dim_
Definition: nnet-combined-component.h:540

kaldi::nnet3::ConvolutionComponent::InitFromConfig
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
Definition: nnet-combined-component.cc:165

kaldi::nnet3::LstmNonlinearityComponent::Type
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Definition: nnet-combined-component.h:343

kaldi::nnet3::MaxpoolingComponent::pool_y_step_
int32 pool_y_step_
Definition: nnet-combined-component.h:549

kaldi::CuVector
Definition: matrix-common.h:74

kaldi::nnet3::ConvolutionComponent::TensorVectorizationType
TensorVectorizationType
Definition: nnet-combined-component.h:116

kaldi::nnet3::ConvolutionComponent::InputToInputPatches
void InputToInputPatches(const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *patches) const
Definition: nnet-combined-component.cc:245

kaldi::nnet3::ConvolutionComponent::Vectorize
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
Definition: nnet-combined-component.cc:680

kaldi::nnet3::LstmNonlinearityComponent::use_dropout_
bool use_dropout_
Definition: nnet-combined-component.h:402

kaldi::nnet3::ConvolutionComponent::Write
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
Definition: nnet-combined-component.cc:620

nnet-component-itf.h

nnet-common.h

kaldi::nnet3::ConvolutionComponent::Init
void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim, int32 filt_x_dim, int32 filt_y_dim, int32 filt_x_step, int32 filt_y_step, int32 num_filters, TensorVectorizationType input_vectorization, BaseFloat param_stddev, BaseFloat bias_stddev)
Definition: nnet-combined-component.cc:94

kaldi::nnet3::ConvolutionComponent::Copy
virtual Component * Copy() const
Copies component (deep copy).
Definition: nnet-combined-component.cc:654

kaldi::MatrixBase
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49

kaldi::nnet3::Component
Abstract base-class for neural-net components.
Definition: nnet-component-itf.h:114

kaldi::nnet3::kUpdatableComponent
Definition: nnet-component-itf.h:42

kaldi::nnet3::ConvolutionComponent::Info
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
Definition: nnet-combined-component.cc:147

kaldi::nnet3::ConvolutionComponent::filt_y_step_
int32 filt_y_step_
Definition: nnet-combined-component.h:225

kaldi::nnet3::ConvolutionComponent::UnVectorize
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
Definition: nnet-combined-component.cc:686

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::nnet3::OnlineNaturalGradient
Keywords for search: natural gradient, naturalgradient, NG-SGD.
Definition: natural-gradient-online.h:414

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::nnet3::ConvolutionComponent::input_vectorization_
TensorVectorizationType input_vectorization_
Definition: nnet-combined-component.h:231

kaldi::nnet3::MaxpoolingComponent::MaxpoolingComponent
MaxpoolingComponent()
Definition: nnet-combined-component.h:491

kaldi::nnet3::ConvolutionComponent::NumParameters
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
Definition: nnet-combined-component.cc:676

kaldi::nnet3::MaxpoolingComponent::Type
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Definition: nnet-combined-component.h:502

kaldi::nnet3::MaxpoolingComponent::pool_z_step_
int32 pool_z_step_
Definition: nnet-combined-component.h:551

kaldi::nnet3::ConvolutionComponent::SetParams
void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &filter)
Definition: nnet-combined-component.cc:669

kaldi::nnet3::UpdatableComponent::FreezeNaturalGradient
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
Definition: nnet-component-itf.h:502

kaldi::nnet3::ConvolutionComponent::Add
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
Definition: nnet-combined-component.cc:348

kaldi::nnet3::MaxpoolingComponent::Copy
virtual Component * Copy() const
Copies component (deep copy).
Definition: nnet-combined-component.h:525

kaldi::nnet3::ConvolutionComponent::InputDim
virtual int32 InputDim() const
Returns input-dimension of this component.
Definition: nnet-combined-component.cc:81

kaldi::nnet3::MaxpoolingComponent::Properties
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
Definition: nnet-combined-component.h:503

kaldi::nnet3::Component::ZeroStats
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
Definition: nnet-component-itf.h:195

float

kaldi::nnet3::ConvolutionComponent::Scale
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
Definition: nnet-combined-component.cc:337

kaldi::nnet3::ConvolutionComponent::bias_params_
CuVector< BaseFloat > bias_params_
Definition: nnet-combined-component.h:242

kaldi::nnet3::kBackpropNeedsOutput
Definition: nnet-component-itf.h:67

natural-gradient-online.h

kaldi::nnet3::ConvolutionComponent::Properties
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
Definition: nnet-combined-component.h:140

kaldi::nnet3::MaxpoolingComponent::pool_x_size_
int32 pool_x_size_
Definition: nnet-combined-component.h:543

kaldi::nnet3::ConvolutionComponent::kZyx
Definition: nnet-combined-component.h:118

kaldi::nnet3::ConvolutionComponent::Read
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
Definition: nnet-combined-component.cc:585

kaldi::nnet3::kSimpleComponent
Definition: nnet-component-itf.h:38

kaldi::nnet3::LstmNonlinearityComponent
Definition: nnet-combined-component.h:335

kaldi::nnet3::kBackpropNeedsInput
Definition: nnet-component-itf.h:65

kaldi::nnet3::ConvolutionComponent::filter_params_
CuMatrix< BaseFloat > filter_params_
Definition: nnet-combined-component.h:234

kaldi::CuSubMatrix
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70

kaldi::nnet3::kPropagateAdds
Definition: nnet-component-itf.h:53

kaldi::nnet3::LstmNonlinearityComponent::self_repair_config_
CuVector< BaseFloat > self_repair_config_
Definition: nnet-combined-component.h:422

kaldi::nnet3::ConvolutionComponent::input_y_dim_
int32 input_y_dim_
Definition: nnet-combined-component.h:207

kaldi::nnet3::LstmNonlinearityComponent::deriv_sum_
CuMatrix< double > deriv_sum_
Definition: nnet-combined-component.h:415

kaldi::nnet3::MaxpoolingComponent::input_x_dim_
int32 input_x_dim_
Definition: nnet-combined-component.h:536

kaldi::nnet3::MaxpoolingComponent
Definition: nnet-combined-component.h:488

kaldi::nnet3::LstmNonlinearityComponent::count_
double count_
Definition: nnet-combined-component.h:433

kaldi::nnet3::LstmNonlinearityComponent::params_
CuMatrix< BaseFloat > params_
Definition: nnet-combined-component.h:398

kaldi::nnet3::UpdatableComponent
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
Definition: nnet-component-itf.h:455

kaldi::nnet3::ConvolutionComponent::Resize
void Resize(int32 input_dim, int32 output_dim)

kaldi::nnet3::ConvolutionComponent::BiasParams
const CuVector< BaseFloat > & BiasParams() const
Definition: nnet-combined-component.h:179

kaldi::nnet3::ConvolutionComponent::operator=
const ConvolutionComponent & operator=(const ConvolutionComponent &other)

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::nnet3::ConvolutionComponent::LinearParams
const CuMatrix< BaseFloat > & LinearParams() const
Definition: nnet-combined-component.h:180

kaldi::nnet3::kBackpropAdds
Definition: nnet-component-itf.h:61

kaldi::nnet3::ConvolutionComponent::Propagate
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
Definition: nnet-combined-component.cc:292

kaldi::nnet3::ComponentPrecomputedIndexes
Definition: nnet-component-itf.h:97

kaldi::nnet3::MaxpoolingComponent::pool_y_size_
int32 pool_y_size_
Definition: nnet-combined-component.h:544

kaldi::ConfigLine
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205

kaldi::nnet3::ConvolutionComponent::filt_x_step_
int32 filt_x_step_
Definition: nnet-combined-component.h:221

kaldi::nnet3::ConvolutionComponent::DotProduct
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
Definition: nnet-combined-component.cc:647

kaldi::nnet3::Component::ConsolidateMemory
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
Definition: nnet-component-itf.h:394

kaldi::nnet3::ConvolutionComponent::ConvolutionComponent
ConvolutionComponent()
Definition: nnet-combined-component.cc:34

kaldi::nnet3::MaxpoolingComponent::pool_z_size_
int32 pool_z_size_
Definition: nnet-combined-component.h:545

kaldi::nnet3::ConvolutionComponent::filt_y_dim_
int32 filt_y_dim_
Definition: nnet-combined-component.h:216

kaldi::nnet3::LstmNonlinearityComponent::self_repair_total_
CuVector< double > self_repair_total_
Definition: nnet-combined-component.h:429

kaldi::nnet3::ConvolutionComponent::input_z_dim_
int32 input_z_dim_
Definition: nnet-combined-component.h:210

kaldi::nnet3::ConvolutionComponent::Backprop
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update_in, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments &#39;to_update&#39; and &#39;in_deriv&#39; are non-NULL...
Definition: nnet-combined-component.cc:443

kaldi::nnet3::LstmNonlinearityComponent::Properties
virtual int32 Properties() const
Return bitmask of the component&#39;s properties.
Definition: nnet-combined-component.h:344

kaldi::nnet3::ConvolutionComponent::PerturbParams
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
Definition: nnet-combined-component.cc:659

kaldi::nnet3::ConvolutionComponent::Type
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Definition: nnet-combined-component.h:139

kaldi::nnet3::ConvolutionComponent::kYzx
Definition: nnet-combined-component.h:117

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

kaldi::nnet3::ConvolutionComponent::InderivPatchesToInderiv
void InderivPatchesToInderiv(const CuMatrix< BaseFloat > &in_deriv_patches, CuMatrixBase< BaseFloat > *in_deriv) const
Definition: nnet-combined-component.cc:387

kaldi::nnet3::ConvolutionComponent::filt_x_dim_
int32 filt_x_dim_
Definition: nnet-combined-component.h:214

kaldi::nnet3::ConvolutionComponent::input_x_dim_
int32 input_x_dim_
Definition: nnet-combined-component.h:204

kaldi::nnet3::ConvolutionComponent
WARNING, this component is deprecated in favor of TimeHeightConvolutionComponent, and will be deleted...
Definition: nnet-combined-component.h:114

kaldi::nnet3::MaxpoolingComponent::pool_x_step_
int32 pool_x_step_
Definition: nnet-combined-component.h:547

kaldi::nnet3::LstmNonlinearityComponent::preconditioner_
OnlineNaturalGradient preconditioner_
Definition: nnet-combined-component.h:439

kaldi::nnet3::LstmNonlinearityComponent::LstmNonlinearityComponent
LstmNonlinearityComponent()
Definition: nnet-combined-component.h:342

kaldi::nnet3::ConvolutionComponent::Update
void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv, const std::vector< CuSubMatrix< BaseFloat > *> &out_deriv_batch)
Definition: nnet-combined-component.cc:511

kaldi::CuVectorBase
Vector for CUDA computing.
Definition: matrix-common.h:72

kaldi::nnet3::MaxpoolingComponent::input_y_dim_
int32 input_y_dim_
Definition: nnet-combined-component.h:538