doc/nnet-affine-transform_8h_source.html

 // nnet/nnet-affine-transform.h

 // Copyright 2011-2014  Brno University of Technology (author: Karel Vesely)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #ifndef KALDI_NNET_NNET_AFFINE_TRANSFORM_H_
 #define KALDI_NNET_NNET_AFFINE_TRANSFORM_H_

 #include <string>

 #include "nnet/nnet-component.h"
 #include "nnet/nnet-utils.h"
 #include "cudamatrix/cu-math.h"

 namespace kaldi {
 namespace nnet1 {

 class AffineTransform : public UpdatableComponent {
  public:
   AffineTransform(int32 dim_in, int32 dim_out):
     UpdatableComponent(dim_in, dim_out),
     linearity_(dim_out, dim_in), bias_(dim_out),
     linearity_corr_(dim_out, dim_in), bias_corr_(dim_out),
     max_norm_(0.0)
   { }
   ~AffineTransform()
   { }

   Component* Copy() const { return new AffineTransform(*this); }
   ComponentType GetType() const { return kAffineTransform; }

   void InitData(std::istream &is) {
     // define options
     float bias_mean = -2.0, bias_range = 2.0, param_stddev = 0.1;
     // parse config
     std::string token;
     while (is >> std::ws, !is.eof()) {
       ReadToken(is, false, &token);
        if (token == "<ParamStddev>") ReadBasicType(is, false, &param_stddev);
       else if (token == "<BiasMean>")    ReadBasicType(is, false, &bias_mean);
       else if (token == "<BiasRange>")   ReadBasicType(is, false, &bias_range);
       else if (token == "<LearnRateCoef>") ReadBasicType(is, false, &learn_rate_coef_);
       else if (token == "<BiasLearnRateCoef>") ReadBasicType(is, false, &bias_learn_rate_coef_);
       else if (token == "<MaxNorm>") ReadBasicType(is, false, &max_norm_);
       else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
                      << " (ParamStddev|BiasMean|BiasRange|LearnRateCoef|BiasLearnRateCoef)";
     }

     //
     // Initialize trainable parameters,
     //
     // Gaussian with given std_dev (mean = 0),
     linearity_.Resize(OutputDim(), InputDim());
     RandGauss(0.0, param_stddev, &linearity_);
     // Uniform,
     bias_.Resize(OutputDim());
     RandUniform(bias_mean, bias_range, &bias_);
   }

   void ReadData(std::istream &is, bool binary) {
     // Read all the '<Tokens>' in arbitrary order,
     while ('<' == Peek(is, binary)) {
       int first_char = PeekToken(is, binary);
       switch (first_char) {
         case 'L': ExpectToken(is, binary, "<LearnRateCoef>");
           ReadBasicType(is, binary, &learn_rate_coef_);
           break;
         case 'B': ExpectToken(is, binary, "<BiasLearnRateCoef>");
           ReadBasicType(is, binary, &bias_learn_rate_coef_);
           break;
         case 'M': ExpectToken(is, binary, "<MaxNorm>");
           ReadBasicType(is, binary, &max_norm_);
           break;
         default:
           std::string token;
           ReadToken(is, false, &token);
           KALDI_ERR << "Unknown token: " << token;
       }
     }
     // Read the data (data follow the tokens),

     // weight matrix,
     linearity_.Read(is, binary);
     // bias vector,
     bias_.Read(is, binary);

     KALDI_ASSERT(linearity_.NumRows() == output_dim_);
     KALDI_ASSERT(linearity_.NumCols() == input_dim_);
     KALDI_ASSERT(bias_.Dim() == output_dim_);
   }

   void WriteData(std::ostream &os, bool binary) const {
     WriteToken(os, binary, "<LearnRateCoef>");
     WriteBasicType(os, binary, learn_rate_coef_);
     WriteToken(os, binary, "<BiasLearnRateCoef>");
     WriteBasicType(os, binary, bias_learn_rate_coef_);
     WriteToken(os, binary, "<MaxNorm>");
     WriteBasicType(os, binary, max_norm_);
     if (!binary) os << "\n";
     // weights
     linearity_.Write(os, binary);
     bias_.Write(os, binary);
   }

   int32 NumParams() const {
     return linearity_.NumRows()*linearity_.NumCols() + bias_.Dim();
   }

   void GetGradient(VectorBase<BaseFloat>* gradient) const {
     KALDI_ASSERT(gradient->Dim() == NumParams());
     int32 linearity_num_elem = linearity_.NumRows() * linearity_.NumCols();
     gradient->Range(0, linearity_num_elem).CopyRowsFromMat(linearity_corr_);
     gradient->Range(linearity_num_elem, bias_.Dim()).CopyFromVec(bias_corr_);
   }

   void GetParams(VectorBase<BaseFloat>* params) const {
     KALDI_ASSERT(params->Dim() == NumParams());
     int32 linearity_num_elem = linearity_.NumRows() * linearity_.NumCols();
     params->Range(0, linearity_num_elem).CopyRowsFromMat(linearity_);
     params->Range(linearity_num_elem, bias_.Dim()).CopyFromVec(bias_);
   }

   void SetParams(const VectorBase<BaseFloat>& params) {
     KALDI_ASSERT(params.Dim() == NumParams());
     int32 linearity_num_elem = linearity_.NumRows() * linearity_.NumCols();
     linearity_.CopyRowsFromVec(params.Range(0, linearity_num_elem));
     bias_.CopyFromVec(params.Range(linearity_num_elem, bias_.Dim()));
   }

   std::string Info() const {
     return std::string("\n  linearity") +
       MomentStatistics(linearity_) +
       ", lr-coef " + ToString(learn_rate_coef_) +
       ", max-norm " + ToString(max_norm_) +
       "\n  bias" + MomentStatistics(bias_) +
       ", lr-coef " + ToString(bias_learn_rate_coef_);
   }
   std::string InfoGradient() const {
     return std::string("\n  linearity_grad") +
       MomentStatistics(linearity_corr_) +
       ", lr-coef " + ToString(learn_rate_coef_) +
       ", max-norm " + ToString(max_norm_) +
       "\n  bias_grad" + MomentStatistics(bias_corr_) +
       ", lr-coef " + ToString(bias_learn_rate_coef_);
   }

   void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                     CuMatrixBase<BaseFloat> *out) {
     // precopy bias
     out->AddVecToRows(1.0, bias_, 0.0);
     // multiply by weights^t
     out->AddMatMat(1.0, in, kNoTrans, linearity_, kTrans, 1.0);
   }

   void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                         const CuMatrixBase<BaseFloat> &out,
                         const CuMatrixBase<BaseFloat> &out_diff,
                         CuMatrixBase<BaseFloat> *in_diff) {
     // multiply error derivative by weights
     in_diff->AddMatMat(1.0, out_diff, kNoTrans, linearity_, kNoTrans, 0.0);
   }


   void Update(const CuMatrixBase<BaseFloat> &input,
               const CuMatrixBase<BaseFloat> &diff) {
     // we use following hyperparameters from the option class
     const BaseFloat lr = opts_.learn_rate * learn_rate_coef_;
     const BaseFloat lr_bias = opts_.learn_rate * bias_learn_rate_coef_;
     const BaseFloat mmt = opts_.momentum;
     const BaseFloat l2 = opts_.l2_penalty;
     const BaseFloat l1 = opts_.l1_penalty;
     // we will also need the number of frames in the mini-batch
     const int32 num_frames = input.NumRows();
     // compute gradient (incl. momentum)
     linearity_corr_.AddMatMat(1.0, diff, kTrans, input, kNoTrans, mmt);
     bias_corr_.AddRowSumMat(1.0, diff, mmt);
     // l2 regularization
     if (l2 != 0.0) {
       linearity_.AddMat(-lr*l2*num_frames, linearity_);
     }
     // l1 regularization
     if (l1 != 0.0) {
       cu::RegularizeL1(&linearity_, &linearity_corr_, lr*l1*num_frames, lr);
     }
     // update
     linearity_.AddMat(-lr, linearity_corr_);
     bias_.AddVec(-lr_bias, bias_corr_);
     // max-norm
     if (max_norm_ > 0.0) {
       CuMatrix<BaseFloat> lin_sqr(linearity_);
       lin_sqr.MulElements(linearity_);
       CuVector<BaseFloat> l2(OutputDim());
       l2.AddColSumMat(1.0, lin_sqr, 0.0);
       l2.ApplyPow(0.5);  // we have per-neuron L2 norms,
       CuVector<BaseFloat> scl(l2);
       scl.Scale(1.0/max_norm_);
       scl.ApplyFloor(1.0);
       scl.InvertElements();
       linearity_.MulRowsVec(scl);  // shink to sphere!
     }
   }

   const CuVectorBase<BaseFloat>& GetBias() const { return bias_; }

   void SetBias(const CuVectorBase<BaseFloat>& bias) {
     KALDI_ASSERT(bias.Dim() == bias_.Dim());
     bias_.CopyFromVec(bias);
   }

   const CuMatrixBase<BaseFloat>& GetLinearity() const { return linearity_; }

   void SetLinearity(const CuMatrixBase<BaseFloat>& linearity) {
     KALDI_ASSERT(linearity.NumRows() == linearity_.NumRows());
     KALDI_ASSERT(linearity.NumCols() == linearity_.NumCols());
     linearity_.CopyFromMat(linearity);
   }

  private:
   CuMatrix<BaseFloat> linearity_;
   CuVector<BaseFloat> bias_;

   CuMatrix<BaseFloat> linearity_corr_;
   CuVector<BaseFloat> bias_corr_;

   BaseFloat max_norm_;
 };

 }  // namespace nnet1
 }  // namespace kaldi

 #endif  // KALDI_NNET_NNET_AFFINE_TRANSFORM_H_
kaldi::nnet1::ToString
std::string ToString(const T &t)
Convert basic type to a string (please don&#39;t overuse),.
Definition: nnet-utils.h:52

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::nnet1::AffineTransform::linearity_corr_
CuMatrix< BaseFloat > linearity_corr_
Definition: nnet-affine-transform.h:238

kaldi::nnet1::NnetTrainOptions::learn_rate
BaseFloat learn_rate
Definition: nnet-trnopts.h:32

kaldi::nnet1::AffineTransform::max_norm_
BaseFloat max_norm_
Definition: nnet-affine-transform.h:241

kaldi::CuVector
Definition: matrix-common.h:74

kaldi::nnet1::UpdatableComponent::opts_
NnetTrainOptions opts_
Option-class with training hyper-parameters,.
Definition: nnet-component.h:265

kaldi::nnet1::MomentStatistics
std::string MomentStatistics(const VectorBase< Real > &vec)
Get a string with statistics of the data in a vector, so we can print them easily.
Definition: nnet-utils.h:63

kaldi::nnet1::Component::input_dim_
int32 input_dim_
Data members,.
Definition: nnet-component.h:190

kaldi::ReadBasicType
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55

kaldi::nnet1::NnetTrainOptions::momentum
BaseFloat momentum
Definition: nnet-trnopts.h:33

kaldi::nnet1::UpdatableComponent::bias_learn_rate_coef_
BaseFloat bias_learn_rate_coef_
Scalar applied to learning rate for bias (to be used in ::Update method),.
Definition: nnet-component.h:273

kaldi::nnet1::UpdatableComponent::learn_rate_coef_
BaseFloat learn_rate_coef_
Scalar applied to learning rate for weight matrices (to be used in ::Update method),.
Definition: nnet-component.h:269

kaldi::nnet1::AffineTransform::InfoGradient
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
Definition: nnet-affine-transform.h:153

kaldi::nnet1::UpdatableComponent
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
Definition: nnet-component.h:208

kaldi::nnet1::AffineTransform::GetLinearity
const CuMatrixBase< BaseFloat > & GetLinearity() const
Definition: nnet-affine-transform.h:226

kaldi::nnet1::RandUniform
void RandUniform(BaseFloat mu, BaseFloat range, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Uniform distribution): mu = the mean value, range = the &#39;width&#39; of...
Definition: nnet-utils.h:188

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

kaldi::ReadToken
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

nnet-component.h

kaldi::CuVectorBase::ApplyFloor
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=NULL)
Definition: cu-vector.h:139

kaldi::nnet1::AffineTransform::Update
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
Definition: nnet-affine-transform.h:179

kaldi::Peek
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145

kaldi::nnet1::Component::ComponentType
ComponentType
Component type identification mechanism,.
Definition: nnet-component.h:47

kaldi::CuVectorBase::InvertElements
void InvertElements()
Definition: cu-vector.cc:1318

kaldi::nnet1::AffineTransform
Definition: nnet-affine-transform.h:33

kaldi::nnet1::AffineTransform::SetBias
void SetBias(const CuVectorBase< BaseFloat > &bias)
Definition: nnet-affine-transform.h:221

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::nnet1::AffineTransform::Info
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
Definition: nnet-affine-transform.h:145

kaldi::nnet1::AffineTransform::GetParams
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
Definition: nnet-affine-transform.h:131

kaldi::nnet1::AffineTransform::WriteData
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
Definition: nnet-affine-transform.h:107

kaldi::nnet1::AffineTransform::SetParams
void SetParams(const VectorBase< BaseFloat > &params)
Set the trainable parameters from, reshaped as a vector,.
Definition: nnet-affine-transform.h:138

kaldi::nnet1::AffineTransform::linearity_
CuMatrix< BaseFloat > linearity_
Definition: nnet-affine-transform.h:235

kaldi::CuMatrixBase::AddVecToRows
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261

kaldi::nnet1::AffineTransform::~AffineTransform
~AffineTransform()
Definition: nnet-affine-transform.h:41

float

kaldi::nnet1::AffineTransform::GetType
ComponentType GetType() const
Get Type Identification of the component,.
Definition: nnet-affine-transform.h:45

kaldi::CuVectorBase::AddColSumMat
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
Definition: cu-vector.cc:1298

kaldi::nnet1::AffineTransform::PropagateFnc
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
Definition: nnet-affine-transform.h:162

kaldi::ExpectToken
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191

kaldi::CuMatrixBase::MulElements
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667

kaldi::nnet1::Component::InputDim
int32 InputDim() const
Get the dimension of the input,.
Definition: nnet-component.h:130

kaldi::nnet1::Component::kAffineTransform
Definition: nnet-component.h:51

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

kaldi::kNoTrans
Definition: matrix-common.h:34

kaldi::nnet1::RandGauss
void RandGauss(BaseFloat mu, BaseFloat sigma, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Gaussian distribution): mu = the mean value, sigma = standard devi...
Definition: nnet-utils.h:164

kaldi::nnet1::AffineTransform::AffineTransform
AffineTransform(int32 dim_in, int32 dim_out)
Definition: nnet-affine-transform.h:35

kaldi::CuVectorBase::ApplyPow
void ApplyPow(Real power)
Definition: cu-vector.h:147

kaldi::nnet1::AffineTransform::bias_
CuVector< BaseFloat > bias_
Definition: nnet-affine-transform.h:236

kaldi::nnet1::NnetTrainOptions::l2_penalty
BaseFloat l2_penalty
Definition: nnet-trnopts.h:34

kaldi::CuMatrixBase::AddMatMat
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291

kaldi::nnet1::AffineTransform::InitData
void InitData(std::istream &is)
Initialize the content of the component by the &#39;line&#39; from the prototype,.
Definition: nnet-affine-transform.h:47

kaldi::nnet1::AffineTransform::GetBias
const CuVectorBase< BaseFloat > & GetBias() const
Accessors to the component parameters,.
Definition: nnet-affine-transform.h:219

kaldi::WriteToken
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134

kaldi::nnet1::NnetTrainOptions::l1_penalty
BaseFloat l1_penalty
Definition: nnet-trnopts.h:35

kaldi::VectorBase::Dim
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64

kaldi::nnet1::AffineTransform::bias_corr_
CuVector< BaseFloat > bias_corr_
Definition: nnet-affine-transform.h:239

kaldi::PeekToken
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170

kaldi::nnet1::Component::output_dim_
int32 output_dim_
Dimension of the output of the Component,.
Definition: nnet-component.h:191

cu-math.h

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::CuMatrixBase::NumCols
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

kaldi::nnet1::AffineTransform::SetLinearity
void SetLinearity(const CuMatrixBase< BaseFloat > &linearity)
Definition: nnet-affine-transform.h:228

kaldi::nnet1::AffineTransform::ReadData
void ReadData(std::istream &is, bool binary)
Reads the component content.
Definition: nnet-affine-transform.h:75

kaldi::CuVectorBase::Scale
void Scale(Real value)
Definition: cu-vector.cc:1216

kaldi::WriteBasicType
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34

kaldi::nnet1::Component
Abstract class, building block of the network.
Definition: nnet-component.cc:51

kaldi::nnet1::AffineTransform::BackpropagateFnc
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
Definition: nnet-affine-transform.h:170

kaldi::nnet1::Component::OutputDim
int32 OutputDim() const
Get the dimension of the output,.
Definition: nnet-component.h:135

kaldi::CuMatrixBase::NumRows
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

kaldi::VectorBase
Provides a vector abstraction class.
Definition: kaldi-vector.h:41

nnet-utils.h

kaldi::nnet1::AffineTransform::GetGradient
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
Definition: nnet-affine-transform.h:124

kaldi::nnet1::AffineTransform::Copy
Component * Copy() const
Copy component (deep copy),.
Definition: nnet-affine-transform.h:44

kaldi::cu::RegularizeL1
void RegularizeL1(CuMatrixBase< Real > *weight, CuMatrixBase< Real > *grad, Real l1, Real lr)
RegularizeL1 is a gradient step with l1 regularization added to the gradient.
Definition: cu-math.cc:37

kaldi::nnet1::AffineTransform::NumParams
int32 NumParams() const
Number of trainable parameters,.
Definition: nnet-affine-transform.h:120

kaldi::CuVectorBase::Dim
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

kaldi::CuVectorBase
Vector for CUDA computing.
Definition: matrix-common.h:72

kaldi::VectorBase::Range
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94