nnet-affine-transform.h
Go to the documentation of this file.
1 // nnet/nnet-affine-transform.h
2 
3 // Copyright 2011-2014 Brno University of Technology (author: Karel Vesely)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_NNET_NNET_AFFINE_TRANSFORM_H_
22 #define KALDI_NNET_NNET_AFFINE_TRANSFORM_H_
23 
24 #include <string>
25 
26 #include "nnet/nnet-component.h"
27 #include "nnet/nnet-utils.h"
28 #include "cudamatrix/cu-math.h"
29 
30 namespace kaldi {
31 namespace nnet1 {
32 
34  public:
35  AffineTransform(int32 dim_in, int32 dim_out):
36  UpdatableComponent(dim_in, dim_out),
37  linearity_(dim_out, dim_in), bias_(dim_out),
38  linearity_corr_(dim_out, dim_in), bias_corr_(dim_out),
39  max_norm_(0.0)
40  { }
42  { }
43 
44  Component* Copy() const { return new AffineTransform(*this); }
46 
47  void InitData(std::istream &is) {
48  // define options
49  float bias_mean = -2.0, bias_range = 2.0, param_stddev = 0.1;
50  // parse config
51  std::string token;
52  while (is >> std::ws, !is.eof()) {
53  ReadToken(is, false, &token);
54  if (token == "<ParamStddev>") ReadBasicType(is, false, &param_stddev);
55  else if (token == "<BiasMean>") ReadBasicType(is, false, &bias_mean);
56  else if (token == "<BiasRange>") ReadBasicType(is, false, &bias_range);
57  else if (token == "<LearnRateCoef>") ReadBasicType(is, false, &learn_rate_coef_);
58  else if (token == "<BiasLearnRateCoef>") ReadBasicType(is, false, &bias_learn_rate_coef_);
59  else if (token == "<MaxNorm>") ReadBasicType(is, false, &max_norm_);
60  else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
61  << " (ParamStddev|BiasMean|BiasRange|LearnRateCoef|BiasLearnRateCoef)";
62  }
63 
64  //
65  // Initialize trainable parameters,
66  //
67  // Gaussian with given std_dev (mean = 0),
68  linearity_.Resize(OutputDim(), InputDim());
69  RandGauss(0.0, param_stddev, &linearity_);
70  // Uniform,
71  bias_.Resize(OutputDim());
72  RandUniform(bias_mean, bias_range, &bias_);
73  }
74 
75  void ReadData(std::istream &is, bool binary) {
76  // Read all the '<Tokens>' in arbitrary order,
77  while ('<' == Peek(is, binary)) {
78  int first_char = PeekToken(is, binary);
79  switch (first_char) {
80  case 'L': ExpectToken(is, binary, "<LearnRateCoef>");
81  ReadBasicType(is, binary, &learn_rate_coef_);
82  break;
83  case 'B': ExpectToken(is, binary, "<BiasLearnRateCoef>");
85  break;
86  case 'M': ExpectToken(is, binary, "<MaxNorm>");
87  ReadBasicType(is, binary, &max_norm_);
88  break;
89  default:
90  std::string token;
91  ReadToken(is, false, &token);
92  KALDI_ERR << "Unknown token: " << token;
93  }
94  }
95  // Read the data (data follow the tokens),
96 
97  // weight matrix,
98  linearity_.Read(is, binary);
99  // bias vector,
100  bias_.Read(is, binary);
101 
102  KALDI_ASSERT(linearity_.NumRows() == output_dim_);
103  KALDI_ASSERT(linearity_.NumCols() == input_dim_);
104  KALDI_ASSERT(bias_.Dim() == output_dim_);
105  }
106 
107  void WriteData(std::ostream &os, bool binary) const {
108  WriteToken(os, binary, "<LearnRateCoef>");
109  WriteBasicType(os, binary, learn_rate_coef_);
110  WriteToken(os, binary, "<BiasLearnRateCoef>");
112  WriteToken(os, binary, "<MaxNorm>");
113  WriteBasicType(os, binary, max_norm_);
114  if (!binary) os << "\n";
115  // weights
116  linearity_.Write(os, binary);
117  bias_.Write(os, binary);
118  }
119 
120  int32 NumParams() const {
121  return linearity_.NumRows()*linearity_.NumCols() + bias_.Dim();
122  }
123 
124  void GetGradient(VectorBase<BaseFloat>* gradient) const {
125  KALDI_ASSERT(gradient->Dim() == NumParams());
126  int32 linearity_num_elem = linearity_.NumRows() * linearity_.NumCols();
127  gradient->Range(0, linearity_num_elem).CopyRowsFromMat(linearity_corr_);
128  gradient->Range(linearity_num_elem, bias_.Dim()).CopyFromVec(bias_corr_);
129  }
130 
131  void GetParams(VectorBase<BaseFloat>* params) const {
132  KALDI_ASSERT(params->Dim() == NumParams());
133  int32 linearity_num_elem = linearity_.NumRows() * linearity_.NumCols();
134  params->Range(0, linearity_num_elem).CopyRowsFromMat(linearity_);
135  params->Range(linearity_num_elem, bias_.Dim()).CopyFromVec(bias_);
136  }
137 
138  void SetParams(const VectorBase<BaseFloat>& params) {
139  KALDI_ASSERT(params.Dim() == NumParams());
140  int32 linearity_num_elem = linearity_.NumRows() * linearity_.NumCols();
141  linearity_.CopyRowsFromVec(params.Range(0, linearity_num_elem));
142  bias_.CopyFromVec(params.Range(linearity_num_elem, bias_.Dim()));
143  }
144 
145  std::string Info() const {
146  return std::string("\n linearity") +
148  ", lr-coef " + ToString(learn_rate_coef_) +
149  ", max-norm " + ToString(max_norm_) +
150  "\n bias" + MomentStatistics(bias_) +
151  ", lr-coef " + ToString(bias_learn_rate_coef_);
152  }
153  std::string InfoGradient() const {
154  return std::string("\n linearity_grad") +
156  ", lr-coef " + ToString(learn_rate_coef_) +
157  ", max-norm " + ToString(max_norm_) +
158  "\n bias_grad" + MomentStatistics(bias_corr_) +
159  ", lr-coef " + ToString(bias_learn_rate_coef_);
160  }
161 
164  // precopy bias
165  out->AddVecToRows(1.0, bias_, 0.0);
166  // multiply by weights^t
167  out->AddMatMat(1.0, in, kNoTrans, linearity_, kTrans, 1.0);
168  }
169 
171  const CuMatrixBase<BaseFloat> &out,
172  const CuMatrixBase<BaseFloat> &out_diff,
173  CuMatrixBase<BaseFloat> *in_diff) {
174  // multiply error derivative by weights
175  in_diff->AddMatMat(1.0, out_diff, kNoTrans, linearity_, kNoTrans, 0.0);
176  }
177 
178 
179  void Update(const CuMatrixBase<BaseFloat> &input,
180  const CuMatrixBase<BaseFloat> &diff) {
181  // we use following hyperparameters from the option class
184  const BaseFloat mmt = opts_.momentum;
185  const BaseFloat l2 = opts_.l2_penalty;
186  const BaseFloat l1 = opts_.l1_penalty;
187  // we will also need the number of frames in the mini-batch
188  const int32 num_frames = input.NumRows();
189  // compute gradient (incl. momentum)
190  linearity_corr_.AddMatMat(1.0, diff, kTrans, input, kNoTrans, mmt);
191  bias_corr_.AddRowSumMat(1.0, diff, mmt);
192  // l2 regularization
193  if (l2 != 0.0) {
194  linearity_.AddMat(-lr*l2*num_frames, linearity_);
195  }
196  // l1 regularization
197  if (l1 != 0.0) {
198  cu::RegularizeL1(&linearity_, &linearity_corr_, lr*l1*num_frames, lr);
199  }
200  // update
201  linearity_.AddMat(-lr, linearity_corr_);
202  bias_.AddVec(-lr_bias, bias_corr_);
203  // max-norm
204  if (max_norm_ > 0.0) {
206  lin_sqr.MulElements(linearity_);
208  l2.AddColSumMat(1.0, lin_sqr, 0.0);
209  l2.ApplyPow(0.5); // we have per-neuron L2 norms,
210  CuVector<BaseFloat> scl(l2);
211  scl.Scale(1.0/max_norm_);
212  scl.ApplyFloor(1.0);
213  scl.InvertElements();
214  linearity_.MulRowsVec(scl); // shink to sphere!
215  }
216  }
217 
219  const CuVectorBase<BaseFloat>& GetBias() const { return bias_; }
220 
221  void SetBias(const CuVectorBase<BaseFloat>& bias) {
222  KALDI_ASSERT(bias.Dim() == bias_.Dim());
223  bias_.CopyFromVec(bias);
224  }
225 
227 
228  void SetLinearity(const CuMatrixBase<BaseFloat>& linearity) {
229  KALDI_ASSERT(linearity.NumRows() == linearity_.NumRows());
230  KALDI_ASSERT(linearity.NumCols() == linearity_.NumCols());
231  linearity_.CopyFromMat(linearity);
232  }
233 
234  private:
237 
240 
242 };
243 
244 } // namespace nnet1
245 } // namespace kaldi
246 
247 #endif // KALDI_NNET_NNET_AFFINE_TRANSFORM_H_
std::string ToString(const T &t)
Convert basic type to a string (please don&#39;t overuse),.
Definition: nnet-utils.h:52
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
CuMatrix< BaseFloat > linearity_corr_
NnetTrainOptions opts_
Option-class with training hyper-parameters,.
std::string MomentStatistics(const VectorBase< Real > &vec)
Get a string with statistics of the data in a vector, so we can print them easily.
Definition: nnet-utils.h:63
int32 input_dim_
Data members,.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
BaseFloat bias_learn_rate_coef_
Scalar applied to learning rate for bias (to be used in ::Update method),.
BaseFloat learn_rate_coef_
Scalar applied to learning rate for weight matrices (to be used in ::Update method),.
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
const CuMatrixBase< BaseFloat > & GetLinearity() const
void RandUniform(BaseFloat mu, BaseFloat range, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Uniform distribution): mu = the mean value, range = the &#39;width&#39; of...
Definition: nnet-utils.h:188
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=NULL)
Definition: cu-vector.h:139
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
ComponentType
Component type identification mechanism,.
void SetBias(const CuVectorBase< BaseFloat > &bias)
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
void SetParams(const VectorBase< BaseFloat > &params)
Set the trainable parameters from, reshaped as a vector,.
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
ComponentType GetType() const
Get Type Identification of the component,.
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
Definition: cu-vector.cc:1298
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667
int32 InputDim() const
Get the dimension of the input,.
#define KALDI_ERR
Definition: kaldi-error.h:147
void RandGauss(BaseFloat mu, BaseFloat sigma, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Gaussian distribution): mu = the mean value, sigma = standard devi...
Definition: nnet-utils.h:164
AffineTransform(int32 dim_in, int32 dim_out)
void ApplyPow(Real power)
Definition: cu-vector.h:147
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
void InitData(std::istream &is)
Initialize the content of the component by the &#39;line&#39; from the prototype,.
const CuVectorBase< BaseFloat > & GetBias() const
Accessors to the component parameters,.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170
int32 output_dim_
Dimension of the output of the Component,.
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void SetLinearity(const CuMatrixBase< BaseFloat > &linearity)
void ReadData(std::istream &is, bool binary)
Reads the component content.
void Scale(Real value)
Definition: cu-vector.cc:1216
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
Abstract class, building block of the network.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
int32 OutputDim() const
Get the dimension of the output,.
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
Component * Copy() const
Copy component (deep copy),.
void RegularizeL1(CuMatrixBase< Real > *weight, CuMatrixBase< Real > *grad, Real l1, Real lr)
RegularizeL1 is a gradient step with l1 regularization added to the gradient.
Definition: cu-math.cc:37
int32 NumParams() const
Number of trainable parameters,.
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69
Vector for CUDA computing.
Definition: matrix-common.h:72
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94