nnet-linear-transform.h
Go to the documentation of this file.
1 // nnet/nnet-linear-transform.h
2 
3 // Copyright 2011-2014 Brno University of Technology (author: Karel Vesely)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_NNET_NNET_LINEAR_TRANSFORM_H_
22 #define KALDI_NNET_NNET_LINEAR_TRANSFORM_H_
23 
24 #include <string>
25 
26 #include "nnet/nnet-component.h"
27 #include "nnet/nnet-utils.h"
28 #include "cudamatrix/cu-math.h"
29 
30 namespace kaldi {
31 namespace nnet1 {
32 
34  public:
35  LinearTransform(int32 dim_in, int32 dim_out):
36  UpdatableComponent(dim_in, dim_out),
37  linearity_(dim_out, dim_in),
38  linearity_corr_(dim_out, dim_in)
39  { }
40 
42  { }
43 
44  Component* Copy() const { return new LinearTransform(*this); }
46 
47  void InitData(std::istream &is) {
48  // define options
49  float param_stddev = 0.1;
50  std::string read_matrix_file;
51  // parse config
52  std::string token;
53  while (is >> std::ws, !is.eof()) {
54  ReadToken(is, false, &token);
55  if (token == "<ParamStddev>") ReadBasicType(is, false, &param_stddev);
56  else if (token == "<ReadMatrix>") ReadToken(is, false, &read_matrix_file);
57  else if (token == "<LearnRateCoef>") ReadBasicType(is, false, &learn_rate_coef_);
58  else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
59  << " (ParamStddev|ReadMatrix|LearnRateCoef)";
60  }
61 
62  if (read_matrix_file != "") { // load from file,
63  bool binary;
64  Input in(read_matrix_file, &binary);
65  linearity_.Read(in.Stream(), binary);
66  in.Close();
67  // check dims,
68  if (OutputDim() != linearity_.NumRows() ||
69  InputDim() != linearity_.NumCols()) {
70  KALDI_ERR << "Dimensionality mismatch! Expected matrix"
71  << " r=" << OutputDim() << " c=" << InputDim()
72  << ", loaded matrix " << read_matrix_file
73  << " with r=" << linearity_.NumRows()
74  << " c=" << linearity_.NumCols();
75  }
76  KALDI_LOG << "Loaded <LinearTransform> matrix from file : "
77  << read_matrix_file;
78  return;
79  }
80 
81  //
82  // Initialize trainable parameters,
83  //
84  // Gaussian with given std_dev (mean = 0),
85  linearity_.Resize(OutputDim(), InputDim());
86  RandGauss(0.0, param_stddev, &linearity_);
87  }
88 
89  void ReadData(std::istream &is, bool binary) {
90  // Read all the '<Tokens>' in arbitrary order,
91  while ('<' == Peek(is, binary)) {
92  int first_char = PeekToken(is, binary);
93  switch (first_char) {
94  case 'L': ExpectToken(is, binary, "<LearnRateCoef>");
95  ReadBasicType(is, binary, &learn_rate_coef_);
96  break;
97  default:
98  std::string token;
99  ReadToken(is, false, &token);
100  KALDI_ERR << "Unknown token: " << token;
101  }
102  }
103  // Read the data (data follow the tokens),
104 
105  // weights
106  linearity_.Read(is, binary);
107 
108  KALDI_ASSERT(linearity_.NumRows() == output_dim_);
109  KALDI_ASSERT(linearity_.NumCols() == input_dim_);
110  }
111 
112  void WriteData(std::ostream &os, bool binary) const {
113  WriteToken(os, binary, "<LearnRateCoef>");
114  WriteBasicType(os, binary, learn_rate_coef_);
115  if (!binary) os << "\n";
116  linearity_.Write(os, binary);
117  }
118 
119  int32 NumParams() const {
120  return linearity_.NumRows()*linearity_.NumCols();
121  }
122 
123  void GetGradient(VectorBase<BaseFloat>* gradient) const {
124  KALDI_ASSERT(gradient->Dim() == NumParams());
125  gradient->CopyRowsFromMat(linearity_corr_);
126  }
127 
128  void GetParams(VectorBase<BaseFloat>* params) const {
129  KALDI_ASSERT(params->Dim() == NumParams());
130  params->CopyRowsFromMat(linearity_);
131  }
132 
133  void SetParams(const VectorBase<BaseFloat>& params) {
134  KALDI_ASSERT(params.Dim() == NumParams());
135  linearity_.CopyRowsFromVec(params);
136  }
137 
139  KALDI_ASSERT(l.NumCols() == linearity_.NumCols());
140  KALDI_ASSERT(l.NumRows() == linearity_.NumRows());
141  linearity_.CopyFromMat(l);
142  }
143 
144  std::string Info() const {
145  return std::string("\n linearity") +
147  ", lr-coef " + ToString(learn_rate_coef_);
148  }
149  std::string InfoGradient() const {
150  return std::string("\n linearity_grad") +
152  ", lr-coef " + ToString(learn_rate_coef_);
153  }
154 
157  // multiply by weights^t
158  out->AddMatMat(1.0, in, kNoTrans, linearity_, kTrans, 0.0);
159  }
160 
162  const CuMatrixBase<BaseFloat> &out,
163  const CuMatrixBase<BaseFloat> &out_diff,
164  CuMatrixBase<BaseFloat> *in_diff) {
165  // multiply error derivative by weights
166  in_diff->AddMatMat(1.0, out_diff, kNoTrans, linearity_, kNoTrans, 0.0);
167  }
168 
169 
170  void Update(const CuMatrixBase<BaseFloat> &input,
171  const CuMatrixBase<BaseFloat> &diff) {
172  // we use following hyperparameters from the option class
173  const BaseFloat lr = opts_.learn_rate;
174  const BaseFloat mmt = opts_.momentum;
175  const BaseFloat l2 = opts_.l2_penalty;
176  const BaseFloat l1 = opts_.l1_penalty;
177  // we will also need the number of frames in the mini-batch
178  const int32 num_frames = input.NumRows();
179  // compute gradient (incl. momentum)
180  linearity_corr_.AddMatMat(1.0, diff, kTrans, input, kNoTrans, mmt);
181  // l2 regularization
182  if (l2 != 0.0) {
183  linearity_.AddMat(-lr*l2*num_frames, linearity_);
184  }
185  // l1 regularization
186  if (l1 != 0.0) {
187  cu::RegularizeL1(&linearity_, &linearity_corr_, lr*l1*num_frames, lr);
188  }
189  // update
191  }
192 
195 
196  void SetLinearity(const CuMatrixBase<BaseFloat>& linearity) {
197  KALDI_ASSERT(linearity.NumRows() == linearity_.NumRows());
198  KALDI_ASSERT(linearity.NumCols() == linearity_.NumCols());
199  linearity_.CopyFromMat(linearity);
200  }
201 
203 
204  private:
207 };
208 
209 } // namespace nnet1
210 } // namespace kaldi
211 
212 #endif // KALDI_NNET_NNET_LINEAR_TRANSFORM_H_
std::string ToString(const T &t)
Convert basic type to a string (please don&#39;t overuse),.
Definition: nnet-utils.h:52
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
NnetTrainOptions opts_
Option-class with training hyper-parameters,.
std::string MomentStatistics(const VectorBase< Real > &vec)
Get a string with statistics of the data in a vector, so we can print them easily.
Definition: nnet-utils.h:63
int32 input_dim_
Data members,.
ComponentType GetType() const
Get Type Identification of the component,.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
BaseFloat learn_rate_coef_
Scalar applied to learning rate for weight matrices (to be used in ::Update method),.
Component * Copy() const
Copy component (deep copy),.
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
LinearTransform(int32 dim_in, int32 dim_out)
int32 NumParams() const
Number of trainable parameters,.
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
ComponentType
Component type identification mechanism,.
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
CuMatrix< BaseFloat > linearity_corr_
std::istream & Stream()
Definition: kaldi-io.cc:826
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void SetLinearity(const MatrixBase< BaseFloat > &l)
int32 InputDim() const
Get the dimension of the input,.
const CuMatrixBase< BaseFloat > & GetLinearityCorr()
#define KALDI_ERR
Definition: kaldi-error.h:147
void RandGauss(BaseFloat mu, BaseFloat sigma, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Gaussian distribution): mu = the mean value, sigma = standard devi...
Definition: nnet-utils.h:164
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
int32 Close()
Definition: kaldi-io.cc:761
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void SetLinearity(const CuMatrixBase< BaseFloat > &linearity)
void SetParams(const VectorBase< BaseFloat > &params)
Set the trainable parameters from, reshaped as a vector,.
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170
int32 output_dim_
Dimension of the output of the Component,.
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
Abstract class, building block of the network.
void ReadData(std::istream &is, bool binary)
Reads the component content.
void InitData(std::istream &is)
Initialize the content of the component by the &#39;line&#39; from the prototype,.
int32 OutputDim() const
Get the dimension of the output,.
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
#define KALDI_LOG
Definition: kaldi-error.h:153
const CuMatrixBase< BaseFloat > & GetLinearity()
Accessors to the component parameters.
void RegularizeL1(CuMatrixBase< Real > *weight, CuMatrixBase< Real > *grad, Real l1, Real lr)
RegularizeL1 is a gradient step with l1 regularization added to the gradient.
Definition: cu-math.cc:37