nnet-recurrent.h
Go to the documentation of this file.
1 // nnet/nnet-lstm-projected-streams.h
2 
3 // Copyright 2016 Brno University of Technology (author: Karel Vesely)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 
22 #ifndef KALDI_NNET_NNET_RECURRENT_STREAMS_H_
23 #define KALDI_NNET_NNET_RECURRENT_STREAMS_H_
24 
25 #include <string>
26 #include <vector>
27 
28 #include "nnet/nnet-component.h"
29 #include "nnet/nnet-utils.h"
30 #include "cudamatrix/cu-math.h"
31 
32 
33 namespace kaldi {
34 namespace nnet1 {
35 
36 
44  public:
45  RecurrentComponent(int32 input_dim, int32 output_dim):
46  MultistreamComponent(input_dim, output_dim)
47  { }
48 
50  { }
51 
52  Component* Copy() const { return new RecurrentComponent(*this); }
54 
55  void InitData(std::istream &is) {
56  // define options,
57  float param_scale = 0.02;
58  // parse the line from prototype,
59  std::string token;
60  while (is >> std::ws, !is.eof()) {
61  ReadToken(is, false, &token);
62  if (token == "<GradClip>") ReadBasicType(is, false, &grad_clip_);
63  else if (token == "<DiffClip>") ReadBasicType(is, false, &diff_clip_);
64  else if (token == "<LearnRateCoef>") ReadBasicType(is, false, &learn_rate_coef_);
65  else if (token == "<BiasLearnRateCoef>") ReadBasicType(is, false, &bias_learn_rate_coef_);
66  else if (token == "<ParamScale>") ReadBasicType(is, false, &param_scale);
67  else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
68  << " (GradClip|DiffClip|LearnRateCoef|BiasLearnRateCoef|ParamScale)";
69  }
70 
71  // init the weights and biases (from uniform dist.),
74  bias_.Resize(output_dim_);
75 
76  RandUniform(0.0, 2.0 * param_scale, &w_forward_);
77  RandUniform(0.0, 2.0 * param_scale, &w_recurrent_);
78  RandUniform(0.0, 2.0 * param_scale, &bias_);
79  }
80 
81  void ReadData(std::istream &is, bool binary) {
82  // Read all the '<Tokens>' in arbitrary order,
83  while ('<' == Peek(is, binary)) {
84  std::string token;
85  int first_char = PeekToken(is, binary);
86  switch (first_char) {
87  case 'G': ExpectToken(is, binary, "<GradClip>");
88  ReadBasicType(is, binary, &grad_clip_);
89  break;
90  case 'D': ExpectToken(is, binary, "<DiffClip>");
91  ReadBasicType(is, binary, &diff_clip_);
92  break;
93  case 'L': ExpectToken(is, binary, "<LearnRateCoef>");
94  ReadBasicType(is, binary, &learn_rate_coef_);
95  break;
96  case 'B': ExpectToken(is, binary, "<BiasLearnRateCoef>");
98  break;
99  default: ReadToken(is, false, &token);
100  KALDI_ERR << "Unknown token: " << token;
101  }
102  }
103 
104  // Read the data (data follow the tokens),
105  w_forward_.Read(is, binary);
106  w_recurrent_.Read(is, binary);
107  bias_.Read(is, binary);
108  }
109 
110  void WriteData(std::ostream &os, bool binary) const {
111  WriteToken(os, binary, "<GradClip>");
112  WriteBasicType(os, binary, grad_clip_);
113  WriteToken(os, binary, "<DiffClip>");
114  WriteBasicType(os, binary, diff_clip_);
115 
116  WriteToken(os, binary, "<LearnRateCoef>");
117  WriteBasicType(os, binary, learn_rate_coef_);
118  WriteToken(os, binary, "<BiasLearnRateCoef>");
120 
121  if (!binary) os << "\n";
122  w_forward_.Write(os, binary);
123  w_recurrent_.Write(os, binary);
124  bias_.Write(os, binary);
125  }
126 
127  int32 NumParams() const {
128  return w_forward_.NumRows() * w_forward_.NumCols() +
129  w_recurrent_.NumRows() * w_recurrent_.NumCols() +
130  bias_.Dim();
131  }
132 
133  void GetGradient(VectorBase<BaseFloat>* gradient) const {
134  KALDI_ASSERT(gradient->Dim() == NumParams());
135  int32 offset, len;
136 
137  offset = 0; len = w_forward_corr_.NumRows() * w_forward_corr_.NumCols();
138  gradient->Range(offset, len).CopyRowsFromMat(w_forward_corr_);
139 
140  offset += len; len = w_recurrent_corr_.NumRows() * w_recurrent_corr_.NumCols();
141  gradient->Range(offset, len).CopyRowsFromMat(w_recurrent_corr_);
142 
143  offset += len; len = bias_corr_.Dim();
144  gradient->Range(offset, len).CopyFromVec(bias_corr_);
145 
146  offset += len;
147  KALDI_ASSERT(offset == NumParams());
148  }
149 
150  void GetParams(VectorBase<BaseFloat>* params) const {
151  KALDI_ASSERT(params->Dim() == NumParams());
152  int32 offset, len;
153 
154  offset = 0; len = w_forward_.NumRows() * w_forward_.NumCols();
155  params->Range(offset, len).CopyRowsFromMat(w_forward_);
156 
157  offset += len; len = w_recurrent_.NumRows() * w_recurrent_.NumCols();
158  params->Range(offset, len).CopyRowsFromMat(w_recurrent_);
159 
160  offset += len; len = bias_.Dim();
161  params->Range(offset, len).CopyFromVec(bias_);
162 
163  offset += len;
164  KALDI_ASSERT(offset == NumParams());
165  }
166 
167  void SetParams(const VectorBase<BaseFloat>& params) {
168  KALDI_ASSERT(params.Dim() == NumParams());
169  int32 offset, len;
170 
171  offset = 0; len = w_forward_.NumRows() * w_forward_.NumCols();
172  w_forward_.CopyRowsFromVec(params.Range(offset, len));
173 
174  offset += len; len = w_recurrent_.NumRows() * w_recurrent_.NumCols();
175  w_recurrent_.CopyRowsFromVec(params.Range(offset, len));
176 
177  offset += len; len = bias_.Dim();
178  bias_.CopyFromVec(params.Range(offset, len));
179 
180  offset += len;
181  KALDI_ASSERT(offset == NumParams());
182  }
183 
184  std::string Info() const {
185  return std::string(" ") +
186  "\n w_forward_ " + MomentStatistics(w_forward_) +
187  "\n w_recurrent_ " + MomentStatistics(w_recurrent_) +
188  "\n bias_ " + MomentStatistics(bias_);
189  }
190 
191  std::string InfoGradient() const {
192  return std::string("") +
193  "( learn_rate_coef " + ToString(learn_rate_coef_) +
194  ", bias_learn_rate_coef " + ToString(bias_learn_rate_coef_) +
195  ", grad-clip " + ToString(grad_clip_) +
196  ", diff-clip " + ToString(diff_clip_) + " )" +
197  "\n Gradients:" +
198  "\n w_forward_corr_ " + MomentStatistics(w_forward_corr_) +
199  "\n w_recurrent_corr_ " + MomentStatistics(w_recurrent_corr_) +
200  "\n bias_corr_ " + MomentStatistics(bias_corr_) +
201  "\n Forward-pass:" +
202  "\n out_ " + MomentStatistics(out_) +
203  "\n Backward-pass:" +
204  "\n out_diff_bptt_ " + MomentStatistics(out_diff_bptt_);
205  }
206 
209 
210 
211  KALDI_ASSERT(in.NumRows() % NumStreams() == 0);
212  int32 T = in.NumRows() / NumStreams();
213  int32 S = NumStreams();
214 
215  // Precopy bias,
216  out->AddVecToRows(1.0, bias_, 0.0);
217  // Apply 'forward' connections,
218  out->AddMatMat(1.0, in, kNoTrans, w_forward_, kTrans, 1.0);
219 
220  // First line of 'out' w/o recurrent signal, apply 'tanh' directly,
221  out->RowRange(0, S).Tanh(out->RowRange(0, S));
222 
223  // Apply 'recurrent' connections,
224  for (int32 t = 1; t < T; t++) {
225  out->RowRange(t*S, S).AddMatMat(1.0, out->RowRange((t-1)*S, S), kNoTrans, w_recurrent_, kTrans, 1.0);
226  out->RowRange(t*S, S).Tanh(out->RowRange(t*S, S));
227  // Zero output for padded frames,
228  if (sequence_lengths_.size() == S) {
229  for (int32 s = 0; s < S; s++) {
230  if (t >= sequence_lengths_[s]) {
231  out->Row(t*S + s).SetZero();
232  }
233  }
234  }
235  //
236  }
237 
238  out_ = (*out); // We'll need a copy for updating the recurrent weights!
239 
240  // We are DONE ;)
241  }
242 
244  const CuMatrixBase<BaseFloat> &out,
245  const CuMatrixBase<BaseFloat> &out_diff,
246  CuMatrixBase<BaseFloat> *in_diff) {
247 
248  int32 T = in.NumRows() / NumStreams();
249  int32 S = NumStreams();
250 
251  // Apply BPTT on 'out_diff',
252  out_diff_bptt_ = out_diff;
253  for (int32 t = T-1; t >= 1; t--) {
254  // buffers,
255  CuSubMatrix<BaseFloat> d_t = out_diff_bptt_.RowRange(t*S, S);
256  CuSubMatrix<BaseFloat> d_t1 = out_diff_bptt_.RowRange((t-1)*S, S);
257  const CuSubMatrix<BaseFloat> y_t = out.RowRange(t*S, S);
258 
259  // BPTT,
260  d_t.DiffTanh(y_t, d_t);
261  d_t1.AddMatMat(1.0, d_t, kNoTrans, w_recurrent_, kNoTrans, 1.0);
262 
263  // clipping,
264  if (diff_clip_ > 0.0) {
265  d_t1.ApplyFloor(-diff_clip_);
266  d_t1.ApplyCeiling(diff_clip_);
267  }
268 
269  // Zero diff for padded frames,
270  if (sequence_lengths_.size() == S) {
271  for (int32 s = 0; s < S; s++) {
272  if (t >= sequence_lengths_[s]) {
273  out_diff_bptt_.Row(t*S + s).SetZero();
274  }
275  }
276  }
277  }
278 
279  // Apply 'DiffTanh' on first block,
280  CuSubMatrix<BaseFloat> d_t = out_diff_bptt_.RowRange(0, S);
281  const CuSubMatrix<BaseFloat> y_t = out.RowRange(0, S);
282  d_t.DiffTanh(y_t, d_t);
283 
284  // Transform diffs to 'in_diff',
285  in_diff->AddMatMat(1.0, out_diff_bptt_, kNoTrans, w_forward_, kNoTrans, 0.0);
286 
287  // We are DONE ;)
288  }
289 
290  void Update(const CuMatrixBase<BaseFloat> &input,
291  const CuMatrixBase<BaseFloat> &diff) {
292  int32 T = input.NumRows() / NumStreams();
293  int32 S = NumStreams();
294 
295  // getting the learning rate,
296  const BaseFloat lr = opts_.learn_rate;
297  const BaseFloat mmt = opts_.momentum;
298 
299  if (bias_corr_.Dim() != OutputDim()) {
300  w_forward_corr_.Resize(w_forward_.NumRows(), w_forward_.NumCols(), kSetZero);
301  w_recurrent_corr_.Resize(w_recurrent_.NumRows(), w_recurrent_.NumCols(), kSetZero);
302  bias_corr_.Resize(OutputDim(), kSetZero);
303  }
304 
305  // getting the gradients,
306  w_forward_corr_.AddMatMat(1.0, out_diff_bptt_, kTrans, input, kNoTrans, mmt);
307 
308 
309  w_recurrent_corr_.AddMatMat(1.0, out_diff_bptt_.RowRange(S, (T-1)*S), kTrans,
310  out_.RowRange(0, (T-1)*S), kNoTrans, mmt);
311 
312  bias_corr_.AddRowSumMat(1.0, out_diff_bptt_, mmt);
313 
314  // updating,
316  w_recurrent_.AddMat(-lr * learn_rate_coef_, w_recurrent_corr_);
317  bias_.AddVec(-lr * bias_learn_rate_coef_, bias_corr_);
318  }
319 
320  private:
321 
324 
325  // trainable parameters,
329 
330  // udpate buffers,
334 
335  // forward propagation buffer,
337 
338  // back-propagate buffer,
340 
341 }; // class RecurrentComponent
342 
343 } // namespace nnet1
344 } // namespace kaldi
345 
346 #endif // KALDI_NNET_NNET_RECURRENT_STREAMS_H_
std::string ToString(const T &t)
Convert basic type to a string (please don&#39;t overuse),.
Definition: nnet-utils.h:52
ComponentType GetType() const
Get Type Identification of the component,.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void ApplyCeiling(Real ceiling_val)
Definition: cu-matrix.h:455
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670
NnetTrainOptions opts_
Option-class with training hyper-parameters,.
std::string MomentStatistics(const VectorBase< Real > &vec)
Get a string with statistics of the data in a vector, so we can print them easily.
Definition: nnet-utils.h:63
int32 input_dim_
Data members,.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
BaseFloat bias_learn_rate_coef_
Scalar applied to learning rate for bias (to be used in ::Update method),.
BaseFloat learn_rate_coef_
Scalar applied to learning rate for weight matrices (to be used in ::Update method),.
CuVector< BaseFloat > bias_corr_
void ApplyFloor(Real floor_val)
Definition: cu-matrix.h:451
void RandUniform(BaseFloat mu, BaseFloat range, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Uniform distribution): mu = the mean value, range = the &#39;width&#39; of...
Definition: nnet-utils.h:188
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
ComponentType
Component type identification mechanism,.
Component * Copy() const
Copy component (deep copy),.
CuMatrix< BaseFloat > w_recurrent_corr_
BaseFloat diff_clip_
Clipping in the BPTT loop,.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
CuMatrix< BaseFloat > w_recurrent_
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
#define KALDI_ERR
Definition: kaldi-error.h:147
void InitData(std::istream &is)
Initialize the content of the component by the &#39;line&#39; from the prototype,.
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
CuMatrix< BaseFloat > out_diff_bptt_
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170
CuSubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
Definition: cu-matrix.h:660
Class MultistreamComponent is an extension of UpdatableComponent for recurrent networks, which are trained with parallel sequences.
void DiffTanh(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the tanh function.
Definition: cu-matrix.cc:1809
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
CuMatrix< BaseFloat > w_forward_corr_
int32 output_dim_
Dimension of the output of the Component,.
int32 NumParams() const
Number of trainable parameters,.
Matrix for CUDA computing.
Definition: matrix-common.h:69
void SetParams(const VectorBase< BaseFloat > &params)
Set the trainable parameters from, reshaped as a vector,.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
Component with recurrent connections, &#39;tanh&#39; non-linearity.
BaseFloat grad_clip_
Clipping of the update,.
void ReadData(std::istream &is, bool binary)
Reads the component content.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
Abstract class, building block of the network.
std::vector< int32 > sequence_lengths_
int32 OutputDim() const
Get the dimension of the output,.
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
CuMatrix< BaseFloat > w_forward_
RecurrentComponent(int32 input_dim, int32 output_dim)
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94