nnet-activation.h
Go to the documentation of this file.
1 // nnet/nnet-activation.h
2 
3 // Copyright 2011-2016 Brno University of Technology (author: Karel Vesely)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_NNET_NNET_ACTIVATION_H_
22 #define KALDI_NNET_NNET_ACTIVATION_H_
23 
24 #include <string>
25 #include <vector>
26 #include <cmath>
27 
28 #include "nnet/nnet-component.h"
29 #include "nnet/nnet-utils.h"
30 #include "cudamatrix/cu-math.h"
31 #include "cudamatrix/cu-rand.h"
32 #include "util/text-utils.h"
33 
34 namespace kaldi {
35 namespace nnet1 {
36 
37 class Softmax : public Component {
38  public:
39  Softmax(int32 dim_in, int32 dim_out):
40  Component(dim_in, dim_out)
41  { }
42 
44  { }
45 
46  Component* Copy() const { return new Softmax(*this); }
47  ComponentType GetType() const { return kSoftmax; }
48 
51  // y = e^x_j/sum_j(e^x_j)
52  out->SoftMaxPerRow(in);
53  }
54 
56  const CuMatrixBase<BaseFloat> &out,
57  const CuMatrixBase<BaseFloat> &out_diff,
58  CuMatrixBase<BaseFloat> *in_diff) {
59  // simply copy the error derivative
60  // (ie. assume crossentropy error function,
61  // while in_diff contains (net_output-target) :
62  // this is already derivative of the error with
63  // respect to activations of last layer neurons)
64  in_diff->CopyFromMat(out_diff);
65  }
66 };
67 
68 
69 class HiddenSoftmax : public Component {
70  public:
71  HiddenSoftmax(int32 dim_in, int32 dim_out) :
72  Component(dim_in, dim_out)
73  { }
74 
76  { }
77 
78  Component* Copy() const { return new HiddenSoftmax(*this); }
79  ComponentType GetType() const { return kHiddenSoftmax; }
80 
83  // y = e^x_j/sum_j(e^x_j)
84  out->SoftMaxPerRow(in);
85  }
86 
88  const CuMatrixBase<BaseFloat> &out,
89  const CuMatrixBase<BaseFloat> &out_diff,
90  CuMatrixBase<BaseFloat> *in_diff) {
91  // This Softmax should be used for a hidden layer, it calculates
92  // the true Jacobian of Softmax: J = diag(out) - out*out^T
93 
94  // The backpropagation formual is:
95  // in_diff = out_diff \odot out - out(out_diff^T * out)
96  // (where \odot is Hadamard product)
97 
98  // 1st term, out_diff \odot out,
99  in_diff->CopyFromMat(out_diff);
100  in_diff->MulElements(out);
101 
102  // 2nd term, -out(out_diff^T * out),
103  diag_out_diff_out_.Resize(out.NumRows());
104  diag_out_diff_out_.AddDiagMatMat(1.0, out_diff, kNoTrans, out, kTrans, 0.0);
105  in_diff->AddDiagVecMat(-1.0, diag_out_diff_out_, out, kNoTrans, 1.0);
106  }
107 
108  private:
111 };
112 
113 class BlockSoftmax : public Component {
114  public:
115  BlockSoftmax(int32 dim_in, int32 dim_out):
116  Component(dim_in, dim_out)
117  { }
118 
120  { }
121 
122  Component* Copy() const { return new BlockSoftmax(*this); }
123  ComponentType GetType() const { return kBlockSoftmax; }
124 
125  void InitData(std::istream &is) {
126  // parse config
127  std::string token,
128  dims_str;
129  while (is >> std::ws, !is.eof()) {
130  ReadToken(is, false, &token);
131  if (token == "<BlockDims>") is >> dims_str;
132  else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
133  << " (BlockDims)";
134  }
135  // parse dims,
136  if (!kaldi::SplitStringToIntegers(dims_str, ",:", false, &block_dims))
137  KALDI_ERR << "Invalid block-dims " << dims_str;
138  // sanity check
139  int32 sum = 0;
140  for (int32 i = 0; i < block_dims.size(); i++) {
141  sum += block_dims[i];
142  }
143  KALDI_ASSERT(sum == OutputDim());
144  }
145 
146  void ReadData(std::istream &is, bool binary) {
147  ReadIntegerVector(is, binary, &block_dims);
148  block_offset.resize(block_dims.size()+1, 0);
149  for (int32 i = 0; i < block_dims.size(); i++) {
150  block_offset[i+1] = block_offset[i] + block_dims[i];
151  }
152  // check
153  KALDI_ASSERT(OutputDim() == block_offset[block_offset.size()-1]);
154  }
155 
156  void WriteData(std::ostream &os, bool binary) const {
157  WriteIntegerVector(os, binary, block_dims);
158  }
159 
162  // perform softmax per block:
163  for (int32 bl = 0; bl < block_dims.size(); bl++) {
164  // get the blocks,
165  CuSubMatrix<BaseFloat> in_bl =
166  in.ColRange(block_offset[bl], block_dims[bl]);
167  CuSubMatrix<BaseFloat> out_bl =
168  out->ColRange(block_offset[bl], block_dims[bl]);
169  // y = e^x_j/sum_j(e^x_j),
170  out_bl.SoftMaxPerRow(in_bl);
171  }
172  }
173 
175  const CuMatrixBase<BaseFloat> &out,
176  const CuMatrixBase<BaseFloat> &out_diff,
177  CuMatrixBase<BaseFloat> *in_diff) {
178  // copy the error derivative:
179  // (assuming we already got softmax-cross-entropy derivative in out_diff)
180  in_diff->CopyFromMat(out_diff);
181 
182  // Set the derivatives to zero for the matrix-lines in which
183  // the sum of 'derivatives' was 1.0 (i.e. there was no target):
184  for (int32 bl = 0; bl < block_dims.size(); bl++) {
185  // get the block,
186  CuSubMatrix<BaseFloat> diff_bl =
187  in_diff->ColRange(block_offset[bl], block_dims[bl]);
188  // get the sum of each row,
189  CuVector<BaseFloat> row_sum(diff_bl.NumRows());
190  row_sum.AddColSumMat(1.0, diff_bl, 0.0); // 0: keep as-is, 1: zero-out
191  // we'll scale rows by 0/1 masks,
192  CuVector<BaseFloat> row_diff_mask(row_sum);
193  row_diff_mask.Scale(-1.0); // 0: keep as-is, -1: zero-out
194  row_diff_mask.Add(1.0); // 1: keep as-is, 0: zero-out
195  // here we should have only 0's and 1's,
196  diff_bl.MulRowsVec(row_diff_mask);
197  }
198  }
199 
200  std::string Info() const {
201  return "\n softmax-dims " + ToString(block_dims);
202  }
203 
204  std::vector<int32> block_dims;
205  std::vector<int32> block_offset;
206 };
207 
208 
209 
210 
211 class Sigmoid : public Component {
212  public:
213  Sigmoid(int32 dim_in, int32 dim_out):
214  Component(dim_in, dim_out)
215  { }
216 
218  { }
219 
220  Component* Copy() const { return new Sigmoid(*this); }
221  ComponentType GetType() const { return kSigmoid; }
222 
225  // y = 1/(1+e^-x)
226  out->Sigmoid(in);
227  }
228 
230  const CuMatrixBase<BaseFloat> &out,
231  const CuMatrixBase<BaseFloat> &out_diff,
232  CuMatrixBase<BaseFloat> *in_diff) {
233  // ey = y(1-y)ex,
234  in_diff->DiffSigmoid(out, out_diff);
235  }
236 };
237 
238 
239 
240 class Tanh : public Component {
241  public:
242  Tanh(int32 dim_in, int32 dim_out):
243  Component(dim_in, dim_out)
244  { }
245 
247  { }
248 
249  Component* Copy() const { return new Tanh(*this); }
250  ComponentType GetType() const { return kTanh; }
251 
254  // y = (e^x - e^(-x)) / (e^x + e^(-x)),
255  out->Tanh(in);
256  }
257 
259  const CuMatrixBase<BaseFloat> &out,
260  const CuMatrixBase<BaseFloat> &out_diff,
261  CuMatrixBase<BaseFloat> *in_diff) {
262  // ey = (1 - y^2)ex
263  in_diff->DiffTanh(out, out_diff);
264  }
265 };
266 
267 
268 
269 class Dropout : public Component {
270  public:
271  Dropout(int32 dim_in, int32 dim_out):
272  Component(dim_in, dim_out),
273  dropout_rate_(0.5)
274  { }
275 
277  { }
278 
279  Component* Copy() const { return new Dropout(*this); }
280  ComponentType GetType() const { return kDropout; }
281 
282  void InitData(std::istream &is) {
283  is >> std::ws; // eat-up whitespace
284  // parse config
285  std::string token;
286  while (is >> std::ws, !is.eof()) {
287  ReadToken(is, false, &token);
288  if (token == "<DropoutRate>") ReadBasicType(is, false, &dropout_rate_);
289  else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
290  << " (DropoutRate)";
291  }
292  KALDI_ASSERT(dropout_rate_ >= 0.0 && dropout_rate_ < 1.0);
293  }
294 
295  void ReadData(std::istream &is, bool binary) {
296  // Read all the '<Tokens>' in arbitrary order,
297  bool finished = false;
298  while ('<' == Peek(is, binary) && !finished) {
299  std::string token;
300  int first_char = PeekToken(is, binary);
301  switch (first_char) {
302  case 'D': ReadToken(is, false, &token);
303  if (token == "<DropoutRate>") ReadBasicType(is, binary, &dropout_rate_);
304  else if (token == "<DropoutRetention>") { /* compatibility */
305  BaseFloat dropout_retention;
306  ReadBasicType(is, binary, &dropout_retention);
307  dropout_rate_ = 1.0 - dropout_retention;
308  } else KALDI_ERR << "Unknown token: " << token;
309  break;
310  case '!': ExpectToken(is, binary, "<!EndOfComponent>");
311  finished = true;
312  break;
313  default: ReadToken(is, false, &token);
314  KALDI_ERR << "Unknown token: " << token;
315  }
316  }
317  KALDI_ASSERT(dropout_rate_ >= 0.0 && dropout_rate_ < 1.0);
318  }
319 
320  void WriteData(std::ostream &os, bool binary) const {
321  WriteToken(os, binary, "<DropoutRate>");
322  WriteBasicType(os, binary, dropout_rate_);
323  }
324 
325  std::string Info() const {
326  return std::string("<DropoutRate> ") + ToString(dropout_rate_);
327  }
328 
331  out->CopyFromMat(in);
332  // set N inputs to zero, according to the 'dropout_rate_' ...
333  dropout_mask_.Resize(out->NumRows(), out->NumCols());
334  rand_.RandUniform(&dropout_mask_); // [0..1]
335  dropout_mask_.Add(-dropout_rate_); // [(-rate)..(1-rate)]
336  dropout_mask_.Heaviside(dropout_mask_); // (x > 0.0 ? 1 : 0)
337  out->MulElements(dropout_mask_);
338  // rescale to keep the same dynamic range as w/o dropout,
339  out->Scale(1.0 / (1.0 - dropout_rate_));
340  }
341 
343  const CuMatrixBase<BaseFloat> &out,
344  const CuMatrixBase<BaseFloat> &out_diff,
345  CuMatrixBase<BaseFloat> *in_diff) {
346  in_diff->CopyFromMat(out_diff);
347  // use same mask on the error derivatives...
348  in_diff->MulElements(dropout_mask_);
349  // enlarge the output to fit same dynamic range as w/o dropout
350  in_diff->Scale(1.0 / (1.0 - dropout_rate_));
351  }
352 
353  BaseFloat GetDropoutRate() { return dropout_rate_; }
354 
356  dropout_rate_ = dr;
357  KALDI_ASSERT(dropout_rate_ >= 0.0 && dropout_rate_ < 1.0);
358  }
359 
360  private:
362 
364 
365  CuMatrix<BaseFloat> dropout_mask_; // random binary mask,
366  // 1 = keep neuron, 0 = drop neuron,
367 };
368 
369 } // namespace nnet1
370 } // namespace kaldi
371 
372 #endif // KALDI_NNET_NNET_ACTIVATION_H_
373 
std::string ToString(const T &t)
Convert basic type to a string (please don&#39;t overuse),.
Definition: nnet-utils.h:52
ComponentType GetType() const
Get Type Identification of the component,.
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
Component * Copy() const
Copy component (deep copy),.
Component * Copy() const
Copy component (deep copy),.
void InitData(std::istream &is)
Virtual interface for initialization and I/O,.
Component * Copy() const
Copy component (deep copy),.
Tanh(int32 dim_in, int32 dim_out)
ComponentType GetType() const
Get Type Identification of the component,.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void Add(Real value)
Definition: cu-vector.cc:1157
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
void InitData(std::istream &is)
Virtual interface for initialization and I/O,.
CuRand< BaseFloat > rand_
generator of random numbers,
Sigmoid(int32 dim_in, int32 dim_out)
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
CuMatrix< BaseFloat > dropout_mask_
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
std::vector< int32 > block_offset
Component * Copy() const
Copy component (deep copy),.
BlockSoftmax(int32 dim_in, int32 dim_out)
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
Component * Copy() const
Copy component (deep copy),.
ComponentType
Component type identification mechanism,.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
void SetDropoutRate(BaseFloat dr)
void Scale(Real value)
Definition: cu-matrix.cc:644
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
Softmax(int32 dim_in, int32 dim_out)
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
Definition: cu-vector.cc:1298
void Sigmoid(const CuMatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src": element by element...
Definition: cu-matrix.cc:1534
BaseFloat dropout_rate_
probability that a neuron is dropped,
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void SoftMaxPerRow(const CuMatrixBase< Real > &src)
Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow.
Definition: cu-matrix.cc:1717
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667
ComponentType GetType() const
Get Type Identification of the component,.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
ComponentType GetType() const
Get Type Identification of the component,.
#define KALDI_ERR
Definition: kaldi-error.h:147
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170
ComponentType GetType() const
Get Type Identification of the component,.
Component * Copy() const
Copy component (deep copy),.
ComponentType GetType() const
Get Type Identification of the component,.
void DiffTanh(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the tanh function.
Definition: cu-matrix.cc:1809
std::vector< int32 > block_dims
void DiffSigmoid(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the sigmoid function.
Definition: cu-matrix.cc:1764
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:665
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
Dropout(int32 dim_in, int32 dim_out)
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
CuVector< BaseFloat > diag_out_diff_out_
buffer for dot-products in BackpropagateFnc,
void Scale(Real value)
Definition: cu-vector.cc:1216
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
void ReadData(std::istream &is, bool binary)
Reads the component content.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
Abstract class, building block of the network.
HiddenSoftmax(int32 dim_in, int32 dim_out)
int32 OutputDim() const
Get the dimension of the output,.
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
void ReadData(std::istream &is, bool binary)
Reads the component content.
void Tanh(const CuMatrixBase< Real > &src)
Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src).
Definition: cu-matrix.cc:1786
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i&#39;th row by scale[i]
Definition: cu-matrix.cc:792
void AddDiagVecMat(const Real alpha, const CuVectorBase< Real > &v, const CuMatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].
Definition: cu-matrix.cc:1382