nnet-parametric-relu.h
Go to the documentation of this file.
1 // nnet/nnet-parametric-relu.h
2 
3 // Copyright 2016 Brno University of Technology (author: Murali Karthick B)
4 // 2011-2014 Brno University of Technology (author: Karel Vesely)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #ifndef KALDI_NNET_NNET_PARAMETRIC_RELU_H_
23 #define KALDI_NNET_NNET_PARAMETRIC_RELU_H_
24 
25 #include <string>
26 
27 #include "nnet/nnet-component.h"
28 #include "nnet/nnet-utils.h"
29 #include "cudamatrix/cu-math.h"
30 
31 namespace kaldi {
32 namespace nnet1 {
33 
35  public:
36  ParametricRelu(int32 dim_in, int32 dim_out):
37  UpdatableComponent(dim_in, dim_out),
38  alpha_(dim_out),
39  beta_(dim_out),
40  alpha_corr_(dim_out),
41  beta_corr_(dim_out),
44  { }
45 
47  { }
48 
49  Component* Copy() const { return new ParametricRelu(*this); }
51 
52  void InitData(std::istream &is) {
53  // define options
54  BaseFloat alpha = 1.0, beta = 0.0;
55 
56  // parse config
57  std::string token;
58  while (is >> std::ws, !is.eof()) {
59  ReadToken(is, false, &token);
60  if (token == "<Alpha>") ReadBasicType(is, false, &alpha);
61  else if (token == "<Beta>") ReadBasicType(is, false, &beta);
62  else if (token == "<AlphaLearnRateCoef>") ReadBasicType(is, false, &alpha_learn_rate_coef_);
63  else if (token == "<BetaLearnRateCoef>") ReadBasicType(is, false, &beta_learn_rate_coef_);
64  else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
65  << " (Alpha|Beta|AlphaLearnRateCoef|BetaLearnRateCoef)";
66  }
67 
68  // Initialize trainable parameters,
69  alpha_.Set(alpha);
70  beta_.Set(beta);
71  }
72 
73  void ReadData(std::istream &is, bool binary) {
74  // Read all the '<Tokens>' in arbitrary order,
75  while ('<' == Peek(is, binary)) {
76  int first_char = PeekToken(is, binary);
77  switch (first_char) {
78  case 'A': ExpectToken(is, binary, "<AlphaLearnRateCoef>");
80  break;
81  case 'B': ExpectToken(is, binary, "<BetaLearnRateCoef>");
83  break;
84  default:
85  std::string token;
86  ReadToken(is, false, &token);
87  KALDI_ERR << "Unknown token: " << token;
88  }
89  }
90  // ParametricRelu scaling parameters
91  alpha_.Read(is, binary);
92  beta_.Read(is, binary);
94  KALDI_ASSERT(beta_.Dim() == output_dim_);
95  }
96 
97  void WriteData(std::ostream &os, bool binary) const {
98  WriteToken(os, binary, "<AlphaLearnRateCoef>");
100  WriteToken(os, binary, "<BetaLearnRateCoef>");
102 
103  // ParametricRelu scales for each neuron,
104  if (!binary) os << "\n";
105  alpha_.Write(os, binary);
106  beta_.Write(os, binary);
107  }
108 
109  int32 NumParams() const {
110  return alpha_.Dim() + beta_.Dim();
111  }
112 
113  void GetGradient(VectorBase<BaseFloat>* gradient) const {
114  KALDI_ASSERT(gradient->Dim() == NumParams());
115  int32 alpha_num_elem = alpha_.Dim();
116  int32 beta_num_elem = beta_.Dim();
117  gradient->Range(0, alpha_num_elem).CopyFromVec(Vector<BaseFloat>(alpha_corr_));
118  gradient->Range(alpha_num_elem, beta_num_elem).CopyFromVec(Vector<BaseFloat>(beta_corr_));
119  }
120 
121  void GetParams(VectorBase<BaseFloat>* params) const {
122  KALDI_ASSERT(params->Dim() == NumParams());
123  int32 alpha_num_elem = alpha_.Dim();
124  int32 beta_num_elem = beta_.Dim();
125  params->Range(0, alpha_num_elem).CopyFromVec(Vector<BaseFloat>(alpha_));
126  params->Range(alpha_num_elem, beta_num_elem).CopyFromVec(Vector<BaseFloat>(beta_));
127  }
128 
129  void SetParams(const VectorBase<BaseFloat>& params) {
130  KALDI_ASSERT(params.Dim() == NumParams());
131  int32 alpha_num_elem = alpha_.Dim();
132  int32 beta_num_elem = beta_.Dim();
133  alpha_.CopyFromVec(params.Range(0, alpha_num_elem));
134  beta_.CopyFromVec(params.Range(alpha_num_elem, beta_num_elem));
135  }
136 
137  std::string Info() const {
138  return std::string("\n alpha") +
140  ", alpha-lr-coef " + ToString(alpha_learn_rate_coef_) +
141  "\n beta" + MomentStatistics(beta_) +
142  ", beta-lr-coef " + ToString(beta_learn_rate_coef_);
143  }
144  std::string InfoGradient() const {
145  return std::string("\n alpha_grad") +
147  ", alpha-lr-coef " + ToString(alpha_learn_rate_coef_) +
148  "\n beta_grad" + MomentStatistics(beta_corr_) +
149  ", beta-lr-coef " + ToString(beta_learn_rate_coef_);
150  }
151 
154  // out = (in < 0.0 ? aplha*in : beta*in)
155  out->ParametricRelu(in, alpha_, beta_);
156  }
157 
159  const CuMatrixBase<BaseFloat> &out,
160  const CuMatrixBase<BaseFloat> &out_diff,
161  CuMatrixBase<BaseFloat> *in_diff) {
162  // in_diff = (in > 0 ? alpha * out_diff : beta * out_diff)
163  in_diff->DiffParametricRelu(in, out_diff, alpha_, beta_);
164  }
165 
166  void Update(const CuMatrixBase<BaseFloat> &input,
167  const CuMatrixBase<BaseFloat> &diff) {
168  // we use these hyperparameters,
171  const BaseFloat mmt = opts_.momentum;
172 
173  if (alpha_learn_rate_coef_ > 0.0) {
174  // get gradient,
175  alpha_aux_ = input;
176  alpha_aux_.ApplyFloor(0.0); // masking positive Relu inputs,
177  alpha_aux_.MulElements(diff);
178  alpha_corr_.AddRowSumMat(1.0, alpha_aux_, mmt);
179  // update,
180  alpha_.AddVec(-alpha_lr, alpha_corr_);
181  }
182  if (beta_learn_rate_coef_ > 0.0) {
183  // get gradient,
184  beta_aux_ = input;
185  beta_aux_.ApplyCeiling(0.0); // masking positive Relu inputs,
186  beta_aux_.MulElements(diff);
187  beta_corr_.AddRowSumMat(1.0, beta_aux_, mmt);
188  beta_.AddVec(-beta_lr, beta_corr_);
189  }
190  }
191 
192  private:
195 
198 
203 
208 };
209 
210 } // namespace nnet1
211 } // namespace kaldi
212 
213 #endif // KALDI_NNET_NNET_PARAMETRIC_RELU_H_
std::string ToString(const T &t)
Convert basic type to a string (please don&#39;t overuse),.
Definition: nnet-utils.h:52
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void InitData(std::istream &is)
Initialize the content of the component by the &#39;line&#39; from the prototype,.
void ReadData(std::istream &is, bool binary)
Reads the component content.
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
BaseFloat beta_learn_rate_coef_
Controls learning rate for beta (0.0 disables learning),.
int32 NumParams() const
Number of trainable parameters,.
NnetTrainOptions opts_
Option-class with training hyper-parameters,.
std::string MomentStatistics(const VectorBase< Real > &vec)
Get a string with statistics of the data in a vector, so we can print them easily.
Definition: nnet-utils.h:63
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
CuMatrix< BaseFloat > alpha_aux_
Auxiliary matrix for getting &#39;alpha&#39; updates,.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
Component * Copy() const
Copy component (deep copy),.
CuVector< BaseFloat > alpha_
Vector of &#39;alphas&#39;, one value per neuron.
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
ComponentType
Component type identification mechanism,.
void SetParams(const VectorBase< BaseFloat > &params)
Set the trainable parameters from, reshaped as a vector,.
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
#define KALDI_ERR
Definition: kaldi-error.h:147
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170
CuVector< BaseFloat > beta_
Vector of &#39;betas&#39;, one value per neuron.
ParametricRelu(int32 dim_in, int32 dim_out)
ComponentType GetType() const
Get Type Identification of the component,.
int32 output_dim_
Dimension of the output of the Component,.
Matrix for CUDA computing.
Definition: matrix-common.h:69
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
CuVector< BaseFloat > beta_corr_
Vector of &#39;beta&#39; updates.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
Abstract class, building block of the network.
CuMatrix< BaseFloat > beta_aux_
Auxiliary matrix for getting &#39;beta&#39; updates,.
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void ParametricRelu(const CuMatrixBase< Real > &src, const CuVectorBase< Real > &alpha, const CuVectorBase< Real > &beta)
Compute the parametric rectified linear unit function; element by element, *this = src * (src > 0 ...
Definition: cu-matrix.cc:1467
void DiffParametricRelu(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff, const CuVectorBase< Real > &alpha, const CuVectorBase< Real > &beta)
Differentiate backward through the parametric relu function.
Definition: cu-matrix.cc:1501
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
CuVector< BaseFloat > alpha_corr_
Vector of &#39;alpha&#39; updates.
BaseFloat alpha_learn_rate_coef_
Controls learning rate for alpha (0.0 disables learning),.
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94