nnet-sentence-averaging-component.h
Go to the documentation of this file.
1 // nnet/nnet-sentence-averaging-component.h
2 
3 // Copyright 2013-2016 Brno University of Technology (Author: Karel Vesely)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #ifndef KALDI_NNET_NNET_SENTENCE_AVERAGING_COMPONENT_H_
22 #define KALDI_NNET_NNET_SENTENCE_AVERAGING_COMPONENT_H_
23 
24 #include <string>
25 
26 #include "nnet/nnet-component.h"
27 #include "nnet/nnet-utils.h"
28 #include "cudamatrix/cu-math.h"
29 
30 namespace kaldi {
31 namespace nnet1 {
32 
33 
43  public:
45  Component(dim_in, dim_out),
46  gradient_boost_(100.0),
47  shrinkage_(0.0),
48  only_summing_(false)
49  { }
50 
52  { }
53 
54  Component* Copy() const {
55  return new SimpleSentenceAveragingComponent(*this);
56  }
57 
60  }
61 
62  void InitData(std::istream &is) {
63  // parse config
64  std::string token;
65  while (is >> std::ws, !is.eof()) {
66  ReadToken(is, false, &token);
67  if (token == "<GradientBoost>") ReadBasicType(is, false, &gradient_boost_);
68  else if (token == "<Shrinkage>") ReadBasicType(is, false, &shrinkage_);
69  else if (token == "<OnlySumming>") ReadBasicType(is, false, &only_summing_);
70  else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
71  << " (GradientBoost|Shrinkage|OnlySumming)";
72  }
73  }
74 
75  void ReadData(std::istream &is, bool binary) {
76  bool end_loop = false;
77  while (!end_loop && '<' == Peek(is, binary)) {
78  int first_char = PeekToken(is, binary);
79  switch (first_char) {
80  case 'G': ExpectToken(is, binary, "<GradientBoost>");
81  ReadBasicType(is, binary, &gradient_boost_);
82  break;
83  case 'S': ExpectToken(is, binary, "<Shrinkage>");
84  ReadBasicType(is, binary, &shrinkage_);
85  break;
86  case 'O': ExpectToken(is, binary, "<OnlySumming>");
87  // compatibility trick,
88  // in some models 'only_summing_' was float '0.0',
89  // from now 'only_summing_' is 'bool':
90  try {
91  ReadBasicType(is, binary, &only_summing_);
92  } catch(const std::exception &e) {
93  KALDI_WARN << "ERROR was handled by exception!";
94  BaseFloat dummy_float;
95  ReadBasicType(is, binary, &dummy_float);
96  }
97  break;
98  case '!':
99  ExpectToken(is, binary, "<!EndOfComponent>");
100  default:
101  end_loop = true;
102  }
103  }
104  }
105 
106  void WriteData(std::ostream &os, bool binary) const {
107  WriteToken(os, binary, "<GradientBoost>");
108  WriteBasicType(os, binary, gradient_boost_);
109  WriteToken(os, binary, "<Shrinkage>");
110  WriteBasicType(os, binary, shrinkage_);
111  WriteToken(os, binary, "<OnlySumming>");
112  WriteBasicType(os, binary, only_summing_);
113  }
114 
115  std::string Info() const {
116  return std::string("\n gradient-boost ") + ToString(gradient_boost_) +
117  ", shrinkage: " + ToString(shrinkage_) +
118  ", only summing: " + ToString(only_summing_);
119  }
120  std::string InfoGradient() const {
121  return Info();
122  }
123 
126  // get the average row-vector,
127  average_row_.Resize(InputDim());
128  if (only_summing_) {
129  average_row_.AddRowSumMat(1.0, in, 0.0);
130  } else {
131  average_row_.AddRowSumMat(1.0/(in.NumRows()+shrinkage_), in, 0.0);
132  }
133  // copy it on the output,
134  out->AddVecToRows(1.0, average_row_, 0.0);
135  }
136 
138  const CuMatrixBase<BaseFloat> &out,
139  const CuMatrixBase<BaseFloat> &out_diff,
140  CuMatrixBase<BaseFloat> *in_diff) {
141  // When averaging, a single frame from input influenced all the frames
142  // on the output. So the derivative w.r.t. single input frame is a sum
143  // of the output derivatives, scaled by the averaging constant 1/K.
144  //
145  // In the same time all the input frames of the average influenced
146  // all the output frames. So the loss derivarive is same for all
147  // the input frames coming to the averaging.
148  //
149  // getting the average output diff,
150  average_diff_.Resize(OutputDim());
151  if (only_summing_) {
152  average_diff_.AddRowSumMat(1.0, out_diff, 0.0);
153  } else {
154  average_diff_.AddRowSumMat(1.0/(out_diff.NumRows()+shrinkage_), out_diff, 0.0);
155  }
156  // copy the derivative into the input diff, (applying gradient-boost!!)
158  }
159 
160  private:
163 
166 
169 
173 
176 };
177 
178 
181  public:
183  UpdatableComponent(dim_in, dim_out), learn_rate_factor_(100.0)
184  { }
186  { }
187 
188  Component* Copy() const { return new SentenceAveragingComponent(*this); }
190 
191  void InitData(std::istream &is) {
192  // define options
193  std::string nested_nnet_filename;
194  std::string nested_nnet_proto;
195  // parse config
196  std::string token;
197  while (is >> std::ws, !is.eof()) {
198  ReadToken(is, false, &token);
199  if (token == "<NestedNnetFilename>") ReadToken(is, false, &nested_nnet_filename);
200  else if (token == "<NestedNnetProto>") ReadToken(is, false, &nested_nnet_proto);
201  else if (token == "<LearnRateFactor>") ReadBasicType(is, false, &learn_rate_factor_);
202  else KALDI_ERR << "Unknown token " << token << " Typo in config?";
203  }
204  // initialize (read already prepared nnet from file)
205  KALDI_ASSERT((nested_nnet_proto != "") ^ (nested_nnet_filename != "")); // xor,
206  if (nested_nnet_filename != "") nnet_.Read(nested_nnet_filename);
207  if (nested_nnet_proto != "") nnet_.Init(nested_nnet_proto);
208  // check dims of nested nnet
209  KALDI_ASSERT(InputDim() == nnet_.InputDim());
210  KALDI_ASSERT(OutputDim() == nnet_.OutputDim() + InputDim());
211  }
212 
213  void ReadData(std::istream &is, bool binary) {
214  nnet_.Read(is, binary);
215  KALDI_ASSERT(nnet_.InputDim() == InputDim());
216  KALDI_ASSERT(nnet_.OutputDim() + InputDim() == OutputDim());
217  }
218 
219  void WriteData(std::ostream &os, bool binary) const {
220  nnet_.Write(os, binary);
221  }
222 
223  int32 NumParams() const { return nnet_.NumParams(); }
224 
225  void GetGradient(VectorBase<BaseFloat>* gradient) const {
226  KALDI_ERR << "Unimplemented!";
227  }
228 
229  void GetParams(VectorBase<BaseFloat>* params) const {
230  KALDI_ASSERT(params->Dim() == NumParams());
231  Vector<BaseFloat> params_aux;
232  nnet_.GetParams(&params_aux);
233  params->CopyFromVec(params_aux);
234  }
235 
236  void SetParams(const VectorBase<BaseFloat>& params) {
237  KALDI_ERR << "Unimplemented!";
238  }
239 
240  std::string Info() const {
241  return std::string("nested_network {\n") + nnet_.Info() + "}\n";
242  }
243 
244  std::string InfoGradient() const {
245  return std::string("nested_gradient {\n") + nnet_.InfoGradient() + "}\n";
246  }
247 
250  // Get NN output
251  CuMatrix<BaseFloat> out_nnet;
252  nnet_.Propagate(in, &out_nnet);
253  // Get the average row (averaging over the time axis):
254  // averaging corresponds to extraction of a 'constant vector'
255  // code for single sentence,
256  int32 num_inputs = in.NumCols(),
257  nnet_outputs = nnet_.OutputDim(),
258  num_frames = out_nnet.NumRows();
259 
260  CuVector<BaseFloat> average_row(nnet_outputs);
261  average_row.AddRowSumMat(1.0/num_frames, out_nnet, 0.0);
262  // Forwarding sentence codes along with input features
263  out->ColRange(0, nnet_outputs).AddVecToRows(1.0, average_row, 0.0);
264  out->ColRange(nnet_outputs, num_inputs).CopyFromMat(in);
265  }
266 
268  const CuMatrixBase<BaseFloat> &out,
269  const CuMatrixBase<BaseFloat> &out_diff,
270  CuMatrixBase<BaseFloat> *in_diff) {
271  if (in_diff == NULL) return;
272  int32 num_inputs = in.NumCols(),
273  nnet_outputs = nnet_.OutputDim();
274  in_diff->CopyFromMat(out_diff.ColRange(nnet_outputs, num_inputs));
275  }
276 
277  void Update(const CuMatrixBase<BaseFloat> &input,
278  const CuMatrixBase<BaseFloat> &diff) {
279  // get useful dims,
280  int32 nnet_outputs = nnet_.OutputDim(),
281  num_frames = diff.NumRows();
282  // Passing the derivative into the nested network. The loss derivative is averaged:
283  // single frame from nested network influenced all the frames in the main network,
284  // so to get the derivative w.r.t. single frame from nested network we sum derivatives
285  // of all frames from main network (and scale by 1/Nframes constant).
286  //
287  // In fact all the frames from nested network influenced all the input frames to main nnet,
288  // so the loss derivarive w.r.t. nested network output is same for all frames in sentence.
289  CuVector<BaseFloat> average_diff(nnet_outputs);
290  average_diff.AddRowSumMat(1.0 / num_frames, diff.ColRange(0, nnet_outputs), 0.0);
291  CuMatrix<BaseFloat> nnet_out_diff(num_frames, nnet_outputs);
292  nnet_out_diff.AddVecToRows(1.0, average_diff, 0.0);
293  //
294  nnet_.Backpropagate(nnet_out_diff, NULL);
295  }
296 
297  void SetTrainOptions(const NnetTrainOptions &opts) {
299  // Pass the train options to the nnet
300  NnetTrainOptions o(opts);
301  o.learn_rate *= learn_rate_factor_;
302  nnet_.SetTrainOptions(opts_);
303  }
304 
305  private:
308 };
309 /* Deprecated */
310 
311 } // namespace nnet1
312 } // namespace kaldi
313 
314 #endif // KALDI_NNET_NNET_SENTENCE_AVERAGING_COMPONENT_H_
Deprecated!!!, keeping it as Katka Zmolikova used it in JSALT 2015.
CuVector< BaseFloat > average_diff_
Auxiliary buffer for back-propagation (for average vector),.
std::string ToString(const T &t)
Convert basic type to a string (please don&#39;t overuse),.
Definition: nnet-utils.h:52
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void InitData(std::istream &is)
Initialize the content of the component by the &#39;line&#39; from the prototype,.
virtual void SetTrainOptions(const NnetTrainOptions &opts)
Set the training options to the component,.
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
void SetTrainOptions(const NnetTrainOptions &opts)
Set the training options to the component,.
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)
CuVector< BaseFloat > average_row_
Auxiliary buffer for forward propagation (for average vector),.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
Class UpdatableComponent is a Component which has trainable parameters, it contains SGD training hype...
void ReadData(std::istream &is, bool binary)
Reads the component content.
ComponentType GetType() const
Get Type Identification of the component,.
ComponentType GetType() const
Get Type Identification of the component,.
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
int32 NumParams() const
Number of trainable parameters,.
void Update(const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &diff)
Compute gradient and update parameters,.
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
Component * Copy() const
Copy component (deep copy),.
std::string InfoGradient() const
Print some additional info about gradient (after <...> and dims),.
ComponentType
Component type identification mechanism,.
void PropagateFnc(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out)
Abstract interface for propagation/backpropagation.
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
bool only_summing_
Removes normalization term from arithmetic mean (when true).
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void InitData(std::istream &is)
Virtual interface for initialization and I/O,.
std::string Info() const
Print some additional info (after <ComponentName> and the dims),.
int32 InputDim() const
Get the dimension of the input,.
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
Definition: io-funcs.cc:170
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:665
BaseFloat gradient_boost_
Scalar applied on gradient in backpropagation,.
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
void ReadData(std::istream &is, bool binary)
Reads the component content.
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void GetGradient(VectorBase< BaseFloat > *gradient) const
Get gradient reshaped as a vector,.
void GetParams(VectorBase< BaseFloat > *params) const
Get the trainable parameters reshaped as a vector,.
void WriteData(std::ostream &os, bool binary) const
Writes the component content.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
Abstract class, building block of the network.
int32 OutputDim() const
Get the dimension of the output,.
void SetParams(const VectorBase< BaseFloat > &params)
Set the trainable parameters from, reshaped as a vector,.
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
Component * Copy() const
Copy component (deep copy),.
SimpleSentenceAveragingComponent does not have nested network, it is intended to be used inside of a ...
BaseFloat shrinkage_
Number of &#39;imaginary&#39; zero-vectors in the average (shrinks the average vector for short sentences)...
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
Definition: cu-vector.cc:1277
void BackpropagateFnc(const CuMatrixBase< BaseFloat > &in, const CuMatrixBase< BaseFloat > &out, const CuMatrixBase< BaseFloat > &out_diff, CuMatrixBase< BaseFloat > *in_diff)
Backward pass transformation (to be implemented by descending class...)