nnet-component.cc
Go to the documentation of this file.
1 // nnet2/nnet-component.cc
2 
3 // Copyright 2011-2012 Karel Vesely
4 // 2013-2014 Johns Hopkins University (author: Daniel Povey)
5 // 2013 Xiaohui Zhang
6 // 2014 Vijayaditya Peddinti
7 // 2014-2015 Guoguo Chen
8 
9 // See ../../COPYING for clarification regarding multiple authors
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
19 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
20 // MERCHANTABLITY OR NON-INFRINGEMENT.
21 // See the Apache 2 License for the specific language governing permissions and
22 // limitations under the License.
23 
24 #include <iterator>
25 #include <sstream>
26 #include "nnet2/nnet-component.h"
29 #include "util/stl-utils.h"
30 #include "util/text-utils.h"
31 #include "util/kaldi-io.h"
32 
33 namespace kaldi {
34 namespace nnet2 {
35 
36 // static
37 Component* Component::ReadNew(std::istream &is, bool binary) {
38  std::string token;
39  ReadToken(is, binary, &token); // e.g. "<SigmoidComponent>".
40  token.erase(0, 1); // erase "<".
41  token.erase(token.length()-1); // erase ">".
42  Component *ans = NewComponentOfType(token);
43  if (!ans)
44  KALDI_ERR << "Unknown component type " << token;
45  ans->Read(is, binary);
46  return ans;
47 }
48 
49 
50 // static
51 Component* Component::NewComponentOfType(const std::string &component_type) {
52  Component *ans = NULL;
53  if (component_type == "SigmoidComponent") {
54  ans = new SigmoidComponent();
55  } else if (component_type == "TanhComponent") {
56  ans = new TanhComponent();
57  } else if (component_type == "PowerComponent") {
58  ans = new PowerComponent();
59  } else if (component_type == "SoftmaxComponent") {
60  ans = new SoftmaxComponent();
61  } else if (component_type == "LogSoftmaxComponent") {
62  ans = new LogSoftmaxComponent();
63  } else if (component_type == "RectifiedLinearComponent") {
64  ans = new RectifiedLinearComponent();
65  } else if (component_type == "NormalizeComponent") {
66  ans = new NormalizeComponent();
67  } else if (component_type == "SoftHingeComponent") {
68  ans = new SoftHingeComponent();
69  } else if (component_type == "PnormComponent") {
70  ans = new PnormComponent();
71  } else if (component_type == "MaxoutComponent") {
72  ans = new MaxoutComponent();
73  } else if (component_type == "ScaleComponent") {
74  ans = new ScaleComponent();
75  } else if (component_type == "AffineComponent") {
76  ans = new AffineComponent();
77  } else if (component_type == "AffineComponentPreconditioned") {
79  } else if (component_type == "AffineComponentPreconditionedOnline") {
81  } else if (component_type == "SumGroupComponent") {
82  ans = new SumGroupComponent();
83  } else if (component_type == "BlockAffineComponent") {
84  ans = new BlockAffineComponent();
85  } else if (component_type == "BlockAffineComponentPreconditioned") {
87  } else if (component_type == "PermuteComponent") {
88  ans = new PermuteComponent();
89  } else if (component_type == "DctComponent") {
90  ans = new DctComponent();
91  } else if (component_type == "FixedLinearComponent") {
92  ans = new FixedLinearComponent();
93  } else if (component_type == "FixedAffineComponent") {
94  ans = new FixedAffineComponent();
95  } else if (component_type == "FixedScaleComponent") {
96  ans = new FixedScaleComponent();
97  } else if (component_type == "FixedBiasComponent") {
98  ans = new FixedBiasComponent();
99  } else if (component_type == "SpliceComponent") {
100  ans = new SpliceComponent();
101  } else if (component_type == "SpliceMaxComponent") {
102  ans = new SpliceMaxComponent();
103  } else if (component_type == "DropoutComponent") {
104  ans = new DropoutComponent();
105  } else if (component_type == "AdditiveNoiseComponent") {
106  ans = new AdditiveNoiseComponent();
107  } else if (component_type == "Convolutional1dComponent") {
108  ans = new Convolutional1dComponent();
109  } else if (component_type == "MaxpoolingComponent") {
110  ans = new MaxpoolingComponent();
111  }
112  return ans;
113 }
114 
115 // static
116 Component* Component::NewFromString(const std::string &initializer_line) {
117  std::istringstream istr(initializer_line);
118  std::string component_type; // e.g. "SigmoidComponent".
119  istr >> component_type >> std::ws;
120  std::string rest_of_line;
121  getline(istr, rest_of_line);
122  Component *ans = NewComponentOfType(component_type);
123  if (ans == NULL)
124  KALDI_ERR << "Bad initializer line (no such type of Component): "
125  << initializer_line;
126  ans->InitFromString(rest_of_line);
127  return ans;
128 }
129 
130 
131 // This is like ExpectToken but for two tokens, and it
132 // will either accept token1 and then token2, or just token2.
133 // This is useful in Read functions where the first token
134 // may already have been consumed.
135 static void ExpectOneOrTwoTokens(std::istream &is, bool binary,
136  const std::string &token1,
137  const std::string &token2) {
138  KALDI_ASSERT(token1 != token2);
139  std::string temp;
140  ReadToken(is, binary, &temp);
141  if (temp == token1) {
142  ExpectToken(is, binary, token2);
143  } else {
144  if (temp != token2) {
145  KALDI_ERR << "Expecting token " << token1 << " or " << token2
146  << " but got " << temp;
147  }
148  }
149 }
150 
151 
152 // static
153 bool ParseFromString(const std::string &name, std::string *string,
154  int32 *param) {
155  std::vector<std::string> split_string;
156  SplitStringToVector(*string, " \t", true,
157  &split_string);
158  std::string name_equals = name + "="; // the name and then the equals sign.
159  size_t len = name_equals.length();
160 
161  for (size_t i = 0; i < split_string.size(); i++) {
162  if (split_string[i].compare(0, len, name_equals) == 0) {
163  if (!ConvertStringToInteger(split_string[i].substr(len), param))
164  KALDI_ERR << "Bad option " << split_string[i];
165  *string = "";
166  // Set "string" to all the pieces but the one we used.
167  for (size_t j = 0; j < split_string.size(); j++) {
168  if (j != i) {
169  if (!string->empty()) *string += " ";
170  *string += split_string[j];
171  }
172  }
173  return true;
174  }
175  }
176  return false;
177 }
178 
179 bool ParseFromString(const std::string &name, std::string *string,
180  bool *param) {
181  std::vector<std::string> split_string;
182  SplitStringToVector(*string, " \t", true,
183  &split_string);
184  std::string name_equals = name + "="; // the name and then the equals sign.
185  size_t len = name_equals.length();
186 
187  for (size_t i = 0; i < split_string.size(); i++) {
188  if (split_string[i].compare(0, len, name_equals) == 0) {
189  std::string b = split_string[i].substr(len);
190  if (b.empty())
191  KALDI_ERR << "Bad option " << split_string[i];
192  if (b[0] == 'f' || b[0] == 'F') *param = false;
193  else if (b[0] == 't' || b[0] == 'T') *param = true;
194  else
195  KALDI_ERR << "Bad option " << split_string[i];
196  *string = "";
197  // Set "string" to all the pieces but the one we used.
198  for (size_t j = 0; j < split_string.size(); j++) {
199  if (j != i) {
200  if (!string->empty()) *string += " ";
201  *string += split_string[j];
202  }
203  }
204  return true;
205  }
206  }
207  return false;
208 }
209 
210 bool ParseFromString(const std::string &name, std::string *string,
211  BaseFloat *param) {
212  std::vector<std::string> split_string;
213  SplitStringToVector(*string, " \t", true,
214  &split_string);
215  std::string name_equals = name + "="; // the name and then the equals sign.
216  size_t len = name_equals.length();
217 
218  for (size_t i = 0; i < split_string.size(); i++) {
219  if (split_string[i].compare(0, len, name_equals) == 0) {
220  if (!ConvertStringToReal(split_string[i].substr(len), param))
221  KALDI_ERR << "Bad option " << split_string[i];
222  *string = "";
223  // Set "string" to all the pieces but the one we used.
224  for (size_t j = 0; j < split_string.size(); j++) {
225  if (j != i) {
226  if (!string->empty()) *string += " ";
227  *string += split_string[j];
228  }
229  }
230  return true;
231  }
232  }
233  return false;
234 }
235 
236 bool ParseFromString(const std::string &name, std::string *string,
237  std::string *param) {
238  std::vector<std::string> split_string;
239  SplitStringToVector(*string, " \t", true,
240  &split_string);
241  std::string name_equals = name + "="; // the name and then the equals sign.
242  size_t len = name_equals.length();
243 
244  for (size_t i = 0; i < split_string.size(); i++) {
245  if (split_string[i].compare(0, len, name_equals) == 0) {
246  *param = split_string[i].substr(len);
247 
248  // Set "string" to all the pieces but the one we used.
249  *string = "";
250  for (size_t j = 0; j < split_string.size(); j++) {
251  if (j != i) {
252  if (!string->empty()) *string += " ";
253  *string += split_string[j];
254  }
255  }
256  return true;
257  }
258  }
259  return false;
260 }
261 
262 bool ParseFromString(const std::string &name, std::string *string,
263  std::vector<int32> *param) {
264  std::vector<std::string> split_string;
265  SplitStringToVector(*string, " \t", true,
266  &split_string);
267  std::string name_equals = name + "="; // the name and then the equals sign.
268  size_t len = name_equals.length();
269 
270  for (size_t i = 0; i < split_string.size(); i++) {
271  if (split_string[i].compare(0, len, name_equals) == 0) {
272  if (!SplitStringToIntegers(split_string[i].substr(len), ":",
273  false, param))
274  KALDI_ERR << "Bad option " << split_string[i];
275  *string = "";
276  // Set "string" to all the pieces but the one we used.
277  for (size_t j = 0; j < split_string.size(); j++) {
278  if (j != i) {
279  if (!string->empty()) *string += " ";
280  *string += split_string[j];
281  }
282  }
283  return true;
284  }
285  }
286  return false;
287 }
288 
289 
291  PermuteComponent *ans = new PermuteComponent();
292  ans->reorder_ = reorder_;
293  return ans;
294 }
295 void PermuteComponent::Init(const std::vector<int32> &reorder) {
296  reorder_ = reorder;
297  KALDI_ASSERT(!reorder.empty());
298  std::vector<int32> indexes(reorder);
299  std::sort(indexes.begin(), indexes.end());
300  for (int32 i = 0; i < static_cast<int32>(indexes.size()); i++)
301  KALDI_ASSERT(i == indexes[i] && "Not a permutation");
302 }
303 
304 
305 std::string Component::Info() const {
306  std::stringstream stream;
307  stream << Type() << ", input-dim=" << InputDim()
308  << ", output-dim=" << OutputDim();
309  return stream.str();
310 }
311 
312 std::string UpdatableComponent::Info() const {
313  std::stringstream stream;
314  stream << Type() << ", input-dim=" << InputDim()
315  << ", output-dim=" << OutputDim() << ", learning-rate="
316  << LearningRate();
317  return stream.str();
318 }
319 
320 
322  KALDI_ASSERT(dim > 0);
323  dim_ = dim;
324  value_sum_.Resize(dim);
325  deriv_sum_.Resize(dim);
326  count_ = 0.0;
327 }
328 
330  const CuMatrixBase<BaseFloat> *deriv) {
331  KALDI_ASSERT(out_value.NumCols() == InputDim());
332  // Check we have the correct dimensions.
333  if (value_sum_.Dim() != InputDim() ||
334  (deriv != NULL && deriv_sum_.Dim() != InputDim())) {
335  std::lock_guard<std::mutex> lock(mutex_);
336  if (value_sum_.Dim() != InputDim()) {
337  value_sum_.Resize(InputDim());
338  count_ = 0.0;
339  }
340  if (deriv != NULL && deriv_sum_.Dim() != InputDim()) {
341  deriv_sum_.Resize(InputDim());
342  count_ = 0.0;
343  value_sum_.SetZero();
344  }
345  }
346  count_ += out_value.NumRows();
348  temp.AddRowSumMat(1.0, out_value, 0.0);
349  value_sum_.AddVec(1.0, temp);
350  if (deriv != NULL) {
351  temp.AddRowSumMat(1.0, *deriv, 0.0);
352  deriv_sum_.AddVec(1.0, temp);
353  }
354 }
355 
357  value_sum_.Scale(scale);
358  deriv_sum_.Scale(scale);
359  count_ *= scale;
360 }
361 
363  if (value_sum_.Dim() == 0 && other.value_sum_.Dim() != 0)
364  value_sum_.Resize(other.value_sum_.Dim());
365  if (deriv_sum_.Dim() == 0 && other.deriv_sum_.Dim() != 0)
366  deriv_sum_.Resize(other.deriv_sum_.Dim());
367  if (other.value_sum_.Dim() != 0)
368  value_sum_.AddVec(alpha, other.value_sum_);
369  if (other.deriv_sum_.Dim() != 0)
370  deriv_sum_.AddVec(alpha, other.deriv_sum_);
371  count_ += alpha * other.count_;
372 }
373 
374 void NonlinearComponent::Read(std::istream &is, bool binary) {
375  std::ostringstream ostr_beg, ostr_end;
376  ostr_beg << "<" << Type() << ">"; // e.g. "<SigmoidComponent>"
377  ostr_end << "</" << Type() << ">"; // e.g. "</SigmoidComponent>"
378  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<Dim>");
379  ReadBasicType(is, binary, &dim_); // Read dimension.
380  ExpectToken(is, binary, "<ValueSum>");
381  value_sum_.Read(is, binary);
382  ExpectToken(is, binary, "<DerivSum>");
383  deriv_sum_.Read(is, binary);
384  ExpectToken(is, binary, "<Count>");
385  ReadBasicType(is, binary, &count_);
386  ExpectToken(is, binary, ostr_end.str());
387 }
388 
389 void NonlinearComponent::Write(std::ostream &os, bool binary) const {
390  std::ostringstream ostr_beg, ostr_end;
391  ostr_beg << "<" << Type() << ">"; // e.g. "<SigmoidComponent>"
392  ostr_end << "</" << Type() << ">"; // e.g. "</SigmoidComponent>"
393  WriteToken(os, binary, ostr_beg.str());
394  WriteToken(os, binary, "<Dim>");
395  WriteBasicType(os, binary, dim_);
396  WriteToken(os, binary, "<ValueSum>");
397  value_sum_.Write(os, binary);
398  WriteToken(os, binary, "<DerivSum>");
399  deriv_sum_.Write(os, binary);
400  WriteToken(os, binary, "<Count>");
401  WriteBasicType(os, binary, count_);
402  WriteToken(os, binary, ostr_end.str());
403 }
404 
406  dim_(other.dim_), value_sum_(other.value_sum_), deriv_sum_(other.deriv_sum_),
407  count_(other.count_) { }
408 
409 void NonlinearComponent::InitFromString(std::string args) {
410  std::string orig_args(args);
411  int32 dim;
412  bool ok = ParseFromString("dim", &args, &dim);
413  if (!ok || !args.empty() || dim <= 0)
414  KALDI_ERR << "Invalid initializer for layer of type "
415  << Type() << ": \"" << orig_args << "\"";
416  Init(dim);
417 }
418 
419 void MaxoutComponent::Init(int32 input_dim, int32 output_dim) {
420  input_dim_ = input_dim;
421  output_dim_ = output_dim;
422  if (input_dim_ == 0)
423  input_dim_ = 10 * output_dim_; // default group size : 10
424  KALDI_ASSERT(input_dim_ > 0 && output_dim_ >= 0);
425  KALDI_ASSERT(input_dim_ % output_dim_ == 0);
426 }
427 
428 void MaxoutComponent::InitFromString(std::string args) {
429  std::string orig_args(args);
430  int32 input_dim = 0;
431  int32 output_dim = 0;
432  bool ok = ParseFromString("output-dim", &args, &output_dim) &&
433  ParseFromString("input-dim", &args, &input_dim);
434  KALDI_LOG << output_dim << " " << input_dim << " " << ok;
435  if (!ok || !args.empty() || output_dim <= 0)
436  KALDI_ERR << "Invalid initializer for layer of type "
437  << Type() << ": \"" << orig_args << "\"";
438  Init(input_dim, output_dim);
439 }
440 
441 
443  const ChunkInfo &out_info,
444  const CuMatrixBase<BaseFloat> &in,
445  CuMatrixBase<BaseFloat> *out) const {
446  in_info.CheckSize(in);
447  out_info.CheckSize(*out);
448  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
449  out->GroupMax(in);
450 }
451 
452 void MaxoutComponent::Backprop(const ChunkInfo &, // in_info,
453  const ChunkInfo &, // out_info,
454  const CuMatrixBase<BaseFloat> &in_value,
455  const CuMatrixBase<BaseFloat> &out_value,
456  const CuMatrixBase<BaseFloat> &out_deriv,
457  Component *to_update,
458  CuMatrix<BaseFloat> *in_deriv) const {
459  in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero);
460  in_deriv->GroupMaxDeriv(in_value, out_value);
461  in_deriv->MulRowsGroupMat(out_deriv);
462 }
463 
464 void MaxoutComponent::Read(std::istream &is, bool binary) {
465  ExpectOneOrTwoTokens(is, binary, "<MaxoutComponent>", "<InputDim>");
466  ReadBasicType(is, binary, &input_dim_);
467  ExpectToken(is, binary, "<OutputDim>");
468  ReadBasicType(is, binary, &output_dim_);
469  ExpectToken(is, binary, "</MaxoutComponent>");
470 }
471 
472 void MaxoutComponent::Write(std::ostream &os, bool binary) const {
473  WriteToken(os, binary, "<MaxoutComponent>");
474  WriteToken(os, binary, "<InputDim>");
475  WriteBasicType(os, binary, input_dim_);
476  WriteToken(os, binary, "<OutputDim>");
477  WriteBasicType(os, binary, output_dim_);
478  WriteToken(os, binary, "</MaxoutComponent>");
479 }
480 
481 std::string MaxoutComponent::Info() const {
482  std::stringstream stream;
483  stream << Type() << ", input-dim = " << input_dim_
484  << ", output-dim = " << output_dim_;
485  return stream.str();
486 }
487 
488 void PnormComponent::Init(int32 input_dim, int32 output_dim, BaseFloat p) {
489  input_dim_ = input_dim;
490  output_dim_ = output_dim;
491  if (input_dim_ == 0)
492  input_dim_ = 10 * output_dim_; // default group size : 10
493  p_ = p;
494  KALDI_ASSERT(input_dim_ > 0 && output_dim_ >= 0 && p_ >= 0);
495  KALDI_ASSERT(input_dim_ % output_dim_ == 0);
496 }
497 
498 void PnormComponent::InitFromString(std::string args) {
499  std::string orig_args(args);
500  int32 input_dim = 0;
501  int32 output_dim = 0;
502  BaseFloat p = 2;
503  bool ok = ParseFromString("output-dim", &args, &output_dim) &&
504  ParseFromString("input-dim", &args, &input_dim);
505  ParseFromString("p", &args, &p);
506  if (!ok || !args.empty() || output_dim <= 0)
507  KALDI_ERR << "Invalid initializer for layer of type "
508  << Type() << ": \"" << orig_args << "\"";
509  Init(input_dim, output_dim, p);
510 }
511 
512 
514  const ChunkInfo &out_info,
515  const CuMatrixBase<BaseFloat> &in,
516  CuMatrixBase<BaseFloat> *out) const {
517  in_info.CheckSize(in);
518  out_info.CheckSize(*out);
519  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
520 
521  out->GroupPnorm(in, p_);
522 }
523 
524 void PnormComponent::Backprop(const ChunkInfo &, // in_info,
525  const ChunkInfo &, // out_info,
526  const CuMatrixBase<BaseFloat> &in_value,
527  const CuMatrixBase<BaseFloat> &out_value,
528  const CuMatrixBase<BaseFloat> &out_deriv,
529  Component *to_update,
530  // may be identical to "this".
531  CuMatrix<BaseFloat> *in_deriv) const {
532  in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero);
533  in_deriv->DiffGroupPnorm(in_value, out_value, out_deriv, p_);
534 }
535 
536 void PnormComponent::Read(std::istream &is, bool binary) {
537  ExpectOneOrTwoTokens(is, binary, "<PnormComponent>", "<InputDim>");
538  ReadBasicType(is, binary, &input_dim_);
539  ExpectToken(is, binary, "<OutputDim>");
540  ReadBasicType(is, binary, &output_dim_);
541  ExpectToken(is, binary, "<P>");
542  ReadBasicType(is, binary, &p_);
543  ExpectToken(is, binary, "</PnormComponent>");
544 }
545 
546 void PnormComponent::Write(std::ostream &os, bool binary) const {
547  WriteToken(os, binary, "<PnormComponent>");
548  WriteToken(os, binary, "<InputDim>");
549  WriteBasicType(os, binary, input_dim_);
550  WriteToken(os, binary, "<OutputDim>");
551  WriteBasicType(os, binary, output_dim_);
552  WriteToken(os, binary, "<P>");
553  WriteBasicType(os, binary, p_);
554  WriteToken(os, binary, "</PnormComponent>");
555 }
556 
557 std::string PnormComponent::Info() const {
558  std::stringstream stream;
559  stream << Type() << ", input-dim = " << input_dim_
560  << ", output-dim = " << output_dim_
561  << ", p = " << p_;
562  return stream.str();
563 }
564 
565 
566 const BaseFloat NormalizeComponent::kNormFloor = pow(2.0, -66);
567 // This component modifies the vector of activations by scaling it so that the
568 // root-mean-square equals 1.0.
569 
571  const ChunkInfo &out_info,
572  const CuMatrixBase<BaseFloat> &in,
573  CuMatrixBase<BaseFloat> *out) const {
574  cu::NormalizePerRow(in, BaseFloat(1), false, out);
575 }
576 
577 /*
578  A note on the derivative of NormalizeComponent...
579  let both row_in and row_out be vectors of dimension D.
580  Let p = row_in^T row_in / D, and let
581  f = 1 / sqrt(max(kNormFloor, p)), and we compute row_out as:
582 row_out = f row_in.
583  Suppose we have a quantity deriv_out which is the derivative
584  of the objective function w.r.t. row_out. We want to compute
585  deriv_in which is the derivative of the objective function w.r.t.
586  row_in. Let the objective function be F. One term is obvious: we have
587  deriv_in = f deriv_out + ....
588  next we have to take into account the derivative that gets back-propagated
589  through f. Obviously, dF/df = deriv_out^T row_in.
590  And df/dp = (p <= kNormFloor ? 0.0 : -0.5 p^{-1.5}) = (f == 1 / sqrt(kNormFloor) ? 0.0 : -0.5 f^3),
591  and dp/d(row_in) = 2/D row_in. [it's vector_valued].
592  So this term in dF/d(row_in) equals:
593  dF/df df/dp dp/d(row_in) = 2/D (f == 1 / sqrt(kNormFloor) ? 0.0 : -0.5 f^3) (deriv_out^T row_in) row_in
594  So
595  deriv_in = f deriv_out + (f == 1.0 ? 0.0 : -f^3 / D) (deriv_out^T row_in) row_in
596 
597 */
598 
600  const ChunkInfo &, // in_info,
601  const ChunkInfo &, // out_info,
602  const CuMatrixBase<BaseFloat> &in_value,
603  const CuMatrixBase<BaseFloat> &out_value,
604  const CuMatrixBase<BaseFloat> &out_deriv, Component *to_update,
605  // may be identical to "this".
606  CuMatrix<BaseFloat> *in_deriv) const {
607  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
608  cu::DiffNormalizePerRow(in_value, out_deriv, BaseFloat(1), false, in_deriv);
609 }
610 
612  const ChunkInfo &out_info,
613  const CuMatrixBase<BaseFloat> &in,
614  CuMatrixBase<BaseFloat> *out) const {
615  in_info.CheckSize(in);
616  out_info.CheckSize(*out);
617  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
618 
619  out->Sigmoid(in);
620 }
621 
622 void SigmoidComponent::Backprop(const ChunkInfo &, //in_info,
623  const ChunkInfo &, //out_info,
624  const CuMatrixBase<BaseFloat> &, //in_value,
625  const CuMatrixBase<BaseFloat> &out_value,
626  const CuMatrixBase<BaseFloat> &out_deriv,
627  Component *to_update, // may be identical to "this".
628  CuMatrix<BaseFloat> *in_deriv) const {
629  // we ignore in_value and to_update.
630 
631  // The element by element equation would be:
632  // in_deriv = out_deriv * out_value * (1.0 - out_value);
633  // We can accomplish this via calls to the matrix library.
634 
635  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
636  in_deriv->Set(1.0);
637  in_deriv->AddMat(-1.0, out_value);
638  // now in_deriv = 1.0 - out_value [element by element]
639  in_deriv->MulElements(out_value);
640  // now in_deriv = out_value * (1.0 - out_value) [element by element], i.e.
641  // it contains the element-by-element derivative of the nonlinearity.
642  if (to_update != NULL)
643  dynamic_cast<NonlinearComponent*>(to_update)->UpdateStats(out_value,
644  in_deriv);
645  in_deriv->MulElements(out_deriv);
646  // now in_deriv = out_deriv * out_value * (1.0 - out_value) [element by element]
647 }
648 
649 
651  const ChunkInfo &out_info,
652  const CuMatrixBase<BaseFloat> &in,
653  CuMatrixBase<BaseFloat> *out) const {
654  // Apply tanh function to each element of the output...
655  // the tanh function may be written as -1 + ( 2 / (1 + e^{-2 x})),
656  // which is a scaled and shifted sigmoid.
657 
658  in_info.CheckSize(in);
659  out_info.CheckSize(*out);
660  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
661  out->Tanh(in);
662 }
663 
664 void TanhComponent::Backprop(const ChunkInfo &, //in_info,
665  const ChunkInfo &, //out_info,
666  const CuMatrixBase<BaseFloat> &, //in_value,
667  const CuMatrixBase<BaseFloat> &out_value,
668  const CuMatrixBase<BaseFloat> &out_deriv,
669  Component *to_update, // may be identical to "this".
670  CuMatrix<BaseFloat> *in_deriv) const {
671  /*
672  Note on the derivative of the tanh function:
673  tanh'(x) = sech^2(x) = -(tanh(x)+1) (tanh(x)-1) = 1 - tanh^2(x)
674 
675  The element by element equation of what we're doing would be:
676  in_deriv = out_deriv * (1.0 - out_value^2).
677  We can accomplish this via calls to the matrix library. */
678 
679  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
680  in_deriv->CopyFromMat(out_value);
681  in_deriv->ApplyPow(2.0);
682  in_deriv->Scale(-1.0);
683  in_deriv->Add(1.0);
684  // now in_deriv = (1.0 - out_value^2), the element-by-element derivative of
685  // the nonlinearity.
686  if (to_update != NULL)
687  dynamic_cast<NonlinearComponent*>(to_update)->UpdateStats(out_value,
688  in_deriv);
689  in_deriv->MulElements(out_deriv);
690 }
691 
693  dim_ = dim;
694  power_ = power;
695  KALDI_ASSERT(dim > 0 && power >= 0);
696 }
697 
698 void PowerComponent::InitFromString(std::string args) {
699  std::string orig_args(args);
700  int32 dim;
701  BaseFloat power = 2.0;
702  ParseFromString("power", &args, &power); // Optional.
703  // Accept either "dim" or "input-dim" to specify the input dim.
704  // "input-dim" is the canonical one; "dim" simplifies the testing code.
705  bool ok = (ParseFromString("dim", &args, &dim) ||
706  ParseFromString("input-dim", &args, &dim));
707  if (!ok || !args.empty() || dim <= 0)
708  KALDI_ERR << "Invalid initializer for layer of type "
709  << Type() << ": \"" << orig_args << "\"";
710  Init(dim, power);
711 }
712 
714  const ChunkInfo &out_info,
715  const CuMatrixBase<BaseFloat> &in,
716  CuMatrixBase<BaseFloat> *out) const {
717  in_info.CheckSize(in);
718  out_info.CheckSize(*out);
719  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
720 
721  // Apply power operation to each element of the input...
722  out->CopyFromMat(in);
723  out->ApplyPowAbs(power_);
724 }
725 
726 void PowerComponent::Backprop(const ChunkInfo &, //in_info,
727  const ChunkInfo &, //out_info,
728  const CuMatrixBase<BaseFloat> &in_value,
729  const CuMatrixBase<BaseFloat> &out_value,
730  const CuMatrixBase<BaseFloat> &out_deriv,
731  Component *to_update, // may be identical to "this".
732  CuMatrix<BaseFloat> *in_deriv) const {
733  in_deriv->Resize(in_value.NumRows(), in_value.NumCols());
734  // in scalar terms: in_deriv += p * in_value^(p-1) * out_deriv
735  in_deriv->CopyFromMat(in_value);
736  in_deriv->ApplyPowAbs(power_ - 1.0, true);
737  in_deriv->Scale(power_);
738  in_deriv->MulElements(out_deriv);
739 }
740 
741 void PowerComponent::Read(std::istream &is, bool binary) {
742  ExpectOneOrTwoTokens(is, binary, "<PowerComponent>", "<InputDim>");
743  ReadBasicType(is, binary, &dim_);
744  ExpectToken(is, binary, "<OutputDim>");
745  ReadBasicType(is, binary, &dim_);
746  ExpectToken(is, binary, "<Power>");
747  ReadBasicType(is, binary, &power_);
748  ExpectToken(is, binary, "</PowerComponent>");
749 }
750 
751 void PowerComponent::Write(std::ostream &os, bool binary) const {
752  WriteToken(os, binary, "<PowerComponent>");
753  WriteToken(os, binary, "<InputDim>");
754  WriteBasicType(os, binary, dim_);
755  WriteToken(os, binary, "<OutputDim>");
756  WriteBasicType(os, binary, dim_);
757  WriteToken(os, binary, "<Power>");
758  WriteBasicType(os, binary, power_);
759  WriteToken(os, binary, "</PowerComponent>");
760 }
761 
762 std::string PowerComponent::Info() const {
763  std::stringstream stream;
764  stream << Type() << ", dim = " << dim_
765  << ", power = " << power_;
766  return stream.str();
767 }
768 
770  const ChunkInfo &out_info,
771  const CuMatrixBase<BaseFloat> &in,
772  CuMatrixBase<BaseFloat> *out) const {
773  // Apply rectified linear function (x >= 0 ? 1.0 : 0.0)
774  out->CopyFromMat(in);
775  out->ApplyFloor(0.0);
776 }
777 
779  const ChunkInfo &, //out_info,
780  const CuMatrixBase<BaseFloat> &, //in_value,
781  const CuMatrixBase<BaseFloat> &out_value,
782  const CuMatrixBase<BaseFloat> &out_deriv,
783  Component *to_update, // may be identical to "this".
784  CuMatrix<BaseFloat> *in_deriv) const {
785 
786  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols(),
787  kUndefined);
788  in_deriv->CopyFromMat(out_value);
789  in_deriv->ApplyHeaviside();
790  // Now in_deriv(i, j) equals (out_value(i, j) > 0.0 ? 1.0 : 0.0),
791  // which is the derivative of the nonlinearity (well, except at zero
792  // where it's undefined).
793  if (to_update != NULL)
794  dynamic_cast<NonlinearComponent*>(to_update)->UpdateStats(out_value,
795  in_deriv);
796  in_deriv->MulElements(out_deriv);
797 }
798 
800  const ChunkInfo &out_info,
801  const CuMatrixBase<BaseFloat> &in,
802  CuMatrixBase<BaseFloat> *out) const {
803  in_info.CheckSize(in);
804  out_info.CheckSize(*out);
805  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
806  // Apply function x = log(1 + exp(x))
807  out->SoftHinge(in);
808 }
809 
810 void SoftHingeComponent::Backprop(const ChunkInfo &, //in_info,
811  const ChunkInfo &, //out_info,
812  const CuMatrixBase<BaseFloat> &in_value,
813  const CuMatrixBase<BaseFloat> &out_value,
814  const CuMatrixBase<BaseFloat> &out_deriv,
815  Component *to_update, // may be identical to "this".
816  CuMatrix<BaseFloat> *in_deriv) const {
817 
818  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols(),
819  kUndefined);
820  // note: d/dx: log(1 + exp(x)) = (exp(x) / (1 + exp(x)) = 1 / (1 + exp(-x)),
821  // which is the sigmoid function.
822 
823  // if the output is y, then dy/dx = (exp(x) / (1 + exp(x)),
824  // and using y = log(1 + exp(x)) -> exp(x) = exp(y) - 1, we have
825  // dy/dx = (exp(y) - 1) / exp(y)
826 
827 
828  in_deriv->Sigmoid(in_value);
829 
830  if (to_update != NULL)
831  dynamic_cast<NonlinearComponent*>(to_update)->UpdateStats(out_value,
832  in_deriv);
833  in_deriv->MulElements(out_deriv);
834 }
835 
836 
838  const ChunkInfo &out_info,
839  const CuMatrixBase<BaseFloat> &in,
840  CuMatrixBase<BaseFloat> *out) const {
841  out->CopyFromMat(in);
842  out->Scale(scale_);
843 }
844 
845 void ScaleComponent::Backprop(const ChunkInfo &, //in_info,
846  const ChunkInfo &, //out_info,
847  const CuMatrixBase<BaseFloat> &, //in_value,
848  const CuMatrixBase<BaseFloat> &, //out_value,
849  const CuMatrixBase<BaseFloat> &out_deriv,
850  Component *, //to_update, // may be identical to "this".
851  CuMatrix<BaseFloat> *in_deriv) const {
852 
853  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols(),
854  kUndefined);
855  in_deriv->CopyFromMat(out_deriv);
856  in_deriv->Scale(scale_);
857 }
858 
860  dim_ = dim;
861  scale_ = scale;
862  KALDI_ASSERT(dim_ > 0);
863  KALDI_ASSERT(scale_ != 0.0);
864 }
865 
866 void ScaleComponent::InitFromString(std::string args) {
867  std::string orig_args(args);
868  int32 dim;
869  BaseFloat scale;
870  if (!ParseFromString("dim", &args, &dim))
871  KALDI_ERR << "Dimension not specified for ScaleComponent in config file";
872  if (!ParseFromString("scale", &args, &scale))
873  KALDI_ERR << "Scale not specified for ScaleComponent in config file";
874  Init(dim, scale);
875 }
876 
877 void ScaleComponent::Write(std::ostream &os, bool binary) const {
878  WriteToken(os, binary, "<ScaleComponent>");
879  WriteToken(os, binary, "<Dim>");
880  WriteBasicType(os, binary, dim_);
881  WriteToken(os, binary, "<Scale>");
882  WriteBasicType(os, binary, scale_);
883  WriteToken(os, binary, "</ScaleComponent>");
884 }
885 
886 void ScaleComponent::Read(std::istream &is, bool binary) {
887  ExpectOneOrTwoTokens(is, binary, "<ScaleComponent>", "<Dim>");
888  ReadBasicType(is, binary, &dim_);
889  ExpectToken(is, binary, "<Scale>");
890  ReadBasicType(is, binary, &scale_);
891  ExpectToken(is, binary, "</ScaleComponent>");
892 }
893 
894 std::string ScaleComponent::Info() const {
895  std::stringstream stream;
896  stream << Type() << ", dim=" << dim_ << ", scale=" << scale_;
897  return stream.str();
898 }
899 
901  const ChunkInfo &out_info,
902  const CuMatrixBase<BaseFloat> &in,
903  CuMatrixBase<BaseFloat> *out) const {
904  in_info.CheckSize(in);
905  out_info.CheckSize(*out);
906  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
907 
908  // Apply softmax function to each row of the output...
909  // for that row, we do
910  // x_i = exp(x_i) / sum_j exp(x_j).
911 
912  out->SoftMaxPerRow(in);
913 
914  // This floor on the output helps us deal with
915  // almost-zeros in a way that doesn't lead to overflow.
916  out->ApplyFloor(1.0e-20);
917 }
918 
920  const ChunkInfo &out_info,
921  const CuMatrixBase<BaseFloat> &, //in_value,
922  const CuMatrixBase<BaseFloat> &out_value,
923  const CuMatrixBase<BaseFloat> &out_deriv,
924  Component *to_update, // only thing updated is counts_.
925  CuMatrix<BaseFloat> *in_deriv) const {
926  /*
927  Note on the derivative of the softmax function: let it be
928  p_i = exp(x_i) / sum_i exp_i
929  The [matrix-valued] Jacobian of this function is
930  diag(p) - p p^T
931  Let the derivative vector at the output be e, and at the input be
932  d. We have
933  d = diag(p) e - p (p^T e).
934  d_i = p_i e_i - p_i (p^T e).
935  */
936  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
937  in_deriv->DiffSoftmaxPerRow(out_value, out_deriv);
938 
939  // The SoftmaxComponent does not have any real trainable parameters, but
940  // during the backprop we store some statistics on the average counts;
941  // these may be used in mixing-up.
942  if (to_update != NULL) {
943  NonlinearComponent *to_update_nonlinear =
944  dynamic_cast<NonlinearComponent*>(to_update);
945  to_update_nonlinear->UpdateStats(out_value);
946  }
947 }
948 
950  const ChunkInfo &out_info,
951  const CuMatrixBase<BaseFloat> &in,
952  CuMatrixBase<BaseFloat> *out) const {
953  in_info.CheckSize(in);
954  out_info.CheckSize(*out);
955  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
956 
957  // Applies log softmax function to each row of the output. For each row, we do
958  // x_i = x_i - log(sum_j exp(x_j))
959  out->LogSoftMaxPerRow(in);
960 
961  // Just to be consistent with SoftmaxComponent::Propagate()
962  out->ApplyFloor(Log(1.0e-20));
963 }
964 
966  const ChunkInfo &out_info,
967  const CuMatrixBase<BaseFloat> &, //in_value,
968  const CuMatrixBase<BaseFloat> &out_value,
969  const CuMatrixBase<BaseFloat> &out_deriv,
970  Component *to_update,
971  CuMatrix<BaseFloat> *in_deriv) const {
972  /*
973  Let the output be y, then
974  y_i = x_i - log(sum_i exp(x_i))
975  where x_i is the input to the component. The Jacobian matrix of this
976  function is
977  J = I - 1 exp(y^T)
978  where 1 is a vector of ones. Let the derivative vector at the output be e,
979  and at the input be d, then we have
980  d = e - exp(y) Sum(e)
981  d_i = e_i - exp(y_i) Sum(e)
982  */
983  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
984  KALDI_ASSERT(SameDim(out_value, out_deriv) && SameDim(out_value, *in_deriv));
985 
986  in_deriv->DiffLogSoftmaxPerRow(out_value, out_deriv);
987 
988  // Updates stats.
989  if (to_update != NULL) {
990  NonlinearComponent *to_update_nonlinear =
991  dynamic_cast<NonlinearComponent*>(to_update);
992  to_update_nonlinear->UpdateStats(out_value);
993  }
994 }
995 
996 
998  linear_params_.Scale(scale);
999  bias_params_.Scale(scale);
1000 }
1001 
1002 // virtual
1003 void AffineComponent::Resize(int32 input_dim, int32 output_dim) {
1004  KALDI_ASSERT(input_dim > 0 && output_dim > 0);
1005  bias_params_.Resize(output_dim);
1006  linear_params_.Resize(output_dim, input_dim);
1007 }
1008 
1009 void AffineComponent::Add(BaseFloat alpha, const UpdatableComponent &other_in) {
1010  const AffineComponent *other =
1011  dynamic_cast<const AffineComponent*>(&other_in);
1012  KALDI_ASSERT(other != NULL);
1013  linear_params_.AddMat(alpha, other->linear_params_);
1014  bias_params_.AddVec(alpha, other->bias_params_);
1015 }
1016 
1018  UpdatableComponent(component),
1019  linear_params_(component.linear_params_),
1020  bias_params_(component.bias_params_),
1021  is_gradient_(component.is_gradient_) { }
1022 
1024  const CuVectorBase<BaseFloat> &bias_params,
1025  BaseFloat learning_rate):
1026  UpdatableComponent(learning_rate),
1027  linear_params_(linear_params),
1028  bias_params_(bias_params) {
1029  KALDI_ASSERT(linear_params.NumRows() == bias_params.Dim()&&
1030  bias_params.Dim() != 0);
1031  is_gradient_ = false;
1032 }
1033 
1034 
1035 
1036 void AffineComponent::SetZero(bool treat_as_gradient) {
1037  if (treat_as_gradient) {
1038  SetLearningRate(1.0);
1039  }
1040  linear_params_.SetZero();
1041  bias_params_.SetZero();
1042  if (treat_as_gradient)
1043  is_gradient_ = true;
1044 }
1045 
1047  const MatrixBase<BaseFloat> &linear) {
1048  bias_params_ = bias;
1049  linear_params_ = linear;
1050  KALDI_ASSERT(bias_params_.Dim() == linear_params_.NumRows());
1051 }
1052 
1054  CuMatrix<BaseFloat> temp_linear_params(linear_params_);
1055  temp_linear_params.SetRandn();
1056  linear_params_.AddMat(stddev, temp_linear_params);
1057 
1058  CuVector<BaseFloat> temp_bias_params(bias_params_);
1059  temp_bias_params.SetRandn();
1060  bias_params_.AddVec(stddev, temp_bias_params);
1061 }
1062 
1063 std::string AffineComponent::Info() const {
1064  std::stringstream stream;
1065  BaseFloat linear_params_size = static_cast<BaseFloat>(linear_params_.NumRows())
1066  * static_cast<BaseFloat>(linear_params_.NumCols());
1067  BaseFloat linear_stddev =
1069  linear_params_size),
1070  bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
1071  bias_params_.Dim());
1072  stream << Type() << ", input-dim=" << InputDim()
1073  << ", output-dim=" << OutputDim()
1074  << ", linear-params-stddev=" << linear_stddev
1075  << ", bias-params-stddev=" << bias_stddev
1076  << ", learning-rate=" << LearningRate();
1077  return stream.str();
1078 }
1079 
1081  AffineComponent *ans = new AffineComponent();
1084  ans->bias_params_ = bias_params_;
1085  ans->is_gradient_ = is_gradient_;
1086  return ans;
1087 }
1088 
1090  const AffineComponent *other =
1091  dynamic_cast<const AffineComponent*>(&other_in);
1093  + VecVec(bias_params_, other->bias_params_);
1094 }
1095 
1097  int32 input_dim, int32 output_dim,
1098  BaseFloat param_stddev, BaseFloat bias_stddev) {
1099  UpdatableComponent::Init(learning_rate);
1100  linear_params_.Resize(output_dim, input_dim);
1101  bias_params_.Resize(output_dim);
1102  KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
1103  linear_params_.SetRandn(); // sets to random normally distributed noise.
1104  linear_params_.Scale(param_stddev);
1105  bias_params_.SetRandn();
1106  bias_params_.Scale(bias_stddev);
1107 }
1108 
1110  std::string matrix_filename) {
1111  UpdatableComponent::Init(learning_rate);
1112  CuMatrix<BaseFloat> mat;
1113  ReadKaldiObject(matrix_filename, &mat); // will abort on failure.
1114  KALDI_ASSERT(mat.NumCols() >= 2);
1115  int32 input_dim = mat.NumCols() - 1, output_dim = mat.NumRows();
1116  linear_params_.Resize(output_dim, input_dim);
1117  bias_params_.Resize(output_dim);
1118  linear_params_.CopyFromMat(mat.Range(0, output_dim, 0, input_dim));
1119  bias_params_.CopyColFromMat(mat, input_dim);
1120 }
1121 
1122 void AffineComponent::InitFromString(std::string args) {
1123  std::string orig_args(args);
1124  bool ok = true;
1125  BaseFloat learning_rate = learning_rate_;
1126  std::string matrix_filename;
1127  int32 input_dim = -1, output_dim = -1;
1128  ParseFromString("learning-rate", &args, &learning_rate); // optional.
1129  if (ParseFromString("matrix", &args, &matrix_filename)) {
1130  Init(learning_rate, matrix_filename);
1131  if (ParseFromString("input-dim", &args, &input_dim))
1132  KALDI_ASSERT(input_dim == InputDim() &&
1133  "input-dim mismatch vs. matrix.");
1134  if (ParseFromString("output-dim", &args, &output_dim))
1135  KALDI_ASSERT(output_dim == OutputDim() &&
1136  "output-dim mismatch vs. matrix.");
1137  } else {
1138  ok = ok && ParseFromString("input-dim", &args, &input_dim);
1139  ok = ok && ParseFromString("output-dim", &args, &output_dim);
1140  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
1141  bias_stddev = 1.0;
1142  ParseFromString("param-stddev", &args, &param_stddev);
1143  ParseFromString("bias-stddev", &args, &bias_stddev);
1144  Init(learning_rate, input_dim, output_dim,
1145  param_stddev, bias_stddev);
1146  }
1147  if (!args.empty())
1148  KALDI_ERR << "Could not process these elements in initializer: "
1149  << args;
1150  if (!ok)
1151  KALDI_ERR << "Bad initializer " << orig_args;
1152 }
1153 
1154 
1156  const ChunkInfo &out_info,
1157  const CuMatrixBase<BaseFloat> &in,
1158  CuMatrixBase<BaseFloat> *out) const {
1159  in_info.CheckSize(in);
1160  out_info.CheckSize(*out);
1161  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
1162 
1163  // No need for asserts as they'll happen within the matrix operations.
1164  out->CopyRowsFromVec(bias_params_); // copies bias_params_ to each row
1165  // of *out.
1166  out->AddMatMat(1.0, in, kNoTrans, linear_params_, kTrans, 1.0);
1167 }
1168 
1170  const CuMatrixBase<BaseFloat> &out_deriv) {
1171  bias_params_.AddRowSumMat(learning_rate_, out_deriv, 1.0);
1172  linear_params_.AddMatMat(learning_rate_, out_deriv, kTrans,
1173  in_value, kNoTrans, 1.0);
1174 }
1175 
1176 void AffineComponent::Backprop(const ChunkInfo &, //in_info,
1177  const ChunkInfo &, //out_info,
1178  const CuMatrixBase<BaseFloat> &in_value,
1179  const CuMatrixBase<BaseFloat> &, //out_value,
1180  const CuMatrixBase<BaseFloat> &out_deriv,
1181  Component *to_update_in, // may be identical to "this".
1182  CuMatrix<BaseFloat> *in_deriv) const {
1183  AffineComponent *to_update = dynamic_cast<AffineComponent*>(to_update_in);
1184  in_deriv->Resize(out_deriv.NumRows(), InputDim());
1185  // Propagate the derivative back to the input.
1186  in_deriv->AddMatMat(1.0, out_deriv, kNoTrans, linear_params_, kNoTrans,
1187  0.0);
1188 
1189  if (to_update != NULL) {
1190  // Next update the model (must do this 2nd so the derivatives we propagate
1191  // are accurate, in case this == to_update_in.)
1192  if (to_update->is_gradient_)
1193  to_update->UpdateSimple(in_value, out_deriv);
1194  else // the call below is to a virtual function that may be re-implemented
1195  to_update->Update(in_value, out_deriv); // by child classes.
1196  }
1197 }
1198 
1199 void AffineComponent::Read(std::istream &is, bool binary) {
1200  std::ostringstream ostr_beg, ostr_end;
1201  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponent>"
1202  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponent>"
1203  // might not see the "<AffineComponent>" part because
1204  // of how ReadNew() works.
1205  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
1206  ReadBasicType(is, binary, &learning_rate_);
1207  ExpectToken(is, binary, "<LinearParams>");
1208  linear_params_.Read(is, binary);
1209  ExpectToken(is, binary, "<BiasParams>");
1210  bias_params_.Read(is, binary);
1211  std::string tok;
1212  // back-compatibility code. TODO: re-do this later.
1213  ReadToken(is, binary, &tok);
1214  if (tok == "<AvgInput>") { // discard the following.
1215  CuVector<BaseFloat> avg_input;
1216  avg_input.Read(is, binary);
1217  BaseFloat avg_input_count;
1218  ExpectToken(is, binary, "<AvgInputCount>");
1219  ReadBasicType(is, binary, &avg_input_count);
1220  ReadToken(is, binary, &tok);
1221  }
1222  if (tok == "<IsGradient>") {
1223  ReadBasicType(is, binary, &is_gradient_);
1224  ExpectToken(is, binary, ostr_end.str());
1225  } else {
1226  is_gradient_ = false;
1227  KALDI_ASSERT(tok == ostr_end.str());
1228  }
1229 }
1230 
1231 void AffineComponent::Write(std::ostream &os, bool binary) const {
1232  std::ostringstream ostr_beg, ostr_end;
1233  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponent>"
1234  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponent>"
1235  WriteToken(os, binary, ostr_beg.str());
1236  WriteToken(os, binary, "<LearningRate>");
1237  WriteBasicType(os, binary, learning_rate_);
1238  WriteToken(os, binary, "<LinearParams>");
1239  linear_params_.Write(os, binary);
1240  WriteToken(os, binary, "<BiasParams>");
1241  bias_params_.Write(os, binary);
1242  WriteToken(os, binary, "<IsGradient>");
1243  WriteBasicType(os, binary, is_gradient_);
1244  WriteToken(os, binary, ostr_end.str());
1245 }
1246 
1248  return (InputDim() + 1) * OutputDim();
1249 }
1251  params->Range(0, InputDim() * OutputDim()).CopyRowsFromMat(linear_params_);
1252  params->Range(InputDim() * OutputDim(),
1253  OutputDim()).CopyFromVec(bias_params_);
1254 }
1256  linear_params_.CopyRowsFromVec(params.Range(0, InputDim() * OutputDim()));
1257  bias_params_.CopyFromVec(params.Range(InputDim() * OutputDim(),
1258  OutputDim()));
1259 }
1260 
1262  AffineComponent **a, AffineComponent **b) const {
1263  KALDI_ASSERT(d <= InputDim());
1264 
1265  // We'll limit the rank of just the linear part, keeping the bias vector full.
1267  int32 rows = M.NumRows(), cols = M.NumCols(), rc_min = std::min(rows, cols);
1268  Vector<BaseFloat> s(rc_min);
1269  Matrix<BaseFloat> U(rows, rc_min), Vt(rc_min, cols);
1270  // Do the destructive svd M = U diag(s) V^T. It actually outputs the transpose of V.
1271  M.DestructiveSvd(&s, &U, &Vt);
1272  SortSvd(&s, &U, &Vt); // Sort the singular values from largest to smallest.
1273  BaseFloat old_svd_sum = s.Sum();
1274  U.Resize(rows, d, kCopyData);
1275  s.Resize(d, kCopyData);
1276  Vt.Resize(d, cols, kCopyData);
1277  BaseFloat new_svd_sum = s.Sum();
1278  KALDI_LOG << "Reduced rank from "
1279  << rc_min << " to " << d << ", SVD sum reduced from "
1280  << old_svd_sum << " to " << new_svd_sum;
1281 
1282  // U.MulColsVec(s); // U <-- U diag(s)
1283  Vt.MulRowsVec(s); // Vt <-- diag(s) Vt.
1284 
1285  *a = dynamic_cast<AffineComponent*>(this->Copy());
1286  *b = dynamic_cast<AffineComponent*>(this->Copy());
1287 
1288  (*a)->bias_params_.Resize(d, kSetZero);
1289  (*a)->linear_params_ = Vt;
1290 
1291  (*b)->bias_params_ = this->bias_params_;
1292  (*b)->linear_params_ = U;
1293 }
1294 
1296  const AffineComponent &next_component) const {
1297  AffineComponent *ans = dynamic_cast<AffineComponent*>(this->Copy());
1298  KALDI_ASSERT(ans != NULL);
1299  // Note: it's possible that "ans" is really of a derived type such
1300  // as AffineComponentPreconditioned, but this will still work.
1301  // the "copy" call will copy things like learning rates, "alpha" value
1302  // for preconditioned component, etc.
1303  ans->linear_params_.Resize(next_component.OutputDim(), InputDim());
1304  ans->bias_params_ = next_component.bias_params_;
1305 
1306  ans->linear_params_.AddMatMat(1.0, next_component.linear_params_, kNoTrans,
1307  this->linear_params_, kNoTrans, 0.0);
1308  ans->bias_params_.AddMatVec(1.0, next_component.linear_params_, kNoTrans,
1309  this->bias_params_, 1.0);
1310  return ans;
1311 }
1312 
1314  const FixedAffineComponent &next_component) const {
1315  // If at least one was non-updatable, make the whole non-updatable.
1316  FixedAffineComponent *ans =
1317  dynamic_cast<FixedAffineComponent*>(next_component.Copy());
1318  KALDI_ASSERT(ans != NULL);
1319  ans->linear_params_.Resize(next_component.OutputDim(), InputDim());
1320  ans->bias_params_ = next_component.bias_params_;
1321 
1322  ans->linear_params_.AddMatMat(1.0, next_component.linear_params_, kNoTrans,
1323  this->linear_params_, kNoTrans, 0.0);
1324  ans->bias_params_.AddMatVec(1.0, next_component.linear_params_, kNoTrans,
1325  this->bias_params_, 1.0);
1326  return ans;
1327 }
1328 
1330  const FixedScaleComponent &next_component) const {
1331  KALDI_ASSERT(this->OutputDim() == next_component.InputDim());
1332  AffineComponent *ans =
1333  dynamic_cast<AffineComponent*>(this->Copy());
1334  KALDI_ASSERT(ans != NULL);
1335  ans->linear_params_.MulRowsVec(next_component.scales_);
1336  ans->bias_params_.MulElements(next_component.scales_);
1337 
1338  return ans;
1339 }
1340 
1341 
1342 
1344  const FixedAffineComponent &prev_component) const {
1345  // If at least one was non-updatable, make the whole non-updatable.
1346  FixedAffineComponent *ans =
1347  dynamic_cast<FixedAffineComponent*>(prev_component.Copy());
1348  KALDI_ASSERT(ans != NULL);
1349 
1350  ans->linear_params_.Resize(this->OutputDim(), prev_component.InputDim());
1351  ans->bias_params_ = this->bias_params_;
1352 
1353  ans->linear_params_.AddMatMat(1.0, this->linear_params_, kNoTrans,
1354  prev_component.linear_params_, kNoTrans, 0.0);
1355  ans->bias_params_.AddMatVec(1.0, this->linear_params_, kNoTrans,
1356  prev_component.bias_params_, 1.0);
1357  return ans;
1358 }
1359 
1360 void AffineComponentPreconditioned::Read(std::istream &is, bool binary) {
1361  std::ostringstream ostr_beg, ostr_end;
1362  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponentPreconditioned>"
1363  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponentPreconditioned>"
1364  // might not see the "<AffineComponentPreconditioned>" part because
1365  // of how ReadNew() works.
1366  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
1367  ReadBasicType(is, binary, &learning_rate_);
1368  ExpectToken(is, binary, "<LinearParams>");
1369  linear_params_.Read(is, binary);
1370  ExpectToken(is, binary, "<BiasParams>");
1371  bias_params_.Read(is, binary);
1372  ExpectToken(is, binary, "<Alpha>");
1373  ReadBasicType(is, binary, &alpha_);
1374  // todo: remove back-compat code. Will just be:
1375  // ExpectToken(is, binary, "<MaxChange>");
1376  // ReadBasicType(is, binary, &max_change_);
1377  // ExpectToken(is, binary, ostr_end);
1378  // [end of function]
1379  std::string tok;
1380  ReadToken(is, binary, &tok);
1381  if (tok == "<MaxChange>") {
1382  ReadBasicType(is, binary, &max_change_);
1383  ExpectToken(is, binary, ostr_end.str());
1384  } else {
1385  max_change_ = 0.0;
1386  KALDI_ASSERT(tok == ostr_end.str());
1387  }
1388 }
1389 
1391  std::string orig_args(args);
1392  std::string matrix_filename;
1393  BaseFloat learning_rate = learning_rate_;
1394  BaseFloat alpha = 0.1, max_change = 0.0;
1395  int32 input_dim = -1, output_dim = -1;
1396  ParseFromString("learning-rate", &args, &learning_rate); // optional.
1397  ParseFromString("alpha", &args, &alpha);
1398  ParseFromString("max-change", &args, &max_change);
1399 
1400  if (ParseFromString("matrix", &args, &matrix_filename)) {
1401  Init(learning_rate, alpha, max_change, matrix_filename);
1402  if (ParseFromString("input-dim", &args, &input_dim))
1403  KALDI_ASSERT(input_dim == InputDim() &&
1404  "input-dim mismatch vs. matrix.");
1405  if (ParseFromString("output-dim", &args, &output_dim))
1406  KALDI_ASSERT(output_dim == OutputDim() &&
1407  "output-dim mismatch vs. matrix.");
1408  } else {
1409  bool ok = true;
1410  ok = ok && ParseFromString("input-dim", &args, &input_dim);
1411  ok = ok && ParseFromString("output-dim", &args, &output_dim);
1412  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
1413  bias_stddev = 1.0;
1414  ParseFromString("param-stddev", &args, &param_stddev);
1415  ParseFromString("bias-stddev", &args, &bias_stddev);
1416  if (!ok)
1417  KALDI_ERR << "Bad initializer " << orig_args;
1418  Init(learning_rate, input_dim, output_dim, param_stddev,
1419  bias_stddev, alpha, max_change);
1420  }
1421  if (!args.empty())
1422  KALDI_ERR << "Could not process these elements in initializer: "
1423  << args;
1424 }
1425 
1427  BaseFloat alpha, BaseFloat max_change,
1428  std::string matrix_filename) {
1429  UpdatableComponent::Init(learning_rate);
1430  alpha_ = alpha;
1431  max_change_ = max_change;
1432  CuMatrix<BaseFloat> mat;
1433  ReadKaldiObject(matrix_filename, &mat); // will abort on failure.
1434  KALDI_ASSERT(mat.NumCols() >= 2);
1435  int32 input_dim = mat.NumCols() - 1, output_dim = mat.NumRows();
1436  linear_params_.Resize(output_dim, input_dim);
1437  bias_params_.Resize(output_dim);
1438  linear_params_.CopyFromMat(mat.Range(0, output_dim, 0, input_dim));
1439  bias_params_.CopyColFromMat(mat, input_dim);
1440 }
1441 
1443  BaseFloat learning_rate,
1444  int32 input_dim, int32 output_dim,
1445  BaseFloat param_stddev, BaseFloat bias_stddev,
1446  BaseFloat alpha, BaseFloat max_change) {
1447  UpdatableComponent::Init(learning_rate);
1448  KALDI_ASSERT(input_dim > 0 && output_dim > 0);
1449  linear_params_.Resize(output_dim, input_dim);
1450  bias_params_.Resize(output_dim);
1451  KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
1452  linear_params_.SetRandn(); // sets to random normally distributed noise.
1453  linear_params_.Scale(param_stddev);
1454  bias_params_.SetRandn();
1455  bias_params_.Scale(bias_stddev);
1456  alpha_ = alpha;
1457  KALDI_ASSERT(alpha_ > 0.0);
1458  max_change_ = max_change; // Note: any value of max_change_is valid, but
1459  // only values > 0.0 will actually activate the code.
1460 }
1461 
1462 
1463 void AffineComponentPreconditioned::Write(std::ostream &os, bool binary) const {
1464  std::ostringstream ostr_beg, ostr_end;
1465  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponent>"
1466  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponent>"
1467  WriteToken(os, binary, ostr_beg.str());
1468  WriteToken(os, binary, "<LearningRate>");
1469  WriteBasicType(os, binary, learning_rate_);
1470  WriteToken(os, binary, "<LinearParams>");
1471  linear_params_.Write(os, binary);
1472  WriteToken(os, binary, "<BiasParams>");
1473  bias_params_.Write(os, binary);
1474  WriteToken(os, binary, "<Alpha>");
1475  WriteBasicType(os, binary, alpha_);
1476  WriteToken(os, binary, "<MaxChange>");
1477  WriteBasicType(os, binary, max_change_);
1478  WriteToken(os, binary, ostr_end.str());
1479 }
1480 
1482  std::stringstream stream;
1483  BaseFloat linear_params_size = static_cast<BaseFloat>(linear_params_.NumRows())
1484  * static_cast<BaseFloat>(linear_params_.NumCols());
1485  BaseFloat linear_stddev =
1487  linear_params_size),
1488  bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
1489  bias_params_.Dim());
1490  stream << Type() << ", input-dim=" << InputDim()
1491  << ", output-dim=" << OutputDim()
1492  << ", linear-params-stddev=" << linear_stddev
1493  << ", bias-params-stddev=" << bias_stddev
1494  << ", learning-rate=" << LearningRate()
1495  << ", alpha=" << alpha_
1496  << ", max-change=" << max_change_;
1497  return stream.str();
1498 }
1499 
1504  ans->bias_params_ = bias_params_;
1505  ans->alpha_ = alpha_;
1506  ans->max_change_ = max_change_;
1507  ans->is_gradient_ = is_gradient_;
1508  return ans;
1509 }
1510 
1511 
1513  const CuMatrix<BaseFloat> &in_value_precon,
1514  const CuMatrix<BaseFloat> &out_deriv_precon) {
1515  static int scaling_factor_printed = 0;
1516 
1517  KALDI_ASSERT(in_value_precon.NumRows() == out_deriv_precon.NumRows());
1518  CuVector<BaseFloat> in_norm(in_value_precon.NumRows()),
1519  out_deriv_norm(in_value_precon.NumRows());
1520  in_norm.AddDiagMat2(1.0, in_value_precon, kNoTrans, 0.0);
1521  out_deriv_norm.AddDiagMat2(1.0, out_deriv_precon, kNoTrans, 0.0);
1522  // Get the actual l2 norms, not the squared l2 norm.
1523  in_norm.ApplyPow(0.5);
1524  out_deriv_norm.ApplyPow(0.5);
1525  BaseFloat sum = learning_rate_ * VecVec(in_norm, out_deriv_norm);
1526  // sum is the product of norms that we are trying to limit
1527  // to max_value_.
1528  KALDI_ASSERT(sum == sum && sum - sum == 0.0 &&
1529  "NaN in backprop");
1530  KALDI_ASSERT(sum >= 0.0);
1531  if (sum <= max_change_) return 1.0;
1532  else {
1533  BaseFloat ans = max_change_ / sum;
1534  if (scaling_factor_printed < 10) {
1535  KALDI_LOG << "Limiting step size to " << max_change_
1536  << " using scaling factor " << ans << ", for component index "
1537  << Index();
1538  scaling_factor_printed++;
1539  }
1540  return ans;
1541  }
1542 }
1543 
1545  const CuMatrixBase<BaseFloat> &in_value,
1546  const CuMatrixBase<BaseFloat> &out_deriv) {
1547  CuMatrix<BaseFloat> in_value_temp;
1548 
1549  in_value_temp.Resize(in_value.NumRows(),
1550  in_value.NumCols() + 1, kUndefined);
1551  in_value_temp.Range(0, in_value.NumRows(),
1552  0, in_value.NumCols()).CopyFromMat(in_value);
1553 
1554  // Add the 1.0 at the end of each row "in_value_temp"
1555  in_value_temp.Range(0, in_value.NumRows(),
1556  in_value.NumCols(), 1).Set(1.0);
1557 
1558  CuMatrix<BaseFloat> in_value_precon(in_value_temp.NumRows(),
1559  in_value_temp.NumCols(), kUndefined),
1560  out_deriv_precon(out_deriv.NumRows(),
1561  out_deriv.NumCols(), kUndefined);
1562  // each row of in_value_precon will be that same row of
1563  // in_value, but multiplied by the inverse of a Fisher
1564  // matrix that has been estimated from all the other rows,
1565  // smoothed by some appropriate amount times the identity
1566  // matrix (this amount is proportional to \alpha).
1567  PreconditionDirectionsAlphaRescaled(in_value_temp, alpha_, &in_value_precon);
1568  PreconditionDirectionsAlphaRescaled(out_deriv, alpha_, &out_deriv_precon);
1569 
1570  BaseFloat minibatch_scale = 1.0;
1571 
1572  if (max_change_ > 0.0)
1573  minibatch_scale = GetScalingFactor(in_value_precon, out_deriv_precon);
1574 
1575 
1576  CuSubMatrix<BaseFloat> in_value_precon_part(in_value_precon,
1577  0, in_value_precon.NumRows(),
1578  0, in_value_precon.NumCols() - 1);
1579  // this "precon_ones" is what happens to the vector of 1's representing
1580  // offsets, after multiplication by the preconditioner.
1581  CuVector<BaseFloat> precon_ones(in_value_precon.NumRows());
1582 
1583  precon_ones.CopyColFromMat(in_value_precon, in_value_precon.NumCols() - 1);
1584 
1585  BaseFloat local_lrate = minibatch_scale * learning_rate_;
1586  bias_params_.AddMatVec(local_lrate, out_deriv_precon, kTrans,
1587  precon_ones, 1.0);
1588  linear_params_.AddMatMat(local_lrate, out_deriv_precon, kTrans,
1589  in_value_precon_part, kNoTrans, 1.0);
1590 }
1591 
1592 
1593 // virtual
1595  int32 input_dim, int32 output_dim) {
1596  KALDI_ASSERT(input_dim > 1 && output_dim > 1);
1597  if (rank_in_ >= input_dim) rank_in_ = input_dim - 1;
1598  if (rank_out_ >= output_dim) rank_out_ = output_dim - 1;
1599  bias_params_.Resize(output_dim);
1600  linear_params_.Resize(output_dim, input_dim);
1601  OnlinePreconditioner temp;
1602  preconditioner_in_ = temp;
1603  preconditioner_out_ = temp;
1604  SetPreconditionerConfigs();
1605 }
1606 
1607 
1608 void AffineComponentPreconditionedOnline::Read(std::istream &is, bool binary) {
1609  std::ostringstream ostr_beg, ostr_end;
1610  ostr_beg << "<" << Type() << ">";
1611  ostr_end << "</" << Type() << ">";
1612  // might not see the "<AffineComponentPreconditionedOnline>" part because
1613  // of how ReadNew() works.
1614  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
1615  ReadBasicType(is, binary, &learning_rate_);
1616  ExpectToken(is, binary, "<LinearParams>");
1617  linear_params_.Read(is, binary);
1618  ExpectToken(is, binary, "<BiasParams>");
1619  bias_params_.Read(is, binary);
1620  std::string tok;
1621  ReadToken(is, binary, &tok);
1622  if (tok == "<Rank>") { // back-compatibility (temporary)
1623  ReadBasicType(is, binary, &rank_in_);
1624  rank_out_ = rank_in_;
1625  } else {
1626  KALDI_ASSERT(tok == "<RankIn>");
1627  ReadBasicType(is, binary, &rank_in_);
1628  ExpectToken(is, binary, "<RankOut>");
1629  ReadBasicType(is, binary, &rank_out_);
1630  }
1631  ReadToken(is, binary, &tok);
1632  if (tok == "<UpdatePeriod>") {
1633  ReadBasicType(is, binary, &update_period_);
1634  ExpectToken(is, binary, "<NumSamplesHistory>");
1635  } else {
1636  update_period_ = 1;
1637  KALDI_ASSERT(tok == "<NumSamplesHistory>");
1638  }
1639  ReadBasicType(is, binary, &num_samples_history_);
1640  ExpectToken(is, binary, "<Alpha>");
1641  ReadBasicType(is, binary, &alpha_);
1642  ExpectToken(is, binary, "<MaxChangePerSample>");
1643  ReadBasicType(is, binary, &max_change_per_sample_);
1644  ExpectToken(is, binary, ostr_end.str());
1645  SetPreconditionerConfigs();
1646 }
1647 
1649  std::string orig_args(args);
1650  bool ok = true;
1651  std::string matrix_filename;
1652  BaseFloat learning_rate = learning_rate_;
1653  BaseFloat num_samples_history = 2000.0, alpha = 4.0,
1654  max_change_per_sample = 0.1;
1655  int32 input_dim = -1, output_dim = -1, rank_in = 30, rank_out = 80,
1656  update_period = 1;
1657  ParseFromString("learning-rate", &args, &learning_rate); // optional.
1658  ParseFromString("num-samples-history", &args, &num_samples_history);
1659  ParseFromString("alpha", &args, &alpha);
1660  ParseFromString("max-change-per-sample", &args, &max_change_per_sample);
1661  ParseFromString("rank-in", &args, &rank_in);
1662  ParseFromString("rank-out", &args, &rank_out);
1663  ParseFromString("update-period", &args, &update_period);
1664 
1665  if (ParseFromString("matrix", &args, &matrix_filename)) {
1666  Init(learning_rate, rank_in, rank_out, update_period,
1667  num_samples_history, alpha, max_change_per_sample,
1668  matrix_filename);
1669  if (ParseFromString("input-dim", &args, &input_dim))
1670  KALDI_ASSERT(input_dim == InputDim() &&
1671  "input-dim mismatch vs. matrix.");
1672  if (ParseFromString("output-dim", &args, &output_dim))
1673  KALDI_ASSERT(output_dim == OutputDim() &&
1674  "output-dim mismatch vs. matrix.");
1675  } else {
1676  ok = ok && ParseFromString("input-dim", &args, &input_dim);
1677  ok = ok && ParseFromString("output-dim", &args, &output_dim);
1678  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
1679  bias_stddev = 1.0;
1680  ParseFromString("param-stddev", &args, &param_stddev);
1681  ParseFromString("bias-stddev", &args, &bias_stddev);
1682  Init(learning_rate, input_dim, output_dim, param_stddev,
1683  bias_stddev, rank_in, rank_out, update_period,
1684  num_samples_history, alpha, max_change_per_sample);
1685  }
1686  if (!args.empty())
1687  KALDI_ERR << "Could not process these elements in initializer: "
1688  << args;
1689  if (!ok)
1690  KALDI_ERR << "Bad initializer " << orig_args;
1691 }
1692 
1694  preconditioner_in_.SetRank(rank_in_);
1695  preconditioner_in_.SetNumSamplesHistory(num_samples_history_);
1696  preconditioner_in_.SetAlpha(alpha_);
1697  preconditioner_in_.SetUpdatePeriod(update_period_);
1698  preconditioner_out_.SetRank(rank_out_);
1699  preconditioner_out_.SetNumSamplesHistory(num_samples_history_);
1700  preconditioner_out_.SetAlpha(alpha_);
1701  preconditioner_out_.SetUpdatePeriod(update_period_);
1702 }
1703 
1705  BaseFloat learning_rate, int32 rank_in, int32 rank_out,
1706  int32 update_period, BaseFloat num_samples_history, BaseFloat alpha,
1707  BaseFloat max_change_per_sample,
1708  std::string matrix_filename) {
1709  UpdatableComponent::Init(learning_rate);
1710  rank_in_ = rank_in;
1711  rank_out_ = rank_out;
1712  update_period_ = update_period;
1713  num_samples_history_ = num_samples_history;
1714  alpha_ = alpha;
1715  SetPreconditionerConfigs();
1716  KALDI_ASSERT(max_change_per_sample >= 0.0);
1717  max_change_per_sample_ = max_change_per_sample;
1718  CuMatrix<BaseFloat> mat;
1719  ReadKaldiObject(matrix_filename, &mat); // will abort on failure.
1720  KALDI_ASSERT(mat.NumCols() >= 2);
1721  int32 input_dim = mat.NumCols() - 1, output_dim = mat.NumRows();
1722  linear_params_.Resize(output_dim, input_dim);
1723  bias_params_.Resize(output_dim);
1724  linear_params_.CopyFromMat(mat.Range(0, output_dim, 0, input_dim));
1725  bias_params_.CopyColFromMat(mat, input_dim);
1726 }
1727 
1729  const AffineComponent &orig,
1730  int32 rank_in, int32 rank_out, int32 update_period,
1731  BaseFloat num_samples_history, BaseFloat alpha):
1732  max_change_per_sample_(0.1) {
1733  this->linear_params_ = orig.linear_params_;
1734  this->bias_params_ = orig.bias_params_;
1735  this->learning_rate_ = orig.learning_rate_;
1736  this->is_gradient_ = orig.is_gradient_;
1737  this->rank_in_ = rank_in;
1738  this->rank_out_ = rank_out;
1739  this->update_period_ = update_period;
1740  this->num_samples_history_ = num_samples_history;
1741  this->alpha_ = alpha;
1743 }
1744 
1746  BaseFloat learning_rate,
1747  int32 input_dim, int32 output_dim,
1748  BaseFloat param_stddev, BaseFloat bias_stddev,
1749  int32 rank_in, int32 rank_out, int32 update_period,
1750  BaseFloat num_samples_history, BaseFloat alpha,
1751  BaseFloat max_change_per_sample) {
1752  UpdatableComponent::Init(learning_rate);
1753  linear_params_.Resize(output_dim, input_dim);
1754  bias_params_.Resize(output_dim);
1755  KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0 &&
1756  bias_stddev >= 0.0);
1757  linear_params_.SetRandn(); // sets to random normally distributed noise.
1758  linear_params_.Scale(param_stddev);
1759  bias_params_.SetRandn();
1760  bias_params_.Scale(bias_stddev);
1761  rank_in_ = rank_in;
1762  rank_out_ = rank_out;
1763  update_period_ = update_period;
1764  num_samples_history_ = num_samples_history;
1765  alpha_ = alpha;
1767  KALDI_ASSERT(max_change_per_sample >= 0.0);
1768  max_change_per_sample_ = max_change_per_sample;
1769 }
1770 
1771 
1772 void AffineComponentPreconditionedOnline::Write(std::ostream &os, bool binary) const {
1773  std::ostringstream ostr_beg, ostr_end;
1774  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponent>"
1775  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponent>"
1776  WriteToken(os, binary, ostr_beg.str());
1777  WriteToken(os, binary, "<LearningRate>");
1778  WriteBasicType(os, binary, learning_rate_);
1779  WriteToken(os, binary, "<LinearParams>");
1780  linear_params_.Write(os, binary);
1781  WriteToken(os, binary, "<BiasParams>");
1782  bias_params_.Write(os, binary);
1783  WriteToken(os, binary, "<RankIn>");
1784  WriteBasicType(os, binary, rank_in_);
1785  WriteToken(os, binary, "<RankOut>");
1786  WriteBasicType(os, binary, rank_out_);
1787  WriteToken(os, binary, "<UpdatePeriod>");
1788  WriteBasicType(os, binary, update_period_);
1789  WriteToken(os, binary, "<NumSamplesHistory>");
1790  WriteBasicType(os, binary, num_samples_history_);
1791  WriteToken(os, binary, "<Alpha>");
1792  WriteBasicType(os, binary, alpha_);
1793  WriteToken(os, binary, "<MaxChangePerSample>");
1795  WriteToken(os, binary, ostr_end.str());
1796 }
1797 
1799  std::stringstream stream;
1800  BaseFloat linear_params_size = static_cast<BaseFloat>(linear_params_.NumRows())
1801  * static_cast<BaseFloat>(linear_params_.NumCols());
1802  BaseFloat linear_stddev =
1804  linear_params_size),
1805  bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
1806  bias_params_.Dim());
1807  stream << Type() << ", input-dim=" << InputDim()
1808  << ", output-dim=" << OutputDim()
1809  << ", linear-params-stddev=" << linear_stddev
1810  << ", bias-params-stddev=" << bias_stddev
1811  << ", learning-rate=" << LearningRate()
1812  << ", rank-in=" << rank_in_
1813  << ", rank-out=" << rank_out_
1814  << ", num_samples_history=" << num_samples_history_
1815  << ", update_period=" << update_period_
1816  << ", alpha=" << alpha_
1817  << ", max-change-per-sample=" << max_change_per_sample_;
1818  return stream.str();
1819 }
1820 
1824  ans->rank_in_ = rank_in_;
1825  ans->rank_out_ = rank_out_;
1828  ans->alpha_ = alpha_;
1830  ans->bias_params_ = bias_params_;
1834  ans->is_gradient_ = is_gradient_;
1835  ans->SetPreconditionerConfigs();
1836  return ans;
1837 }
1838 
1839 
1840 
1842  const CuVectorBase<BaseFloat> &in_products,
1843  BaseFloat learning_rate_scale,
1844  CuVectorBase<BaseFloat> *out_products) {
1845  static int scaling_factor_printed = 0;
1846  int32 minibatch_size = in_products.Dim();
1847 
1848  out_products->MulElements(in_products);
1849  out_products->ApplyPow(0.5);
1850  BaseFloat prod_sum = out_products->Sum();
1851  BaseFloat tot_change_norm = learning_rate_scale * learning_rate_ * prod_sum,
1852  max_change_norm = max_change_per_sample_ * minibatch_size;
1853  // tot_change_norm is the product of norms that we are trying to limit
1854  // to max_value_.
1855  KALDI_ASSERT(tot_change_norm - tot_change_norm == 0.0 && "NaN in backprop");
1856  KALDI_ASSERT(tot_change_norm >= 0.0);
1857  if (tot_change_norm <= max_change_norm) return 1.0;
1858  else {
1859  BaseFloat factor = max_change_norm / tot_change_norm;
1860  if (scaling_factor_printed < 10) {
1861  KALDI_LOG << "Limiting step size using scaling factor "
1862  << factor << ", for component index " << Index();
1863  scaling_factor_printed++;
1864  }
1865  return factor;
1866  }
1867 }
1868 
1870  const CuMatrixBase<BaseFloat> &in_value,
1871  const CuMatrixBase<BaseFloat> &out_deriv) {
1872  CuMatrix<BaseFloat> in_value_temp;
1873 
1874  in_value_temp.Resize(in_value.NumRows(),
1875  in_value.NumCols() + 1, kUndefined);
1876  in_value_temp.Range(0, in_value.NumRows(),
1877  0, in_value.NumCols()).CopyFromMat(in_value);
1878 
1879  // Add the 1.0 at the end of each row "in_value_temp"
1880  in_value_temp.Range(0, in_value.NumRows(),
1881  in_value.NumCols(), 1).Set(1.0);
1882 
1883  CuMatrix<BaseFloat> out_deriv_temp(out_deriv);
1884 
1885  CuMatrix<BaseFloat> row_products(2,
1886  in_value.NumRows());
1887  CuSubVector<BaseFloat> in_row_products(row_products, 0),
1888  out_row_products(row_products, 1);
1889 
1890  // These "scale" values get will get multiplied into the learning rate (faster
1891  // than having the matrices scaled inside the preconditioning code).
1892  BaseFloat in_scale, out_scale;
1893 
1894  preconditioner_in_.PreconditionDirections(&in_value_temp, &in_row_products,
1895  &in_scale);
1896  preconditioner_out_.PreconditionDirections(&out_deriv_temp, &out_row_products,
1897  &out_scale);
1898 
1899  // "scale" is a scaling factor coming from the PreconditionDirections calls
1900  // (it's faster to have them output a scaling factor than to have them scale
1901  // their outputs).
1902  BaseFloat scale = in_scale * out_scale;
1903  BaseFloat minibatch_scale = 1.0;
1904 
1905  if (max_change_per_sample_ > 0.0)
1906  minibatch_scale = GetScalingFactor(in_row_products, scale,
1907  &out_row_products);
1908 
1909  CuSubMatrix<BaseFloat> in_value_precon_part(in_value_temp,
1910  0, in_value_temp.NumRows(),
1911  0, in_value_temp.NumCols() - 1);
1912  // this "precon_ones" is what happens to the vector of 1's representing
1913  // offsets, after multiplication by the preconditioner.
1914  CuVector<BaseFloat> precon_ones(in_value_temp.NumRows());
1915 
1916  precon_ones.CopyColFromMat(in_value_temp, in_value_temp.NumCols() - 1);
1917 
1918  BaseFloat local_lrate = scale * minibatch_scale * learning_rate_;
1919  bias_params_.AddMatVec(local_lrate, out_deriv_temp, kTrans,
1920  precon_ones, 1.0);
1921  linear_params_.AddMatMat(local_lrate, out_deriv_temp, kTrans,
1922  in_value_precon_part, kNoTrans, 1.0);
1923 }
1924 
1925 void BlockAffineComponent::SetZero(bool treat_as_gradient) {
1926  if (treat_as_gradient) {
1927  SetLearningRate(1.0);
1928  }
1929  linear_params_.SetZero();
1930  bias_params_.SetZero();
1931 }
1932 
1934  CuMatrix<BaseFloat> temp_linear_params(linear_params_);
1935  temp_linear_params.SetRandn();
1936  linear_params_.AddMat(stddev, temp_linear_params);
1937 
1938  CuVector<BaseFloat> temp_bias_params(bias_params_);
1939  temp_bias_params.SetRandn();
1940  bias_params_.AddVec(stddev, temp_bias_params);
1941 }
1942 
1944  const UpdatableComponent &other_in) const {
1945  const BlockAffineComponent *other =
1946  dynamic_cast<const BlockAffineComponent*>(&other_in);
1948  + VecVec(bias_params_, other->bias_params_);
1949 }
1950 
1955  ans->bias_params_ = bias_params_;
1956  ans->num_blocks_ = num_blocks_;
1957  return ans;
1958 }
1959 
1961  linear_params_.Scale(scale);
1962  bias_params_.Scale(scale);
1963 }
1964 
1966  const UpdatableComponent &other_in) {
1967  const BlockAffineComponent *other =
1968  dynamic_cast<const BlockAffineComponent*>(&other_in);
1969  KALDI_ASSERT(other != NULL);
1970  linear_params_.AddMat(alpha, other->linear_params_);
1971  bias_params_.AddVec(alpha, other->bias_params_);
1972 }
1973 
1975  const ChunkInfo &out_info,
1976  const CuMatrixBase<BaseFloat> &in,
1977  CuMatrixBase<BaseFloat> *out) const {
1978  in_info.CheckSize(in);
1979  out_info.CheckSize(*out);
1980  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
1981 
1982  // The matrix has a block structure where each matrix has input dim
1983  // (#rows) equal to input_block_dim. The blocks are stored in linear_params_
1984  // as [ M
1985  // N
1986  // O ] but we actually treat it as:
1987  // [ M 0 0
1988  // 0 N 0
1989  // 0 0 O ]
1990  int32 input_block_dim = linear_params_.NumCols(),
1991  output_block_dim = linear_params_.NumRows() / num_blocks_,
1992  num_frames = in.NumRows();
1993  KALDI_ASSERT(in.NumCols() == input_block_dim * num_blocks_);
1994  KALDI_ASSERT(out->NumCols() == output_block_dim * num_blocks_);
1995  KALDI_ASSERT(in.NumRows() == out->NumRows());
1996 
1997  out->CopyRowsFromVec(bias_params_); // copies bias_params_ to each row
1998  // of *out.
1999 
2000  for (int32 b = 0; b < num_blocks_; b++) {
2001  CuSubMatrix<BaseFloat> in_block(in, 0, num_frames,
2002  b * input_block_dim, input_block_dim),
2003  out_block(*out, 0, num_frames,
2004  b * output_block_dim, output_block_dim),
2005  param_block(linear_params_,
2006  b * output_block_dim, output_block_dim,
2007  0, input_block_dim);
2008  out_block.AddMatMat(1.0, in_block, kNoTrans, param_block, kTrans, 1.0);
2009  }
2010 }
2011 
2013  const CuMatrixBase<BaseFloat> &in_value,
2014  const CuMatrixBase<BaseFloat> &out_deriv) {
2015  int32 input_block_dim = linear_params_.NumCols(),
2016  output_block_dim = linear_params_.NumRows() / num_blocks_,
2017  num_frames = in_value.NumRows();
2018 
2019  bias_params_.AddRowSumMat(learning_rate_, out_deriv, 1.0);
2020  for (int32 b = 0; b < num_blocks_; b++) {
2021  CuSubMatrix<BaseFloat> in_value_block(in_value, 0, num_frames,
2022  b * input_block_dim,
2023  input_block_dim),
2024  out_deriv_block(out_deriv, 0, num_frames,
2025  b * output_block_dim, output_block_dim),
2026  param_block(linear_params_,
2027  b * output_block_dim, output_block_dim,
2028  0, input_block_dim);
2029  // Update the parameters.
2030  param_block.AddMatMat(learning_rate_, out_deriv_block, kTrans,
2031  in_value_block, kNoTrans, 1.0);
2032  }
2033 }
2034 
2036  const ChunkInfo &, //out_info,
2037  const CuMatrixBase<BaseFloat> &in_value,
2038  const CuMatrixBase<BaseFloat> &, //out_value,
2039  const CuMatrixBase<BaseFloat> &out_deriv,
2040  Component *to_update_in,
2041  CuMatrix<BaseFloat> *in_deriv) const {
2042 
2043  // This code mirrors the code in Propagate().
2044  int32 num_frames = in_value.NumRows();
2045  BlockAffineComponent *to_update = dynamic_cast<BlockAffineComponent*>(
2046  to_update_in);
2047  in_deriv->Resize(out_deriv.NumRows(), InputDim());
2048  int32 input_block_dim = linear_params_.NumCols(),
2049  output_block_dim = linear_params_.NumRows() / num_blocks_;
2050  KALDI_ASSERT(in_value.NumCols() == input_block_dim * num_blocks_);
2051  KALDI_ASSERT(out_deriv.NumCols() == output_block_dim * num_blocks_);
2052 
2053  for (int32 b = 0; b < num_blocks_; b++) {
2054  CuSubMatrix<BaseFloat> in_value_block(in_value, 0, num_frames,
2055  b * input_block_dim,
2056  input_block_dim),
2057  in_deriv_block(*in_deriv, 0, num_frames,
2058  b * input_block_dim, input_block_dim),
2059  out_deriv_block(out_deriv, 0, num_frames,
2060  b * output_block_dim, output_block_dim),
2061  param_block(linear_params_,
2062  b * output_block_dim, output_block_dim,
2063  0, input_block_dim);
2064 
2065  // Propagate the derivative back to the input.
2066  in_deriv_block.AddMatMat(1.0, out_deriv_block, kNoTrans,
2067  param_block, kNoTrans, 0.0);
2068  }
2069  if (to_update != NULL)
2070  to_update->Update(in_value, out_deriv);
2071 }
2072 
2073 
2075  int32 input_dim, int32 output_dim,
2076  BaseFloat param_stddev,
2077  BaseFloat bias_stddev,
2078  int32 num_blocks) {
2079  UpdatableComponent::Init(learning_rate);
2080  KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
2081  KALDI_ASSERT(input_dim % num_blocks == 0 && output_dim % num_blocks == 0);
2082 
2083  linear_params_.Resize(output_dim, input_dim / num_blocks);
2084  bias_params_.Resize(output_dim);
2085 
2086  linear_params_.SetRandn(); // sets to random normally distributed noise.
2087  linear_params_.Scale(param_stddev);
2088  bias_params_.SetRandn();
2089  bias_params_.Scale(bias_stddev);
2090  num_blocks_ = num_blocks;
2091 }
2092 
2094  std::string orig_args(args);
2095  bool ok = true;
2096  BaseFloat learning_rate = learning_rate_;
2097  int32 input_dim = -1, output_dim = -1, num_blocks = 1;
2098  ParseFromString("learning-rate", &args, &learning_rate); // optional.
2099  ok = ok && ParseFromString("input-dim", &args, &input_dim);
2100  ok = ok && ParseFromString("output-dim", &args, &output_dim);
2101  ok = ok && ParseFromString("num-blocks", &args, &num_blocks);
2102  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
2103  bias_stddev = 1.0;
2104  ParseFromString("param-stddev", &args, &param_stddev);
2105  ParseFromString("bias-stddev", &args, &bias_stddev);
2106  if (!args.empty())
2107  KALDI_ERR << "Could not process these elements in initializer: "
2108  << args;
2109  if (!ok)
2110  KALDI_ERR << "Bad initializer " << orig_args;
2111  Init(learning_rate, input_dim, output_dim,
2112  param_stddev, bias_stddev, num_blocks);
2113 }
2114 
2115 
2116 void BlockAffineComponent::Read(std::istream &is, bool binary) {
2117  ExpectOneOrTwoTokens(is, binary, "<BlockAffineComponent>", "<LearningRate>");
2118  ReadBasicType(is, binary, &learning_rate_);
2119  ExpectToken(is, binary, "<NumBlocks>");
2120  ReadBasicType(is, binary, &num_blocks_);
2121  ExpectToken(is, binary, "<LinearParams>");
2122  linear_params_.Read(is, binary);
2123  ExpectToken(is, binary, "<BiasParams>");
2124  bias_params_.Read(is, binary);
2125  ExpectToken(is, binary, "</BlockAffineComponent>");
2126 }
2127 
2128 void BlockAffineComponent::Write(std::ostream &os, bool binary) const {
2129  WriteToken(os, binary, "<BlockAffineComponent>");
2130  WriteToken(os, binary, "<LearningRate>");
2131  WriteBasicType(os, binary, learning_rate_);
2132  WriteToken(os, binary, "<NumBlocks>");
2133  WriteBasicType(os, binary, num_blocks_);
2134  WriteToken(os, binary, "<LinearParams>");
2135  linear_params_.Write(os, binary);
2136  WriteToken(os, binary, "<BiasParams>");
2137  bias_params_.Write(os, binary);
2138  WriteToken(os, binary, "</BlockAffineComponent>");
2139 }
2140 
2141 
2143  // Note: num_blocks_ should divide both InputDim() and OutputDim().
2144  return InputDim() * OutputDim() / num_blocks_;
2145 }
2146 
2148  int32 l = linear_params_.NumRows() * linear_params_.NumCols(),
2149  b = bias_params_.Dim();
2150  params->Range(0, l).CopyRowsFromMat(linear_params_);
2151  params->Range(l, b).CopyFromVec(bias_params_);
2152 }
2154  int32 l = linear_params_.NumRows() * linear_params_.NumCols(),
2155  b = bias_params_.Dim();
2156  linear_params_.CopyRowsFromVec(params.Range(0, l));
2157  bias_params_.CopyFromVec(params.Range(l, b));
2158 }
2159 
2160 
2162  int32 input_dim, int32 output_dim,
2163  BaseFloat param_stddev,
2164  BaseFloat bias_stddev,
2165  int32 num_blocks,
2166  BaseFloat alpha) {
2167  BlockAffineComponent::Init(learning_rate, input_dim, output_dim,
2168  param_stddev, bias_stddev, num_blocks);
2169  is_gradient_ = false;
2170  KALDI_ASSERT(alpha > 0.0);
2171  alpha_ = alpha;
2172 }
2173 
2175  std::string orig_args(args);
2176  bool ok = true;
2177  BaseFloat learning_rate = learning_rate_;
2178  BaseFloat alpha = 4.0;
2179  int32 input_dim = -1, output_dim = -1, num_blocks = 1;
2180  ParseFromString("learning-rate", &args, &learning_rate); // optional.
2181  ParseFromString("alpha", &args, &alpha);
2182  ok = ok && ParseFromString("input-dim", &args, &input_dim);
2183  ok = ok && ParseFromString("output-dim", &args, &output_dim);
2184  ok = ok && ParseFromString("num-blocks", &args, &num_blocks);
2185 
2186  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
2187  bias_stddev = 1.0;
2188  ParseFromString("param-stddev", &args, &param_stddev);
2189  ParseFromString("bias-stddev", &args, &bias_stddev);
2190  if (!args.empty())
2191  KALDI_ERR << "Could not process these elements in initializer: "
2192  << args;
2193  if (!ok)
2194  KALDI_ERR << "Bad initializer " << orig_args;
2195  Init(learning_rate, input_dim, output_dim,
2196  param_stddev, bias_stddev, num_blocks,
2197  alpha);
2198 }
2199 
2200 void BlockAffineComponentPreconditioned::SetZero(bool treat_as_gradient) {
2201  if (treat_as_gradient)
2202  is_gradient_ = true;
2203  BlockAffineComponent::SetZero(treat_as_gradient);
2204 }
2205 
2206 void BlockAffineComponentPreconditioned::Read(std::istream &is, bool binary) {
2207  ExpectOneOrTwoTokens(is, binary, "<BlockAffineComponentPreconditioned>",
2208  "<LearningRate>");
2209  ReadBasicType(is, binary, &learning_rate_);
2210  ExpectToken(is, binary, "<NumBlocks>");
2211  ReadBasicType(is, binary, &num_blocks_);
2212  ExpectToken(is, binary, "<LinearParams>");
2213  linear_params_.Read(is, binary);
2214  ExpectToken(is, binary, "<BiasParams>");
2215  bias_params_.Read(is, binary);
2216  ExpectToken(is, binary, "<Alpha>");
2217  ReadBasicType(is, binary, &alpha_);
2218  ExpectToken(is, binary, "<IsGradient>");
2219  ReadBasicType(is, binary, &is_gradient_);
2220  ExpectToken(is, binary, "</BlockAffineComponentPreconditioned>");
2221 }
2222 
2224  bool binary) const {
2225  WriteToken(os, binary, "<BlockAffineComponentPreconditioned>");
2226  WriteToken(os, binary, "<LearningRate>");
2227  WriteBasicType(os, binary, learning_rate_);
2228  WriteToken(os, binary, "<NumBlocks>");
2229  WriteBasicType(os, binary, num_blocks_);
2230  WriteToken(os, binary, "<LinearParams>");
2231  linear_params_.Write(os, binary);
2232  WriteToken(os, binary, "<BiasParams>");
2233  bias_params_.Write(os, binary);
2234  WriteToken(os, binary, "<Alpha>");
2235  WriteBasicType(os, binary, alpha_);
2236  WriteToken(os, binary, "<IsGradient>");
2237  WriteBasicType(os, binary, is_gradient_);
2238  WriteToken(os, binary, "</BlockAffineComponentPreconditioned>");
2239 }
2240 
2246  ans->bias_params_ = bias_params_;
2247  ans->num_blocks_ = num_blocks_;
2248  ans->alpha_ = alpha_;
2249  ans->is_gradient_ = is_gradient_;
2250  return ans;
2251 }
2252 
2254  const CuMatrixBase<BaseFloat> &in_value,
2255  const CuMatrixBase<BaseFloat> &out_deriv) {
2256  if (is_gradient_) {
2257  UpdateSimple(in_value, out_deriv);
2258  // does the baseline update with no preconditioning.
2259  return;
2260  }
2261  int32 input_block_dim = linear_params_.NumCols(),
2262  output_block_dim = linear_params_.NumRows() / num_blocks_,
2263  num_frames = in_value.NumRows();
2264 
2265  CuMatrix<BaseFloat> in_value_temp(num_frames, input_block_dim + 1, kUndefined),
2266  in_value_precon(num_frames, input_block_dim + 1, kUndefined);
2267  in_value_temp.Set(1.0); // so last row will have value 1.0.
2268  CuSubMatrix<BaseFloat> in_value_temp_part(in_value_temp, 0, num_frames,
2269  0, input_block_dim); // all but last 1.0
2270  CuSubMatrix<BaseFloat> in_value_precon_part(in_value_precon, 0, num_frames,
2271  0, input_block_dim);
2272  CuVector<BaseFloat> precon_ones(num_frames);
2273  CuMatrix<BaseFloat> out_deriv_precon(num_frames, output_block_dim, kUndefined);
2274 
2275  for (int32 b = 0; b < num_blocks_; b++) {
2276  CuSubMatrix<BaseFloat> in_value_block(in_value, 0, num_frames,
2277  b * input_block_dim,
2278  input_block_dim),
2279  out_deriv_block(out_deriv, 0, num_frames,
2280  b * output_block_dim, output_block_dim),
2281  param_block(linear_params_,
2282  b * output_block_dim, output_block_dim,
2283  0, input_block_dim);
2284  in_value_temp_part.CopyFromMat(in_value_block);
2285 
2287  &in_value_precon);
2289  &out_deriv_precon);
2290 
2291 
2292  // Update the parameters.
2293  param_block.AddMatMat(learning_rate_, out_deriv_precon, kTrans,
2294  in_value_precon_part, kNoTrans, 1.0);
2295  precon_ones.CopyColFromMat(in_value_precon, input_block_dim);
2296  bias_params_.Range(b * output_block_dim, output_block_dim).
2297  AddMatVec(learning_rate_, out_deriv_precon, kTrans,
2298  precon_ones, 1.0);
2299  }
2300 }
2301 
2302 
2303 void PermuteComponent::Read(std::istream &is, bool binary) {
2304  ExpectOneOrTwoTokens(is, binary, "<PermuteComponent>", "<Reorder>");
2305  ReadIntegerVector(is, binary, &reorder_);
2306  ExpectToken(is, binary, "</PermuteComponent>");
2307 }
2308 
2309 void PermuteComponent::Write(std::ostream &os, bool binary) const {
2310  WriteToken(os, binary, "<PermuteComponent>");
2311  WriteToken(os, binary, "<Reorder>");
2312  WriteIntegerVector(os, binary, reorder_);
2313  WriteToken(os, binary, "</PermuteComponent>");
2314 }
2315 
2317  KALDI_ASSERT(dim > 0);
2318  reorder_.resize(dim);
2319  for (int32 i = 0; i < dim; i++) reorder_[i] = i;
2320  std::random_shuffle(reorder_.begin(), reorder_.end());
2321 }
2322 
2323 void PermuteComponent::InitFromString(std::string args) {
2324  std::string orig_args(args);
2325  int32 dim;
2326  bool ok = ParseFromString("dim", &args, &dim);
2327  if (!ok || !args.empty() || dim <= 0)
2328  KALDI_ERR << "Invalid initializer for layer of type "
2329  << Type() << ": \"" << orig_args << "\"";
2330  Init(dim);
2331 }
2332 
2334  const ChunkInfo &out_info,
2335  const CuMatrixBase<BaseFloat> &in,
2336  CuMatrixBase<BaseFloat> *out) const {
2337  in_info.CheckSize(in);
2338  out_info.CheckSize(*out);
2339  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2340 
2341  std::vector<int32> reverse_reorder(reorder_.size());
2342  for (size_t i = 0; i < reorder_.size(); i++)
2343  reverse_reorder[reorder_[i]] = i;
2344  // Note: if we were actually using this component type we could make the
2345  // CuArray a member variable for efficiency.
2346  CuArray<int32> cu_reverse_reorder(reverse_reorder);
2347  out->CopyCols(in, cu_reverse_reorder);
2348 }
2349 
2350 void PermuteComponent::Backprop(const ChunkInfo &, //in_info,
2351  const ChunkInfo &, //out_info,
2352  const CuMatrixBase<BaseFloat> &in_value,
2353  const CuMatrixBase<BaseFloat> &out_value,
2354  const CuMatrixBase<BaseFloat> &out_deriv,
2355  Component *to_update,
2356  CuMatrix<BaseFloat> *in_deriv) const {
2357  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
2358  KALDI_ASSERT(out_deriv.NumCols() == OutputDim());
2359  // Note: if we were actually using this component type we could make the
2360  // CuArray a member variable for efficiency.
2361  CuArray<int32> cu_reorder(reorder_);
2362  in_deriv->CopyCols(out_deriv, cu_reorder);
2363 }
2364 
2365 void SumGroupComponent::Init(const std::vector<int32> &sizes) {
2366  KALDI_ASSERT(!sizes.empty());
2367  std::vector<Int32Pair> cpu_vec(sizes.size());
2368  std::vector<int32> reverse_cpu_vec;
2369  int32 cur_index = 0;
2370  for (size_t i = 0; i < sizes.size(); i++) {
2371  KALDI_ASSERT(sizes[i] > 0);
2372  cpu_vec[i].first = cur_index;
2373  cpu_vec[i].second = cur_index + sizes[i];
2374  cur_index += sizes[i];
2375  for (int32 j = cpu_vec[i].first; j < cpu_vec[i].second; j++)
2376  reverse_cpu_vec.push_back(i);
2377  }
2378  this->indexes_ = cpu_vec;
2379  this->reverse_indexes_ = reverse_cpu_vec;
2380  this->input_dim_ = cur_index;
2381  this->output_dim_ = sizes.size();
2382 }
2383 
2384 void SumGroupComponent::InitFromString(std::string args) {
2385  std::string orig_args(args);
2386  std::vector<int32> sizes;
2387  bool ok = ParseFromString("sizes", &args, &sizes);
2388 
2389  if (!ok || !args.empty() || sizes.empty())
2390  KALDI_ERR << "Invalid initializer for layer of type "
2391  << Type() << ": \"" << orig_args << "\"";
2392  this->Init(sizes);
2393 }
2394 
2396  SumGroupComponent *ans = new SumGroupComponent();
2397  ans->indexes_ = indexes_;
2398  ans->reverse_indexes_ = reverse_indexes_;
2399  ans->input_dim_ = input_dim_;
2400  ans->output_dim_ = output_dim_;
2401  return ans;
2402 }
2403 
2404 void SumGroupComponent::Read(std::istream &is, bool binary) {
2405  ExpectOneOrTwoTokens(is, binary, "<SumGroupComponent>", "<Sizes>");
2406  std::vector<int32> sizes;
2407  ReadIntegerVector(is, binary, &sizes);
2408 
2409  std::string token;
2410  ReadToken(is, binary, &token);
2411  if (!(token == "<SumGroupComponent>" ||
2412  token == "</SumGroupComponent>")) {
2413  KALDI_ERR << "Expected </SumGroupComponent>, got " << token;
2414  }
2415  this->Init(sizes);
2416 }
2417 
2418 void SumGroupComponent::GetSizes(std::vector<int32> *sizes) const {
2419  std::vector<Int32Pair> indexes;
2420  indexes_.CopyToVec(&indexes);
2421  sizes->resize(indexes.size());
2422  for (size_t i = 0; i < indexes.size(); i++) {
2423  (*sizes)[i] = indexes[i].second - indexes[i].first;
2424  if (i == 0) { KALDI_ASSERT(indexes[i].first == 0); }
2425  else { KALDI_ASSERT(indexes[i].first == indexes[i-1].second); }
2426  KALDI_ASSERT(indexes[i].second > indexes[i].first);
2427  (*sizes)[i] = indexes[i].second - indexes[i].first;
2428  }
2429 }
2430 
2431 void SumGroupComponent::Write(std::ostream &os, bool binary) const {
2432  WriteToken(os, binary, "<SumGroupComponent>");
2433  WriteToken(os, binary, "<Sizes>");
2434  std::vector<int32> sizes;
2435  this->GetSizes(&sizes);
2436  WriteIntegerVector(os, binary, sizes);
2437  WriteToken(os, binary, "</SumGroupComponent>");
2438 }
2439 
2441  const ChunkInfo &out_info,
2442  const CuMatrixBase<BaseFloat> &in,
2443  CuMatrixBase<BaseFloat> *out) const {
2444  in_info.CheckSize(in);
2445  out_info.CheckSize(*out);
2446  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2447 
2448  out->SumColumnRanges(in, indexes_);
2449 }
2450 
2452  const ChunkInfo &out_info,
2453  const CuMatrixBase<BaseFloat> &, //in_value,
2454  const CuMatrixBase<BaseFloat> &, //out_value,
2455  const CuMatrixBase<BaseFloat> &out_deriv,
2456  Component *to_update, // may be identical to "this".
2457  CuMatrix<BaseFloat> *in_deriv) const {
2458  in_deriv->Resize(out_deriv.NumRows(), InputDim());
2459  in_deriv->CopyCols(out_deriv, reverse_indexes_);
2460 }
2461 
2462 
2463 std::string SpliceComponent::Info() const {
2464  std::stringstream stream;
2465  std::ostringstream os;
2466  std::copy(context_.begin(), context_.end(),
2467  std::ostream_iterator<int32>(os, " "));
2468  stream << Component::Info() << ", context=" << os.str();
2469  if (const_component_dim_ != 0)
2470  stream << ", const_component_dim=" << const_component_dim_;
2471 
2472  return stream.str();
2473 }
2474 
2475 void SpliceComponent::Init(int32 input_dim, std::vector<int32> context,
2476  int32 const_component_dim) {
2477  input_dim_ = input_dim;
2478  const_component_dim_ = const_component_dim;
2479  context_ = context;
2480  KALDI_ASSERT(context_.size() > 0);
2481  KALDI_ASSERT(input_dim_ > 0 && context_.front() <= 0 && context_.back() >= 0);
2482  KALDI_ASSERT(IsSortedAndUniq(context));
2483  KALDI_ASSERT(const_component_dim_ >= 0 && const_component_dim_ < input_dim_);
2484 }
2485 
2486 
2487 // e.g. args == "input-dim=10 left-context=2 right-context=2
2488 void SpliceComponent::InitFromString(std::string args) {
2489  std::string orig_args(args);
2490  int32 input_dim, left_context, right_context;
2491  std::vector <int32> context;
2492  bool in_dim_ok = ParseFromString("input-dim", &args, &input_dim);
2493  bool context_ok = ParseFromString("context", &args, &context);
2494  bool left_right_context_ok = ParseFromString("left-context", &args,
2495  &left_context) &&
2496  ParseFromString("right-context", &args,
2497  &right_context);
2498  int32 const_component_dim = 0;
2499  ParseFromString("const-component-dim", &args, &const_component_dim);
2500 
2501  if (!(in_dim_ok && (context_ok || left_right_context_ok)) ||
2502  !args.empty() || input_dim <= 0)
2503  KALDI_ERR << "Invalid initializer for layer of type "
2504  << Type() << ": \"" << orig_args << "\"";
2505  if (left_right_context_ok) {
2506  KALDI_ASSERT(context.size() == 0);
2507  for (int32 i = -left_context; i <= right_context; i++)
2508  context.push_back(i);
2509  }
2510  Init(input_dim, context, const_component_dim);
2511 }
2512 
2514  return (input_dim_ - const_component_dim_)
2515  * (context_.size())
2516  + const_component_dim_;
2517 }
2518 
2520  if (offsets_.empty()) { // if data is contiguous
2521  KALDI_ASSERT((offset <= last_offset_) && (offset >= first_offset_));
2522  return offset - first_offset_;
2523  } else {
2524  std::vector<int32>::const_iterator iter =
2525  std::lower_bound(offsets_.begin(), offsets_.end(), offset);
2526  // make sure offset is present in the vector
2527  KALDI_ASSERT(iter != offsets_.end() && *iter == offset);
2528  return static_cast<int32>(iter - offsets_.begin());
2529  }
2530 }
2531 
2533  if (offsets_.empty()) { // if data is contiguous
2534  int32 offset = index + first_offset_; // just offset by the first_offset_
2535  KALDI_ASSERT((offset <= last_offset_) && (offset >= first_offset_));
2536  return offset;
2537  } else {
2538  KALDI_ASSERT((index >= 0) && (index < offsets_.size()));
2539  return offsets_[index];
2540  }
2541 }
2542 
2543 void ChunkInfo::Check() const {
2544  // Checking sanity of the ChunkInfo object
2545  KALDI_ASSERT((feat_dim_ > 0) && (num_chunks_ > 0));
2546 
2547  if (! offsets_.empty()) {
2548  KALDI_ASSERT((first_offset_ == offsets_.front()) &&
2549  (last_offset_ == offsets_.back()));
2550  } else {
2551  KALDI_ASSERT((first_offset_ >= 0) && (last_offset_ >= first_offset_));
2552  // asserting the chunk is not contiguous, as offsets is not empty
2553  KALDI_ASSERT ( last_offset_ - first_offset_ + 1 > offsets_.size() );
2554  }
2555  KALDI_ASSERT(NumRows() % num_chunks_ == 0);
2556 
2557 }
2558 
2560  KALDI_ASSERT((mat.NumRows() == NumRows()) && (mat.NumCols() == NumCols()));
2561 }
2562 
2563 /*
2564  * This method was used for debugging, make changes in nnet-component.h to
2565  * expose it
2566 void ChunkInfo::ToString() const {
2567  KALDI_LOG << "feat_dim " << feat_dim_;
2568  KALDI_LOG << "num_chunks " << num_chunks_;
2569  KALDI_LOG << "first_index " << first_offset_;
2570  KALDI_LOG << "last_index " << last_offset_;
2571  for (size_t i = 0; i < offsets_.size(); i++)
2572  KALDI_LOG << offsets_[i];
2573 }
2574 */
2575 
2576 
2578  const ChunkInfo &out_info,
2579  const CuMatrixBase<BaseFloat> &in,
2580  CuMatrixBase<BaseFloat> *out) const {
2581 
2582  // Check the inputs are correct and resize output
2583  in_info.Check();
2584  out_info.Check();
2585  in_info.CheckSize(in);
2586  out_info.CheckSize(*out);
2587  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2588 
2589  int32 in_chunk_size = in_info.ChunkSize(),
2590  out_chunk_size = out_info.ChunkSize(),
2591  input_dim = in_info.NumCols();
2592 
2593  if (out_chunk_size <= 0)
2594  KALDI_ERR << "Splicing features: output will have zero dimension. "
2595  << "Probably a code error.";
2596 
2597  // 'indexes' is, for each index from 0 to context_.size() - 1,
2598  // then for each row of "out", the corresponding row of "in" that we copy from
2599  int32 num_splice = context_.size();
2600  std::vector<std::vector<int32> > indexes(num_splice);
2601  for (int32 c = 0; c < num_splice; c++)
2602  indexes[c].resize(out->NumRows());
2603  // const_component_dim_ != 0, "const_indexes" will be used to determine which
2604  // row of "in" we copy the last part of each row of "out" from (this part is
2605  // not subject to splicing, it's assumed constant for each frame of "input".
2606  int32 const_dim = const_component_dim_;
2607  std::vector<int32> const_indexes(const_dim == 0 ? 0 : out->NumRows());
2608 
2609  for (int32 chunk = 0; chunk < in_info.NumChunks(); chunk++) {
2610  if (chunk == 0) {
2611  // this branch could be used for all chunks in the matrix,
2612  // but is restricted to chunk 0 for efficiency reasons
2613  for (int32 c = 0; c < num_splice; c++) {
2614  for (int32 out_index = 0; out_index < out_chunk_size; out_index++) {
2615  int32 out_offset = out_info.GetOffset(out_index);
2616  int32 in_index = in_info.GetIndex(out_offset + context_[c]);
2617  indexes[c][chunk * out_chunk_size + out_index] =
2618  chunk * in_chunk_size + in_index;
2619  }
2620  }
2621  } else { // just copy the indices from the previous chunk
2622  // and offset these by input chunk size
2623  for (int32 c = 0; c < num_splice; c++) {
2624  for (int32 out_index = 0; out_index < out_chunk_size; out_index++) {
2625  int32 last_value = indexes[c][(chunk-1) * out_chunk_size + out_index];
2626  indexes[c][chunk * out_chunk_size + out_index] =
2627  (last_value == -1 ? -1 : last_value + in_chunk_size);
2628  }
2629  }
2630  }
2631  if (const_dim != 0) {
2632  for (int32 out_index = 0; out_index < out_chunk_size; out_index++)
2633  const_indexes[chunk * out_chunk_size + out_index] =
2634  chunk * in_chunk_size + out_index; // there is
2635  // an arbitrariness here; since we assume the const_component
2636  // is constant within a chunk, it doesn't matter from where we copy.
2637  }
2638  }
2639 
2640 
2641  for (int32 c = 0; c < num_splice; c++) {
2642  int32 dim = input_dim - const_dim; // dimension we
2643  // are splicing
2644  CuSubMatrix<BaseFloat> in_part(in, 0, in.NumRows(),
2645  0, dim),
2646  out_part(*out, 0, out->NumRows(),
2647  c * dim, dim);
2648  CuArray<int32> cu_indexes(indexes[c]);
2649  out_part.CopyRows(in_part, cu_indexes);
2650  }
2651  if (const_dim != 0) {
2652  CuSubMatrix<BaseFloat> in_part(in, 0, in.NumRows(),
2653  in.NumCols() - const_dim, const_dim),
2654  out_part(*out, 0, out->NumRows(),
2655  out->NumCols() - const_dim, const_dim);
2656 
2657  CuArray<int32> cu_const_indexes(const_indexes);
2658  out_part.CopyRows(in_part, cu_const_indexes);
2659  }
2660 }
2661 
2663  const ChunkInfo &out_info,
2664  const CuMatrixBase<BaseFloat> &, // in_value,
2665  const CuMatrixBase<BaseFloat> &, // out_value,
2666  const CuMatrixBase<BaseFloat> &out_deriv,
2667  Component *to_update,
2668  CuMatrix<BaseFloat> *in_deriv) const {
2669  in_info.Check();
2670  out_info.Check();
2671  out_info.CheckSize(out_deriv);
2672  in_deriv->Resize(in_info.NumRows(), in_info.NumCols(), kUndefined);
2673  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2674  int32 num_chunks = in_info.NumChunks();
2675  // rewrite backpropagate
2676 
2677  int32 out_chunk_size = out_info.ChunkSize(),
2678  in_chunk_size = in_info.ChunkSize(),
2679  output_dim = out_deriv.NumCols(),
2680  input_dim = InputDim();
2681 
2682  KALDI_ASSERT(OutputDim() == output_dim);
2683 
2684  int32 num_splice = context_.size(),
2685  const_dim = const_component_dim_;
2686  // 'indexes' is, for each index from 0 to num_splice - 1,
2687  // then for each row of "in_deriv", the corresponding row of "out_deriv" that
2688  // we add, or -1 if.
2689 
2690  std::vector<std::vector<int32> > indexes(num_splice);
2691  // const_dim != 0, "const_indexes" will be used to determine which
2692  // row of "in" we copy the last part of each row of "out" from (this part is
2693  // not subject to splicing, it's assumed constant for each frame of "input".
2694  std::vector<int32> const_indexes(const_dim == 0 ? 0 : in_deriv->NumRows(), -1);
2695 
2696  for (int32 c = 0; c < indexes.size(); c++)
2697  indexes[c].resize(in_deriv->NumRows(), -1); // set to -1 by default,
2698  // this gets interpreted by the CopyRows() code
2699  // as a signal to zero the output...
2700 
2701  int32 dim = input_dim - const_dim; // dimension we are splicing
2702  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
2703  if (chunk == 0) { // this branch can be taken for all chunks, but is not
2704  // taken for efficiency reasons
2705  for (int32 c = 0; c < num_splice; c++) {
2706  for (int32 out_index = 0; out_index < out_chunk_size; out_index++) {
2707  int32 out_offset = out_info.GetOffset(out_index);
2708  int32 in_index = in_info.GetIndex(out_offset + context_[c]);
2709  indexes[c][chunk * in_chunk_size + in_index] =
2710  chunk * out_chunk_size + out_index;
2711  }
2712  }
2713  } else { // just copy the indexes from the previous chunk
2714  for (int32 c = 0; c < num_splice; c++) {
2715  for (int32 in_index = 0; in_index < in_chunk_size; in_index++) {
2716  int32 last_value = indexes[c][(chunk-1) * in_chunk_size + in_index];
2717  indexes[c][chunk * in_chunk_size + in_index] =
2718  (last_value == -1 ? -1 : last_value + out_chunk_size);
2719  }
2720  }
2721  }
2722  // this code corresponds to the way the forward propagation works; see
2723  // comments there.
2724  if (const_dim != 0) {
2725  for (int32 out_index = 0; out_index < out_chunk_size; out_index++) {
2726  const_indexes[chunk * in_chunk_size + out_index] =
2727  chunk * out_chunk_size + out_index;
2728  }
2729  }
2730  }
2731 
2732  CuMatrix<BaseFloat> temp_mat(in_deriv->NumRows(), dim, kUndefined);
2733 
2734  for (int32 c = 0; c < num_splice; c++) {
2735  CuArray<int32> cu_indexes(indexes[c]);
2736  int32 dim = input_dim - const_dim; // dimension we
2737  // are splicing
2738  CuSubMatrix<BaseFloat> out_deriv_part(out_deriv, 0, out_deriv.NumRows(),
2739  c * dim, dim),
2740  in_deriv_part(*in_deriv, 0, in_deriv->NumRows(),
2741  0, dim);
2742  if (c == 0) {
2743  in_deriv_part.CopyRows(out_deriv_part, cu_indexes);
2744  } else {
2745  temp_mat.CopyRows(out_deriv_part, cu_indexes);
2746  in_deriv_part.AddMat(1.0, temp_mat);
2747  }
2748  }
2749  if (const_dim != 0) {
2750  CuSubMatrix<BaseFloat> out_deriv_part(out_deriv, 0, out_deriv.NumRows(),
2751  out_deriv.NumCols() - const_dim,
2752  const_dim),
2753  in_deriv_part(*in_deriv, 0, in_deriv->NumRows(),
2754  in_deriv->NumCols() - const_dim, const_dim);
2755  CuArray<int32> cu_const_indexes(const_indexes);
2756  in_deriv_part.CopyRows(out_deriv_part, cu_const_indexes);
2757  }
2758 }
2759 
2761  SpliceComponent *ans = new SpliceComponent();
2762  ans->input_dim_ = input_dim_;
2763  ans->context_ = context_;
2764  ans->const_component_dim_ = const_component_dim_;
2765  return ans;
2766 }
2767 
2768 void SpliceComponent::Read(std::istream &is, bool binary) {
2769  ExpectOneOrTwoTokens(is, binary, "<SpliceComponent>", "<InputDim>");
2770  ReadBasicType(is, binary, &input_dim_);
2771  std::string token;
2772  ReadToken(is, false, &token);
2773  if (token == "<LeftContext>") {
2774  int32 left_context=0, right_context=0;
2775  std::vector<int32> context;
2776  ReadBasicType(is, binary, &left_context);
2777  ExpectToken(is, binary, "<RightContext>");
2778  ReadBasicType(is, binary, &right_context);
2779  for (int32 i = -1 * left_context; i <= right_context; i++)
2780  context.push_back(i);
2781  context_ = context;
2782  } else if (token == "<Context>") {
2783  ReadIntegerVector(is, binary, &context_);
2784  } else {
2785  KALDI_ERR << "Unknown token" << token
2786  << ", the model might be corrupted";
2787  }
2788  ExpectToken(is, binary, "<ConstComponentDim>");
2789  ReadBasicType(is, binary, &const_component_dim_);
2790  ExpectToken(is, binary, "</SpliceComponent>");
2791 }
2792 
2793 void SpliceComponent::Write(std::ostream &os, bool binary) const {
2794  WriteToken(os, binary, "<SpliceComponent>");
2795  WriteToken(os, binary, "<InputDim>");
2796  WriteBasicType(os, binary, input_dim_);
2797  WriteToken(os, binary, "<Context>");
2798  WriteIntegerVector(os, binary, context_);
2799  WriteToken(os, binary, "<ConstComponentDim>");
2800  WriteBasicType(os, binary, const_component_dim_);
2801  WriteToken(os, binary, "</SpliceComponent>");
2802 }
2803 
2804 
2805 std::string SpliceMaxComponent::Info() const {
2806  std::stringstream stream;
2807  std::ostringstream os;
2808  std::copy(context_.begin(), context_.end(),
2809  std::ostream_iterator<int32>(os, " "));
2810  stream << Component::Info() << ", context=" << os.str();
2811  return stream.str();
2812 }
2813 
2815  std::vector<int32> context) {
2816  dim_ = dim;
2817  context_ = context;
2818  KALDI_ASSERT(dim_ > 0 && context_.front() <= 0 && context_.back() >= 0);
2819 }
2820 
2821 
2822 // e.g. args == "dim=10 left-context=2 right-context=2
2823 void SpliceMaxComponent::InitFromString(std::string args) {
2824  std::string orig_args(args);
2825  int32 dim, left_context, right_context;
2826  std::vector <int32> context;
2827  bool dim_ok = ParseFromString("dim", &args, &dim);
2828  bool context_ok = ParseFromString("context", &args, &context);
2829  bool left_right_context_ok = ParseFromString("left-context",
2830  &args, &left_context) &&
2831  ParseFromString("right-context", &args,
2832  &right_context);
2833 
2834  if (!(dim_ok && (context_ok || left_right_context_ok)) ||
2835  !args.empty() || dim <= 0)
2836  KALDI_ERR << "Invalid initializer for layer of type "
2837  << Type() << ": \"" << orig_args << "\"";
2838  if (left_right_context_ok) {
2839  KALDI_ASSERT(context.size() == 0);
2840  for (int32 i = -1 * left_context; i <= right_context; i++)
2841  context.push_back(i);
2842  }
2843  Init(dim, context);
2844 }
2845 
2846 
2848  const ChunkInfo &out_info,
2849  const CuMatrixBase<BaseFloat> &in,
2850  CuMatrixBase<BaseFloat> *out) const {
2851  in_info.Check();
2852  out_info.Check();
2853  in_info.CheckSize(in);
2854  out_info.CheckSize(*out);
2855  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2856  int32 in_chunk_size = in_info.ChunkSize(),
2857  out_chunk_size = out_info.ChunkSize(),
2858  dim = in_info.NumCols();
2859 
2860  CuMatrix<BaseFloat> input_chunk_part(out_chunk_size, dim);
2861  for (int32 chunk = 0; chunk < in_info.NumChunks(); chunk++) {
2862  CuSubMatrix<BaseFloat> input_chunk(in,
2863  chunk * in_chunk_size, in_chunk_size,
2864  0, dim),
2865  output_chunk(*out,
2866  chunk * out_chunk_size,
2867  out_chunk_size, 0, dim);
2868  for (int32 offset = 0; offset < context_.size(); offset++) {
2869  // computing the indices to copy into input_chunk_part from input_chunk
2870  // copy the rows of the input matrix which correspond to the current
2871  // context index
2872  std::vector<int32> input_chunk_inds(out_chunk_size);
2873  for (int32 i = 0; i < out_chunk_size; i++) {
2874  int32 out_chunk_ind = i;
2875  int32 out_chunk_offset =
2876  out_info.GetOffset(out_chunk_ind);
2877  input_chunk_inds[i] =
2878  in_info.GetIndex(out_chunk_offset + context_[offset]);
2879  }
2880  CuArray<int32> cu_chunk_inds(input_chunk_inds);
2881  input_chunk_part.CopyRows(input_chunk, cu_chunk_inds);
2882  if (offset == 0) {
2883  output_chunk.CopyFromMat(input_chunk_part);
2884  } else {
2885  output_chunk.Max(input_chunk_part);
2886  }
2887  }
2888  }
2889 }
2890 
2892  const ChunkInfo &out_info,
2893  const CuMatrixBase<BaseFloat> &in_value,
2894  const CuMatrixBase<BaseFloat> &, // out_value
2895  const CuMatrixBase<BaseFloat> &out_deriv,
2896  Component *to_update,
2897  CuMatrix<BaseFloat> *in_deriv) const {
2898  in_info.Check();
2899  out_info.Check();
2900  in_info.CheckSize(in_value);
2901  out_info.CheckSize(out_deriv);
2902  in_deriv->Resize(in_info.NumRows(), in_info.NumCols());
2903  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2904 
2905  int32 out_chunk_size = out_info.ChunkSize(),
2906  in_chunk_size = in_info.ChunkSize(),
2907  dim = out_deriv.NumCols();
2908 
2909  KALDI_ASSERT(dim == InputDim());
2910 
2911  for (int32 chunk = 0; chunk < in_info.NumChunks(); chunk++) {
2912  CuSubMatrix<BaseFloat> in_deriv_chunk(*in_deriv,
2913  chunk * in_chunk_size,
2914  in_chunk_size,
2915  0, dim),
2916  in_value_chunk(in_value,
2917  chunk * in_chunk_size,
2918  in_chunk_size,
2919  0, dim),
2920  out_deriv_chunk(out_deriv,
2921  chunk * out_chunk_size,
2922  out_chunk_size,
2923  0, dim);
2924  for (int32 r = 0; r < out_deriv_chunk.NumRows(); r++) {
2925  int32 out_chunk_ind = r;
2926  int32 out_chunk_offset =
2927  out_info.GetOffset(out_chunk_ind);
2928 
2929  for (int32 c = 0; c < dim; c++) {
2930  int32 in_r_max = -1;
2931  BaseFloat max_input = -std::numeric_limits<BaseFloat>::infinity();
2932  for (int32 context_ind = 0;
2933  context_ind < context_.size(); context_ind++) {
2934  int32 in_r =
2935  in_info.GetIndex(out_chunk_offset + context_[context_ind]);
2936  BaseFloat input = in_value_chunk(in_r, c);
2937  if (input > max_input) {
2938  max_input = input;
2939  in_r_max = in_r;
2940  }
2941  }
2942  KALDI_ASSERT(in_r_max != -1);
2943  (*in_deriv)(in_r_max, c) += out_deriv_chunk(r, c);
2944  }
2945  }
2946  }
2947 }
2948 
2951  ans->Init(dim_, context_);
2952  return ans;
2953 }
2954 
2955 void SpliceMaxComponent::Read(std::istream &is, bool binary) {
2956  ExpectOneOrTwoTokens(is, binary, "<SpliceMaxComponent>", "<Dim>");
2957  ReadBasicType(is, binary, &dim_);
2958  std::string token;
2959  ReadToken(is, false, &token);
2960  if (token == "<LeftContext>") {
2961  int32 left_context = 0, right_context = 0;
2962  std::vector<int32> context;
2963  ReadBasicType(is, binary, &left_context);
2964  ExpectToken(is, binary, "<RightContext>");
2965  ReadBasicType(is, binary, &right_context);
2966  for (int32 i = -1 * left_context; i <= right_context; i++)
2967  context.push_back(i);
2968  context_ = context;
2969  } else if (token == "<Context>") {
2970  ReadIntegerVector(is, binary, &context_);
2971  } else {
2972  KALDI_ERR << "Unknown token" << token << ", the model might be corrupted";
2973  }
2974  ExpectToken(is, binary, "</SpliceMaxComponent>");
2975 }
2976 
2977 void SpliceMaxComponent::Write(std::ostream &os, bool binary) const {
2978  WriteToken(os, binary, "<SpliceMaxComponent>");
2979  WriteToken(os, binary, "<Dim>");
2980  WriteBasicType(os, binary, dim_);
2981  WriteToken(os, binary, "<Context>");
2982  WriteIntegerVector(os, binary, context_);
2983  WriteToken(os, binary, "</SpliceMaxComponent>");
2984 }
2985 
2986 std::string DctComponent::Info() const {
2987  std::stringstream stream;
2988  stream << Component::Info() << ", dct_dim=" << dct_mat_.NumCols();
2989  if (dct_mat_.NumCols() != dct_mat_.NumRows())
2990  stream << ", dct_keep_dim=" << dct_mat_.NumRows();
2991 
2992  return stream.str();
2993 }
2994 
2995 void DctComponent::Init(int32 dim, int32 dct_dim, bool reorder, int32 dct_keep_dim) {
2996  int dct_keep_dim_ = (dct_keep_dim > 0) ? dct_keep_dim : dct_dim;
2997 
2998  KALDI_ASSERT(dim > 0 && dct_dim > 0);
2999  KALDI_ASSERT(dim % dct_dim == 0); // dct_dim must divide dim.
3000  KALDI_ASSERT(dct_dim >= dct_keep_dim_);
3001  dim_ = dim;
3002  dct_mat_.Resize(dct_keep_dim_, dct_dim);
3003  reorder_ = reorder;
3004  Matrix<BaseFloat> dct_mat(dct_keep_dim_, dct_dim);
3005  ComputeDctMatrix(&dct_mat);
3006  dct_mat_ = dct_mat;
3007 }
3008 
3009 
3010 
3011 void DctComponent::InitFromString(std::string args) {
3012  std::string orig_args(args);
3013  int32 dim, dct_dim, dct_keep_dim = 0;
3014  bool reorder = false;
3015 
3016  bool ok = ParseFromString("dim", &args, &dim);
3017  ok = ParseFromString("dct-dim", &args, &dct_dim) && ok;
3018  ok = ParseFromString("reorder", &args, &reorder) && ok;
3019  ParseFromString("dct-keep-dim", &args, &dct_keep_dim);
3020 
3021  if (!ok || !args.empty() || dim <= 0 || dct_dim <= 0 || dct_keep_dim < 0)
3022  KALDI_ERR << "Invalid initializer for layer of type "
3023  << Type() << ": \"" << orig_args << "\"";
3024  Init(dim, dct_dim, reorder, dct_keep_dim);
3025 }
3026 
3027 void DctComponent::Reorder(CuMatrixBase<BaseFloat> *mat, bool reverse) const {
3028  // reorders into contiguous blocks of dize "dct_dim_", assuming that
3029  // such blocks were interlaced before. if reverse==true, does the
3030  // reverse.
3031  int32 dct_dim = dct_mat_.NumCols(),
3032  dct_keep_dim = dct_mat_.NumRows(),
3033  block_size_in = dim_ / dct_dim,
3034  block_size_out = dct_keep_dim;
3035 
3036  //This does not necesarily needs to be true anymore -- output must be reordered as well, but the dimension differs...
3037  //KALDI_ASSERT(mat->NumCols() == dim_);
3038  if (reverse) std::swap(block_size_in, block_size_out);
3039 
3040  CuVector<BaseFloat> temp(mat->NumCols());
3041  for (int32 i = 0; i < mat->NumRows(); i++) {
3042  CuSubVector<BaseFloat> row(*mat, i);
3043  int32 num_blocks_in = block_size_out;
3044  for (int32 b = 0; b < num_blocks_in; b++) {
3045  for (int32 j = 0; j < block_size_in; j++) {
3046  temp(j * block_size_out + b) = row(b * block_size_in + j);
3047  }
3048  }
3049  row.CopyFromVec(temp);
3050  }
3051 }
3052 
3054  const ChunkInfo &out_info,
3055  const CuMatrixBase<BaseFloat> &in,
3056  CuMatrixBase<BaseFloat> *out) const {
3057  KALDI_ASSERT(in.NumCols() == InputDim());
3058  int32 dct_dim = dct_mat_.NumCols(),
3059  dct_keep_dim = dct_mat_.NumRows(),
3060  num_rows = in.NumRows(),
3061  num_chunks = dim_ / dct_dim;
3062 
3063  in_info.CheckSize(in);
3064  out_info.CheckSize(*out);
3065  KALDI_ASSERT(num_rows == out_info.NumRows());
3066  KALDI_ASSERT(num_chunks * dct_keep_dim == out_info.NumCols());
3067 
3068  CuMatrix<BaseFloat> in_tmp;
3069  if (reorder_) {
3070  in_tmp = in;
3071  Reorder(&in_tmp, false);
3072  }
3073 
3074  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
3075  CuSubMatrix<BaseFloat> in_mat(reorder_ ? in_tmp : in,
3076  0, num_rows, dct_dim * chunk, dct_dim),
3077  out_mat(*out,
3078  0, num_rows, dct_keep_dim * chunk, dct_keep_dim);
3079 
3080  out_mat.AddMatMat(1.0, in_mat, kNoTrans, dct_mat_, kTrans, 0.0);
3081  }
3082  if (reorder_)
3083  Reorder(out, true);
3084 }
3085 
3086 void DctComponent::Backprop(const ChunkInfo &, //in_info,
3087  const ChunkInfo &, //out_info,
3088  const CuMatrixBase<BaseFloat> &, //in_value,
3089  const CuMatrixBase<BaseFloat> &, //out_value,
3090  const CuMatrixBase<BaseFloat> &out_deriv,
3091  Component *, //to_update,
3092  CuMatrix<BaseFloat> *in_deriv) const {
3093  KALDI_ASSERT(out_deriv.NumCols() == OutputDim());
3094 
3095  int32 dct_dim = dct_mat_.NumCols(),
3096  dct_keep_dim = dct_mat_.NumRows(),
3097  num_chunks = dim_ / dct_dim,
3098  num_rows = out_deriv.NumRows();
3099 
3100  in_deriv->Resize(num_rows, dim_);
3101 
3102  CuMatrix<BaseFloat> out_deriv_tmp;
3103  if (reorder_) {
3104  out_deriv_tmp = out_deriv;
3105  Reorder(&out_deriv_tmp, false);
3106  }
3107  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
3108  CuSubMatrix<BaseFloat> in_deriv_mat(*in_deriv,
3109  0, num_rows, dct_dim * chunk, dct_dim),
3110  out_deriv_mat(reorder_ ? out_deriv_tmp : out_deriv,
3111  0, num_rows, dct_keep_dim * chunk, dct_keep_dim);
3112 
3113  // Note: in the reverse direction the DCT matrix is transposed. This is
3114  // normal when computing derivatives; the necessity for the transpose is
3115  // obvious if you consider what happens when the input and output dims
3116  // differ.
3117  in_deriv_mat.AddMatMat(1.0, out_deriv_mat, kNoTrans,
3118  dct_mat_, kNoTrans, 0.0);
3119  }
3120  if (reorder_)
3121  Reorder(in_deriv, true);
3122 }
3123 
3125  DctComponent *ans = new DctComponent();
3126  ans->dct_mat_ = dct_mat_;
3127  ans->dim_ = dim_;
3128  ans->reorder_ = reorder_;
3129  return ans;
3130 }
3131 
3132 void DctComponent::Write(std::ostream &os, bool binary) const {
3133  WriteToken(os, binary, "<DctComponent>");
3134  WriteToken(os, binary, "<Dim>");
3135  WriteBasicType(os, binary, dim_);
3136  WriteToken(os, binary, "<DctDim>");
3137  int32 dct_dim = dct_mat_.NumCols();
3138  WriteBasicType(os, binary, dct_dim);
3139  WriteToken(os, binary, "<Reorder>");
3140  WriteBasicType(os, binary, reorder_);
3141  WriteToken(os, binary, "<DctKeepDim>");
3142  int32 dct_keep_dim = dct_mat_.NumRows();
3143  WriteBasicType(os, binary, dct_keep_dim);
3144  WriteToken(os, binary, "</DctComponent>");
3145 }
3146 
3147 void DctComponent::Read(std::istream &is, bool binary) {
3148  ExpectOneOrTwoTokens(is, binary, "<DctComponent>", "<Dim>");
3149  ReadBasicType(is, binary, &dim_);
3150 
3151  ExpectToken(is, binary, "<DctDim>");
3152  int32 dct_dim;
3153  ReadBasicType(is, binary, &dct_dim);
3154 
3155  ExpectToken(is, binary, "<Reorder>");
3156  ReadBasicType(is, binary, &reorder_);
3157 
3158  int32 dct_keep_dim = dct_dim;
3159  std::string token;
3160  ReadToken(is, binary, &token);
3161  if (token == "<DctKeepDim>") {
3162  ReadBasicType(is, binary, &dct_keep_dim);
3163  ExpectToken(is, binary, "</DctComponent>");
3164  } else if (token != "</DctComponent>") {
3165  KALDI_ERR << "Expected token \"</DctComponent>\", got instead \""
3166  << token << "\".";
3167  }
3168 
3169  KALDI_ASSERT(dct_dim > 0 && dim_ > 0 && dim_ % dct_dim == 0);
3170  Init(dim_, dct_dim, reorder_, dct_keep_dim);
3171  //idct_mat_.Resize(dct_keep_dim, dct_dim);
3172  //ComputeDctMatrix(&dct_mat_);
3173 }
3174 
3176  std::string orig_args = args;
3177  std::string filename;
3178  bool ok = ParseFromString("matrix", &args, &filename);
3179 
3180  if (!ok || !args.empty())
3181  KALDI_ERR << "Invalid initializer for layer of type "
3182  << Type() << ": \"" << orig_args << "\"";
3183 
3184  bool binary;
3185  Input ki(filename, &binary);
3186  CuMatrix<BaseFloat> mat;
3187  mat.Read(ki.Stream(), binary);
3188  KALDI_ASSERT(mat.NumRows() != 0);
3189  Init(mat);
3190 }
3191 
3192 
3193 std::string FixedLinearComponent::Info() const {
3194  std::stringstream stream;
3195  BaseFloat mat_size = static_cast<BaseFloat>(mat_.NumRows())
3196  * static_cast<BaseFloat>(mat_.NumCols()),
3197  mat_stddev = std::sqrt(TraceMatMat(mat_, mat_, kTrans) /
3198  mat_size);
3199  stream << Component::Info() << ", params-stddev=" << mat_stddev;
3200  return stream.str();
3201 }
3202 
3204  const ChunkInfo &out_info,
3205  const CuMatrixBase<BaseFloat> &in,
3206  CuMatrixBase<BaseFloat> *out) const {
3207  in_info.CheckSize(in);
3208  out_info.CheckSize(*out);
3209  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
3210 
3211  out->AddMatMat(1.0, in, kNoTrans, mat_, kTrans, 0.0);
3212 }
3213 
3215  const ChunkInfo &, //out_info,
3216  const CuMatrixBase<BaseFloat> &, //in_value,
3217  const CuMatrixBase<BaseFloat> &, //out_value,
3218  const CuMatrixBase<BaseFloat> &out_deriv,
3219  Component *, //to_update, // may be identical to "this".
3220  CuMatrix<BaseFloat> *in_deriv) const {
3221  in_deriv->Resize(out_deriv.NumRows(), mat_.NumCols());
3222  in_deriv->AddMatMat(1.0, out_deriv, kNoTrans, mat_, kNoTrans, 0.0);
3223 }
3224 
3227  ans->Init(mat_);
3228  return ans;
3229 }
3230 
3231 
3232 void FixedLinearComponent::Write(std::ostream &os, bool binary) const {
3233  WriteToken(os, binary, "<FixedLinearComponent>");
3234  WriteToken(os, binary, "<CuMatrix>");
3235  mat_.Write(os, binary);
3236  WriteToken(os, binary, "</FixedLinearComponent>");
3237 }
3238 
3239 void FixedLinearComponent::Read(std::istream &is, bool binary) {
3240  ExpectOneOrTwoTokens(is, binary, "<FixedLinearComponent>", "<CuMatrix>");
3241  mat_.Read(is, binary);
3242  ExpectToken(is, binary, "</FixedLinearComponent>");
3243 }
3244 
3246  KALDI_ASSERT(mat.NumCols() > 1);
3247  linear_params_ = mat.Range(0, mat.NumRows(),
3248  0, mat.NumCols() - 1);
3249  bias_params_.Resize(mat.NumRows());
3250  bias_params_.CopyColFromMat(mat, mat.NumCols() - 1);
3251 }
3252 
3253 
3255  std::string orig_args = args;
3256  std::string filename;
3257  bool ok = ParseFromString("matrix", &args, &filename);
3258 
3259  if (!ok || !args.empty())
3260  KALDI_ERR << "Invalid initializer for layer of type "
3261  << Type() << ": \"" << orig_args << "\"";
3262 
3263  bool binary;
3264  Input ki(filename, &binary);
3265  CuMatrix<BaseFloat> mat;
3266  mat.Read(ki.Stream(), binary);
3267  KALDI_ASSERT(mat.NumRows() != 0);
3268  Init(mat);
3269 }
3270 
3271 
3272 std::string FixedAffineComponent::Info() const {
3273  std::stringstream stream;
3274  BaseFloat linear_params_size = static_cast<BaseFloat>(linear_params_.NumRows())
3275  * static_cast<BaseFloat>(linear_params_.NumCols()),
3276  linear_params_stddev =
3277  std::sqrt(TraceMatMat(linear_params_,
3279  linear_params_size),
3280  bias_params_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
3281  bias_params_.Dim());
3282 
3283  stream << Component::Info() << ", linear-params-stddev=" << linear_params_stddev
3284  << ", bias-params-stddev=" << bias_params_stddev;
3285  return stream.str();
3286 }
3287 
3289  const ChunkInfo &out_info,
3290  const CuMatrixBase<BaseFloat> &in,
3291  CuMatrixBase<BaseFloat> *out) const {
3292  in_info.CheckSize(in);
3293  out_info.CheckSize(*out);
3294  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
3295 
3296  out->AddMatMat(1.0, in, kNoTrans, linear_params_, kTrans, 0.0);
3297  out->AddVecToRows(1.0, bias_params_);
3298 }
3299 
3301  const ChunkInfo &, //out_info,
3302  const CuMatrixBase<BaseFloat> &, //in_value,
3303  const CuMatrixBase<BaseFloat> &, //out_value,
3304  const CuMatrixBase<BaseFloat> &out_deriv,
3305  Component *, //to_update, // may be identical to "this".
3306  CuMatrix<BaseFloat> *in_deriv) const {
3307  in_deriv->Resize(out_deriv.NumRows(), linear_params_.NumCols());
3308  in_deriv->AddMatMat(1.0, out_deriv, kNoTrans, linear_params_, kNoTrans, 0.0);
3309 }
3310 
3314  ans->bias_params_ = bias_params_;
3315  return ans;
3316 }
3317 
3318 
3319 void FixedAffineComponent::Write(std::ostream &os, bool binary) const {
3320  WriteToken(os, binary, "<FixedAffineComponent>");
3321  WriteToken(os, binary, "<LinearParams>");
3322  linear_params_.Write(os, binary);
3323  WriteToken(os, binary, "<BiasParams>");
3324  bias_params_.Write(os, binary);
3325  WriteToken(os, binary, "</FixedAffineComponent>");
3326 }
3327 
3328 void FixedAffineComponent::Read(std::istream &is, bool binary) {
3329  ExpectOneOrTwoTokens(is, binary, "<FixedAffineComponent>", "<LinearParams>");
3330  linear_params_.Read(is, binary);
3331  ExpectToken(is, binary, "<BiasParams>");
3332  bias_params_.Read(is, binary);
3333  ExpectToken(is, binary, "</FixedAffineComponent>");
3334 }
3335 
3336 
3338  KALDI_ASSERT(scales.Dim() != 0);
3339  scales_ = scales;
3340 }
3341 
3342 void FixedScaleComponent::InitFromString(std::string args) {
3343  std::string orig_args = args;
3344  std::string filename;
3345  bool ok = ParseFromString("scales", &args, &filename);
3346 
3347  if (!ok || !args.empty())
3348  KALDI_ERR << "Invalid initializer for layer of type "
3349  << Type() << ": \"" << orig_args << "\"";
3350 
3351  CuVector<BaseFloat> vec;
3352  ReadKaldiObject(filename, &vec);
3353  Init(vec);
3354 }
3355 
3356 
3357 std::string FixedScaleComponent::Info() const {
3358  std::stringstream stream;
3359  BaseFloat scales_size = static_cast<BaseFloat>(scales_.Dim()),
3360  scales_mean = scales_.Sum() / scales_size,
3361  scales_stddev = std::sqrt(VecVec(scales_, scales_) / scales_size
3362  - (scales_mean * scales_mean));
3363  stream << Component::Info() << ", scales-mean=" << scales_mean
3364  << ", scales-stddev=" << scales_stddev;
3365  return stream.str();
3366 }
3367 
3369  const ChunkInfo &out_info,
3370  const CuMatrixBase<BaseFloat> &in,
3371  CuMatrixBase<BaseFloat> *out) const {
3372  out->CopyFromMat(in);
3373  out->MulColsVec(scales_);
3374 }
3375 
3377  const ChunkInfo &, //out_info,
3378  const CuMatrixBase<BaseFloat> &, //in_value,
3379  const CuMatrixBase<BaseFloat> &, //out_value,
3380  const CuMatrixBase<BaseFloat> &out_deriv,
3381  Component *, //to_update, // may be identical to "this".
3382  CuMatrix<BaseFloat> *in_deriv) const {
3383  *in_deriv = out_deriv;
3384  in_deriv->MulColsVec(scales_);
3385 }
3386 
3389  ans->scales_ = scales_;
3390  return ans;
3391 }
3392 
3393 
3394 void FixedScaleComponent::Write(std::ostream &os, bool binary) const {
3395  WriteToken(os, binary, "<FixedScaleComponent>");
3396  WriteToken(os, binary, "<Scales>");
3397  scales_.Write(os, binary);
3398  WriteToken(os, binary, "</FixedScaleComponent>");
3399 }
3400 
3401 void FixedScaleComponent::Read(std::istream &is, bool binary) {
3402  ExpectOneOrTwoTokens(is, binary, "<FixedScaleComponent>", "<Scales>");
3403  scales_.Read(is, binary);
3404  ExpectToken(is, binary, "</FixedScaleComponent>");
3405 }
3406 
3408  KALDI_ASSERT(bias.Dim() != 0);
3409  bias_ = bias;
3410 }
3411 
3412 void FixedBiasComponent::InitFromString(std::string args) {
3413  std::string orig_args = args;
3414  std::string filename;
3415  bool ok = ParseFromString("bias", &args, &filename);
3416 
3417  if (!ok || !args.empty())
3418  KALDI_ERR << "Invalid initializer for layer of type "
3419  << Type() << ": \"" << orig_args << "\"";
3420 
3421  CuVector<BaseFloat> vec;
3422  ReadKaldiObject(filename, &vec);
3423  Init(vec);
3424 }
3425 
3426 
3427 std::string FixedBiasComponent::Info() const {
3428  std::stringstream stream;
3429  BaseFloat bias_size = static_cast<BaseFloat>(bias_.Dim()),
3430  bias_mean = bias_.Sum() / bias_size,
3431  bias_stddev = std::sqrt(VecVec(bias_, bias_) / bias_size)
3432  - (bias_mean * bias_mean);
3433  stream << Component::Info() << ", bias-mean=" << bias_mean
3434  << ", bias-stddev=" << bias_stddev;
3435  return stream.str();
3436 }
3437 
3439  const ChunkInfo &out_info,
3440  const CuMatrixBase<BaseFloat> &in,
3441  CuMatrixBase<BaseFloat> *out) const {
3442  out->CopyFromMat(in);
3443  out->AddVecToRows(1.0, bias_, 1.0);
3444 }
3445 
3446 void FixedBiasComponent::Backprop(const ChunkInfo &, //in_info,
3447  const ChunkInfo &, //out_info,
3448  const CuMatrixBase<BaseFloat> &, //in_value,
3449  const CuMatrixBase<BaseFloat> &, //out_value,
3450  const CuMatrixBase<BaseFloat> &out_deriv,
3451  Component *, //to_update,
3452  CuMatrix<BaseFloat> *in_deriv) const {
3453  *in_deriv = out_deriv;
3454 }
3455 
3458  ans->bias_ = bias_;
3459  return ans;
3460 }
3461 
3462 
3463 void FixedBiasComponent::Write(std::ostream &os, bool binary) const {
3464  WriteToken(os, binary, "<FixedBiasComponent>");
3465  WriteToken(os, binary, "<Bias>");
3466  bias_.Write(os, binary);
3467  WriteToken(os, binary, "</FixedBiasComponent>");
3468 }
3469 
3470 void FixedBiasComponent::Read(std::istream &is, bool binary) {
3471  ExpectOneOrTwoTokens(is, binary, "<FixedBiasComponent>", "<Bias>");
3472  bias_.Read(is, binary);
3473  ExpectToken(is, binary, "</FixedBiasComponent>");
3474 }
3475 
3476 
3477 
3478 
3479 std::string DropoutComponent::Info() const {
3480  std::stringstream stream;
3481  stream << Component::Info() << ", dropout_proportion = "
3482  << dropout_proportion_ << ", dropout_scale = "
3483  << dropout_scale_;
3484  return stream.str();
3485 }
3486 
3487 void DropoutComponent::InitFromString(std::string args) {
3488  std::string orig_args(args);
3489  int32 dim;
3490  BaseFloat dropout_proportion = 0.5, dropout_scale = 0.0;
3491  bool ok = ParseFromString("dim", &args, &dim);
3492  ParseFromString("dropout-proportion", &args, &dropout_proportion);
3493  ParseFromString("dropout-scale", &args, &dropout_scale);
3494 
3495  if (!ok || !args.empty() || dim <= 0)
3496  KALDI_ERR << "Invalid initializer for layer of type DropoutComponent: \""
3497  << orig_args << "\"";
3498  Init(dim, dropout_proportion, dropout_scale);
3499 }
3500 
3501 void DropoutComponent::Read(std::istream &is, bool binary) {
3502  ExpectOneOrTwoTokens(is, binary, "<DropoutComponent>", "<Dim>");
3503  ReadBasicType(is, binary, &dim_);
3504  ExpectToken(is, binary, "<DropoutScale>");
3505  ReadBasicType(is, binary, &dropout_scale_);
3506  ExpectToken(is, binary, "<DropoutProportion>");
3507  ReadBasicType(is, binary, &dropout_proportion_);
3508  ExpectToken(is, binary, "</DropoutComponent>");
3509 }
3510 
3511 void DropoutComponent::Write(std::ostream &os, bool binary) const {
3512  WriteToken(os, binary, "<DropoutComponent>");
3513  WriteToken(os, binary, "<Dim>");
3514  WriteBasicType(os, binary, dim_);
3515  WriteToken(os, binary, "<DropoutScale>");
3516  WriteBasicType(os, binary, dropout_scale_);
3517  WriteToken(os, binary, "<DropoutProportion>");
3518  WriteBasicType(os, binary, dropout_proportion_);
3519  WriteToken(os, binary, "</DropoutComponent>");
3520 }
3521 
3522 
3524  BaseFloat dropout_proportion,
3525  BaseFloat dropout_scale){
3526  dim_ = dim;
3527  dropout_proportion_ = dropout_proportion;
3528  dropout_scale_ = dropout_scale;
3529 }
3530 
3532  const ChunkInfo &out_info,
3533  const CuMatrixBase<BaseFloat> &in,
3534  CuMatrixBase<BaseFloat> *out) const {
3535  in_info.CheckSize(in);
3536  out_info.CheckSize(*out);
3537  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
3538  KALDI_ASSERT(in.NumCols() == this->InputDim());
3539 
3540  BaseFloat dp = dropout_proportion_;
3541  KALDI_ASSERT(dp < 1.0 && dp >= 0.0);
3542  KALDI_ASSERT(dropout_scale_ <= 1.0 && dropout_scale_ >= 0.0);
3543 
3544  BaseFloat low_scale = dropout_scale_,
3545  high_scale = (1.0 - (dp * low_scale)) / (1.0 - dp),
3546  average = (low_scale * dp) +
3547  (high_scale * (1.0 - dp));
3548  KALDI_ASSERT(fabs(average - 1.0) < 0.01);
3549 
3550  // This const_cast is only safe assuming you don't attempt
3551  // to use multi-threaded code with the GPU.
3552  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
3553 
3554 
3555  out->Add(-dp); // now, a proportion "dp" will be <0.0
3556  out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dp" will
3557  // be zero and (1-dp) will be 1.0.
3558  if ((high_scale - low_scale) != 1.0)
3559  out->Scale(high_scale - low_scale); // now, "dp" are 0 and (1-dp) are "high_scale-low_scale".
3560  if (low_scale != 0.0)
3561  out->Add(low_scale); // now "dp" equal "low_scale" and (1.0-dp) equal "high_scale".
3562 
3563  out->MulElements(in);
3564 }
3565 
3566 void DropoutComponent::Backprop(const ChunkInfo &, //in_info,
3567  const ChunkInfo &, //out_info,
3568  const CuMatrixBase<BaseFloat> &in_value,
3569  const CuMatrixBase<BaseFloat> &out_value,
3570  const CuMatrixBase<BaseFloat> &out_deriv,
3571  Component *, //to_update
3572  CuMatrix<BaseFloat> *in_deriv) const {
3573  KALDI_ASSERT(SameDim(in_value, out_value) && SameDim(in_value, out_deriv));
3574  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
3575  in_deriv->SetMatMatDivMat(out_deriv, out_value, in_value);
3576 }
3577 
3579  return new DropoutComponent(dim_,
3580  dropout_proportion_,
3581  dropout_scale_);
3582 }
3583 
3585  std::string orig_args(args);
3586  int32 dim;
3587  BaseFloat stddev = 1.0;
3588  bool ok = ParseFromString("dim", &args, &dim);
3589  ParseFromString("stddev", &args, &stddev);
3590 
3591  if (!ok || !args.empty() || dim <= 0)
3592  KALDI_ERR << "Invalid initializer for layer of type AdditiveNoiseComponent: \""
3593  << orig_args << "\"";
3594  Init(dim, stddev);
3595 }
3596 
3597 void AdditiveNoiseComponent::Read(std::istream &is, bool binary) {
3598  ExpectOneOrTwoTokens(is, binary, "<AdditiveNoiseComponent>", "<Dim>");
3599  ReadBasicType(is, binary, &dim_);
3600  ExpectToken(is, binary, "<Stddev>");
3601  ReadBasicType(is, binary, &stddev_);
3602  ExpectToken(is, binary, "</AdditiveNoiseComponent>");
3603 }
3604 
3605 void AdditiveNoiseComponent::Write(std::ostream &os, bool binary) const {
3606  WriteToken(os, binary, "<AdditiveNoiseComponent>");
3607  WriteToken(os, binary, "<Dim>");
3608  WriteBasicType(os, binary, dim_);
3609  WriteToken(os, binary, "<Stddev>");
3610  WriteBasicType(os, binary, stddev_);
3611  WriteToken(os, binary, "</AdditiveNoiseComponent>");
3612 }
3613 
3615  dim_ = dim;
3616  stddev_ = stddev;
3617 }
3618 
3620  const ChunkInfo &out_info,
3621  const CuMatrixBase<BaseFloat> &in,
3622  CuMatrixBase<BaseFloat> *out) const {
3623  KALDI_ASSERT(in.NumCols() == this->InputDim());
3624  out->CopyFromMat(in);
3625  CuMatrix<BaseFloat> rand(in.NumRows(), in.NumCols());
3626  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(&rand);
3627  out->AddMat(stddev_, rand);
3628 }
3629 
3632  patch_dim_(0), patch_step_(0), patch_stride_(0),
3633  appended_conv_(false), is_gradient_(false) {}
3634 
3636  UpdatableComponent(component),
3637  filter_params_(component.filter_params_),
3638  bias_params_(component.bias_params_),
3639  appended_conv_(component.appended_conv_),
3640  is_gradient_(component.is_gradient_) {}
3641 
3643  const CuVectorBase<BaseFloat> &bias_params,
3644  BaseFloat learning_rate):
3645  UpdatableComponent(learning_rate),
3646  filter_params_(filter_params),
3647  bias_params_(bias_params) {
3648  KALDI_ASSERT(filter_params.NumRows() == bias_params.Dim() &&
3649  bias_params.Dim() != 0);
3650  appended_conv_ = false;
3651  is_gradient_ = false;
3652 }
3653 
3654 // aquire input dim
3656  int32 filter_dim = filter_params_.NumCols();
3657  int32 num_splice = filter_dim / patch_dim_;
3658  return patch_stride_ * num_splice;
3659 }
3660 
3661 // aquire output dim
3663  int32 num_filters = filter_params_.NumRows();
3664  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3665  return num_patches * num_filters;
3666 }
3667 
3668 // initialize the component using hyperparameters
3670  int32 input_dim, int32 output_dim,
3671  int32 patch_dim, int32 patch_step,
3672  int32 patch_stride, BaseFloat param_stddev,
3673  BaseFloat bias_stddev, bool appended_conv) {
3674  UpdatableComponent::Init(learning_rate);
3675  patch_dim_ = patch_dim;
3676  patch_step_ = patch_step;
3677  patch_stride_ = patch_stride;
3678  appended_conv_ = appended_conv;
3679  int32 num_splice = input_dim / patch_stride;
3680  int32 filter_dim = num_splice * patch_dim;
3681  int32 num_patches = 1 + (patch_stride - patch_dim) / patch_step;
3682  int32 num_filters = output_dim / num_patches;
3683  KALDI_ASSERT(input_dim % patch_stride == 0);
3684  KALDI_ASSERT((patch_stride - patch_dim) % patch_step == 0);
3685  KALDI_ASSERT(output_dim % num_patches == 0);
3686 
3687  filter_params_.Resize(num_filters, filter_dim);
3688  bias_params_.Resize(num_filters);
3689  KALDI_ASSERT(param_stddev >= 0.0 && bias_stddev >= 0.0);
3690  filter_params_.SetRandn();
3691  filter_params_.Scale(param_stddev);
3692  bias_params_.SetRandn();
3693  bias_params_.Scale(bias_stddev);
3694 }
3695 
3696 // initialize the component using predefined matrix file
3697 void Convolutional1dComponent::Init(BaseFloat learning_rate, int32 patch_dim,
3698  int32 patch_step, int32 patch_stride,
3699  std::string matrix_filename,
3700  bool appended_conv) {
3701  UpdatableComponent::Init(learning_rate);
3702  patch_dim_ = patch_dim;
3703  patch_step_ = patch_step;
3704  patch_stride_ = patch_stride;
3705  appended_conv_ = appended_conv;
3706  CuMatrix<BaseFloat> mat;
3707  ReadKaldiObject(matrix_filename, &mat);
3708  KALDI_ASSERT(mat.NumCols() >= 2);
3709  int32 filter_dim = mat.NumCols() - 1, num_filters = mat.NumRows();
3710  filter_params_.Resize(num_filters, filter_dim);
3711  bias_params_.Resize(num_filters);
3712  filter_params_.CopyFromMat(mat.Range(0, num_filters, 0, filter_dim));
3713  bias_params_.CopyColFromMat(mat, filter_dim);
3714 }
3715 
3716 // resize the component, setting the parameters to zero, while
3717 // leaving any other configuration values the same
3718 void Convolutional1dComponent::Resize(int32 input_dim, int32 output_dim) {
3719  KALDI_ASSERT(input_dim > 0 && output_dim > 0);
3720  int32 num_splice = input_dim / patch_stride_;
3721  int32 filter_dim = num_splice * patch_dim_;
3722  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3723  int32 num_filters = output_dim / num_patches;
3724  KALDI_ASSERT(input_dim % patch_stride_ == 0);
3725  KALDI_ASSERT((patch_stride_ - patch_dim_) % patch_step_ == 0);
3726  KALDI_ASSERT(output_dim % num_patches == 0);
3727  filter_params_.Resize(num_filters, filter_dim);
3728  bias_params_.Resize(num_filters);
3729 }
3730 
3731 // display information about component
3732 std::string Convolutional1dComponent::Info() const {
3733  std::stringstream stream;
3734  BaseFloat filter_params_size = static_cast<BaseFloat>(filter_params_.NumRows())
3735  * static_cast<BaseFloat>(filter_params_.NumCols());
3736  BaseFloat filter_stddev =
3738  filter_params_size),
3739  bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
3740  bias_params_.Dim());
3741 
3742  int32 num_splice = InputDim() / patch_stride_;
3743  int32 filter_dim = num_splice * patch_dim_;
3744  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3745  int32 num_filters = OutputDim() / num_patches;
3746 
3747  stream << Type() << ", input-dim=" << InputDim()
3748  << ", output-dim=" << OutputDim()
3749  << ", num-splice=" << num_splice
3750  << ", num-patches=" << num_patches
3751  << ", num-filters=" << num_filters
3752  << ", filter-dim=" << filter_dim
3753  << ", filter-params-stddev=" << filter_stddev
3754  << ", bias-params-stddev=" << bias_stddev
3755  << ", appended-conv=" << appended_conv_
3756  << ", learning-rate=" << LearningRate();
3757  return stream.str();
3758 }
3759 
3760 // initialize the component using configuration file
3762  std::string orig_args(args);
3763  bool ok = true, appended_conv = false;
3764  BaseFloat learning_rate = learning_rate_;
3765  std::string matrix_filename;
3766  int32 input_dim = -1, output_dim = -1;
3767  int32 patch_dim = -1, patch_step = -1, patch_stride = -1;
3768  ParseFromString("learning-rate", &args, &learning_rate);
3769  ParseFromString("appended-conv", &args, &appended_conv);
3770  ok = ok && ParseFromString("patch-dim", &args, &patch_dim);
3771  ok = ok && ParseFromString("patch-step", &args, &patch_step);
3772  ok = ok && ParseFromString("patch-stride", &args, &patch_stride);
3773  if (ParseFromString("matrix", &args, &matrix_filename)) {
3774  // initialize from prefined parameter matrix
3775  Init(learning_rate, patch_dim, patch_step, patch_stride,
3776  matrix_filename, appended_conv);
3777  if (ParseFromString("input-dim", &args, &input_dim))
3778  KALDI_ASSERT(input_dim == InputDim() &&
3779  "input-dim mismatch vs. matrix.");
3780  if (ParseFromString("output-dim", &args, &output_dim))
3781  KALDI_ASSERT(output_dim == OutputDim() &&
3782  "output-dim mismatch vs. matrix.");
3783  } else {
3784  // initialize from configuration
3785  ok = ok && ParseFromString("input-dim", &args, &input_dim);
3786  ok = ok && ParseFromString("output-dim", &args, &output_dim);
3787  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim), bias_stddev = 1.0;
3788  ParseFromString("param-stddev", &args, &param_stddev);
3789  ParseFromString("bias-stddev", &args, &bias_stddev);
3790  Init(learning_rate, input_dim, output_dim, patch_dim,
3791  patch_step, patch_stride, param_stddev, bias_stddev, appended_conv);
3792  }
3793  if (!args.empty())
3794  KALDI_ERR << "Could not process these elements in initializer: " << args;
3795  if (!ok)
3796  KALDI_ERR << "Bad initializer " << orig_args;
3797 }
3798 
3799 // propagation function
3800 
3801 /*
3802  In Convolution1dComponent, filter is defined $num-filters x $filter-dim,
3803  and bias vector B is defined by length $num-filters. The propatation is
3804  Y = X o A' + B
3805  where "o" is executing matrix-matrix convolution, which consists of a group
3806  of vector-matrix convolutions.
3807  For instance, the convolution of X(t) and the i-th filter A(i) is
3808  Y(t,i) = X(t) o A'(i) + B(i)
3809  The convolution used here is valid convolution. Meaning that the
3810  output of M o N is of dim |M| - |N| + 1, assuming M is not shorter then N.
3811 
3812  By default, input is arranged by
3813  x (time), y (channel), z(frequency)
3814  and output is arranged by
3815  x (time), y (frequency), z(channel).
3816  When appending convolutional1dcomponent, appended_conv_ should be
3817  set ture for the appended convolutional1dcomponent.
3818 */
3820  const ChunkInfo &out_info,
3821  const CuMatrixBase<BaseFloat> &in,
3822  CuMatrixBase<BaseFloat> *out) const {
3823  in_info.CheckSize(in);
3824  out_info.CheckSize(*out);
3825  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
3826 
3827  // dims
3828  int32 num_splice = InputDim() / patch_stride_;
3829  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3830  int32 num_filters = filter_params_.NumRows();
3831  int32 num_frames = in.NumRows();
3832  int32 filter_dim = filter_params_.NumCols();
3833 
3838  CuMatrix<BaseFloat> patches(num_frames, filter_dim * num_patches, kUndefined);
3839  // column_map is indexed by the column-index of "patches",
3840  // and the value is the corresponding column-index of "in".
3841  std::vector<int32> column_map(filter_dim * num_patches);
3842 
3843  // build-up a column selection map
3844  for (int32 patch = 0, index = 0; patch < num_patches; patch++) {
3845  int32 fstride = patch * patch_step_;
3846  for (int32 splice = 0; splice < num_splice; splice++) {
3847  int32 cstride = splice * patch_stride_;
3848  for (int32 d = 0; d < patch_dim_; d++, index++) {
3849  if (appended_conv_)
3850  column_map[index] = (fstride + d) * num_splice + splice;
3851  else
3852  column_map[index] = fstride + cstride + d;
3853  }
3854  }
3855  }
3856  CuArray<int32> cu_cols(column_map);
3857  patches.CopyCols(in, cu_cols);
3858 
3859  //
3860  // compute filter activations
3861  //
3862 
3863  std::vector<CuSubMatrix<BaseFloat>* > tgt_batch, patch_batch, filter_params_batch;
3864 
3865  CuSubMatrix<BaseFloat>* filter_params_elem = new CuSubMatrix<BaseFloat>(
3866  filter_params_, 0, filter_params_.NumRows(), 0, filter_params_.NumCols());
3867 
3868  // form batch in vector container
3869  for (int32 p = 0; p < num_patches; p++) {
3870  // form batch in vector container. for filter_params_batch, all elements
3871  // point to the same copy filter_params_elem
3872  tgt_batch.push_back(new CuSubMatrix<BaseFloat>(out->ColRange(p * num_filters,
3873  num_filters)));
3874  patch_batch.push_back(new CuSubMatrix<BaseFloat>(
3875  patches.ColRange(p * filter_dim, filter_dim)));
3876  filter_params_batch.push_back(filter_params_elem);
3877 
3878  tgt_batch[p]->AddVecToRows(1.0, bias_params_, 0.0); // add bias
3879  }
3880 
3881  // apply all filters
3882  AddMatMatBatched<BaseFloat>(1.0, tgt_batch, patch_batch, kNoTrans,
3883  filter_params_batch, kTrans, 1.0);
3884 
3885  // release memory
3886  delete filter_params_elem;
3887  for (int32 p = 0; p < num_patches; p++) {
3888  delete tgt_batch[p];
3889  delete patch_batch[p];
3890  }
3891 }
3892 
3893 // scale the parameters
3895  filter_params_.Scale(scale);
3896  bias_params_.Scale(scale);
3897 }
3898 
3899 // add another convolution component
3901  const Convolutional1dComponent *other =
3902  dynamic_cast<const Convolutional1dComponent*>(&other_in);
3903  KALDI_ASSERT(other != NULL);
3904  filter_params_.AddMat(alpha, other->filter_params_);
3905  bias_params_.AddVec(alpha, other->bias_params_);
3906 }
3907 
3908 /*
3909  This function does an operation similar to reversing a map,
3910  except it handles maps that are not one-to-one by outputting
3911  the reversed map as a vector of lists.
3912  @param[in] forward_indexes is a vector of int32, each of whose
3913  elements is between 0 and input_dim - 1.
3914  @param[in] input_dim. See definitions of forward_indexes and
3915  backward_indexes.
3916  @param[out] backward_indexes is a vector of dimension input_dim
3917  of lists, The list at (backward_indexes[i]) is a list
3918  of all indexes j such that forward_indexes[j] = i.
3919 */
3920 void Convolutional1dComponent::ReverseIndexes(const std::vector<int32> &forward_indexes,
3921  int32 input_dim,
3922  std::vector<std::vector<int32> > *backward_indexes) {
3923  int32 i, size = forward_indexes.size();
3924  int32 reserve_size = 2 + size / input_dim;
3925  backward_indexes->resize(input_dim);
3926  std::vector<std::vector<int32> >::iterator iter = backward_indexes->begin(),
3927  end = backward_indexes->end();
3928  for (; iter != end; ++iter)
3929  iter->reserve(reserve_size);
3930  for (int32 j = 0; j < forward_indexes.size(); j++) {
3931  i = forward_indexes[j];
3932  KALDI_ASSERT(i < input_dim);
3933  (*backward_indexes)[i].push_back(j);
3934  }
3935 }
3936 
3937 /*
3938  This function transforms a vector of lists into a list of vectors,
3939  padded with -1.
3940  @param[in] The input vector of lists. Let in.size() be D, and let
3941  the longest list length (i.e. the max of in[i].size()) be L.
3942  @param[out] The output list of vectors. The length of the list will
3943  be L, each vector-dimension will be D (i.e. out[i].size() == D),
3944  and if in[i] == j, then for some k we will have that
3945  out[k][j] = i. The output vectors are padded with -1
3946  where necessary if not all the input lists have the same side.
3947 */
3948 void Convolutional1dComponent::RearrangeIndexes(const std::vector<std::vector<int32> > &in,
3949  std::vector<std::vector<int32> > *out) {
3950  int32 D = in.size();
3951  int32 L = 0;
3952  for (int32 i = 0; i < D; i++)
3953  if (in[i].size() > L)
3954  L = in[i].size();
3955  out->resize(L);
3956  for (int32 i = 0; i < L; i++)
3957  (*out)[i].resize(D, -1);
3958  for (int32 i = 0; i < D; i++) {
3959  for (int32 j = 0; j < in[i].size(); j++) {
3960  (*out)[j][i] = in[i][j];
3961  }
3962  }
3963 }
3964 
3965 // back propagation function
3967  const ChunkInfo &out_info,
3968  const CuMatrixBase<BaseFloat> &in_value,
3969  const CuMatrixBase<BaseFloat> &out_value,
3970  const CuMatrixBase<BaseFloat> &out_deriv,
3971  Component *to_update_in,
3972  CuMatrix<BaseFloat> *in_deriv) const {
3973  in_deriv->Resize(out_deriv.NumRows(), InputDim());
3974  Convolutional1dComponent *to_update = dynamic_cast<Convolutional1dComponent*>(to_update_in);
3975  int32 num_splice = InputDim() / patch_stride_;
3976  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3977  int32 num_filters = filter_params_.NumRows();
3978  int32 num_frames = out_deriv.NumRows();
3979  int32 filter_dim = filter_params_.NumCols();
3980 
3986  CuMatrix<BaseFloat> patches_deriv(num_frames, filter_dim * num_patches, kSetZero);
3987 
3988  //
3989  // backpropagate to vector of matrices
3990  // (corresponding to position of a filter)
3991  //
3992  std::vector<CuSubMatrix<BaseFloat>* > patch_deriv_batch, out_deriv_batch,
3993  filter_params_batch;
3994 
3995  CuSubMatrix<BaseFloat>* filter_params_elem = new CuSubMatrix<BaseFloat>(
3996  filter_params_, 0, filter_params_.NumRows(), 0, filter_params_.NumCols());
3997 
3998  // form batch in vector container
3999  for (int32 p = 0; p < num_patches; p++) {
4000  // form batch in vector container. for filter_params_batch, all elements
4001  // point to the same copy filter_params_elem
4002  patch_deriv_batch.push_back(new CuSubMatrix<BaseFloat>(patches_deriv.ColRange(
4003  p * filter_dim, filter_dim)));
4004  out_deriv_batch.push_back(new CuSubMatrix<BaseFloat>(out_deriv.ColRange(
4005  p * num_filters, num_filters)));
4006  filter_params_batch.push_back(filter_params_elem);
4007  }
4008  AddMatMatBatched<BaseFloat>(1.0, patch_deriv_batch, out_deriv_batch, kNoTrans,
4009  filter_params_batch, kNoTrans, 0.0);
4010 
4011  // release memory
4012  delete filter_params_elem;
4013  for (int32 p = 0; p < num_patches; p++) {
4014  delete patch_deriv_batch[p];
4015  delete out_deriv_batch[p];
4016  }
4017 
4018  // sum the derivatives into in_deriv
4019  std::vector<int32> column_map(filter_dim * num_patches);
4020  for (int32 patch = 0, index = 0; patch < num_patches; patch++) {
4021  int32 fstride = patch * patch_step_;
4022  for (int32 splice = 0; splice < num_splice; splice++) {
4023  int32 cstride = splice * patch_stride_;
4024  for (int32 d = 0; d < patch_dim_; d++, index++) {
4025  if (appended_conv_)
4026  column_map[index] = (fstride + d) * num_splice + splice;
4027  else
4028  column_map[index] = fstride + cstride + d;
4029  }
4030  }
4031  }
4032  std::vector<std::vector<int32> > reversed_column_map;
4033  ReverseIndexes(column_map, InputDim(), &reversed_column_map);
4034  std::vector<std::vector<int32> > rearranged_column_map;
4035  RearrangeIndexes(reversed_column_map, &rearranged_column_map);
4036  for (int32 p = 0; p < rearranged_column_map.size(); p++) {
4037  CuArray<int32> cu_cols(rearranged_column_map[p]);
4038  in_deriv->AddCols(patches_deriv, cu_cols);
4039  }
4040 
4041  if (to_update != NULL) {
4042  // Next update the model (must do this 2nd so the derivatives we propagate
4043  // are accurate, in case this == to_update_in.)
4044  to_update->Update(in_value, out_deriv);
4045  }
4046 }
4047 
4048 void Convolutional1dComponent::SetZero(bool treat_as_gradient) {
4049  if (treat_as_gradient) {
4050  SetLearningRate(1.0);
4051  }
4052  filter_params_.SetZero();
4053  bias_params_.SetZero();
4054  if (treat_as_gradient) {
4055  is_gradient_ = true;
4056  }
4057 }
4058 
4059 void Convolutional1dComponent::Read(std::istream &is, bool binary) {
4060  std::ostringstream ostr_beg, ostr_end;
4061  ostr_beg << "<" << Type() << ">"; // e.g. "<Convolutional1dComponent>"
4062  ostr_end << "</" << Type() << ">"; // e.g. "</Convolutional1dComponent>"
4063  // might not see the "<Convolutional1dComponent>" part because
4064  // of how ReadNew() works.
4065  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
4066  ReadBasicType(is, binary, &learning_rate_);
4067  ExpectToken(is, binary, "<PatchDim>");
4068  ReadBasicType(is, binary, &patch_dim_);
4069  ExpectToken(is, binary, "<PatchStep>");
4070  ReadBasicType(is, binary, &patch_step_);
4071  ExpectToken(is, binary, "<PatchStride>");
4072  ReadBasicType(is, binary, &patch_stride_);
4073  // back-compatibility
4074  std::string tok;
4075  ReadToken(is, binary, &tok);
4076  if (tok == "<AppendedConv>") {
4077  ReadBasicType(is, binary, &appended_conv_);
4078  ExpectToken(is, binary, "<FilterParams>");
4079  } else {
4080  appended_conv_ = false;
4081  KALDI_ASSERT(tok == "<FilterParams>");
4082  }
4083  filter_params_.Read(is, binary);
4084  ExpectToken(is, binary, "<BiasParams>");
4085  bias_params_.Read(is, binary);
4086  ReadToken(is, binary, &tok);
4087  if (tok == "<IsGradient>") {
4088  ReadBasicType(is, binary, &is_gradient_);
4089  ExpectToken(is, binary, ostr_end.str());
4090  } else {
4091  is_gradient_ = false;
4092  KALDI_ASSERT(tok == ostr_end.str());
4093  }
4094 }
4095 
4096 void Convolutional1dComponent::Write(std::ostream &os, bool binary) const {
4097  std::ostringstream ostr_beg, ostr_end;
4098  ostr_beg << "<" << Type() << ">"; // e.g. "<Convolutional1dComponent>"
4099  ostr_end << "</" << Type() << ">"; // e.g. "</Convolutional1dComponent>"
4100  WriteToken(os, binary, ostr_beg.str());
4101  WriteToken(os, binary, "<LearningRate>");
4102  WriteBasicType(os, binary, learning_rate_);
4103  WriteToken(os, binary, "<PatchDim>");
4104  WriteBasicType(os, binary, patch_dim_);
4105  WriteToken(os, binary, "<PatchStep>");
4106  WriteBasicType(os, binary, patch_step_);
4107  WriteToken(os, binary, "<PatchStride>");
4108  WriteBasicType(os, binary, patch_stride_);
4109  WriteToken(os, binary, "<AppendedConv>");
4110  WriteBasicType(os, binary, appended_conv_);
4111  WriteToken(os, binary, "<FilterParams>");
4112  filter_params_.Write(os, binary);
4113  WriteToken(os, binary, "<BiasParams>");
4114  bias_params_.Write(os, binary);
4115  WriteToken(os, binary, "<IsGradient>");
4116  WriteBasicType(os, binary, is_gradient_);
4117  WriteToken(os, binary, ostr_end.str());
4118 }
4119 
4121  const Convolutional1dComponent *other =
4122  dynamic_cast<const Convolutional1dComponent*>(&other_in);
4124  + VecVec(bias_params_, other->bias_params_);
4125 }
4126 
4130  ans->patch_dim_ = patch_dim_;
4131  ans->patch_step_ = patch_step_;
4134  ans->bias_params_ = bias_params_;
4136  ans->is_gradient_ = is_gradient_;
4137  return ans;
4138 }
4139 
4141  CuMatrix<BaseFloat> temp_filter_params(filter_params_);
4142  temp_filter_params.SetRandn();
4143  filter_params_.AddMat(stddev, temp_filter_params);
4144 
4145  CuVector<BaseFloat> temp_bias_params(bias_params_);
4146  temp_bias_params.SetRandn();
4147  bias_params_.AddVec(stddev, temp_bias_params);
4148 }
4149 
4151  const MatrixBase<BaseFloat> &filter) {
4152  bias_params_ = bias;
4153  filter_params_ = filter;
4154  KALDI_ASSERT(bias_params_.Dim() == filter_params_.NumRows());
4155 }
4156 
4158  return (filter_params_.NumCols() + 1) * filter_params_.NumRows();
4159 }
4160 
4161 // update parameters
4163  const CuMatrixBase<BaseFloat> &out_deriv) {
4164  // useful dims
4165  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
4166  int32 num_filters = filter_params_.NumRows();
4167  int32 filter_dim = filter_params_.NumCols();
4168  int32 num_frames = in_value.NumRows();
4169  int32 num_splice = InputDim() / patch_stride_;
4170  CuMatrix<BaseFloat> filters_grad;
4171  CuVector<BaseFloat> bias_grad;
4172 
4177  CuMatrix<BaseFloat> patches(num_frames, filter_dim * num_patches, kUndefined);
4178  std::vector<int32> column_map(filter_dim * num_patches);
4179  for (int32 patch = 0, index = 0; patch < num_patches; patch++) {
4180  int32 fstride = patch * patch_step_;
4181  for (int32 splice = 0; splice < num_splice; splice++) {
4182  int32 cstride = splice * patch_stride_;
4183  for (int32 d = 0; d < patch_dim_; d++, index++) {
4184  if (appended_conv_)
4185  column_map[index] = (fstride + d) * num_splice + splice;
4186  else
4187  column_map[index] = fstride + cstride + d;
4188  }
4189  }
4190  }
4191  CuArray<int32> cu_cols(column_map);
4192  patches.CopyCols(in_value, cu_cols);
4193 
4194  //
4195  // calculate the gradient
4196  //
4197  filters_grad.Resize(num_filters, filter_dim, kSetZero); // reset
4198  bias_grad.Resize(num_filters, kSetZero); // reset
4199 
4200  //
4201  // use all the patches
4202  //
4203 
4204  // create a single large matrix holding the smaller matrices
4205  // from the vector container filters_grad_batch along the rows
4206  CuMatrix<BaseFloat> filters_grad_blocks_batch(
4207  num_patches * filters_grad.NumRows(), filters_grad.NumCols());
4208 
4209  std::vector<CuSubMatrix<BaseFloat>* > filters_grad_batch, diff_patch_batch,
4210  patch_batch;
4211  for (int32 p = 0; p < num_patches; p++) {
4212  // form batch in vector container
4213  filters_grad_batch.push_back(new CuSubMatrix<BaseFloat>(
4214  filters_grad_blocks_batch.RowRange(
4215  p * filters_grad.NumRows(),
4216  filters_grad.NumRows())));
4217  diff_patch_batch.push_back(new CuSubMatrix<BaseFloat>(out_deriv.ColRange(
4218  p * num_filters, num_filters)));
4219  patch_batch.push_back(new CuSubMatrix<BaseFloat>(patches.ColRange(
4220  p * filter_dim, filter_dim)));
4221  }
4222 
4223  AddMatMatBatched<BaseFloat>(1.0, filters_grad_batch, diff_patch_batch,
4224  kTrans, patch_batch, kNoTrans, 1.0);
4225 
4226  // add the row blocks together to filters_grad
4227  filters_grad.AddMatBlocks(1.0, filters_grad_blocks_batch);
4228 
4229  // create a matrix holding the col blocks sum of out_deriv
4230  CuMatrix<BaseFloat> out_deriv_col_blocks_sum(out_deriv.NumRows(), num_filters);
4231 
4232  // add the col blocks together to out_deriv_col_blocks_sum
4233  out_deriv_col_blocks_sum.AddMatBlocks(1.0, out_deriv);
4234 
4235  bias_grad.AddRowSumMat(1.0, out_deriv_col_blocks_sum, 1.0);
4236 
4237  // release memory
4238  for (int32 p = 0; p < num_patches; p++) {
4239  delete filters_grad_batch[p];
4240  delete diff_patch_batch[p];
4241  delete patch_batch[p];
4242  }
4243 
4244  //
4245  // update
4246  //
4247  filter_params_.AddMat(learning_rate_, filters_grad);
4248  bias_params_.AddVec(learning_rate_, bias_grad);
4249 }
4250 
4251 void MaxpoolingComponent::Init(int32 input_dim, int32 output_dim,
4252  int32 pool_size, int32 pool_stride) {
4253  input_dim_ = input_dim;
4254  output_dim_ = output_dim;
4255  pool_size_ = pool_size;
4256  pool_stride_ = pool_stride;
4257 
4258  // sanity check
4259  // number of patches
4260  KALDI_ASSERT(input_dim_ % pool_stride_ == 0);
4261  int32 num_patches = input_dim_ / pool_stride_;
4262  // number of pools
4263  KALDI_ASSERT(num_patches % pool_size_ == 0);
4264  int32 num_pools = num_patches / pool_size_;
4265  // check output dim
4266  KALDI_ASSERT(output_dim_ == num_pools * pool_stride_);
4267 }
4268 
4269 void MaxpoolingComponent::InitFromString(std::string args) {
4270  std::string orig_args(args);
4271  int32 input_dim = 0;
4272  int32 output_dim = 0;
4273  int32 pool_size = -1, pool_stride = -1;
4274  bool ok = true;
4275 
4276  ok = ok && ParseFromString("input-dim", &args, &input_dim);
4277  ok = ok && ParseFromString("output-dim", &args, &output_dim);
4278  ok = ok && ParseFromString("pool-size", &args, &pool_size);
4279  ok = ok && ParseFromString("pool-stride", &args, &pool_stride);
4280 
4281  KALDI_LOG << output_dim << " " << input_dim << " " << ok;
4282  KALDI_LOG << "Pool: " << pool_size << " "
4283  << pool_stride << " " << ok;
4284  if (!ok || !args.empty() || output_dim <= 0)
4285  KALDI_ERR << "Invalid initializer for layer of type "
4286  << Type() << ": \"" << orig_args << "\"";
4287  Init(input_dim, output_dim, pool_size, pool_stride);
4288 }
4289 
4290 /*
4291  Input and output of maxpooling component is arranged as
4292  x (time), y (frequency), z (channel)
4293  for efficient pooling.
4294  */
4296  const ChunkInfo &out_info,
4297  const CuMatrixBase<BaseFloat> &in,
4298  CuMatrixBase<BaseFloat> *out) const {
4299  in_info.CheckSize(in);
4300  out_info.CheckSize(*out);
4301  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
4302  int32 num_patches = input_dim_ / pool_stride_;
4303  int32 num_pools = num_patches / pool_size_;
4304 
4305  // do the max-pooling
4306  for (int32 q = 0; q < num_pools; q++) {
4307  // get output buffer of the pool
4308  CuSubMatrix<BaseFloat> pool(out->ColRange(q * pool_stride_, pool_stride_));
4309  pool.Set(-1e20); // reset a large negative value
4310  for (int32 r = 0; r < pool_size_; r++) {
4311  // col-by-col block comparison pool
4312  int32 p = r + q * pool_size_;
4313  pool.Max(in.ColRange(p * pool_stride_, pool_stride_));
4314  }
4315  }
4316 }
4317 
4318 void MaxpoolingComponent::Backprop(const ChunkInfo &, // in_info,
4319  const ChunkInfo &, // out_info,
4320  const CuMatrixBase<BaseFloat> &in_value,
4321  const CuMatrixBase<BaseFloat> &out_value,
4322  const CuMatrixBase<BaseFloat> &out_deriv,
4323  Component *to_update,
4324  CuMatrix<BaseFloat> *in_deriv) const {
4325  int32 num_patches = input_dim_ / pool_stride_;
4326  int32 num_pools = num_patches / pool_size_;
4327  std::vector<int32> patch_summands(num_patches, 0);
4328  in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero);
4329 
4330  for(int32 q = 0; q < num_pools; q++) {
4331  for(int32 r = 0; r < pool_size_; r++) {
4332  int32 p = r + q * pool_size_;
4333  CuSubMatrix<BaseFloat> in_p(in_value.ColRange(p * pool_stride_, pool_stride_));
4334  CuSubMatrix<BaseFloat> out_q(out_value.ColRange(q * pool_stride_, pool_stride_));
4335  CuSubMatrix<BaseFloat> tgt(in_deriv->ColRange(p * pool_stride_, pool_stride_));
4336  CuMatrix<BaseFloat> src(out_deriv.ColRange(q * pool_stride_, pool_stride_));
4337  // zero-out mask
4338  CuMatrix<BaseFloat> mask;
4339  in_p.EqualElementMask(out_q, &mask);
4340  src.MulElements(mask);
4341  tgt.AddMat(1.0, src);
4342  // summed deriv info
4343  patch_summands[p] += 1;
4344  }
4345  }
4346 
4347  // scale in_deriv of overlaped pools
4348  for(int32 p = 0; p < num_patches; p++) {
4349  CuSubMatrix<BaseFloat> tgt(in_deriv->ColRange(p * pool_stride_, pool_stride_));
4350  KALDI_ASSERT(patch_summands[p] > 0);
4351  tgt.Scale(1.0 / patch_summands[p]);
4352  }
4353 }
4354 
4355 void MaxpoolingComponent::Read(std::istream &is, bool binary) {
4356  ExpectOneOrTwoTokens(is, binary, "<MaxpoolingComponent>", "<InputDim>");
4357  ReadBasicType(is, binary, &input_dim_);
4358  ExpectToken(is, binary, "<OutputDim>");
4359  ReadBasicType(is, binary, &output_dim_);
4360  ExpectToken(is, binary, "<PoolSize>");
4361  ReadBasicType(is, binary, &pool_size_);
4362  ExpectToken(is, binary, "<PoolStride>");
4363  ReadBasicType(is, binary, &pool_stride_);
4364  ExpectToken(is, binary, "</MaxpoolingComponent>");
4365 }
4366 
4367 void MaxpoolingComponent::Write(std::ostream &os, bool binary) const {
4368  WriteToken(os, binary, "<MaxpoolingComponent>");
4369  WriteToken(os, binary, "<InputDim>");
4370  WriteBasicType(os, binary, input_dim_);
4371  WriteToken(os, binary, "<OutputDim>");
4372  WriteBasicType(os, binary, output_dim_);
4373  WriteToken(os, binary, "<PoolSize>");
4374  WriteBasicType(os, binary, pool_size_);
4375  WriteToken(os, binary, "<PoolStride>");
4376  WriteBasicType(os, binary, pool_stride_);
4377  WriteToken(os, binary, "</MaxpoolingComponent>");
4378 }
4379 
4380 std::string MaxpoolingComponent::Info() const {
4381  std::stringstream stream;
4382  stream << Type() << ", input-dim = " << input_dim_
4383  << ", output-dim = " << output_dim_
4384  << ", pool-size = " << pool_size_
4385  << ", pool-stride = " << pool_stride_;
4386  return stream.str();
4387 }
4388 
4389 } // namespace nnet2
4390 } // namespace kaldi
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
int32 OutputDim() const
Get size of output vectors.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void MulElements(const CuVectorBase< Real > &v)
Definition: cu-vector.cc:838
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual void Read(std::istream &is, bool binary)
We implement Read at this level as it just needs the Type().
virtual std::string Info() const
virtual void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &linear)
This kind of Component is a base-class for things like sigmoid and softmax.
virtual void Read(std::istream &is, bool binary)
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Read(std::istream &is, bool binary)
virtual int32 InputDim() const
Get size of input vectors.
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
virtual int32 InputDim() const =0
Get size of input vectors.
CuVector< BaseFloat > bias_params_
void SoftHinge(const CuMatrixBase< Real > &src)
Apply the function y = log(1 + exp(x)), to each element.
Definition: cu-matrix.cc:1555
void ApplyPow(Real power)
Definition: cu-matrix.h:438
FixedAffineComponent is an affine transform that is supplied at network initialization time and is no...
virtual Component * Copy() const
Copy component (deep copy).
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev)
virtual void Read(std::istream &is, bool binary)
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual Component * Copy() const
Copy component (deep copy).
void Check() const
Checks that the data in the ChunkInfo is valid, and die if not.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual int32 OutputDim() const
Get size of output vectors.
void GroupMax(const CuMatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j where G = x.NumCols() / y.NumCols() must be an integer.
Definition: cu-matrix.cc:1617
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
static void ReverseIndexes(const std::vector< int32 > &forward_indexes, int32 input_dim, std::vector< std::vector< int32 > > *backward_indexes)
virtual void Resize(int32 input_dim, int32 output_dim)
virtual void InitFromString(std::string args)
We implement InitFromString at this level.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
Component * CollapseWithPrevious(const FixedAffineComponent &prev) const
virtual std::string Info() const
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
void Init(const CuVectorBase< BaseFloat > &scales)
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
virtual Component * Copy() const
Copy component (deep copy).
virtual void PerturbParams(BaseFloat stddev)
We introduce a new virtual function that only applies to class UpdatableComponent.
BaseFloat GetScalingFactor(const CuVectorBase< BaseFloat > &in_products, BaseFloat gamma_prod, CuVectorBase< BaseFloat > *out_products)
The following function is only called if max_change_per_sample_ > 0, it returns a scaling factor alph...
virtual void UpdateSimple(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
static Component * NewFromString(const std::string &initializer_line)
Initialize the Component from one line that will contain first the type, e.g.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void Init(int32 input_dim, int32 output_dim, int32 pool_size, int32 pool_stride)
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
void CopyColFromMat(const CuMatrixBase< Real > &mat, MatrixIndexT col)
Definition: cu-vector.cc:103
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void Init(const CuMatrixBase< BaseFloat > &matrix)
std::vector< int32 > context_
virtual Component * Copy() const
Copy component (deep copy).
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
virtual void Add(BaseFloat alpha, const UpdatableComponent &other)
This new virtual function adds the parameters of another updatable component, times some constant...
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void Init(int32 dim, int32 dct_dim, bool reorder, int32 keep_dct_dim=0)
Real Sum() const
Definition: cu-vector.cc:297
void ComputeDctMatrix(Matrix< Real > *M)
ComputeDctMatrix computes a matrix corresponding to the DCT, such that M * v equals the DCT of vector...
void Init(int32 input_dim, std::vector< int32 > context, int32 const_component_dim=0)
virtual void Read(std::istream &is, bool binary)
virtual void SetZero(bool treat_as_gradient)
Set parameters to zero, and if treat_as_gradient is true, we&#39;ll be treating this as a gradient so set...
virtual Component * Copy() const
Copy component (deep copy).
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual int32 OutputDim() const =0
Get size of output vectors.
void Add(BaseFloat alpha, const NonlinearComponent &other)
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:653
void Write(std::ostream &os, bool binary) const
Write component to stream.
void ApplyFloor(Real floor_val)
Definition: cu-matrix.h:451
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
Definition: cu-vector.cc:595
virtual void SetZero(bool treat_as_gradient)
Set parameters to zero, and if treat_as_gradient is true, we&#39;ll be treating this as a gradient so set...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Read(std::istream &is, bool binary)
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update_in, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &filter)
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
Definition: cu-matrix.cc:954
virtual int32 GetParameterDim() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
PermuteComponent does a permutation of the dimensions (by default, a fixed random permutation...
virtual void Scale(BaseFloat scale)
This new virtual function scales the parameters by this amount.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
This is a bit similar to dropout but adding (not multiplying) Gaussian noise with a given standard de...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Read(std::istream &is, bool binary)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
int32 GetParameterDim() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev, int32 num_blocks, BaseFloat alpha)
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
void AddCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indices)
Add column indices[r] of src to column r.
Definition: cu-matrix.cc:2701
virtual void Read(std::istream &is, bool binary)
virtual Component * Copy() const
Copy component (deep copy).
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void SetZero(bool treat_as_gradient)
Set parameters to zero, and if treat_as_gradient is true, we&#39;ll be treating this as a gradient so set...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
void Scale(BaseFloat scale)
This new virtual function scales the parameters by this amount.
virtual void Read(std::istream &is, bool binary)
static const BaseFloat kNormFloor
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev, int32 num_blocks)
virtual std::string Info() const
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
Discrete cosine transform.
void DestructiveSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt)
Singular value decomposition Major limitations: For nonsquare matrices, we assume m>=n (NumRows >= Nu...
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void GetSizes(std::vector< int32 > *sizes) const
void UpdateStats(const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > *deriv=NULL)
void PreconditionDirections(CuMatrixBase< BaseFloat > *R, CuVectorBase< BaseFloat > *row_prod, BaseFloat *scale)
virtual void InitFromString(std::string args)
We implement InitFromString at this level.
void AddMatBlocks(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
This function is like AddMat (it does *this += alpha * src), except that it supports cases where *thi...
Definition: cu-matrix.cc:1119
FixedScaleComponent applies a fixed per-element scale; it&#39;s similar to the Rescale component in the n...
void Scale(Real value)
Definition: cu-matrix.cc:644
virtual void Read(std::istream &is, bool binary)=0
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
bool ParseFromString(const std::string &name, std::string *string, int32 *param)
Functions used in Init routines.
std::istream & Stream()
Definition: kaldi-io.cc:826
void Init(int32 dim, std::vector< int32 > context)
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev, int32 rank_in, int32 rank_out, int32 update_period, BaseFloat num_samples_history, BaseFloat alpha, BaseFloat max_change_per_sample)
virtual std::string Info() const
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
float BaseFloat
Definition: kaldi-types.h:29
virtual std::string Info() const
int32 GetOffset(int32 index) const
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078
void Reorder(CuMatrixBase< BaseFloat > *mat, bool reverse) const
void Init(int32 input_dim, int32 output_dim, BaseFloat p)
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void DiffNormalizePerRow(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_deriv, const Real target_rms, const bool add_log_stddev, CuMatrixBase< Real > *in_deriv)
Definition: cu-math.cc:349
void Max(const CuMatrixBase< Real > &A)
Do, elementwise, *this = max(*this, A).
Definition: cu-matrix.cc:715
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
double Log(double x)
Definition: kaldi-math.h:100
void Init(BaseFloat learning_rate)
void ApplyPowAbs(Real power, bool include_sign=false)
Definition: cu-matrix.h:443
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void Init(const CuMatrixBase< BaseFloat > &matrix)
matrix should be of size input-dim+1 to output-dim, last col is offset
void Sigmoid(const CuMatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src": element by element...
Definition: cu-matrix.cc:1534
virtual Component * Copy() const
Copy component (deep copy).
void Add(Real value)
Definition: cu-matrix.cc:582
CuMatrix< BaseFloat > dct_mat_
virtual std::string Info() const
Take the absoute values of an input vector to a power.
virtual int32 InputDim() const
Get size of input vectors.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void SoftMaxPerRow(const CuMatrixBase< Real > &src)
Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow.
Definition: cu-matrix.cc:1717
static Component * ReadNew(std::istream &is, bool binary)
Read component from stream.
BaseFloat learning_rate_
learning rate (0.0..0.01)
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667
This Component, if present, randomly zeroes half of the inputs and multiplies the other half by two...
void CopyRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies row r from row indexes[r] of src.
Definition: cu-matrix.cc:2678
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
ChunkInfo is a class whose purpose is to describe the structure of matrices holding features...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Read(std::istream &is, bool binary)
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Add(BaseFloat alpha, const UpdatableComponent &other)
This new virtual function adds the parameters of another updatable component, times some constant...
void Resize(MatrixIndexT dim, MatrixResizeType t=kSetZero)
Allocate the memory.
Definition: cu-vector.cc:993
#define KALDI_ERR
Definition: kaldi-error.h:147
This is as SpliceComponent but outputs the max of any of the inputs (taking the max across time)...
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void SetZero(bool treat_as_gradient)
Set parameters to zero, and if treat_as_gradient is true, we&#39;ll be treating this as a gradient so set...
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
Definition: text-utils.cc:238
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
void GroupPnorm(const CuMatrixBase< Real > &src, Real pow)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p) where G = x...
Definition: cu-matrix.cc:1576
void ApplyPow(Real power)
Definition: cu-vector.h:147
virtual int32 OutputDim() const
Get size of output vectors.
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Read(std::istream &is, bool binary)
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
virtual Component * Copy() const
Copy component (deep copy).
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
virtual std::string Info() const
virtual void Read(std::istream &is, bool binary)
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
void DiffSoftmaxPerRow(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the softmax function.
Definition: cu-matrix.cc:1868
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
void GroupMaxDeriv(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > &output)
Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax fun...
Definition: cu-matrix.cc:874
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual std::string Info() const
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Here, "other" is a component of the same specific type.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
int32 InputDim() const
Get size of input vectors.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void PerturbParams(BaseFloat stddev)
We introduce a new virtual function that only applies to class UpdatableComponent.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
FixedBiasComponent applies a fixed per-element bias; it&#39;s similar to the AddShift component in the nn...
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual Component * Copy() const
Copy component (deep copy).
virtual int32 Index() const
Returns the index in the sequence of layers in the neural net; intended only to be used in debugging ...
virtual void UpdateSimple(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void Read(std::istream &is, bool binary)
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
Real Sum() const
Returns sum of the elements.
void Init(int32 dim, BaseFloat noise_stddev)
virtual void Scale(BaseFloat scale)
This new virtual function scales the parameters by this amount.
int32 NumRows() const
Returns the number of rows that we expect the feature matrix to have.
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
int32 GetIndex(int32 offset) const
void PerturbParams(BaseFloat stddev)
We introduce a new virtual function that only applies to class UpdatableComponent.
virtual std::string Info() const
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual Component * Copy() const
Copy component (deep copy).
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual Component * Copy() const
Copy component (deep copy).
virtual std::string Info() const
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual int32 GetParameterDim() const
The following new virtual function returns the total dimension of the parameters in this class...
CuMatrix< BaseFloat > linear_params_
void Read(std::istream &is, bool binary)
void CheckSize(const CuMatrixBase< BaseFloat > &mat) const
Checks that the matrix has the size we expect, and die if not.
MaxPoolingComponent : Maxpooling component was firstly used in ConvNet for selecting an representativ...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual Component * Copy() const
Copy component (deep copy).
void MulRowsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = diag(scale) * (*this).
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
void MulColsVec(const CuVectorBase< Real > &scale)
scale i&#39;th column by scale[i]
Definition: cu-matrix.cc:765
void SumColumnRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, sets (*this)(r, c) to the sum src(r, j), where j ranges from indexes[c].first through indexes[c].second - 1.
Definition: cu-matrix.cc:2893
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:665
virtual Component * Copy() const
Copy component (deep copy).
void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
FixedLinearComponent is a linear transform that is supplied at network initialization time and is not...
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
void DiffLogSoftmaxPerRow(const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv)
Differentiate backward through the log softmax function.
Definition: cu-matrix.cc:1903
void DiffGroupPnorm(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv, Real power)
Differentiate backward through the GroupPnorm function.
Definition: cu-matrix.cc:841
virtual std::string Info() const
static Component * NewComponentOfType(const std::string &type)
Return a new Component of the given type e.g.
A class representing a vector.
Definition: kaldi-vector.h:406
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual std::string Info() const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
virtual std::string Type() const =0
void Read(std::istream &is, bool binary)
I/O functions.
Definition: cu-matrix.cc:494
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
BaseFloat GetScalingFactor(const CuMatrix< BaseFloat > &in_value_precon, const CuMatrix< BaseFloat > &out_deriv_precon)
The following function is only called if max_change_ > 0.
static void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
std::vector< int32 > reorder_
static void RearrangeIndexes(const std::vector< std::vector< int32 > > &in, std::vector< std::vector< int32 > > *out)
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
void Read(std::istream &is, bool binary)
I/O.
Definition: cu-vector.cc:963
Keywords: natural gradient descent, NG-SGD, naturalgradient.
virtual void Read(std::istream &is, bool binary)
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, int32 patch_dim, int32 patch_step, int32 patch_stride, BaseFloat param_stddev, BaseFloat bias_stddev, bool appended_conv)
virtual std::string Info() const
virtual void Resize(int32 input_dim, int32 output_dim)
CuVector< BaseFloat > bias_params_
virtual void Read(std::istream &is, bool binary)
virtual int32 InputDim() const
Get size of input vectors.
void MulRowsGroupMat(const CuMatrixBase< Real > &src)
divide each row into src.NumCols() groups, and then scale i&#39;th row&#39;s jth group of elements by src[i...
Definition: cu-matrix.cc:816
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Here, "other" is a component of the same specific type.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
Component * Copy() const
Copy component (deep copy).
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
::MatrixDim Dim() const
Definition: cu-matrix.h:221
void Init(int32 input_dim, int32 output_dim)
virtual void InitFromString(std::string args)=0
Initialize, typically from a line of a config file.
Component * CollapseWithNext(const AffineComponent &next) const
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev, BaseFloat alpha, BaseFloat max_change)
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void CopyCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies column r from column indexes[r] of src.
Definition: cu-matrix.cc:2656
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
virtual void Read(std::istream &is, bool binary)
void Init(int32 dim, BaseFloat scale)
void Init(const CuVectorBase< BaseFloat > &scales)
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Read(std::istream &is, bool binary)
We implement Read at this level as it just needs the Type().
virtual std::string Info() const
void Init(const std::vector< int32 > &sizes)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Add(BaseFloat alpha, const UpdatableComponent &other)
This new virtual function adds the parameters of another updatable component, times some constant...
CuMatrix< BaseFloat > linear_params_
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
Keywords for search: natural gradient, naturalgradient, NG-SGD.
void LogSoftMaxPerRow(const CuMatrixBase< Real > &src)
LogSoftmax nonlinearity Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row...
Definition: cu-matrix.cc:1740
void SetLearningRate(BaseFloat lrate)
Sets the learning rate of gradient descent.
virtual std::string Type() const
void InitFromString(std::string args)
Initialize, typically from a line of a config file.
Convolutional1dComponent implements convolution over frequency axis.
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
virtual Component * Copy() const
Copy component (deep copy).
void NormalizePerRow(const CuMatrixBase< Real > &in, const Real target_rms, const bool add_log_stddev, CuMatrixBase< Real > *out)
Normalize nonlinearity modifies the vector of activations by scaling it so that the root-mean-square ...
Definition: cu-math.cc:280
virtual void Read(std::istream &is, bool binary)
void Init(int32 dim, BaseFloat power=2)
Splices a context window of frames together [over time].
void SetMatMatDivMat(const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const CuMatrixBase< Real > &C)
*this = a * b / c (by element; when c = 0, *this = a) *this can be an alias of a, b or c safely and g...
Definition: cu-matrix.cc:1206
virtual Component * Copy() const
Copy component (deep copy).
void Resize(int32 input_dim, int32 output_dim)
void Tanh(const CuMatrixBase< Real > &src)
Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src).
Definition: cu-matrix.cc:1786
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
CuVector< BaseFloat > bias_params_
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
#define KALDI_LOG
Definition: kaldi-error.h:153
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37
CuMatrix< BaseFloat > linear_params_
virtual void Read(std::istream &is, bool binary)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual std::string Info() const
void Set(Real value)
Definition: cu-matrix.cc:531
virtual void Read(std::istream &is, bool binary)
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
void PreconditionDirectionsAlphaRescaled(const CuMatrixBase< BaseFloat > &R, double alpha, CuMatrixBase< BaseFloat > *P)
This wrapper for PreconditionDirections computes lambda using = /(N D) trace(R^T, R), and calls PreconditionDirections.
void EqualElementMask(const CuMatrixBase< Real > &mat, CuMatrix< Real > *mask) const
Definition: cu-matrix.cc:3429
BaseFloat LearningRate() const
Gets the learning rate of gradient descent.
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Here, "other" is a component of the same specific type.
virtual std::string Info() const
int32 NumCols() const
Returns the number of columns that we expect the feature matrix to have.
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50
virtual void Read(std::istream &is, bool binary)
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
Definition: cu-vector.cc:1277
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
void Init(int32 dim, BaseFloat dropout_proportion=0.5, BaseFloat dropout_scale=0.0)
dropout-proportion is the proportion that is dropped out, e.g.
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Read(std::istream &is, bool binary)
Vector for CUDA computing.
Definition: matrix-common.h:72
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void LimitRank(int32 dimension, AffineComponent **a, AffineComponent **b) const
This function is for getting a low-rank approximations of this AffineComponent by two AffineComponent...
virtual int32 OutputDim() const
Get size of output vectors.
Class UpdatableComponent is a Component which has trainable parameters and contains some global param...
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94