nnet-component.cc
Go to the documentation of this file.
1 // nnet2/nnet-component.cc
2 
3 // Copyright 2011-2012 Karel Vesely
4 // 2013-2014 Johns Hopkins University (author: Daniel Povey)
5 // 2013 Xiaohui Zhang
6 // 2014 Vijayaditya Peddinti
7 // 2014-2015 Guoguo Chen
8 
9 // See ../../COPYING for clarification regarding multiple authors
10 //
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 //
15 // http://www.apache.org/licenses/LICENSE-2.0
16 //
17 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
19 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
20 // MERCHANTABLITY OR NON-INFRINGEMENT.
21 // See the Apache 2 License for the specific language governing permissions and
22 // limitations under the License.
23 
24 #include <iterator>
25 #include <sstream>
26 #include "nnet2/nnet-component.h"
29 #include "util/stl-utils.h"
30 #include "util/text-utils.h"
31 #include "util/kaldi-io.h"
32 
33 namespace kaldi {
34 namespace nnet2 {
35 
36 // static
37 Component* Component::ReadNew(std::istream &is, bool binary) {
38  std::string token;
39  ReadToken(is, binary, &token); // e.g. "<SigmoidComponent>".
40  token.erase(0, 1); // erase "<".
41  token.erase(token.length()-1); // erase ">".
42  Component *ans = NewComponentOfType(token);
43  if (!ans)
44  KALDI_ERR << "Unknown component type " << token;
45  ans->Read(is, binary);
46  return ans;
47 }
48 
49 
50 // static
51 Component* Component::NewComponentOfType(const std::string &component_type) {
52  Component *ans = NULL;
53  if (component_type == "SigmoidComponent") {
54  ans = new SigmoidComponent();
55  } else if (component_type == "TanhComponent") {
56  ans = new TanhComponent();
57  } else if (component_type == "PowerComponent") {
58  ans = new PowerComponent();
59  } else if (component_type == "SoftmaxComponent") {
60  ans = new SoftmaxComponent();
61  } else if (component_type == "LogSoftmaxComponent") {
62  ans = new LogSoftmaxComponent();
63  } else if (component_type == "RectifiedLinearComponent") {
64  ans = new RectifiedLinearComponent();
65  } else if (component_type == "NormalizeComponent") {
66  ans = new NormalizeComponent();
67  } else if (component_type == "SoftHingeComponent") {
68  ans = new SoftHingeComponent();
69  } else if (component_type == "PnormComponent") {
70  ans = new PnormComponent();
71  } else if (component_type == "MaxoutComponent") {
72  ans = new MaxoutComponent();
73  } else if (component_type == "ScaleComponent") {
74  ans = new ScaleComponent();
75  } else if (component_type == "AffineComponent") {
76  ans = new AffineComponent();
77  } else if (component_type == "AffineComponentPreconditioned") {
79  } else if (component_type == "AffineComponentPreconditionedOnline") {
81  } else if (component_type == "SumGroupComponent") {
82  ans = new SumGroupComponent();
83  } else if (component_type == "BlockAffineComponent") {
84  ans = new BlockAffineComponent();
85  } else if (component_type == "BlockAffineComponentPreconditioned") {
87  } else if (component_type == "PermuteComponent") {
88  ans = new PermuteComponent();
89  } else if (component_type == "DctComponent") {
90  ans = new DctComponent();
91  } else if (component_type == "FixedLinearComponent") {
92  ans = new FixedLinearComponent();
93  } else if (component_type == "FixedAffineComponent") {
94  ans = new FixedAffineComponent();
95  } else if (component_type == "FixedScaleComponent") {
96  ans = new FixedScaleComponent();
97  } else if (component_type == "FixedBiasComponent") {
98  ans = new FixedBiasComponent();
99  } else if (component_type == "SpliceComponent") {
100  ans = new SpliceComponent();
101  } else if (component_type == "SpliceMaxComponent") {
102  ans = new SpliceMaxComponent();
103  } else if (component_type == "DropoutComponent") {
104  ans = new DropoutComponent();
105  } else if (component_type == "AdditiveNoiseComponent") {
106  ans = new AdditiveNoiseComponent();
107  } else if (component_type == "Convolutional1dComponent") {
108  ans = new Convolutional1dComponent();
109  } else if (component_type == "MaxpoolingComponent") {
110  ans = new MaxpoolingComponent();
111  }
112  return ans;
113 }
114 
115 // static
116 Component* Component::NewFromString(const std::string &initializer_line) {
117  std::istringstream istr(initializer_line);
118  std::string component_type; // e.g. "SigmoidComponent".
119  istr >> component_type >> std::ws;
120  std::string rest_of_line;
121  getline(istr, rest_of_line);
122  Component *ans = NewComponentOfType(component_type);
123  if (ans == NULL)
124  KALDI_ERR << "Bad initializer line (no such type of Component): "
125  << initializer_line;
126  ans->InitFromString(rest_of_line);
127  return ans;
128 }
129 
130 
131 // This is like ExpectToken but for two tokens, and it
132 // will either accept token1 and then token2, or just token2.
133 // This is useful in Read functions where the first token
134 // may already have been consumed.
135 static void ExpectOneOrTwoTokens(std::istream &is, bool binary,
136  const std::string &token1,
137  const std::string &token2) {
138  KALDI_ASSERT(token1 != token2);
139  std::string temp;
140  ReadToken(is, binary, &temp);
141  if (temp == token1) {
142  ExpectToken(is, binary, token2);
143  } else {
144  if (temp != token2) {
145  KALDI_ERR << "Expecting token " << token1 << " or " << token2
146  << " but got " << temp;
147  }
148  }
149 }
150 
151 
152 // static
153 bool ParseFromString(const std::string &name, std::string *string,
154  int32 *param) {
155  std::vector<std::string> split_string;
156  SplitStringToVector(*string, " \t", true,
157  &split_string);
158  std::string name_equals = name + "="; // the name and then the equals sign.
159  size_t len = name_equals.length();
160 
161  for (size_t i = 0; i < split_string.size(); i++) {
162  if (split_string[i].compare(0, len, name_equals) == 0) {
163  if (!ConvertStringToInteger(split_string[i].substr(len), param))
164  KALDI_ERR << "Bad option " << split_string[i];
165  *string = "";
166  // Set "string" to all the pieces but the one we used.
167  for (size_t j = 0; j < split_string.size(); j++) {
168  if (j != i) {
169  if (!string->empty()) *string += " ";
170  *string += split_string[j];
171  }
172  }
173  return true;
174  }
175  }
176  return false;
177 }
178 
179 bool ParseFromString(const std::string &name, std::string *string,
180  bool *param) {
181  std::vector<std::string> split_string;
182  SplitStringToVector(*string, " \t", true,
183  &split_string);
184  std::string name_equals = name + "="; // the name and then the equals sign.
185  size_t len = name_equals.length();
186 
187  for (size_t i = 0; i < split_string.size(); i++) {
188  if (split_string[i].compare(0, len, name_equals) == 0) {
189  std::string b = split_string[i].substr(len);
190  if (b.empty())
191  KALDI_ERR << "Bad option " << split_string[i];
192  if (b[0] == 'f' || b[0] == 'F') *param = false;
193  else if (b[0] == 't' || b[0] == 'T') *param = true;
194  else
195  KALDI_ERR << "Bad option " << split_string[i];
196  *string = "";
197  // Set "string" to all the pieces but the one we used.
198  for (size_t j = 0; j < split_string.size(); j++) {
199  if (j != i) {
200  if (!string->empty()) *string += " ";
201  *string += split_string[j];
202  }
203  }
204  return true;
205  }
206  }
207  return false;
208 }
209 
210 bool ParseFromString(const std::string &name, std::string *string,
211  BaseFloat *param) {
212  std::vector<std::string> split_string;
213  SplitStringToVector(*string, " \t", true,
214  &split_string);
215  std::string name_equals = name + "="; // the name and then the equals sign.
216  size_t len = name_equals.length();
217 
218  for (size_t i = 0; i < split_string.size(); i++) {
219  if (split_string[i].compare(0, len, name_equals) == 0) {
220  if (!ConvertStringToReal(split_string[i].substr(len), param))
221  KALDI_ERR << "Bad option " << split_string[i];
222  *string = "";
223  // Set "string" to all the pieces but the one we used.
224  for (size_t j = 0; j < split_string.size(); j++) {
225  if (j != i) {
226  if (!string->empty()) *string += " ";
227  *string += split_string[j];
228  }
229  }
230  return true;
231  }
232  }
233  return false;
234 }
235 
236 bool ParseFromString(const std::string &name, std::string *string,
237  std::string *param) {
238  std::vector<std::string> split_string;
239  SplitStringToVector(*string, " \t", true,
240  &split_string);
241  std::string name_equals = name + "="; // the name and then the equals sign.
242  size_t len = name_equals.length();
243 
244  for (size_t i = 0; i < split_string.size(); i++) {
245  if (split_string[i].compare(0, len, name_equals) == 0) {
246  *param = split_string[i].substr(len);
247 
248  // Set "string" to all the pieces but the one we used.
249  *string = "";
250  for (size_t j = 0; j < split_string.size(); j++) {
251  if (j != i) {
252  if (!string->empty()) *string += " ";
253  *string += split_string[j];
254  }
255  }
256  return true;
257  }
258  }
259  return false;
260 }
261 
262 bool ParseFromString(const std::string &name, std::string *string,
263  std::vector<int32> *param) {
264  std::vector<std::string> split_string;
265  SplitStringToVector(*string, " \t", true,
266  &split_string);
267  std::string name_equals = name + "="; // the name and then the equals sign.
268  size_t len = name_equals.length();
269 
270  for (size_t i = 0; i < split_string.size(); i++) {
271  if (split_string[i].compare(0, len, name_equals) == 0) {
272  if (!SplitStringToIntegers(split_string[i].substr(len), ":",
273  false, param))
274  KALDI_ERR << "Bad option " << split_string[i];
275  *string = "";
276  // Set "string" to all the pieces but the one we used.
277  for (size_t j = 0; j < split_string.size(); j++) {
278  if (j != i) {
279  if (!string->empty()) *string += " ";
280  *string += split_string[j];
281  }
282  }
283  return true;
284  }
285  }
286  return false;
287 }
288 
289 
291  PermuteComponent *ans = new PermuteComponent();
292  ans->reorder_ = reorder_;
293  return ans;
294 }
295 void PermuteComponent::Init(const std::vector<int32> &reorder) {
296  reorder_ = reorder;
297  KALDI_ASSERT(!reorder.empty());
298  std::vector<int32> indexes(reorder);
299  std::sort(indexes.begin(), indexes.end());
300  for (int32 i = 0; i < static_cast<int32>(indexes.size()); i++)
301  KALDI_ASSERT(i == indexes[i] && "Not a permutation");
302 }
303 
304 
305 std::string Component::Info() const {
306  std::stringstream stream;
307  stream << Type() << ", input-dim=" << InputDim()
308  << ", output-dim=" << OutputDim();
309  return stream.str();
310 }
311 
312 std::string UpdatableComponent::Info() const {
313  std::stringstream stream;
314  stream << Type() << ", input-dim=" << InputDim()
315  << ", output-dim=" << OutputDim() << ", learning-rate="
316  << LearningRate();
317  return stream.str();
318 }
319 
320 
322  KALDI_ASSERT(dim > 0);
323  dim_ = dim;
324  value_sum_.Resize(dim);
325  deriv_sum_.Resize(dim);
326  count_ = 0.0;
327 }
328 
330  const CuMatrixBase<BaseFloat> *deriv) {
331  KALDI_ASSERT(out_value.NumCols() == InputDim());
332  // Check we have the correct dimensions.
333  if (value_sum_.Dim() != InputDim() ||
334  (deriv != NULL && deriv_sum_.Dim() != InputDim())) {
335  std::lock_guard<std::mutex> lock(mutex_);
336  if (value_sum_.Dim() != InputDim()) {
337  value_sum_.Resize(InputDim());
338  count_ = 0.0;
339  }
340  if (deriv != NULL && deriv_sum_.Dim() != InputDim()) {
341  deriv_sum_.Resize(InputDim());
342  count_ = 0.0;
343  value_sum_.SetZero();
344  }
345  }
346  count_ += out_value.NumRows();
348  temp.AddRowSumMat(1.0, out_value, 0.0);
349  value_sum_.AddVec(1.0, temp);
350  if (deriv != NULL) {
351  temp.AddRowSumMat(1.0, *deriv, 0.0);
352  deriv_sum_.AddVec(1.0, temp);
353  }
354 }
355 
357  value_sum_.Scale(scale);
358  deriv_sum_.Scale(scale);
359  count_ *= scale;
360 }
361 
363  if (value_sum_.Dim() == 0 && other.value_sum_.Dim() != 0)
364  value_sum_.Resize(other.value_sum_.Dim());
365  if (deriv_sum_.Dim() == 0 && other.deriv_sum_.Dim() != 0)
366  deriv_sum_.Resize(other.deriv_sum_.Dim());
367  if (other.value_sum_.Dim() != 0)
368  value_sum_.AddVec(alpha, other.value_sum_);
369  if (other.deriv_sum_.Dim() != 0)
370  deriv_sum_.AddVec(alpha, other.deriv_sum_);
371  count_ += alpha * other.count_;
372 }
373 
374 void NonlinearComponent::Read(std::istream &is, bool binary) {
375  std::ostringstream ostr_beg, ostr_end;
376  ostr_beg << "<" << Type() << ">"; // e.g. "<SigmoidComponent>"
377  ostr_end << "</" << Type() << ">"; // e.g. "</SigmoidComponent>"
378  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<Dim>");
379  ReadBasicType(is, binary, &dim_); // Read dimension.
380  ExpectToken(is, binary, "<ValueSum>");
381  value_sum_.Read(is, binary);
382  ExpectToken(is, binary, "<DerivSum>");
383  deriv_sum_.Read(is, binary);
384  ExpectToken(is, binary, "<Count>");
385  ReadBasicType(is, binary, &count_);
386  ExpectToken(is, binary, ostr_end.str());
387 }
388 
389 void NonlinearComponent::Write(std::ostream &os, bool binary) const {
390  std::ostringstream ostr_beg, ostr_end;
391  ostr_beg << "<" << Type() << ">"; // e.g. "<SigmoidComponent>"
392  ostr_end << "</" << Type() << ">"; // e.g. "</SigmoidComponent>"
393  WriteToken(os, binary, ostr_beg.str());
394  WriteToken(os, binary, "<Dim>");
395  WriteBasicType(os, binary, dim_);
396  WriteToken(os, binary, "<ValueSum>");
397  value_sum_.Write(os, binary);
398  WriteToken(os, binary, "<DerivSum>");
399  deriv_sum_.Write(os, binary);
400  WriteToken(os, binary, "<Count>");
401  WriteBasicType(os, binary, count_);
402  WriteToken(os, binary, ostr_end.str());
403 }
404 
406  dim_(other.dim_), value_sum_(other.value_sum_), deriv_sum_(other.deriv_sum_),
407  count_(other.count_) { }
408 
409 void NonlinearComponent::InitFromString(std::string args) {
410  std::string orig_args(args);
411  int32 dim;
412  bool ok = ParseFromString("dim", &args, &dim);
413  if (!ok || !args.empty() || dim <= 0)
414  KALDI_ERR << "Invalid initializer for layer of type "
415  << Type() << ": \"" << orig_args << "\"";
416  Init(dim);
417 }
418 
419 void MaxoutComponent::Init(int32 input_dim, int32 output_dim) {
420  input_dim_ = input_dim;
421  output_dim_ = output_dim;
422  if (input_dim_ == 0)
423  input_dim_ = 10 * output_dim_; // default group size : 10
424  KALDI_ASSERT(input_dim_ > 0 && output_dim_ >= 0);
425  KALDI_ASSERT(input_dim_ % output_dim_ == 0);
426 }
427 
428 void MaxoutComponent::InitFromString(std::string args) {
429  std::string orig_args(args);
430  int32 input_dim = 0;
431  int32 output_dim = 0;
432  bool ok = ParseFromString("output-dim", &args, &output_dim) &&
433  ParseFromString("input-dim", &args, &input_dim);
434  KALDI_LOG << output_dim << " " << input_dim << " " << ok;
435  if (!ok || !args.empty() || output_dim <= 0)
436  KALDI_ERR << "Invalid initializer for layer of type "
437  << Type() << ": \"" << orig_args << "\"";
438  Init(input_dim, output_dim);
439 }
440 
441 
443  const ChunkInfo &out_info,
444  const CuMatrixBase<BaseFloat> &in,
445  CuMatrixBase<BaseFloat> *out) const {
446  in_info.CheckSize(in);
447  out_info.CheckSize(*out);
448  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
449  out->GroupMax(in);
450 }
451 
452 void MaxoutComponent::Backprop(const ChunkInfo &, // in_info,
453  const ChunkInfo &, // out_info,
454  const CuMatrixBase<BaseFloat> &in_value,
455  const CuMatrixBase<BaseFloat> &out_value,
456  const CuMatrixBase<BaseFloat> &out_deriv,
457  Component *to_update,
458  CuMatrix<BaseFloat> *in_deriv) const {
459  in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero);
460  in_deriv->GroupMaxDeriv(in_value, out_value);
461  in_deriv->MulRowsGroupMat(out_deriv);
462 }
463 
464 void MaxoutComponent::Read(std::istream &is, bool binary) {
465  ExpectOneOrTwoTokens(is, binary, "<MaxoutComponent>", "<InputDim>");
466  ReadBasicType(is, binary, &input_dim_);
467  ExpectToken(is, binary, "<OutputDim>");
468  ReadBasicType(is, binary, &output_dim_);
469  ExpectToken(is, binary, "</MaxoutComponent>");
470 }
471 
472 void MaxoutComponent::Write(std::ostream &os, bool binary) const {
473  WriteToken(os, binary, "<MaxoutComponent>");
474  WriteToken(os, binary, "<InputDim>");
475  WriteBasicType(os, binary, input_dim_);
476  WriteToken(os, binary, "<OutputDim>");
477  WriteBasicType(os, binary, output_dim_);
478  WriteToken(os, binary, "</MaxoutComponent>");
479 }
480 
481 std::string MaxoutComponent::Info() const {
482  std::stringstream stream;
483  stream << Type() << ", input-dim = " << input_dim_
484  << ", output-dim = " << output_dim_;
485  return stream.str();
486 }
487 
488 void PnormComponent::Init(int32 input_dim, int32 output_dim, BaseFloat p) {
489  input_dim_ = input_dim;
490  output_dim_ = output_dim;
491  if (input_dim_ == 0)
492  input_dim_ = 10 * output_dim_; // default group size : 10
493  p_ = p;
494  KALDI_ASSERT(input_dim_ > 0 && output_dim_ >= 0 && p_ >= 0);
495  KALDI_ASSERT(input_dim_ % output_dim_ == 0);
496 }
497 
498 void PnormComponent::InitFromString(std::string args) {
499  std::string orig_args(args);
500  int32 input_dim = 0;
501  int32 output_dim = 0;
502  BaseFloat p = 2;
503  bool ok = ParseFromString("output-dim", &args, &output_dim) &&
504  ParseFromString("input-dim", &args, &input_dim);
505  ParseFromString("p", &args, &p);
506  if (!ok || !args.empty() || output_dim <= 0)
507  KALDI_ERR << "Invalid initializer for layer of type "
508  << Type() << ": \"" << orig_args << "\"";
509  Init(input_dim, output_dim, p);
510 }
511 
512 
514  const ChunkInfo &out_info,
515  const CuMatrixBase<BaseFloat> &in,
516  CuMatrixBase<BaseFloat> *out) const {
517  in_info.CheckSize(in);
518  out_info.CheckSize(*out);
519  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
520 
521  out->GroupPnorm(in, p_);
522 }
523 
524 void PnormComponent::Backprop(const ChunkInfo &, // in_info,
525  const ChunkInfo &, // out_info,
526  const CuMatrixBase<BaseFloat> &in_value,
527  const CuMatrixBase<BaseFloat> &out_value,
528  const CuMatrixBase<BaseFloat> &out_deriv,
529  Component *to_update,
530  // may be identical to "this".
531  CuMatrix<BaseFloat> *in_deriv) const {
532  in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero);
533  in_deriv->DiffGroupPnorm(in_value, out_value, out_deriv, p_);
534 }
535 
536 void PnormComponent::Read(std::istream &is, bool binary) {
537  ExpectOneOrTwoTokens(is, binary, "<PnormComponent>", "<InputDim>");
538  ReadBasicType(is, binary, &input_dim_);
539  ExpectToken(is, binary, "<OutputDim>");
540  ReadBasicType(is, binary, &output_dim_);
541  ExpectToken(is, binary, "<P>");
542  ReadBasicType(is, binary, &p_);
543  ExpectToken(is, binary, "</PnormComponent>");
544 }
545 
546 void PnormComponent::Write(std::ostream &os, bool binary) const {
547  WriteToken(os, binary, "<PnormComponent>");
548  WriteToken(os, binary, "<InputDim>");
549  WriteBasicType(os, binary, input_dim_);
550  WriteToken(os, binary, "<OutputDim>");
551  WriteBasicType(os, binary, output_dim_);
552  WriteToken(os, binary, "<P>");
553  WriteBasicType(os, binary, p_);
554  WriteToken(os, binary, "</PnormComponent>");
555 }
556 
557 std::string PnormComponent::Info() const {
558  std::stringstream stream;
559  stream << Type() << ", input-dim = " << input_dim_
560  << ", output-dim = " << output_dim_
561  << ", p = " << p_;
562  return stream.str();
563 }
564 
565 
566 const BaseFloat NormalizeComponent::kNormFloor = pow(2.0, -66);
567 // This component modifies the vector of activations by scaling it so that the
568 // root-mean-square equals 1.0.
569 
571  const ChunkInfo &out_info,
572  const CuMatrixBase<BaseFloat> &in,
573  CuMatrixBase<BaseFloat> *out) const {
574  cu::NormalizePerRow(in, BaseFloat(1), false, out);
575 }
576 
577 /*
578  A note on the derivative of NormalizeComponent...
579  let both row_in and row_out be vectors of dimension D.
580  Let p = row_in^T row_in / D, and let
581  f = 1 / sqrt(max(kNormFloor, p)), and we compute row_out as:
582 row_out = f row_in.
583  Suppose we have a quantity deriv_out which is the derivative
584  of the objective function w.r.t. row_out. We want to compute
585  deriv_in which is the derivative of the objective function w.r.t.
586  row_in. Let the objective function be F. One term is obvious: we have
587  deriv_in = f deriv_out + ....
588  next we have to take into account the derivative that gets back-propagated
589  through f. Obviously, dF/df = deriv_out^T row_in.
590  And df/dp = (p <= kNormFloor ? 0.0 : -0.5 p^{-1.5}) = (f == 1 / sqrt(kNormFloor) ? 0.0 : -0.5 f^3),
591  and dp/d(row_in) = 2/D row_in. [it's vector_valued].
592  So this term in dF/d(row_in) equals:
593  dF/df df/dp dp/d(row_in) = 2/D (f == 1 / sqrt(kNormFloor) ? 0.0 : -0.5 f^3) (deriv_out^T row_in) row_in
594  So
595  deriv_in = f deriv_out + (f == 1.0 ? 0.0 : -f^3 / D) (deriv_out^T row_in) row_in
596 
597 */
598 
600  const ChunkInfo &, // in_info,
601  const ChunkInfo &, // out_info,
602  const CuMatrixBase<BaseFloat> &in_value,
603  const CuMatrixBase<BaseFloat> &out_value,
604  const CuMatrixBase<BaseFloat> &out_deriv, Component *to_update,
605  // may be identical to "this".
606  CuMatrix<BaseFloat> *in_deriv) const {
607  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
608  cu::DiffNormalizePerRow(in_value, out_deriv, BaseFloat(1), false, in_deriv);
609 }
610 
612  const ChunkInfo &out_info,
613  const CuMatrixBase<BaseFloat> &in,
614  CuMatrixBase<BaseFloat> *out) const {
615  in_info.CheckSize(in);
616  out_info.CheckSize(*out);
617  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
618 
619  out->Sigmoid(in);
620 }
621 
622 void SigmoidComponent::Backprop(const ChunkInfo &, //in_info,
623  const ChunkInfo &, //out_info,
624  const CuMatrixBase<BaseFloat> &, //in_value,
625  const CuMatrixBase<BaseFloat> &out_value,
626  const CuMatrixBase<BaseFloat> &out_deriv,
627  Component *to_update, // may be identical to "this".
628  CuMatrix<BaseFloat> *in_deriv) const {
629  // we ignore in_value and to_update.
630 
631  // The element by element equation would be:
632  // in_deriv = out_deriv * out_value * (1.0 - out_value);
633  // We can accomplish this via calls to the matrix library.
634 
635  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
636  in_deriv->Set(1.0);
637  in_deriv->AddMat(-1.0, out_value);
638  // now in_deriv = 1.0 - out_value [element by element]
639  in_deriv->MulElements(out_value);
640  // now in_deriv = out_value * (1.0 - out_value) [element by element], i.e.
641  // it contains the element-by-element derivative of the nonlinearity.
642  if (to_update != NULL)
643  dynamic_cast<NonlinearComponent*>(to_update)->UpdateStats(out_value,
644  in_deriv);
645  in_deriv->MulElements(out_deriv);
646  // now in_deriv = out_deriv * out_value * (1.0 - out_value) [element by element]
647 }
648 
649 
651  const ChunkInfo &out_info,
652  const CuMatrixBase<BaseFloat> &in,
653  CuMatrixBase<BaseFloat> *out) const {
654  // Apply tanh function to each element of the output...
655  // the tanh function may be written as -1 + ( 2 / (1 + e^{-2 x})),
656  // which is a scaled and shifted sigmoid.
657 
658  in_info.CheckSize(in);
659  out_info.CheckSize(*out);
660  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
661  out->Tanh(in);
662 }
663 
664 void TanhComponent::Backprop(const ChunkInfo &, //in_info,
665  const ChunkInfo &, //out_info,
666  const CuMatrixBase<BaseFloat> &, //in_value,
667  const CuMatrixBase<BaseFloat> &out_value,
668  const CuMatrixBase<BaseFloat> &out_deriv,
669  Component *to_update, // may be identical to "this".
670  CuMatrix<BaseFloat> *in_deriv) const {
671  /*
672  Note on the derivative of the tanh function:
673  tanh'(x) = sech^2(x) = -(tanh(x)+1) (tanh(x)-1) = 1 - tanh^2(x)
674 
675  The element by element equation of what we're doing would be:
676  in_deriv = out_deriv * (1.0 - out_value^2).
677  We can accomplish this via calls to the matrix library. */
678 
679  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
680  in_deriv->CopyFromMat(out_value);
681  in_deriv->ApplyPow(2.0);
682  in_deriv->Scale(-1.0);
683  in_deriv->Add(1.0);
684  // now in_deriv = (1.0 - out_value^2), the element-by-element derivative of
685  // the nonlinearity.
686  if (to_update != NULL)
687  dynamic_cast<NonlinearComponent*>(to_update)->UpdateStats(out_value,
688  in_deriv);
689  in_deriv->MulElements(out_deriv);
690 }
691 
693  dim_ = dim;
694  power_ = power;
695  KALDI_ASSERT(dim > 0 && power >= 0);
696 }
697 
698 void PowerComponent::InitFromString(std::string args) {
699  std::string orig_args(args);
700  int32 dim;
701  BaseFloat power = 2.0;
702  ParseFromString("power", &args, &power); // Optional.
703  // Accept either "dim" or "input-dim" to specify the input dim.
704  // "input-dim" is the canonical one; "dim" simplifies the testing code.
705  bool ok = (ParseFromString("dim", &args, &dim) ||
706  ParseFromString("input-dim", &args, &dim));
707  if (!ok || !args.empty() || dim <= 0)
708  KALDI_ERR << "Invalid initializer for layer of type "
709  << Type() << ": \"" << orig_args << "\"";
710  Init(dim, power);
711 }
712 
714  const ChunkInfo &out_info,
715  const CuMatrixBase<BaseFloat> &in,
716  CuMatrixBase<BaseFloat> *out) const {
717  in_info.CheckSize(in);
718  out_info.CheckSize(*out);
719  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
720 
721  // Apply power operation to each element of the input...
722  out->CopyFromMat(in);
723  out->ApplyPowAbs(power_);
724 }
725 
726 void PowerComponent::Backprop(const ChunkInfo &, //in_info,
727  const ChunkInfo &, //out_info,
728  const CuMatrixBase<BaseFloat> &in_value,
729  const CuMatrixBase<BaseFloat> &out_value,
730  const CuMatrixBase<BaseFloat> &out_deriv,
731  Component *to_update, // may be identical to "this".
732  CuMatrix<BaseFloat> *in_deriv) const {
733  in_deriv->Resize(in_value.NumRows(), in_value.NumCols());
734  // in scalar terms: in_deriv += p * in_value^(p-1) * out_deriv
735  in_deriv->CopyFromMat(in_value);
736  in_deriv->ApplyPowAbs(power_ - 1.0, true);
737  in_deriv->Scale(power_);
738  in_deriv->MulElements(out_deriv);
739 }
740 
741 void PowerComponent::Read(std::istream &is, bool binary) {
742  ExpectOneOrTwoTokens(is, binary, "<PowerComponent>", "<InputDim>");
743  ReadBasicType(is, binary, &dim_);
744  ExpectToken(is, binary, "<OutputDim>");
745  ReadBasicType(is, binary, &dim_);
746  ExpectToken(is, binary, "<Power>");
747  ReadBasicType(is, binary, &power_);
748  ExpectToken(is, binary, "</PowerComponent>");
749 }
750 
751 void PowerComponent::Write(std::ostream &os, bool binary) const {
752  WriteToken(os, binary, "<PowerComponent>");
753  WriteToken(os, binary, "<InputDim>");
754  WriteBasicType(os, binary, dim_);
755  WriteToken(os, binary, "<OutputDim>");
756  WriteBasicType(os, binary, dim_);
757  WriteToken(os, binary, "<Power>");
758  WriteBasicType(os, binary, power_);
759  WriteToken(os, binary, "</PowerComponent>");
760 }
761 
762 std::string PowerComponent::Info() const {
763  std::stringstream stream;
764  stream << Type() << ", dim = " << dim_
765  << ", power = " << power_;
766  return stream.str();
767 }
768 
770  const ChunkInfo &out_info,
771  const CuMatrixBase<BaseFloat> &in,
772  CuMatrixBase<BaseFloat> *out) const {
773  // Apply rectified linear function (x >= 0 ? 1.0 : 0.0)
774  out->CopyFromMat(in);
775  out->ApplyFloor(0.0);
776 }
777 
779  const ChunkInfo &, //out_info,
780  const CuMatrixBase<BaseFloat> &, //in_value,
781  const CuMatrixBase<BaseFloat> &out_value,
782  const CuMatrixBase<BaseFloat> &out_deriv,
783  Component *to_update, // may be identical to "this".
784  CuMatrix<BaseFloat> *in_deriv) const {
785 
786  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols(),
787  kUndefined);
788  in_deriv->CopyFromMat(out_value);
789  in_deriv->ApplyHeaviside();
790  // Now in_deriv(i, j) equals (out_value(i, j) > 0.0 ? 1.0 : 0.0),
791  // which is the derivative of the nonlinearity (well, except at zero
792  // where it's undefined).
793  if (to_update != NULL)
794  dynamic_cast<NonlinearComponent*>(to_update)->UpdateStats(out_value,
795  in_deriv);
796  in_deriv->MulElements(out_deriv);
797 }
798 
800  const ChunkInfo &out_info,
801  const CuMatrixBase<BaseFloat> &in,
802  CuMatrixBase<BaseFloat> *out) const {
803  in_info.CheckSize(in);
804  out_info.CheckSize(*out);
805  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
806  // Apply function x = log(1 + exp(x))
807  out->SoftHinge(in);
808 }
809 
810 void SoftHingeComponent::Backprop(const ChunkInfo &, //in_info,
811  const ChunkInfo &, //out_info,
812  const CuMatrixBase<BaseFloat> &in_value,
813  const CuMatrixBase<BaseFloat> &out_value,
814  const CuMatrixBase<BaseFloat> &out_deriv,
815  Component *to_update, // may be identical to "this".
816  CuMatrix<BaseFloat> *in_deriv) const {
817 
818  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols(),
819  kUndefined);
820  // note: d/dx: log(1 + exp(x)) = (exp(x) / (1 + exp(x)) = 1 / (1 + exp(-x)),
821  // which is the sigmoid function.
822 
823  // if the output is y, then dy/dx = (exp(x) / (1 + exp(x)),
824  // and using y = log(1 + exp(x)) -> exp(x) = exp(y) - 1, we have
825  // dy/dx = (exp(y) - 1) / exp(y)
826 
827 
828  in_deriv->Sigmoid(in_value);
829 
830  if (to_update != NULL)
831  dynamic_cast<NonlinearComponent*>(to_update)->UpdateStats(out_value,
832  in_deriv);
833  in_deriv->MulElements(out_deriv);
834 }
835 
836 
838  const ChunkInfo &out_info,
839  const CuMatrixBase<BaseFloat> &in,
840  CuMatrixBase<BaseFloat> *out) const {
841  out->CopyFromMat(in);
842  out->Scale(scale_);
843 }
844 
845 void ScaleComponent::Backprop(const ChunkInfo &, //in_info,
846  const ChunkInfo &, //out_info,
847  const CuMatrixBase<BaseFloat> &, //in_value,
848  const CuMatrixBase<BaseFloat> &, //out_value,
849  const CuMatrixBase<BaseFloat> &out_deriv,
850  Component *, //to_update, // may be identical to "this".
851  CuMatrix<BaseFloat> *in_deriv) const {
852 
853  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols(),
854  kUndefined);
855  in_deriv->CopyFromMat(out_deriv);
856  in_deriv->Scale(scale_);
857 }
858 
860  dim_ = dim;
861  scale_ = scale;
862  KALDI_ASSERT(dim_ > 0);
863  KALDI_ASSERT(scale_ != 0.0);
864 }
865 
866 void ScaleComponent::InitFromString(std::string args) {
867  std::string orig_args(args);
868  int32 dim;
869  BaseFloat scale;
870  if (!ParseFromString("dim", &args, &dim))
871  KALDI_ERR << "Dimension not specified for ScaleComponent in config file";
872  if (!ParseFromString("scale", &args, &scale))
873  KALDI_ERR << "Scale not specified for ScaleComponent in config file";
874  Init(dim, scale);
875 }
876 
877 void ScaleComponent::Write(std::ostream &os, bool binary) const {
878  WriteToken(os, binary, "<ScaleComponent>");
879  WriteToken(os, binary, "<Dim>");
880  WriteBasicType(os, binary, dim_);
881  WriteToken(os, binary, "<Scale>");
882  WriteBasicType(os, binary, scale_);
883  WriteToken(os, binary, "</ScaleComponent>");
884 }
885 
886 void ScaleComponent::Read(std::istream &is, bool binary) {
887  ExpectOneOrTwoTokens(is, binary, "<ScaleComponent>", "<Dim>");
888  ReadBasicType(is, binary, &dim_);
889  ExpectToken(is, binary, "<Scale>");
890  ReadBasicType(is, binary, &scale_);
891  ExpectToken(is, binary, "</ScaleComponent>");
892 }
893 
894 std::string ScaleComponent::Info() const {
895  std::stringstream stream;
896  stream << Type() << ", dim=" << dim_ << ", scale=" << scale_;
897  return stream.str();
898 }
899 
901  const ChunkInfo &out_info,
902  const CuMatrixBase<BaseFloat> &in,
903  CuMatrixBase<BaseFloat> *out) const {
904  in_info.CheckSize(in);
905  out_info.CheckSize(*out);
906  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
907 
908  // Apply softmax function to each row of the output...
909  // for that row, we do
910  // x_i = exp(x_i) / sum_j exp(x_j).
911 
912  out->SoftMaxPerRow(in);
913 
914  // This floor on the output helps us deal with
915  // almost-zeros in a way that doesn't lead to overflow.
916  out->ApplyFloor(1.0e-20);
917 }
918 
920  const ChunkInfo &out_info,
921  const CuMatrixBase<BaseFloat> &, //in_value,
922  const CuMatrixBase<BaseFloat> &out_value,
923  const CuMatrixBase<BaseFloat> &out_deriv,
924  Component *to_update, // only thing updated is counts_.
925  CuMatrix<BaseFloat> *in_deriv) const {
926  /*
927  Note on the derivative of the softmax function: let it be
928  p_i = exp(x_i) / sum_i exp_i
929  The [matrix-valued] Jacobian of this function is
930  diag(p) - p p^T
931  Let the derivative vector at the output be e, and at the input be
932  d. We have
933  d = diag(p) e - p (p^T e).
934  d_i = p_i e_i - p_i (p^T e).
935  */
936  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
937  in_deriv->DiffSoftmaxPerRow(out_value, out_deriv);
938 
939  // The SoftmaxComponent does not have any real trainable parameters, but
940  // during the backprop we store some statistics on the average counts;
941  // these may be used in mixing-up.
942  if (to_update != NULL) {
943  NonlinearComponent *to_update_nonlinear =
944  dynamic_cast<NonlinearComponent*>(to_update);
945  to_update_nonlinear->UpdateStats(out_value);
946  }
947 }
948 
950  const ChunkInfo &out_info,
951  const CuMatrixBase<BaseFloat> &in,
952  CuMatrixBase<BaseFloat> *out) const {
953  in_info.CheckSize(in);
954  out_info.CheckSize(*out);
955  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
956 
957  // Applies log softmax function to each row of the output. For each row, we do
958  // x_i = x_i - log(sum_j exp(x_j))
959  out->LogSoftMaxPerRow(in);
960 
961  // Just to be consistent with SoftmaxComponent::Propagate()
962  out->ApplyFloor(Log(1.0e-20));
963 }
964 
966  const ChunkInfo &out_info,
967  const CuMatrixBase<BaseFloat> &, //in_value,
968  const CuMatrixBase<BaseFloat> &out_value,
969  const CuMatrixBase<BaseFloat> &out_deriv,
970  Component *to_update,
971  CuMatrix<BaseFloat> *in_deriv) const {
972  /*
973  Let the output be y, then
974  y_i = x_i - log(sum_i exp(x_i))
975  where x_i is the input to the component. The Jacobian matrix of this
976  function is
977  J = I - 1 exp(y^T)
978  where 1 is a vector of ones. Let the derivative vector at the output be e,
979  and at the input be d, then we have
980  d = e - exp(y) Sum(e)
981  d_i = e_i - exp(y_i) Sum(e)
982  */
983  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
984  KALDI_ASSERT(SameDim(out_value, out_deriv) && SameDim(out_value, *in_deriv));
985 
986  in_deriv->DiffLogSoftmaxPerRow(out_value, out_deriv);
987 
988  // Updates stats.
989  if (to_update != NULL) {
990  NonlinearComponent *to_update_nonlinear =
991  dynamic_cast<NonlinearComponent*>(to_update);
992  to_update_nonlinear->UpdateStats(out_value);
993  }
994 }
995 
996 
998  linear_params_.Scale(scale);
999  bias_params_.Scale(scale);
1000 }
1001 
1002 // virtual
1003 void AffineComponent::Resize(int32 input_dim, int32 output_dim) {
1004  KALDI_ASSERT(input_dim > 0 && output_dim > 0);
1005  bias_params_.Resize(output_dim);
1006  linear_params_.Resize(output_dim, input_dim);
1007 }
1008 
1009 void AffineComponent::Add(BaseFloat alpha, const UpdatableComponent &other_in) {
1010  const AffineComponent *other =
1011  dynamic_cast<const AffineComponent*>(&other_in);
1012  KALDI_ASSERT(other != NULL);
1013  linear_params_.AddMat(alpha, other->linear_params_);
1014  bias_params_.AddVec(alpha, other->bias_params_);
1015 }
1016 
1018  UpdatableComponent(component),
1019  linear_params_(component.linear_params_),
1020  bias_params_(component.bias_params_),
1021  is_gradient_(component.is_gradient_) { }
1022 
1024  const CuVectorBase<BaseFloat> &bias_params,
1025  BaseFloat learning_rate):
1026  UpdatableComponent(learning_rate),
1027  linear_params_(linear_params),
1028  bias_params_(bias_params) {
1029  KALDI_ASSERT(linear_params.NumRows() == bias_params.Dim()&&
1030  bias_params.Dim() != 0);
1031  is_gradient_ = false;
1032 }
1033 
1034 
1035 
1036 void AffineComponent::SetZero(bool treat_as_gradient) {
1037  if (treat_as_gradient) {
1038  SetLearningRate(1.0);
1039  }
1040  linear_params_.SetZero();
1041  bias_params_.SetZero();
1042  if (treat_as_gradient)
1043  is_gradient_ = true;
1044 }
1045 
1047  const MatrixBase<BaseFloat> &linear) {
1048  bias_params_ = bias;
1049  linear_params_ = linear;
1050  KALDI_ASSERT(bias_params_.Dim() == linear_params_.NumRows());
1051 }
1052 
1054  CuMatrix<BaseFloat> temp_linear_params(linear_params_);
1055  temp_linear_params.SetRandn();
1056  linear_params_.AddMat(stddev, temp_linear_params);
1057 
1058  CuVector<BaseFloat> temp_bias_params(bias_params_);
1059  temp_bias_params.SetRandn();
1060  bias_params_.AddVec(stddev, temp_bias_params);
1061 }
1062 
1063 std::string AffineComponent::Info() const {
1064  std::stringstream stream;
1065  BaseFloat linear_params_size = static_cast<BaseFloat>(linear_params_.NumRows())
1066  * static_cast<BaseFloat>(linear_params_.NumCols());
1067  BaseFloat linear_stddev =
1069  linear_params_size),
1070  bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
1071  bias_params_.Dim());
1072  stream << Type() << ", input-dim=" << InputDim()
1073  << ", output-dim=" << OutputDim()
1074  << ", linear-params-stddev=" << linear_stddev
1075  << ", bias-params-stddev=" << bias_stddev
1076  << ", learning-rate=" << LearningRate();
1077  return stream.str();
1078 }
1079 
1081  AffineComponent *ans = new AffineComponent();
1084  ans->bias_params_ = bias_params_;
1085  ans->is_gradient_ = is_gradient_;
1086  return ans;
1087 }
1088 
1090  const AffineComponent *other =
1091  dynamic_cast<const AffineComponent*>(&other_in);
1093  + VecVec(bias_params_, other->bias_params_);
1094 }
1095 
1097  int32 input_dim, int32 output_dim,
1098  BaseFloat param_stddev, BaseFloat bias_stddev) {
1099  UpdatableComponent::Init(learning_rate);
1100  linear_params_.Resize(output_dim, input_dim);
1101  bias_params_.Resize(output_dim);
1102  KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
1103  linear_params_.SetRandn(); // sets to random normally distributed noise.
1104  linear_params_.Scale(param_stddev);
1105  bias_params_.SetRandn();
1106  bias_params_.Scale(bias_stddev);
1107 }
1108 
1110  std::string matrix_filename) {
1111  UpdatableComponent::Init(learning_rate);
1112  CuMatrix<BaseFloat> mat;
1113  ReadKaldiObject(matrix_filename, &mat); // will abort on failure.
1114  KALDI_ASSERT(mat.NumCols() >= 2);
1115  int32 input_dim = mat.NumCols() - 1, output_dim = mat.NumRows();
1116  linear_params_.Resize(output_dim, input_dim);
1117  bias_params_.Resize(output_dim);
1118  linear_params_.CopyFromMat(mat.Range(0, output_dim, 0, input_dim));
1119  bias_params_.CopyColFromMat(mat, input_dim);
1120 }
1121 
1122 void AffineComponent::InitFromString(std::string args) {
1123  std::string orig_args(args);
1124  bool ok = true;
1125  BaseFloat learning_rate = learning_rate_;
1126  std::string matrix_filename;
1127  int32 input_dim = -1, output_dim = -1;
1128  ParseFromString("learning-rate", &args, &learning_rate); // optional.
1129  if (ParseFromString("matrix", &args, &matrix_filename)) {
1130  Init(learning_rate, matrix_filename);
1131  if (ParseFromString("input-dim", &args, &input_dim))
1132  KALDI_ASSERT(input_dim == InputDim() &&
1133  "input-dim mismatch vs. matrix.");
1134  if (ParseFromString("output-dim", &args, &output_dim))
1135  KALDI_ASSERT(output_dim == OutputDim() &&
1136  "output-dim mismatch vs. matrix.");
1137  } else {
1138  ok = ok && ParseFromString("input-dim", &args, &input_dim);
1139  ok = ok && ParseFromString("output-dim", &args, &output_dim);
1140  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
1141  bias_stddev = 1.0;
1142  ParseFromString("param-stddev", &args, &param_stddev);
1143  ParseFromString("bias-stddev", &args, &bias_stddev);
1144  Init(learning_rate, input_dim, output_dim,
1145  param_stddev, bias_stddev);
1146  }
1147  if (!args.empty())
1148  KALDI_ERR << "Could not process these elements in initializer: "
1149  << args;
1150  if (!ok)
1151  KALDI_ERR << "Bad initializer " << orig_args;
1152 }
1153 
1154 
1156  const ChunkInfo &out_info,
1157  const CuMatrixBase<BaseFloat> &in,
1158  CuMatrixBase<BaseFloat> *out) const {
1159  in_info.CheckSize(in);
1160  out_info.CheckSize(*out);
1161  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
1162 
1163  // No need for asserts as they'll happen within the matrix operations.
1164  out->CopyRowsFromVec(bias_params_); // copies bias_params_ to each row
1165  // of *out.
1166  out->AddMatMat(1.0, in, kNoTrans, linear_params_, kTrans, 1.0);
1167 }
1168 
1170  const CuMatrixBase<BaseFloat> &out_deriv) {
1171  bias_params_.AddRowSumMat(learning_rate_, out_deriv, 1.0);
1172  linear_params_.AddMatMat(learning_rate_, out_deriv, kTrans,
1173  in_value, kNoTrans, 1.0);
1174 }
1175 
1176 void AffineComponent::Backprop(const ChunkInfo &, //in_info,
1177  const ChunkInfo &, //out_info,
1178  const CuMatrixBase<BaseFloat> &in_value,
1179  const CuMatrixBase<BaseFloat> &, //out_value,
1180  const CuMatrixBase<BaseFloat> &out_deriv,
1181  Component *to_update_in, // may be identical to "this".
1182  CuMatrix<BaseFloat> *in_deriv) const {
1183  AffineComponent *to_update = dynamic_cast<AffineComponent*>(to_update_in);
1184  in_deriv->Resize(out_deriv.NumRows(), InputDim());
1185  // Propagate the derivative back to the input.
1186  in_deriv->AddMatMat(1.0, out_deriv, kNoTrans, linear_params_, kNoTrans,
1187  0.0);
1188 
1189  if (to_update != NULL) {
1190  // Next update the model (must do this 2nd so the derivatives we propagate
1191  // are accurate, in case this == to_update_in.)
1192  if (to_update->is_gradient_)
1193  to_update->UpdateSimple(in_value, out_deriv);
1194  else // the call below is to a virtual function that may be re-implemented
1195  to_update->Update(in_value, out_deriv); // by child classes.
1196  }
1197 }
1198 
1199 void AffineComponent::Read(std::istream &is, bool binary) {
1200  std::ostringstream ostr_beg, ostr_end;
1201  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponent>"
1202  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponent>"
1203  // might not see the "<AffineComponent>" part because
1204  // of how ReadNew() works.
1205  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
1206  ReadBasicType(is, binary, &learning_rate_);
1207  ExpectToken(is, binary, "<LinearParams>");
1208  linear_params_.Read(is, binary);
1209  ExpectToken(is, binary, "<BiasParams>");
1210  bias_params_.Read(is, binary);
1211  std::string tok;
1212  // back-compatibility code. TODO: re-do this later.
1213  ReadToken(is, binary, &tok);
1214  if (tok == "<AvgInput>") { // discard the following.
1215  CuVector<BaseFloat> avg_input;
1216  avg_input.Read(is, binary);
1217  BaseFloat avg_input_count;
1218  ExpectToken(is, binary, "<AvgInputCount>");
1219  ReadBasicType(is, binary, &avg_input_count);
1220  ReadToken(is, binary, &tok);
1221  }
1222  if (tok == "<IsGradient>") {
1223  ReadBasicType(is, binary, &is_gradient_);
1224  ExpectToken(is, binary, ostr_end.str());
1225  } else {
1226  is_gradient_ = false;
1227  KALDI_ASSERT(tok == ostr_end.str());
1228  }
1229 }
1230 
1231 void AffineComponent::Write(std::ostream &os, bool binary) const {
1232  std::ostringstream ostr_beg, ostr_end;
1233  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponent>"
1234  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponent>"
1235  WriteToken(os, binary, ostr_beg.str());
1236  WriteToken(os, binary, "<LearningRate>");
1237  WriteBasicType(os, binary, learning_rate_);
1238  WriteToken(os, binary, "<LinearParams>");
1239  linear_params_.Write(os, binary);
1240  WriteToken(os, binary, "<BiasParams>");
1241  bias_params_.Write(os, binary);
1242  WriteToken(os, binary, "<IsGradient>");
1243  WriteBasicType(os, binary, is_gradient_);
1244  WriteToken(os, binary, ostr_end.str());
1245 }
1246 
1248  return (InputDim() + 1) * OutputDim();
1249 }
1251  params->Range(0, InputDim() * OutputDim()).CopyRowsFromMat(linear_params_);
1252  params->Range(InputDim() * OutputDim(),
1253  OutputDim()).CopyFromVec(bias_params_);
1254 }
1256  linear_params_.CopyRowsFromVec(params.Range(0, InputDim() * OutputDim()));
1257  bias_params_.CopyFromVec(params.Range(InputDim() * OutputDim(),
1258  OutputDim()));
1259 }
1260 
1262  AffineComponent **a, AffineComponent **b) const {
1263  KALDI_ASSERT(d <= InputDim());
1264 
1265  // We'll limit the rank of just the linear part, keeping the bias vector full.
1267  int32 rows = M.NumRows(), cols = M.NumCols(), rc_min = std::min(rows, cols);
1268  Vector<BaseFloat> s(rc_min);
1269  Matrix<BaseFloat> U(rows, rc_min), Vt(rc_min, cols);
1270  // Do the destructive svd M = U diag(s) V^T. It actually outputs the transpose of V.
1271  M.DestructiveSvd(&s, &U, &Vt);
1272  SortSvd(&s, &U, &Vt); // Sort the singular values from largest to smallest.
1273  BaseFloat old_svd_sum = s.Sum();
1274  U.Resize(rows, d, kCopyData);
1275  s.Resize(d, kCopyData);
1276  Vt.Resize(d, cols, kCopyData);
1277  BaseFloat new_svd_sum = s.Sum();
1278  KALDI_LOG << "Reduced rank from "
1279  << rc_min << " to " << d << ", SVD sum reduced from "
1280  << old_svd_sum << " to " << new_svd_sum;
1281 
1282  // U.MulColsVec(s); // U <-- U diag(s)
1283  Vt.MulRowsVec(s); // Vt <-- diag(s) Vt.
1284 
1285  *a = dynamic_cast<AffineComponent*>(this->Copy());
1286  *b = dynamic_cast<AffineComponent*>(this->Copy());
1287 
1288  (*a)->bias_params_.Resize(d, kSetZero);
1289  (*a)->linear_params_ = Vt;
1290 
1291  (*b)->bias_params_ = this->bias_params_;
1292  (*b)->linear_params_ = U;
1293 }
1294 
1296  const AffineComponent &next_component) const {
1297  AffineComponent *ans = dynamic_cast<AffineComponent*>(this->Copy());
1298  KALDI_ASSERT(ans != NULL);
1299  // Note: it's possible that "ans" is really of a derived type such
1300  // as AffineComponentPreconditioned, but this will still work.
1301  // the "copy" call will copy things like learning rates, "alpha" value
1302  // for preconditioned component, etc.
1303  ans->linear_params_.Resize(next_component.OutputDim(), InputDim());
1304  ans->bias_params_ = next_component.bias_params_;
1305 
1306  ans->linear_params_.AddMatMat(1.0, next_component.linear_params_, kNoTrans,
1307  this->linear_params_, kNoTrans, 0.0);
1308  ans->bias_params_.AddMatVec(1.0, next_component.linear_params_, kNoTrans,
1309  this->bias_params_, 1.0);
1310  return ans;
1311 }
1312 
1314  const FixedAffineComponent &next_component) const {
1315  // If at least one was non-updatable, make the whole non-updatable.
1316  FixedAffineComponent *ans =
1317  dynamic_cast<FixedAffineComponent*>(next_component.Copy());
1318  KALDI_ASSERT(ans != NULL);
1319  ans->linear_params_.Resize(next_component.OutputDim(), InputDim());
1320  ans->bias_params_ = next_component.bias_params_;
1321 
1322  ans->linear_params_.AddMatMat(1.0, next_component.linear_params_, kNoTrans,
1323  this->linear_params_, kNoTrans, 0.0);
1324  ans->bias_params_.AddMatVec(1.0, next_component.linear_params_, kNoTrans,
1325  this->bias_params_, 1.0);
1326  return ans;
1327 }
1328 
1330  const FixedScaleComponent &next_component) const {
1331  KALDI_ASSERT(this->OutputDim() == next_component.InputDim());
1332  AffineComponent *ans =
1333  dynamic_cast<AffineComponent*>(this->Copy());
1334  KALDI_ASSERT(ans != NULL);
1335  ans->linear_params_.MulRowsVec(next_component.scales_);
1336  ans->bias_params_.MulElements(next_component.scales_);
1337 
1338  return ans;
1339 }
1340 
1341 
1342 
1344  const FixedAffineComponent &prev_component) const {
1345  // If at least one was non-updatable, make the whole non-updatable.
1346  FixedAffineComponent *ans =
1347  dynamic_cast<FixedAffineComponent*>(prev_component.Copy());
1348  KALDI_ASSERT(ans != NULL);
1349 
1350  ans->linear_params_.Resize(this->OutputDim(), prev_component.InputDim());
1351  ans->bias_params_ = this->bias_params_;
1352 
1353  ans->linear_params_.AddMatMat(1.0, this->linear_params_, kNoTrans,
1354  prev_component.linear_params_, kNoTrans, 0.0);
1355  ans->bias_params_.AddMatVec(1.0, this->linear_params_, kNoTrans,
1356  prev_component.bias_params_, 1.0);
1357  return ans;
1358 }
1359 
1360 void AffineComponentPreconditioned::Read(std::istream &is, bool binary) {
1361  std::ostringstream ostr_beg, ostr_end;
1362  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponentPreconditioned>"
1363  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponentPreconditioned>"
1364  // might not see the "<AffineComponentPreconditioned>" part because
1365  // of how ReadNew() works.
1366  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
1367  ReadBasicType(is, binary, &learning_rate_);
1368  ExpectToken(is, binary, "<LinearParams>");
1369  linear_params_.Read(is, binary);
1370  ExpectToken(is, binary, "<BiasParams>");
1371  bias_params_.Read(is, binary);
1372  ExpectToken(is, binary, "<Alpha>");
1373  ReadBasicType(is, binary, &alpha_);
1374  // todo: remove back-compat code. Will just be:
1375  // ExpectToken(is, binary, "<MaxChange>");
1376  // ReadBasicType(is, binary, &max_change_);
1377  // ExpectToken(is, binary, ostr_end);
1378  // [end of function]
1379  std::string tok;
1380  ReadToken(is, binary, &tok);
1381  if (tok == "<MaxChange>") {
1382  ReadBasicType(is, binary, &max_change_);
1383  ExpectToken(is, binary, ostr_end.str());
1384  } else {
1385  max_change_ = 0.0;
1386  KALDI_ASSERT(tok == ostr_end.str());
1387  }
1388 }
1389 
1391  std::string orig_args(args);
1392  std::string matrix_filename;
1393  BaseFloat learning_rate = learning_rate_;
1394  BaseFloat alpha = 0.1, max_change = 0.0;
1395  int32 input_dim = -1, output_dim = -1;
1396  ParseFromString("learning-rate", &args, &learning_rate); // optional.
1397  ParseFromString("alpha", &args, &alpha);
1398  ParseFromString("max-change", &args, &max_change);
1399 
1400  if (ParseFromString("matrix", &args, &matrix_filename)) {
1401  Init(learning_rate, alpha, max_change, matrix_filename);
1402  if (ParseFromString("input-dim", &args, &input_dim))
1403  KALDI_ASSERT(input_dim == InputDim() &&
1404  "input-dim mismatch vs. matrix.");
1405  if (ParseFromString("output-dim", &args, &output_dim))
1406  KALDI_ASSERT(output_dim == OutputDim() &&
1407  "output-dim mismatch vs. matrix.");
1408  } else {
1409  bool ok = true;
1410  ok = ok && ParseFromString("input-dim", &args, &input_dim);
1411  ok = ok && ParseFromString("output-dim", &args, &output_dim);
1412  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
1413  bias_stddev = 1.0;
1414  ParseFromString("param-stddev", &args, &param_stddev);
1415  ParseFromString("bias-stddev", &args, &bias_stddev);
1416  if (!ok)
1417  KALDI_ERR << "Bad initializer " << orig_args;
1418  Init(learning_rate, input_dim, output_dim, param_stddev,
1419  bias_stddev, alpha, max_change);
1420  }
1421  if (!args.empty())
1422  KALDI_ERR << "Could not process these elements in initializer: "
1423  << args;
1424 }
1425 
1427  BaseFloat alpha, BaseFloat max_change,
1428  std::string matrix_filename) {
1429  UpdatableComponent::Init(learning_rate);
1430  alpha_ = alpha;
1431  max_change_ = max_change;
1432  CuMatrix<BaseFloat> mat;
1433  ReadKaldiObject(matrix_filename, &mat); // will abort on failure.
1434  KALDI_ASSERT(mat.NumCols() >= 2);
1435  int32 input_dim = mat.NumCols() - 1, output_dim = mat.NumRows();
1436  linear_params_.Resize(output_dim, input_dim);
1437  bias_params_.Resize(output_dim);
1438  linear_params_.CopyFromMat(mat.Range(0, output_dim, 0, input_dim));
1439  bias_params_.CopyColFromMat(mat, input_dim);
1440 }
1441 
1443  BaseFloat learning_rate,
1444  int32 input_dim, int32 output_dim,
1445  BaseFloat param_stddev, BaseFloat bias_stddev,
1446  BaseFloat alpha, BaseFloat max_change) {
1447  UpdatableComponent::Init(learning_rate);
1448  KALDI_ASSERT(input_dim > 0 && output_dim > 0);
1449  linear_params_.Resize(output_dim, input_dim);
1450  bias_params_.Resize(output_dim);
1451  KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
1452  linear_params_.SetRandn(); // sets to random normally distributed noise.
1453  linear_params_.Scale(param_stddev);
1454  bias_params_.SetRandn();
1455  bias_params_.Scale(bias_stddev);
1456  alpha_ = alpha;
1457  KALDI_ASSERT(alpha_ > 0.0);
1458  max_change_ = max_change; // Note: any value of max_change_is valid, but
1459  // only values > 0.0 will actually activate the code.
1460 }
1461 
1462 
1463 void AffineComponentPreconditioned::Write(std::ostream &os, bool binary) const {
1464  std::ostringstream ostr_beg, ostr_end;
1465  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponent>"
1466  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponent>"
1467  WriteToken(os, binary, ostr_beg.str());
1468  WriteToken(os, binary, "<LearningRate>");
1469  WriteBasicType(os, binary, learning_rate_);
1470  WriteToken(os, binary, "<LinearParams>");
1471  linear_params_.Write(os, binary);
1472  WriteToken(os, binary, "<BiasParams>");
1473  bias_params_.Write(os, binary);
1474  WriteToken(os, binary, "<Alpha>");
1475  WriteBasicType(os, binary, alpha_);
1476  WriteToken(os, binary, "<MaxChange>");
1477  WriteBasicType(os, binary, max_change_);
1478  WriteToken(os, binary, ostr_end.str());
1479 }
1480 
1482  std::stringstream stream;
1483  BaseFloat linear_params_size = static_cast<BaseFloat>(linear_params_.NumRows())
1484  * static_cast<BaseFloat>(linear_params_.NumCols());
1485  BaseFloat linear_stddev =
1487  linear_params_size),
1488  bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
1489  bias_params_.Dim());
1490  stream << Type() << ", input-dim=" << InputDim()
1491  << ", output-dim=" << OutputDim()
1492  << ", linear-params-stddev=" << linear_stddev
1493  << ", bias-params-stddev=" << bias_stddev
1494  << ", learning-rate=" << LearningRate()
1495  << ", alpha=" << alpha_
1496  << ", max-change=" << max_change_;
1497  return stream.str();
1498 }
1499 
1504  ans->bias_params_ = bias_params_;
1505  ans->alpha_ = alpha_;
1506  ans->max_change_ = max_change_;
1507  ans->is_gradient_ = is_gradient_;
1508  return ans;
1509 }
1510 
1511 
1513  const CuMatrix<BaseFloat> &in_value_precon,
1514  const CuMatrix<BaseFloat> &out_deriv_precon) {
1515  static int scaling_factor_printed = 0;
1516 
1517  KALDI_ASSERT(in_value_precon.NumRows() == out_deriv_precon.NumRows());
1518  CuVector<BaseFloat> in_norm(in_value_precon.NumRows()),
1519  out_deriv_norm(in_value_precon.NumRows());
1520  in_norm.AddDiagMat2(1.0, in_value_precon, kNoTrans, 0.0);
1521  out_deriv_norm.AddDiagMat2(1.0, out_deriv_precon, kNoTrans, 0.0);
1522  // Get the actual l2 norms, not the squared l2 norm.
1523  in_norm.ApplyPow(0.5);
1524  out_deriv_norm.ApplyPow(0.5);
1525  BaseFloat sum = learning_rate_ * VecVec(in_norm, out_deriv_norm);
1526  // sum is the product of norms that we are trying to limit
1527  // to max_value_.
1528  KALDI_ASSERT(sum == sum && sum - sum == 0.0 &&
1529  "NaN in backprop");
1530  KALDI_ASSERT(sum >= 0.0);
1531  if (sum <= max_change_) return 1.0;
1532  else {
1533  BaseFloat ans = max_change_ / sum;
1534  if (scaling_factor_printed < 10) {
1535  KALDI_LOG << "Limiting step size to " << max_change_
1536  << " using scaling factor " << ans << ", for component index "
1537  << Index();
1538  scaling_factor_printed++;
1539  }
1540  return ans;
1541  }
1542 }
1543 
1545  const CuMatrixBase<BaseFloat> &in_value,
1546  const CuMatrixBase<BaseFloat> &out_deriv) {
1547  CuMatrix<BaseFloat> in_value_temp;
1548 
1549  in_value_temp.Resize(in_value.NumRows(),
1550  in_value.NumCols() + 1, kUndefined);
1551  in_value_temp.Range(0, in_value.NumRows(),
1552  0, in_value.NumCols()).CopyFromMat(in_value);
1553 
1554  // Add the 1.0 at the end of each row "in_value_temp"
1555  in_value_temp.Range(0, in_value.NumRows(),
1556  in_value.NumCols(), 1).Set(1.0);
1557 
1558  CuMatrix<BaseFloat> in_value_precon(in_value_temp.NumRows(),
1559  in_value_temp.NumCols(), kUndefined),
1560  out_deriv_precon(out_deriv.NumRows(),
1561  out_deriv.NumCols(), kUndefined);
1562  // each row of in_value_precon will be that same row of
1563  // in_value, but multiplied by the inverse of a Fisher
1564  // matrix that has been estimated from all the other rows,
1565  // smoothed by some appropriate amount times the identity
1566  // matrix (this amount is proportional to \alpha).
1567  PreconditionDirectionsAlphaRescaled(in_value_temp, alpha_, &in_value_precon);
1568  PreconditionDirectionsAlphaRescaled(out_deriv, alpha_, &out_deriv_precon);
1569 
1570  BaseFloat minibatch_scale = 1.0;
1571 
1572  if (max_change_ > 0.0)
1573  minibatch_scale = GetScalingFactor(in_value_precon, out_deriv_precon);
1574 
1575 
1576  CuSubMatrix<BaseFloat> in_value_precon_part(in_value_precon,
1577  0, in_value_precon.NumRows(),
1578  0, in_value_precon.NumCols() - 1);
1579  // this "precon_ones" is what happens to the vector of 1's representing
1580  // offsets, after multiplication by the preconditioner.
1581  CuVector<BaseFloat> precon_ones(in_value_precon.NumRows());
1582 
1583  precon_ones.CopyColFromMat(in_value_precon, in_value_precon.NumCols() - 1);
1584 
1585  BaseFloat local_lrate = minibatch_scale * learning_rate_;
1586  bias_params_.AddMatVec(local_lrate, out_deriv_precon, kTrans,
1587  precon_ones, 1.0);
1588  linear_params_.AddMatMat(local_lrate, out_deriv_precon, kTrans,
1589  in_value_precon_part, kNoTrans, 1.0);
1590 }
1591 
1592 
1593 // virtual
1595  int32 input_dim, int32 output_dim) {
1596  KALDI_ASSERT(input_dim > 1 && output_dim > 1);
1597  if (rank_in_ >= input_dim) rank_in_ = input_dim - 1;
1598  if (rank_out_ >= output_dim) rank_out_ = output_dim - 1;
1599  bias_params_.Resize(output_dim);
1600  linear_params_.Resize(output_dim, input_dim);
1601  OnlinePreconditioner temp;
1602  preconditioner_in_ = temp;
1603  preconditioner_out_ = temp;
1604  SetPreconditionerConfigs();
1605 }
1606 
1607 
1608 void AffineComponentPreconditionedOnline::Read(std::istream &is, bool binary) {
1609  std::ostringstream ostr_beg, ostr_end;
1610  ostr_beg << "<" << Type() << ">";
1611  ostr_end << "</" << Type() << ">";
1612  // might not see the "<AffineComponentPreconditionedOnline>" part because
1613  // of how ReadNew() works.
1614  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
1615  ReadBasicType(is, binary, &learning_rate_);
1616  ExpectToken(is, binary, "<LinearParams>");
1617  linear_params_.Read(is, binary);
1618  ExpectToken(is, binary, "<BiasParams>");
1619  bias_params_.Read(is, binary);
1620  std::string tok;
1621  ReadToken(is, binary, &tok);
1622  if (tok == "<Rank>") { // back-compatibility (temporary)
1623  ReadBasicType(is, binary, &rank_in_);
1624  rank_out_ = rank_in_;
1625  } else {
1626  KALDI_ASSERT(tok == "<RankIn>");
1627  ReadBasicType(is, binary, &rank_in_);
1628  ExpectToken(is, binary, "<RankOut>");
1629  ReadBasicType(is, binary, &rank_out_);
1630  }
1631  ReadToken(is, binary, &tok);
1632  if (tok == "<UpdatePeriod>") {
1633  ReadBasicType(is, binary, &update_period_);
1634  ExpectToken(is, binary, "<NumSamplesHistory>");
1635  } else {
1636  update_period_ = 1;
1637  KALDI_ASSERT(tok == "<NumSamplesHistory>");
1638  }
1639  ReadBasicType(is, binary, &num_samples_history_);
1640  ExpectToken(is, binary, "<Alpha>");
1641  ReadBasicType(is, binary, &alpha_);
1642  ExpectToken(is, binary, "<MaxChangePerSample>");
1643  ReadBasicType(is, binary, &max_change_per_sample_);
1644  ExpectToken(is, binary, ostr_end.str());
1645  SetPreconditionerConfigs();
1646 }
1647 
1649  std::string orig_args(args);
1650  bool ok = true;
1651  std::string matrix_filename;
1652  BaseFloat learning_rate = learning_rate_;
1653  BaseFloat num_samples_history = 2000.0, alpha = 4.0,
1654  max_change_per_sample = 0.1;
1655  int32 input_dim = -1, output_dim = -1, rank_in = 30, rank_out = 80,
1656  update_period = 1;
1657  ParseFromString("learning-rate", &args, &learning_rate); // optional.
1658  ParseFromString("num-samples-history", &args, &num_samples_history);
1659  ParseFromString("alpha", &args, &alpha);
1660  ParseFromString("max-change-per-sample", &args, &max_change_per_sample);
1661  ParseFromString("rank-in", &args, &rank_in);
1662  ParseFromString("rank-out", &args, &rank_out);
1663  ParseFromString("update-period", &args, &update_period);
1664 
1665  if (ParseFromString("matrix", &args, &matrix_filename)) {
1666  Init(learning_rate, rank_in, rank_out, update_period,
1667  num_samples_history, alpha, max_change_per_sample,
1668  matrix_filename);
1669  if (ParseFromString("input-dim", &args, &input_dim))
1670  KALDI_ASSERT(input_dim == InputDim() &&
1671  "input-dim mismatch vs. matrix.");
1672  if (ParseFromString("output-dim", &args, &output_dim))
1673  KALDI_ASSERT(output_dim == OutputDim() &&
1674  "output-dim mismatch vs. matrix.");
1675  } else {
1676  ok = ok && ParseFromString("input-dim", &args, &input_dim);
1677  ok = ok && ParseFromString("output-dim", &args, &output_dim);
1678  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
1679  bias_stddev = 1.0;
1680  ParseFromString("param-stddev", &args, &param_stddev);
1681  ParseFromString("bias-stddev", &args, &bias_stddev);
1682  Init(learning_rate, input_dim, output_dim, param_stddev,
1683  bias_stddev, rank_in, rank_out, update_period,
1684  num_samples_history, alpha, max_change_per_sample);
1685  }
1686  if (!args.empty())
1687  KALDI_ERR << "Could not process these elements in initializer: "
1688  << args;
1689  if (!ok)
1690  KALDI_ERR << "Bad initializer " << orig_args;
1691 }
1692 
1694  preconditioner_in_.SetRank(rank_in_);
1695  preconditioner_in_.SetNumSamplesHistory(num_samples_history_);
1696  preconditioner_in_.SetAlpha(alpha_);
1697  preconditioner_in_.SetUpdatePeriod(update_period_);
1698  preconditioner_out_.SetRank(rank_out_);
1699  preconditioner_out_.SetNumSamplesHistory(num_samples_history_);
1700  preconditioner_out_.SetAlpha(alpha_);
1701  preconditioner_out_.SetUpdatePeriod(update_period_);
1702 }
1703 
1705  BaseFloat learning_rate, int32 rank_in, int32 rank_out,
1706  int32 update_period, BaseFloat num_samples_history, BaseFloat alpha,
1707  BaseFloat max_change_per_sample,
1708  std::string matrix_filename) {
1709  UpdatableComponent::Init(learning_rate);
1710  rank_in_ = rank_in;
1711  rank_out_ = rank_out;
1712  update_period_ = update_period;
1713  num_samples_history_ = num_samples_history;
1714  alpha_ = alpha;
1715  SetPreconditionerConfigs();
1716  KALDI_ASSERT(max_change_per_sample >= 0.0);
1717  max_change_per_sample_ = max_change_per_sample;
1718  CuMatrix<BaseFloat> mat;
1719  ReadKaldiObject(matrix_filename, &mat); // will abort on failure.
1720  KALDI_ASSERT(mat.NumCols() >= 2);
1721  int32 input_dim = mat.NumCols() - 1, output_dim = mat.NumRows();
1722  linear_params_.Resize(output_dim, input_dim);
1723  bias_params_.Resize(output_dim);
1724  linear_params_.CopyFromMat(mat.Range(0, output_dim, 0, input_dim));
1725  bias_params_.CopyColFromMat(mat, input_dim);
1726 }
1727 
1729  const AffineComponent &orig,
1730  int32 rank_in, int32 rank_out, int32 update_period,
1731  BaseFloat num_samples_history, BaseFloat alpha):
1732  max_change_per_sample_(0.1) {
1733  this->linear_params_ = orig.linear_params_;
1734  this->bias_params_ = orig.bias_params_;
1735  this->learning_rate_ = orig.learning_rate_;
1736  this->is_gradient_ = orig.is_gradient_;
1737  this->rank_in_ = rank_in;
1738  this->rank_out_ = rank_out;
1739  this->update_period_ = update_period;
1740  this->num_samples_history_ = num_samples_history;
1741  this->alpha_ = alpha;
1743 }
1744 
1746  BaseFloat learning_rate,
1747  int32 input_dim, int32 output_dim,
1748  BaseFloat param_stddev, BaseFloat bias_stddev,
1749  int32 rank_in, int32 rank_out, int32 update_period,
1750  BaseFloat num_samples_history, BaseFloat alpha,
1751  BaseFloat max_change_per_sample) {
1752  UpdatableComponent::Init(learning_rate);
1753  linear_params_.Resize(output_dim, input_dim);
1754  bias_params_.Resize(output_dim);
1755  KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0 &&
1756  bias_stddev >= 0.0);
1757  linear_params_.SetRandn(); // sets to random normally distributed noise.
1758  linear_params_.Scale(param_stddev);
1759  bias_params_.SetRandn();
1760  bias_params_.Scale(bias_stddev);
1761  rank_in_ = rank_in;
1762  rank_out_ = rank_out;
1763  update_period_ = update_period;
1764  num_samples_history_ = num_samples_history;
1765  alpha_ = alpha;
1767  KALDI_ASSERT(max_change_per_sample >= 0.0);
1768  max_change_per_sample_ = max_change_per_sample;
1769 }
1770 
1771 
1772 void AffineComponentPreconditionedOnline::Write(std::ostream &os, bool binary) const {
1773  std::ostringstream ostr_beg, ostr_end;
1774  ostr_beg << "<" << Type() << ">"; // e.g. "<AffineComponent>"
1775  ostr_end << "</" << Type() << ">"; // e.g. "</AffineComponent>"
1776  WriteToken(os, binary, ostr_beg.str());
1777  WriteToken(os, binary, "<LearningRate>");
1778  WriteBasicType(os, binary, learning_rate_);
1779  WriteToken(os, binary, "<LinearParams>");
1780  linear_params_.Write(os, binary);
1781  WriteToken(os, binary, "<BiasParams>");
1782  bias_params_.Write(os, binary);
1783  WriteToken(os, binary, "<RankIn>");
1784  WriteBasicType(os, binary, rank_in_);
1785  WriteToken(os, binary, "<RankOut>");
1786  WriteBasicType(os, binary, rank_out_);
1787  WriteToken(os, binary, "<UpdatePeriod>");
1788  WriteBasicType(os, binary, update_period_);
1789  WriteToken(os, binary, "<NumSamplesHistory>");
1790  WriteBasicType(os, binary, num_samples_history_);
1791  WriteToken(os, binary, "<Alpha>");
1792  WriteBasicType(os, binary, alpha_);
1793  WriteToken(os, binary, "<MaxChangePerSample>");
1795  WriteToken(os, binary, ostr_end.str());
1796 }
1797 
1799  std::stringstream stream;
1800  BaseFloat linear_params_size = static_cast<BaseFloat>(linear_params_.NumRows())
1801  * static_cast<BaseFloat>(linear_params_.NumCols());
1802  BaseFloat linear_stddev =
1804  linear_params_size),
1805  bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
1806  bias_params_.Dim());
1807  stream << Type() << ", input-dim=" << InputDim()
1808  << ", output-dim=" << OutputDim()
1809  << ", linear-params-stddev=" << linear_stddev
1810  << ", bias-params-stddev=" << bias_stddev
1811  << ", learning-rate=" << LearningRate()
1812  << ", rank-in=" << rank_in_
1813  << ", rank-out=" << rank_out_
1814  << ", num_samples_history=" << num_samples_history_
1815  << ", update_period=" << update_period_
1816  << ", alpha=" << alpha_
1817  << ", max-change-per-sample=" << max_change_per_sample_;
1818  return stream.str();
1819 }
1820 
1824  ans->rank_in_ = rank_in_;
1825  ans->rank_out_ = rank_out_;
1828  ans->alpha_ = alpha_;
1830  ans->bias_params_ = bias_params_;
1834  ans->is_gradient_ = is_gradient_;
1835  ans->SetPreconditionerConfigs();
1836  return ans;
1837 }
1838 
1839 
1840 
1842  const CuVectorBase<BaseFloat> &in_products,
1843  BaseFloat learning_rate_scale,
1844  CuVectorBase<BaseFloat> *out_products) {
1845  static int scaling_factor_printed = 0;
1846  int32 minibatch_size = in_products.Dim();
1847 
1848  out_products->MulElements(in_products);
1849  out_products->ApplyPow(0.5);
1850  BaseFloat prod_sum = out_products->Sum();
1851  BaseFloat tot_change_norm = learning_rate_scale * learning_rate_ * prod_sum,
1852  max_change_norm = max_change_per_sample_ * minibatch_size;
1853  // tot_change_norm is the product of norms that we are trying to limit
1854  // to max_value_.
1855  KALDI_ASSERT(tot_change_norm - tot_change_norm == 0.0 && "NaN in backprop");
1856  KALDI_ASSERT(tot_change_norm >= 0.0);
1857  if (tot_change_norm <= max_change_norm) return 1.0;
1858  else {
1859  BaseFloat factor = max_change_norm / tot_change_norm;
1860  if (scaling_factor_printed < 10) {
1861  KALDI_LOG << "Limiting step size using scaling factor "
1862  << factor << ", for component index " << Index();
1863  scaling_factor_printed++;
1864  }
1865  return factor;
1866  }
1867 }
1868 
1870  const CuMatrixBase<BaseFloat> &in_value,
1871  const CuMatrixBase<BaseFloat> &out_deriv) {
1872  CuMatrix<BaseFloat> in_value_temp;
1873 
1874  in_value_temp.Resize(in_value.NumRows(),
1875  in_value.NumCols() + 1, kUndefined);
1876  in_value_temp.Range(0, in_value.NumRows(),
1877  0, in_value.NumCols()).CopyFromMat(in_value);
1878 
1879  // Add the 1.0 at the end of each row "in_value_temp"
1880  in_value_temp.Range(0, in_value.NumRows(),
1881  in_value.NumCols(), 1).Set(1.0);
1882 
1883  CuMatrix<BaseFloat> out_deriv_temp(out_deriv);
1884 
1885  CuMatrix<BaseFloat> row_products(2,
1886  in_value.NumRows());
1887  CuSubVector<BaseFloat> in_row_products(row_products, 0),
1888  out_row_products(row_products, 1);
1889 
1890  // These "scale" values get will get multiplied into the learning rate (faster
1891  // than having the matrices scaled inside the preconditioning code).
1892  BaseFloat in_scale, out_scale;
1893 
1894  preconditioner_in_.PreconditionDirections(&in_value_temp, &in_row_products,
1895  &in_scale);
1896  preconditioner_out_.PreconditionDirections(&out_deriv_temp, &out_row_products,
1897  &out_scale);
1898 
1899  // "scale" is a scaling factor coming from the PreconditionDirections calls
1900  // (it's faster to have them output a scaling factor than to have them scale
1901  // their outputs).
1902  BaseFloat scale = in_scale * out_scale;
1903  BaseFloat minibatch_scale = 1.0;
1904 
1905  if (max_change_per_sample_ > 0.0)
1906  minibatch_scale = GetScalingFactor(in_row_products, scale,
1907  &out_row_products);
1908 
1909  CuSubMatrix<BaseFloat> in_value_precon_part(in_value_temp,
1910  0, in_value_temp.NumRows(),
1911  0, in_value_temp.NumCols() - 1);
1912  // this "precon_ones" is what happens to the vector of 1's representing
1913  // offsets, after multiplication by the preconditioner.
1914  CuVector<BaseFloat> precon_ones(in_value_temp.NumRows());
1915 
1916  precon_ones.CopyColFromMat(in_value_temp, in_value_temp.NumCols() - 1);
1917 
1918  BaseFloat local_lrate = scale * minibatch_scale * learning_rate_;
1919  bias_params_.AddMatVec(local_lrate, out_deriv_temp, kTrans,
1920  precon_ones, 1.0);
1921  linear_params_.AddMatMat(local_lrate, out_deriv_temp, kTrans,
1922  in_value_precon_part, kNoTrans, 1.0);
1923 }
1924 
1925 void BlockAffineComponent::SetZero(bool treat_as_gradient) {
1926  if (treat_as_gradient) {
1927  SetLearningRate(1.0);
1928  }
1929  linear_params_.SetZero();
1930  bias_params_.SetZero();
1931 }
1932 
1934  CuMatrix<BaseFloat> temp_linear_params(linear_params_);
1935  temp_linear_params.SetRandn();
1936  linear_params_.AddMat(stddev, temp_linear_params);
1937 
1938  CuVector<BaseFloat> temp_bias_params(bias_params_);
1939  temp_bias_params.SetRandn();
1940  bias_params_.AddVec(stddev, temp_bias_params);
1941 }
1942 
1944  const UpdatableComponent &other_in) const {
1945  const BlockAffineComponent *other =
1946  dynamic_cast<const BlockAffineComponent*>(&other_in);
1948  + VecVec(bias_params_, other->bias_params_);
1949 }
1950 
1955  ans->bias_params_ = bias_params_;
1956  ans->num_blocks_ = num_blocks_;
1957  return ans;
1958 }
1959 
1961  linear_params_.Scale(scale);
1962  bias_params_.Scale(scale);
1963 }
1964 
1966  const UpdatableComponent &other_in) {
1967  const BlockAffineComponent *other =
1968  dynamic_cast<const BlockAffineComponent*>(&other_in);
1969  KALDI_ASSERT(other != NULL);
1970  linear_params_.AddMat(alpha, other->linear_params_);
1971  bias_params_.AddVec(alpha, other->bias_params_);
1972 }
1973 
1975  const ChunkInfo &out_info,
1976  const CuMatrixBase<BaseFloat> &in,
1977  CuMatrixBase<BaseFloat> *out) const {
1978  in_info.CheckSize(in);
1979  out_info.CheckSize(*out);
1980  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
1981 
1982  // The matrix has a block structure where each matrix has input dim
1983  // (#rows) equal to input_block_dim. The blocks are stored in linear_params_
1984  // as [ M
1985  // N
1986  // O ] but we actually treat it as:
1987  // [ M 0 0
1988  // 0 N 0
1989  // 0 0 O ]
1990  int32 input_block_dim = linear_params_.NumCols(),
1991  output_block_dim = linear_params_.NumRows() / num_blocks_,
1992  num_frames = in.NumRows();
1993  KALDI_ASSERT(in.NumCols() == input_block_dim * num_blocks_);
1994  KALDI_ASSERT(out->NumCols() == output_block_dim * num_blocks_);
1995  KALDI_ASSERT(in.NumRows() == out->NumRows());
1996 
1997  out->CopyRowsFromVec(bias_params_); // copies bias_params_ to each row
1998  // of *out.
1999 
2000  for (int32 b = 0; b < num_blocks_; b++) {
2001  CuSubMatrix<BaseFloat> in_block(in, 0, num_frames,
2002  b * input_block_dim, input_block_dim),
2003  out_block(*out, 0, num_frames,
2004  b * output_block_dim, output_block_dim),
2005  param_block(linear_params_,
2006  b * output_block_dim, output_block_dim,
2007  0, input_block_dim);
2008  out_block.AddMatMat(1.0, in_block, kNoTrans, param_block, kTrans, 1.0);
2009  }
2010 }
2011 
2013  const CuMatrixBase<BaseFloat> &in_value,
2014  const CuMatrixBase<BaseFloat> &out_deriv) {
2015  int32 input_block_dim = linear_params_.NumCols(),
2016  output_block_dim = linear_params_.NumRows() / num_blocks_,
2017  num_frames = in_value.NumRows();
2018 
2019  bias_params_.AddRowSumMat(learning_rate_, out_deriv, 1.0);
2020  for (int32 b = 0; b < num_blocks_; b++) {
2021  CuSubMatrix<BaseFloat> in_value_block(in_value, 0, num_frames,
2022  b * input_block_dim,
2023  input_block_dim),
2024  out_deriv_block(out_deriv, 0, num_frames,
2025  b * output_block_dim, output_block_dim),
2026  param_block(linear_params_,
2027  b * output_block_dim, output_block_dim,
2028  0, input_block_dim);
2029  // Update the parameters.
2030  param_block.AddMatMat(learning_rate_, out_deriv_block, kTrans,
2031  in_value_block, kNoTrans, 1.0);
2032  }
2033 }
2034 
2036  const ChunkInfo &, //out_info,
2037  const CuMatrixBase<BaseFloat> &in_value,
2038  const CuMatrixBase<BaseFloat> &, //out_value,
2039  const CuMatrixBase<BaseFloat> &out_deriv,
2040  Component *to_update_in,
2041  CuMatrix<BaseFloat> *in_deriv) const {
2042 
2043  // This code mirrors the code in Propagate().
2044  int32 num_frames = in_value.NumRows();
2045  BlockAffineComponent *to_update = dynamic_cast<BlockAffineComponent*>(
2046  to_update_in);
2047  in_deriv->Resize(out_deriv.NumRows(), InputDim());
2048  int32 input_block_dim = linear_params_.NumCols(),
2049  output_block_dim = linear_params_.NumRows() / num_blocks_;
2050  KALDI_ASSERT(in_value.NumCols() == input_block_dim * num_blocks_);
2051  KALDI_ASSERT(out_deriv.NumCols() == output_block_dim * num_blocks_);
2052 
2053  for (int32 b = 0; b < num_blocks_; b++) {
2054  CuSubMatrix<BaseFloat> in_value_block(in_value, 0, num_frames,
2055  b * input_block_dim,
2056  input_block_dim),
2057  in_deriv_block(*in_deriv, 0, num_frames,
2058  b * input_block_dim, input_block_dim),
2059  out_deriv_block(out_deriv, 0, num_frames,
2060  b * output_block_dim, output_block_dim),
2061  param_block(linear_params_,
2062  b * output_block_dim, output_block_dim,
2063  0, input_block_dim);
2064 
2065  // Propagate the derivative back to the input.
2066  in_deriv_block.AddMatMat(1.0, out_deriv_block, kNoTrans,
2067  param_block, kNoTrans, 0.0);
2068  }
2069  if (to_update != NULL)
2070  to_update->Update(in_value, out_deriv);
2071 }
2072 
2073 
2075  int32 input_dim, int32 output_dim,
2076  BaseFloat param_stddev,
2077  BaseFloat bias_stddev,
2078  int32 num_blocks) {
2079  UpdatableComponent::Init(learning_rate);
2080  KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
2081  KALDI_ASSERT(input_dim % num_blocks == 0 && output_dim % num_blocks == 0);
2082 
2083  linear_params_.Resize(output_dim, input_dim / num_blocks);
2084  bias_params_.Resize(output_dim);
2085 
2086  linear_params_.SetRandn(); // sets to random normally distributed noise.
2087  linear_params_.Scale(param_stddev);
2088  bias_params_.SetRandn();
2089  bias_params_.Scale(bias_stddev);
2090  num_blocks_ = num_blocks;
2091 }
2092 
2094  std::string orig_args(args);
2095  bool ok = true;
2096  BaseFloat learning_rate = learning_rate_;
2097  int32 input_dim = -1, output_dim = -1, num_blocks = 1;
2098  ParseFromString("learning-rate", &args, &learning_rate); // optional.
2099  ok = ok && ParseFromString("input-dim", &args, &input_dim);
2100  ok = ok && ParseFromString("output-dim", &args, &output_dim);
2101  ok = ok && ParseFromString("num-blocks", &args, &num_blocks);
2102  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
2103  bias_stddev = 1.0;
2104  ParseFromString("param-stddev", &args, &param_stddev);
2105  ParseFromString("bias-stddev", &args, &bias_stddev);
2106  if (!args.empty())
2107  KALDI_ERR << "Could not process these elements in initializer: "
2108  << args;
2109  if (!ok)
2110  KALDI_ERR << "Bad initializer " << orig_args;
2111  Init(learning_rate, input_dim, output_dim,
2112  param_stddev, bias_stddev, num_blocks);
2113 }
2114 
2115 
2116 void BlockAffineComponent::Read(std::istream &is, bool binary) {
2117  ExpectOneOrTwoTokens(is, binary, "<BlockAffineComponent>", "<LearningRate>");
2118  ReadBasicType(is, binary, &learning_rate_);
2119  ExpectToken(is, binary, "<NumBlocks>");
2120  ReadBasicType(is, binary, &num_blocks_);
2121  ExpectToken(is, binary, "<LinearParams>");
2122  linear_params_.Read(is, binary);
2123  ExpectToken(is, binary, "<BiasParams>");
2124  bias_params_.Read(is, binary);
2125  ExpectToken(is, binary, "</BlockAffineComponent>");
2126 }
2127 
2128 void BlockAffineComponent::Write(std::ostream &os, bool binary) const {
2129  WriteToken(os, binary, "<BlockAffineComponent>");
2130  WriteToken(os, binary, "<LearningRate>");
2131  WriteBasicType(os, binary, learning_rate_);
2132  WriteToken(os, binary, "<NumBlocks>");
2133  WriteBasicType(os, binary, num_blocks_);
2134  WriteToken(os, binary, "<LinearParams>");
2135  linear_params_.Write(os, binary);
2136  WriteToken(os, binary, "<BiasParams>");
2137  bias_params_.Write(os, binary);
2138  WriteToken(os, binary, "</BlockAffineComponent>");
2139 }
2140 
2141 
2143  // Note: num_blocks_ should divide both InputDim() and OutputDim().
2144  return InputDim() * OutputDim() / num_blocks_;
2145 }
2146 
2148  int32 l = linear_params_.NumRows() * linear_params_.NumCols(),
2149  b = bias_params_.Dim();
2150  params->Range(0, l).CopyRowsFromMat(linear_params_);
2151  params->Range(l, b).CopyFromVec(bias_params_);
2152 }
2154  int32 l = linear_params_.NumRows() * linear_params_.NumCols(),
2155  b = bias_params_.Dim();
2156  linear_params_.CopyRowsFromVec(params.Range(0, l));
2157  bias_params_.CopyFromVec(params.Range(l, b));
2158 }
2159 
2160 
2162  int32 input_dim, int32 output_dim,
2163  BaseFloat param_stddev,
2164  BaseFloat bias_stddev,
2165  int32 num_blocks,
2166  BaseFloat alpha) {
2167  BlockAffineComponent::Init(learning_rate, input_dim, output_dim,
2168  param_stddev, bias_stddev, num_blocks);
2169  is_gradient_ = false;
2170  KALDI_ASSERT(alpha > 0.0);
2171  alpha_ = alpha;
2172 }
2173 
2175  std::string orig_args(args);
2176  bool ok = true;
2177  BaseFloat learning_rate = learning_rate_;
2178  BaseFloat alpha = 4.0;
2179  int32 input_dim = -1, output_dim = -1, num_blocks = 1;
2180  ParseFromString("learning-rate", &args, &learning_rate); // optional.
2181  ParseFromString("alpha", &args, &alpha);
2182  ok = ok && ParseFromString("input-dim", &args, &input_dim);
2183  ok = ok && ParseFromString("output-dim", &args, &output_dim);
2184  ok = ok && ParseFromString("num-blocks", &args, &num_blocks);
2185 
2186  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
2187  bias_stddev = 1.0;
2188  ParseFromString("param-stddev", &args, &param_stddev);
2189  ParseFromString("bias-stddev", &args, &bias_stddev);
2190  if (!args.empty())
2191  KALDI_ERR << "Could not process these elements in initializer: "
2192  << args;
2193  if (!ok)
2194  KALDI_ERR << "Bad initializer " << orig_args;
2195  Init(learning_rate, input_dim, output_dim,
2196  param_stddev, bias_stddev, num_blocks,
2197  alpha);
2198 }
2199 
2200 void BlockAffineComponentPreconditioned::SetZero(bool treat_as_gradient) {
2201  if (treat_as_gradient)
2202  is_gradient_ = true;
2203  BlockAffineComponent::SetZero(treat_as_gradient);
2204 }
2205 
2206 void BlockAffineComponentPreconditioned::Read(std::istream &is, bool binary) {
2207  ExpectOneOrTwoTokens(is, binary, "<BlockAffineComponentPreconditioned>",
2208  "<LearningRate>");
2209  ReadBasicType(is, binary, &learning_rate_);
2210  ExpectToken(is, binary, "<NumBlocks>");
2211  ReadBasicType(is, binary, &num_blocks_);
2212  ExpectToken(is, binary, "<LinearParams>");
2213  linear_params_.Read(is, binary);
2214  ExpectToken(is, binary, "<BiasParams>");
2215  bias_params_.Read(is, binary);
2216  ExpectToken(is, binary, "<Alpha>");
2217  ReadBasicType(is, binary, &alpha_);
2218  ExpectToken(is, binary, "<IsGradient>");
2219  ReadBasicType(is, binary, &is_gradient_);
2220  ExpectToken(is, binary, "</BlockAffineComponentPreconditioned>");
2221 }
2222 
2224  bool binary) const {
2225  WriteToken(os, binary, "<BlockAffineComponentPreconditioned>");
2226  WriteToken(os, binary, "<LearningRate>");
2227  WriteBasicType(os, binary, learning_rate_);
2228  WriteToken(os, binary, "<NumBlocks>");
2229  WriteBasicType(os, binary, num_blocks_);
2230  WriteToken(os, binary, "<LinearParams>");
2231  linear_params_.Write(os, binary);
2232  WriteToken(os, binary, "<BiasParams>");
2233  bias_params_.Write(os, binary);
2234  WriteToken(os, binary, "<Alpha>");
2235  WriteBasicType(os, binary, alpha_);
2236  WriteToken(os, binary, "<IsGradient>");
2237  WriteBasicType(os, binary, is_gradient_);
2238  WriteToken(os, binary, "</BlockAffineComponentPreconditioned>");
2239 }
2240 
2246  ans->bias_params_ = bias_params_;
2247  ans->num_blocks_ = num_blocks_;
2248  ans->alpha_ = alpha_;
2249  ans->is_gradient_ = is_gradient_;
2250  return ans;
2251 }
2252 
2254  const CuMatrixBase<BaseFloat> &in_value,
2255  const CuMatrixBase<BaseFloat> &out_deriv) {
2256  if (is_gradient_) {
2257  UpdateSimple(in_value, out_deriv);
2258  // does the baseline update with no preconditioning.
2259  return;
2260  }
2261  int32 input_block_dim = linear_params_.NumCols(),
2262  output_block_dim = linear_params_.NumRows() / num_blocks_,
2263  num_frames = in_value.NumRows();
2264 
2265  CuMatrix<BaseFloat> in_value_temp(num_frames, input_block_dim + 1, kUndefined),
2266  in_value_precon(num_frames, input_block_dim + 1, kUndefined);
2267  in_value_temp.Set(1.0); // so last row will have value 1.0.
2268  CuSubMatrix<BaseFloat> in_value_temp_part(in_value_temp, 0, num_frames,
2269  0, input_block_dim); // all but last 1.0
2270  CuSubMatrix<BaseFloat> in_value_precon_part(in_value_precon, 0, num_frames,
2271  0, input_block_dim);
2272  CuVector<BaseFloat> precon_ones(num_frames);
2273  CuMatrix<BaseFloat> out_deriv_precon(num_frames, output_block_dim, kUndefined);
2274 
2275  for (int32 b = 0; b < num_blocks_; b++) {
2276  CuSubMatrix<BaseFloat> in_value_block(in_value, 0, num_frames,
2277  b * input_block_dim,
2278  input_block_dim),
2279  out_deriv_block(out_deriv, 0, num_frames,
2280  b * output_block_dim, output_block_dim),
2281  param_block(linear_params_,
2282  b * output_block_dim, output_block_dim,
2283  0, input_block_dim);
2284  in_value_temp_part.CopyFromMat(in_value_block);
2285 
2287  &in_value_precon);
2289  &out_deriv_precon);
2290 
2291 
2292  // Update the parameters.
2293  param_block.AddMatMat(learning_rate_, out_deriv_precon, kTrans,
2294  in_value_precon_part, kNoTrans, 1.0);
2295  precon_ones.CopyColFromMat(in_value_precon, input_block_dim);
2296  bias_params_.Range(b * output_block_dim, output_block_dim).
2297  AddMatVec(learning_rate_, out_deriv_precon, kTrans,
2298  precon_ones, 1.0);
2299  }
2300 }
2301 
2302 
2303 void PermuteComponent::Read(std::istream &is, bool binary) {
2304  ExpectOneOrTwoTokens(is, binary, "<PermuteComponent>", "<Reorder>");
2305  ReadIntegerVector(is, binary, &reorder_);
2306  ExpectToken(is, binary, "</PermuteComponent>");
2307 }
2308 
2309 void PermuteComponent::Write(std::ostream &os, bool binary) const {
2310  WriteToken(os, binary, "<PermuteComponent>");
2311  WriteToken(os, binary, "<Reorder>");
2312  WriteIntegerVector(os, binary, reorder_);
2313  WriteToken(os, binary, "</PermuteComponent>");
2314 }
2315 
2317  KALDI_ASSERT(dim > 0);
2318  reorder_.resize(dim);
2319  for (int32 i = 0; i < dim; i++) reorder_[i] = i;
2320  std::random_shuffle(reorder_.begin(), reorder_.end());
2321 }
2322 
2323 void PermuteComponent::InitFromString(std::string args) {
2324  std::string orig_args(args);
2325  int32 dim;
2326  bool ok = ParseFromString("dim", &args, &dim);
2327  if (!ok || !args.empty() || dim <= 0)
2328  KALDI_ERR << "Invalid initializer for layer of type "
2329  << Type() << ": \"" << orig_args << "\"";
2330  Init(dim);
2331 }
2332 
2334  const ChunkInfo &out_info,
2335  const CuMatrixBase<BaseFloat> &in,
2336  CuMatrixBase<BaseFloat> *out) const {
2337  in_info.CheckSize(in);
2338  out_info.CheckSize(*out);
2339  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2340 
2341  std::vector<int32> reverse_reorder(reorder_.size());
2342  for (size_t i = 0; i < reorder_.size(); i++)
2343  reverse_reorder[reorder_[i]] = i;
2344  // Note: if we were actually using this component type we could make the
2345  // CuArray a member variable for efficiency.
2346  CuArray<int32> cu_reverse_reorder(reverse_reorder);
2347  out->CopyCols(in, cu_reverse_reorder);
2348 }
2349 
2350 void PermuteComponent::Backprop(const ChunkInfo &, //in_info,
2351  const ChunkInfo &, //out_info,
2352  const CuMatrixBase<BaseFloat> &in_value,
2353  const CuMatrixBase<BaseFloat> &out_value,
2354  const CuMatrixBase<BaseFloat> &out_deriv,
2355  Component *to_update,
2356  CuMatrix<BaseFloat> *in_deriv) const {
2357  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
2358  KALDI_ASSERT(out_deriv.NumCols() == OutputDim());
2359  // Note: if we were actually using this component type we could make the
2360  // CuArray a member variable for efficiency.
2361  CuArray<int32> cu_reorder(reorder_);
2362  in_deriv->CopyCols(out_deriv, cu_reorder);
2363 }
2364 
2365 void SumGroupComponent::Init(const std::vector<int32> &sizes) {
2366  KALDI_ASSERT(!sizes.empty());
2367  std::vector<Int32Pair> cpu_vec(sizes.size());
2368  std::vector<int32> reverse_cpu_vec;
2369  int32 cur_index = 0;
2370  for (size_t i = 0; i < sizes.size(); i++) {
2371  KALDI_ASSERT(sizes[i] > 0);
2372  cpu_vec[i].first = cur_index;
2373  cpu_vec[i].second = cur_index + sizes[i];
2374  cur_index += sizes[i];
2375  for (int32 j = cpu_vec[i].first; j < cpu_vec[i].second; j++)
2376  reverse_cpu_vec.push_back(i);
2377  }
2378  this->indexes_ = cpu_vec;
2379  this->reverse_indexes_ = reverse_cpu_vec;
2380  this->input_dim_ = cur_index;
2381  this->output_dim_ = sizes.size();
2382 }
2383 
2384 void SumGroupComponent::InitFromString(std::string args) {
2385  std::string orig_args(args);
2386  std::vector<int32> sizes;
2387  bool ok = ParseFromString("sizes", &args, &sizes);
2388 
2389  if (!ok || !args.empty() || sizes.empty())
2390  KALDI_ERR << "Invalid initializer for layer of type "
2391  << Type() << ": \"" << orig_args << "\"";
2392  this->Init(sizes);
2393 }
2394 
2396  SumGroupComponent *ans = new SumGroupComponent();
2397  ans->indexes_ = indexes_;
2398  ans->reverse_indexes_ = reverse_indexes_;
2399  ans->input_dim_ = input_dim_;
2400  ans->output_dim_ = output_dim_;
2401  return ans;
2402 }
2403 
2404 void SumGroupComponent::Read(std::istream &is, bool binary) {
2405  ExpectOneOrTwoTokens(is, binary, "<SumGroupComponent>", "<Sizes>");
2406  std::vector<int32> sizes;
2407  ReadIntegerVector(is, binary, &sizes);
2408 
2409  std::string token;
2410  ReadToken(is, binary, &token);
2411  if (!(token == "<SumGroupComponent>" ||
2412  token == "</SumGroupComponent>")) {
2413  KALDI_ERR << "Expected </SumGroupComponent>, got " << token;
2414  }
2415  this->Init(sizes);
2416 }
2417 
2418 void SumGroupComponent::GetSizes(std::vector<int32> *sizes) const {
2419  std::vector<Int32Pair> indexes;
2420  indexes_.CopyToVec(&indexes);
2421  sizes->resize(indexes.size());
2422  for (size_t i = 0; i < indexes.size(); i++) {
2423  (*sizes)[i] = indexes[i].second - indexes[i].first;
2424  if (i == 0) { KALDI_ASSERT(indexes[i].first == 0); }
2425  else { KALDI_ASSERT(indexes[i].first == indexes[i-1].second); }
2426  KALDI_ASSERT(indexes[i].second > indexes[i].first);
2427  (*sizes)[i] = indexes[i].second - indexes[i].first;
2428  }
2429 }
2430 
2431 void SumGroupComponent::Write(std::ostream &os, bool binary) const {
2432  WriteToken(os, binary, "<SumGroupComponent>");
2433  WriteToken(os, binary, "<Sizes>");
2434  std::vector<int32> sizes;
2435  this->GetSizes(&sizes);
2436  WriteIntegerVector(os, binary, sizes);
2437  WriteToken(os, binary, "</SumGroupComponent>");
2438 }
2439 
2441  const ChunkInfo &out_info,
2442  const CuMatrixBase<BaseFloat> &in,
2443  CuMatrixBase<BaseFloat> *out) const {
2444  in_info.CheckSize(in);
2445  out_info.CheckSize(*out);
2446  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2447 
2448  out->SumColumnRanges(in, indexes_);
2449 }
2450 
2452  const ChunkInfo &out_info,
2453  const CuMatrixBase<BaseFloat> &, //in_value,
2454  const CuMatrixBase<BaseFloat> &, //out_value,
2455  const CuMatrixBase<BaseFloat> &out_deriv,
2456  Component *to_update, // may be identical to "this".
2457  CuMatrix<BaseFloat> *in_deriv) const {
2458  in_deriv->Resize(out_deriv.NumRows(), InputDim());
2459  in_deriv->CopyCols(out_deriv, reverse_indexes_);
2460 }
2461 
2462 
2463 std::string SpliceComponent::Info() const {
2464  std::stringstream stream;
2465  std::ostringstream os;
2466  std::copy(context_.begin(), context_.end(),
2467  std::ostream_iterator<int32>(os, " "));
2468  stream << Component::Info() << ", context=" << os.str();
2469  if (const_component_dim_ != 0)
2470  stream << ", const_component_dim=" << const_component_dim_;
2471 
2472  return stream.str();
2473 }
2474 
2475 void SpliceComponent::Init(int32 input_dim, std::vector<int32> context,
2476  int32 const_component_dim) {
2477  input_dim_ = input_dim;
2478  const_component_dim_ = const_component_dim;
2479  context_ = context;
2480  KALDI_ASSERT(context_.size() > 0);
2481  KALDI_ASSERT(input_dim_ > 0 && context_.front() <= 0 && context_.back() >= 0);
2482  KALDI_ASSERT(IsSortedAndUniq(context));
2483  KALDI_ASSERT(const_component_dim_ >= 0 && const_component_dim_ < input_dim_);
2484 }
2485 
2486 
2487 // e.g. args == "input-dim=10 left-context=2 right-context=2
2488 void SpliceComponent::InitFromString(std::string args) {
2489  std::string orig_args(args);
2490  int32 input_dim, left_context, right_context;
2491  std::vector <int32> context;
2492  bool in_dim_ok = ParseFromString("input-dim", &args, &input_dim);
2493  bool context_ok = ParseFromString("context", &args, &context);
2494  bool left_right_context_ok = ParseFromString("left-context", &args,
2495  &left_context) &&
2496  ParseFromString("right-context", &args,
2497  &right_context);
2498  int32 const_component_dim = 0;
2499  ParseFromString("const-component-dim", &args, &const_component_dim);
2500 
2501  if (!(in_dim_ok && (context_ok || left_right_context_ok)) ||
2502  !args.empty() || input_dim <= 0)
2503  KALDI_ERR << "Invalid initializer for layer of type "
2504  << Type() << ": \"" << orig_args << "\"";
2505  if (left_right_context_ok) {
2506  KALDI_ASSERT(context.size() == 0);
2507  for (int32 i = -left_context; i <= right_context; i++)
2508  context.push_back(i);
2509  }
2510  Init(input_dim, context, const_component_dim);
2511 }
2512 
2514  return (input_dim_ - const_component_dim_)
2515  * (context_.size())
2516  + const_component_dim_;
2517 }
2518 
2520  if (offsets_.empty()) { // if data is contiguous
2521  KALDI_ASSERT((offset <= last_offset_) && (offset >= first_offset_));
2522  return offset - first_offset_;
2523  } else {
2524  std::vector<int32>::const_iterator iter =
2525  std::lower_bound(offsets_.begin(), offsets_.end(), offset);
2526  // make sure offset is present in the vector
2527  KALDI_ASSERT(iter != offsets_.end() && *iter == offset);
2528  return static_cast<int32>(iter - offsets_.begin());
2529  }
2530 }
2531 
2533  if (offsets_.empty()) { // if data is contiguous
2534  int32 offset = index + first_offset_; // just offset by the first_offset_
2535  KALDI_ASSERT((offset <= last_offset_) && (offset >= first_offset_));
2536  return offset;
2537  } else {
2538  KALDI_ASSERT((index >= 0) && (index < offsets_.size()));
2539  return offsets_[index];
2540  }
2541 }
2542 
2543 void ChunkInfo::Check() const {
2544  // Checking sanity of the ChunkInfo object
2545  KALDI_ASSERT((feat_dim_ > 0) && (num_chunks_ > 0));
2546 
2547  if (! offsets_.empty()) {
2548  KALDI_ASSERT((first_offset_ == offsets_.front()) &&
2549  (last_offset_ == offsets_.back()));
2550  } else {
2551  KALDI_ASSERT((first_offset_ >= 0) && (last_offset_ >= first_offset_));
2552  // asserting the chunk is not contiguous, as offsets is not empty
2553  KALDI_ASSERT ( last_offset_ - first_offset_ + 1 > offsets_.size() );
2554  }
2555  KALDI_ASSERT(NumRows() % num_chunks_ == 0);
2556 
2557 }
2558 
2560  KALDI_ASSERT((mat.NumRows() == NumRows()) && (mat.NumCols() == NumCols()));
2561 }
2562 
2563 /*
2564  * This method was used for debugging, make changes in nnet-component.h to
2565  * expose it
2566 void ChunkInfo::ToString() const {
2567  KALDI_LOG << "feat_dim " << feat_dim_;
2568  KALDI_LOG << "num_chunks " << num_chunks_;
2569  KALDI_LOG << "first_index " << first_offset_;
2570  KALDI_LOG << "last_index " << last_offset_;
2571  for (size_t i = 0; i < offsets_.size(); i++)
2572  KALDI_LOG << offsets_[i];
2573 }
2574 */
2575 
2576 
2578  const ChunkInfo &out_info,
2579  const CuMatrixBase<BaseFloat> &in,
2580  CuMatrixBase<BaseFloat> *out) const {
2581 
2582  // Check the inputs are correct and resize output
2583  in_info.Check();
2584  out_info.Check();
2585  in_info.CheckSize(in);
2586  out_info.CheckSize(*out);
2587  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2588 
2589  int32 in_chunk_size = in_info.ChunkSize(),
2590  out_chunk_size = out_info.ChunkSize(),
2591  input_dim = in_info.NumCols();
2592 
2593  if (out_chunk_size <= 0)
2594  KALDI_ERR << "Splicing features: output will have zero dimension. "
2595  << "Probably a code error.";
2596 
2597  // 'indexes' is, for each index from 0 to context_.size() - 1,
2598  // then for each row of "out", the corresponding row of "in" that we copy from
2599  int32 num_splice = context_.size();
2600  std::vector<std::vector<int32> > indexes(num_splice);
2601  for (int32 c = 0; c < num_splice; c++)
2602  indexes[c].resize(out->NumRows());
2603  // const_component_dim_ != 0, "const_indexes" will be used to determine which
2604  // row of "in" we copy the last part of each row of "out" from (this part is
2605  // not subject to splicing, it's assumed constant for each frame of "input".
2606  int32 const_dim = const_component_dim_;
2607  std::vector<int32> const_indexes(const_dim == 0 ? 0 : out->NumRows());
2608 
2609  for (int32 chunk = 0; chunk < in_info.NumChunks(); chunk++) {
2610  if (chunk == 0) {
2611  // this branch could be used for all chunks in the matrix,
2612  // but is restricted to chunk 0 for efficiency reasons
2613  for (int32 c = 0; c < num_splice; c++) {
2614  for (int32 out_index = 0; out_index < out_chunk_size; out_index++) {
2615  int32 out_offset = out_info.GetOffset(out_index);
2616  int32 in_index = in_info.GetIndex(out_offset + context_[c]);
2617  indexes[c][chunk * out_chunk_size + out_index] =
2618  chunk * in_chunk_size + in_index;
2619  }
2620  }
2621  } else { // just copy the indices from the previous chunk
2622  // and offset these by input chunk size
2623  for (int32 c = 0; c < num_splice; c++) {
2624  for (int32 out_index = 0; out_index < out_chunk_size; out_index++) {
2625  int32 last_value = indexes[c][(chunk-1) * out_chunk_size + out_index];
2626  indexes[c][chunk * out_chunk_size + out_index] =
2627  (last_value == -1 ? -1 : last_value + in_chunk_size);
2628  }
2629  }
2630  }
2631  if (const_dim != 0) {
2632  for (int32 out_index = 0; out_index < out_chunk_size; out_index++)
2633  const_indexes[chunk * out_chunk_size + out_index] =
2634  chunk * in_chunk_size + out_index; // there is
2635  // an arbitrariness here; since we assume the const_component
2636  // is constant within a chunk, it doesn't matter from where we copy.
2637  }
2638  }
2639 
2640 
2641  for (int32 c = 0; c < num_splice; c++) {
2642  int32 dim = input_dim - const_dim; // dimension we
2643  // are splicing
2644  CuSubMatrix<BaseFloat> in_part(in, 0, in.NumRows(),
2645  0, dim),
2646  out_part(*out, 0, out->NumRows(),
2647  c * dim, dim);
2648  CuArray<int32> cu_indexes(indexes[c]);
2649  out_part.CopyRows(in_part, cu_indexes);
2650  }
2651  if (const_dim != 0) {
2652  CuSubMatrix<BaseFloat> in_part(in, 0, in.NumRows(),
2653  in.NumCols() - const_dim, const_dim),
2654  out_part(*out, 0, out->NumRows(),
2655  out->NumCols() - const_dim, const_dim);
2656 
2657  CuArray<int32> cu_const_indexes(const_indexes);
2658  out_part.CopyRows(in_part, cu_const_indexes);
2659  }
2660 }
2661 
2663  const ChunkInfo &out_info,
2664  const CuMatrixBase<BaseFloat> &, // in_value,
2665  const CuMatrixBase<BaseFloat> &, // out_value,
2666  const CuMatrixBase<BaseFloat> &out_deriv,
2667  Component *to_update,
2668  CuMatrix<BaseFloat> *in_deriv) const {
2669  in_info.Check();
2670  out_info.Check();
2671  out_info.CheckSize(out_deriv);
2672  in_deriv->Resize(in_info.NumRows(), in_info.NumCols(), kUndefined);
2673  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2674  int32 num_chunks = in_info.NumChunks();
2675  // rewrite backpropagate
2676 
2677  int32 out_chunk_size = out_info.ChunkSize(),
2678  in_chunk_size = in_info.ChunkSize(),
2679  output_dim = out_deriv.NumCols(),
2680  input_dim = InputDim();
2681 
2682  KALDI_ASSERT(OutputDim() == output_dim);
2683 
2684  int32 num_splice = context_.size(),
2685  const_dim = const_component_dim_;
2686  // 'indexes' is, for each index from 0 to num_splice - 1,
2687  // then for each row of "in_deriv", the corresponding row of "out_deriv" that
2688  // we add, or -1 if.
2689 
2690  std::vector<std::vector<int32> > indexes(num_splice);
2691  // const_dim != 0, "const_indexes" will be used to determine which
2692  // row of "in" we copy the last part of each row of "out" from (this part is
2693  // not subject to splicing, it's assumed constant for each frame of "input".
2694  std::vector<int32> const_indexes(const_dim == 0 ? 0 : in_deriv->NumRows(), -1);
2695 
2696  for (int32 c = 0; c < indexes.size(); c++)
2697  indexes[c].resize(in_deriv->NumRows(), -1); // set to -1 by default,
2698  // this gets interpreted by the CopyRows() code
2699  // as a signal to zero the output...
2700 
2701  int32 dim = input_dim - const_dim; // dimension we are splicing
2702  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
2703  if (chunk == 0) { // this branch can be taken for all chunks, but is not
2704  // taken for efficiency reasons
2705  for (int32 c = 0; c < num_splice; c++) {
2706  for (int32 out_index = 0; out_index < out_chunk_size; out_index++) {
2707  int32 out_offset = out_info.GetOffset(out_index);
2708  int32 in_index = in_info.GetIndex(out_offset + context_[c]);
2709  indexes[c][chunk * in_chunk_size + in_index] =
2710  chunk * out_chunk_size + out_index;
2711  }
2712  }
2713  } else { // just copy the indexes from the previous chunk
2714  for (int32 c = 0; c < num_splice; c++) {
2715  for (int32 in_index = 0; in_index < in_chunk_size; in_index++) {
2716  int32 last_value = indexes[c][(chunk-1) * in_chunk_size + in_index];
2717  indexes[c][chunk * in_chunk_size + in_index] =
2718  (last_value == -1 ? -1 : last_value + out_chunk_size);
2719  }
2720  }
2721  }
2722  // this code corresponds to the way the forward propagation works; see
2723  // comments there.
2724  if (const_dim != 0) {
2725  for (int32 out_index = 0; out_index < out_chunk_size; out_index++) {
2726  const_indexes[chunk * in_chunk_size + out_index] =
2727  chunk * out_chunk_size + out_index;
2728  }
2729  }
2730  }
2731 
2732  CuMatrix<BaseFloat> temp_mat(in_deriv->NumRows(), dim, kUndefined);
2733 
2734  for (int32 c = 0; c < num_splice; c++) {
2735  CuArray<int32> cu_indexes(indexes[c]);
2736  int32 dim = input_dim - const_dim; // dimension we
2737  // are splicing
2738  CuSubMatrix<BaseFloat> out_deriv_part(out_deriv, 0, out_deriv.NumRows(),
2739  c * dim, dim),
2740  in_deriv_part(*in_deriv, 0, in_deriv->NumRows(),
2741  0, dim);
2742  if (c == 0) {
2743  in_deriv_part.CopyRows(out_deriv_part, cu_indexes);
2744  } else {
2745  temp_mat.CopyRows(out_deriv_part, cu_indexes);
2746  in_deriv_part.AddMat(1.0, temp_mat);
2747  }
2748  }
2749  if (const_dim != 0) {
2750  CuSubMatrix<BaseFloat> out_deriv_part(out_deriv, 0, out_deriv.NumRows(),
2751  out_deriv.NumCols() - const_dim,
2752  const_dim),
2753  in_deriv_part(*in_deriv, 0, in_deriv->NumRows(),
2754  in_deriv->NumCols() - const_dim, const_dim);
2755  CuArray<int32> cu_const_indexes(const_indexes);
2756  in_deriv_part.CopyRows(out_deriv_part, cu_const_indexes);
2757  }
2758 }
2759 
2761  SpliceComponent *ans = new SpliceComponent();
2762  ans->input_dim_ = input_dim_;
2763  ans->context_ = context_;
2764  ans->const_component_dim_ = const_component_dim_;
2765  return ans;
2766 }
2767 
2768 void SpliceComponent::Read(std::istream &is, bool binary) {
2769  ExpectOneOrTwoTokens(is, binary, "<SpliceComponent>", "<InputDim>");
2770  ReadBasicType(is, binary, &input_dim_);
2771  std::string token;
2772  ReadToken(is, false, &token);
2773  if (token == "<LeftContext>") {
2774  int32 left_context=0, right_context=0;
2775  std::vector<int32> context;
2776  ReadBasicType(is, binary, &left_context);
2777  ExpectToken(is, binary, "<RightContext>");
2778  ReadBasicType(is, binary, &right_context);
2779  for (int32 i = -1 * left_context; i <= right_context; i++)
2780  context.push_back(i);
2781  context_ = context;
2782  } else if (token == "<Context>") {
2783  ReadIntegerVector(is, binary, &context_);
2784  } else {
2785  KALDI_ERR << "Unknown token" << token
2786  << ", the model might be corrupted";
2787  }
2788  ExpectToken(is, binary, "<ConstComponentDim>");
2789  ReadBasicType(is, binary, &const_component_dim_);
2790  ExpectToken(is, binary, "</SpliceComponent>");
2791 }
2792 
2793 void SpliceComponent::Write(std::ostream &os, bool binary) const {
2794  WriteToken(os, binary, "<SpliceComponent>");
2795  WriteToken(os, binary, "<InputDim>");
2796  WriteBasicType(os, binary, input_dim_);
2797  WriteToken(os, binary, "<Context>");
2798  WriteIntegerVector(os, binary, context_);
2799  WriteToken(os, binary, "<ConstComponentDim>");
2800  WriteBasicType(os, binary, const_component_dim_);
2801  WriteToken(os, binary, "</SpliceComponent>");
2802 }
2803 
2804 
2805 std::string SpliceMaxComponent::Info() const {
2806  std::stringstream stream;
2807  std::ostringstream os;
2808  std::copy(context_.begin(), context_.end(),
2809  std::ostream_iterator<int32>(os, " "));
2810  stream << Component::Info() << ", context=" << os.str();
2811  return stream.str();
2812 }
2813 
2815  std::vector<int32> context) {
2816  dim_ = dim;
2817  context_ = context;
2818  KALDI_ASSERT(dim_ > 0 && context_.front() <= 0 && context_.back() >= 0);
2819 }
2820 
2821 
2822 // e.g. args == "dim=10 left-context=2 right-context=2
2823 void SpliceMaxComponent::InitFromString(std::string args) {
2824  std::string orig_args(args);
2825  int32 dim, left_context, right_context;
2826  std::vector <int32> context;
2827  bool dim_ok = ParseFromString("dim", &args, &dim);
2828  bool context_ok = ParseFromString("context", &args, &context);
2829  bool left_right_context_ok = ParseFromString("left-context",
2830  &args, &left_context) &&
2831  ParseFromString("right-context", &args,
2832  &right_context);
2833 
2834  if (!(dim_ok && (context_ok || left_right_context_ok)) ||
2835  !args.empty() || dim <= 0)
2836  KALDI_ERR << "Invalid initializer for layer of type "
2837  << Type() << ": \"" << orig_args << "\"";
2838  if (left_right_context_ok) {
2839  KALDI_ASSERT(context.size() == 0);
2840  for (int32 i = -1 * left_context; i <= right_context; i++)
2841  context.push_back(i);
2842  }
2843  Init(dim, context);
2844 }
2845 
2846 
2848  const ChunkInfo &out_info,
2849  const CuMatrixBase<BaseFloat> &in,
2850  CuMatrixBase<BaseFloat> *out) const {
2851  in_info.Check();
2852  out_info.Check();
2853  in_info.CheckSize(in);
2854  out_info.CheckSize(*out);
2855  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2856  int32 in_chunk_size = in_info.ChunkSize(),
2857  out_chunk_size = out_info.ChunkSize(),
2858  dim = in_info.NumCols();
2859 
2860  CuMatrix<BaseFloat> input_chunk_part(out_chunk_size, dim);
2861  for (int32 chunk = 0; chunk < in_info.NumChunks(); chunk++) {
2862  CuSubMatrix<BaseFloat> input_chunk(in,
2863  chunk * in_chunk_size, in_chunk_size,
2864  0, dim),
2865  output_chunk(*out,
2866  chunk * out_chunk_size,
2867  out_chunk_size, 0, dim);
2868  for (int32 offset = 0; offset < context_.size(); offset++) {
2869  // computing the indices to copy into input_chunk_part from input_chunk
2870  // copy the rows of the input matrix which correspond to the current
2871  // context index
2872  std::vector<int32> input_chunk_inds(out_chunk_size);
2873  for (int32 i = 0; i < out_chunk_size; i++) {
2874  int32 out_chunk_ind = i;
2875  int32 out_chunk_offset =
2876  out_info.GetOffset(out_chunk_ind);
2877  input_chunk_inds[i] =
2878  in_info.GetIndex(out_chunk_offset + context_[offset]);
2879  }
2880  CuArray<int32> cu_chunk_inds(input_chunk_inds);
2881  input_chunk_part.CopyRows(input_chunk, cu_chunk_inds);
2882  if (offset == 0) {
2883  output_chunk.CopyFromMat(input_chunk_part);
2884  } else {
2885  output_chunk.Max(input_chunk_part);
2886  }
2887  }
2888  }
2889 }
2890 
2892  const ChunkInfo &out_info,
2893  const CuMatrixBase<BaseFloat> &in_value,
2894  const CuMatrixBase<BaseFloat> &, // out_value
2895  const CuMatrixBase<BaseFloat> &out_deriv,
2896  Component *to_update,
2897  CuMatrix<BaseFloat> *in_deriv) const {
2898  in_info.Check();
2899  out_info.Check();
2900  in_info.CheckSize(in_value);
2901  out_info.CheckSize(out_deriv);
2902  in_deriv->Resize(in_info.NumRows(), in_info.NumCols());
2903  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
2904 
2905  int32 out_chunk_size = out_info.ChunkSize(),
2906  in_chunk_size = in_info.ChunkSize(),
2907  dim = out_deriv.NumCols();
2908 
2909  KALDI_ASSERT(dim == InputDim());
2910 
2911  for (int32 chunk = 0; chunk < in_info.NumChunks(); chunk++) {
2912  CuSubMatrix<BaseFloat> in_deriv_chunk(*in_deriv,
2913  chunk * in_chunk_size,
2914  in_chunk_size,
2915  0, dim),
2916  in_value_chunk(in_value,
2917  chunk * in_chunk_size,
2918  in_chunk_size,
2919  0, dim),
2920  out_deriv_chunk(out_deriv,
2921  chunk * out_chunk_size,
2922  out_chunk_size,
2923  0, dim);
2924  for (int32 r = 0; r < out_deriv_chunk.NumRows(); r++) {
2925  int32 out_chunk_ind = r;
2926  int32 out_chunk_offset =
2927  out_info.GetOffset(out_chunk_ind);
2928 
2929  for (int32 c = 0; c < dim; c++) {
2930  int32 in_r_max = -1;
2931  BaseFloat max_input = -std::numeric_limits<BaseFloat>::infinity();
2932  for (int32 context_ind = 0;
2933  context_ind < context_.size(); context_ind++) {
2934  int32 in_r =
2935  in_info.GetIndex(out_chunk_offset + context_[context_ind]);
2936  BaseFloat input = in_value_chunk(in_r, c);
2937  if (input > max_input) {
2938  max_input = input;
2939  in_r_max = in_r;
2940  }
2941  }
2942  KALDI_ASSERT(in_r_max != -1);
2943  (*in_deriv)(in_r_max, c) += out_deriv_chunk(r, c);
2944  }
2945  }
2946  }
2947 }
2948 
2951  ans->Init(dim_, context_);
2952  return ans;
2953 }
2954 
2955 void SpliceMaxComponent::Read(std::istream &is, bool binary) {
2956  ExpectOneOrTwoTokens(is, binary, "<SpliceMaxComponent>", "<Dim>");
2957  ReadBasicType(is, binary, &dim_);
2958  std::string token;
2959  ReadToken(is, false, &token);
2960  if (token == "<LeftContext>") {
2961  int32 left_context = 0, right_context = 0;
2962  std::vector<int32> context;
2963  ReadBasicType(is, binary, &left_context);
2964  ExpectToken(is, binary, "<RightContext>");
2965  ReadBasicType(is, binary, &right_context);
2966  for (int32 i = -1 * left_context; i <= right_context; i++)
2967  context.push_back(i);
2968  context_ = context;
2969  } else if (token == "<Context>") {
2970  ReadIntegerVector(is, binary, &context_);
2971  } else {
2972  KALDI_ERR << "Unknown token" << token << ", the model might be corrupted";
2973  }
2974  ExpectToken(is, binary, "</SpliceMaxComponent>");
2975 }
2976 
2977 void SpliceMaxComponent::Write(std::ostream &os, bool binary) const {
2978  WriteToken(os, binary, "<SpliceMaxComponent>");
2979  WriteToken(os, binary, "<Dim>");
2980  WriteBasicType(os, binary, dim_);
2981  WriteToken(os, binary, "<Context>");
2982  WriteIntegerVector(os, binary, context_);
2983  WriteToken(os, binary, "</SpliceMaxComponent>");
2984 }
2985 
2986 std::string DctComponent::Info() const {
2987  std::stringstream stream;
2988  stream << Component::Info() << ", dct_dim=" << dct_mat_.NumCols();
2989  if (dct_mat_.NumCols() != dct_mat_.NumRows())
2990  stream << ", dct_keep_dim=" << dct_mat_.NumRows();
2991 
2992  return stream.str();
2993 }
2994 
2995 void DctComponent::Init(int32 dim, int32 dct_dim, bool reorder, int32 dct_keep_dim) {
2996  int dct_keep_dim_ = (dct_keep_dim > 0) ? dct_keep_dim : dct_dim;
2997 
2998  KALDI_ASSERT(dim > 0 && dct_dim > 0);
2999  KALDI_ASSERT(dim % dct_dim == 0); // dct_dim must divide dim.
3000  KALDI_ASSERT(dct_dim >= dct_keep_dim_);
3001  dim_ = dim;
3002  dct_mat_.Resize(dct_keep_dim_, dct_dim);
3003  reorder_ = reorder;
3004  Matrix<BaseFloat> dct_mat(dct_keep_dim_, dct_dim);
3005  ComputeDctMatrix(&dct_mat);
3006  dct_mat_ = dct_mat;
3007 }
3008 
3009 
3010 
3011 void DctComponent::InitFromString(std::string args) {
3012  std::string orig_args(args);
3013  int32 dim, dct_dim, dct_keep_dim = 0;
3014  bool reorder = false;
3015 
3016  bool ok = ParseFromString("dim", &args, &dim);
3017  ok = ParseFromString("dct-dim", &args, &dct_dim) && ok;
3018  ok = ParseFromString("reorder", &args, &reorder) && ok;
3019  ParseFromString("dct-keep-dim", &args, &dct_keep_dim);
3020 
3021  if (!ok || !args.empty() || dim <= 0 || dct_dim <= 0 || dct_keep_dim < 0)
3022  KALDI_ERR << "Invalid initializer for layer of type "
3023  << Type() << ": \"" << orig_args << "\"";
3024  Init(dim, dct_dim, reorder, dct_keep_dim);
3025 }
3026 
3027 void DctComponent::Reorder(CuMatrixBase<BaseFloat> *mat, bool reverse) const {
3028  // reorders into contiguous blocks of dize "dct_dim_", assuming that
3029  // such blocks were interlaced before. if reverse==true, does the
3030  // reverse.
3031  int32 dct_dim = dct_mat_.NumCols(),
3032  dct_keep_dim = dct_mat_.NumRows(),
3033  block_size_in = dim_ / dct_dim,
3034  block_size_out = dct_keep_dim;
3035 
3036  //This does not necesarily needs to be true anymore -- output must be reordered as well, but the dimension differs...
3037  //KALDI_ASSERT(mat->NumCols() == dim_);
3038  if (reverse) std::swap(block_size_in, block_size_out);
3039 
3040  CuVector<BaseFloat> temp(mat->NumCols());
3041  for (int32 i = 0; i < mat->NumRows(); i++) {
3042  CuSubVector<BaseFloat> row(*mat, i);
3043  int32 num_blocks_in = block_size_out;
3044  for (int32 b = 0; b < num_blocks_in; b++) {
3045  for (int32 j = 0; j < block_size_in; j++) {
3046  temp(j * block_size_out + b) = row(b * block_size_in + j);
3047  }
3048  }
3049  row.CopyFromVec(temp);
3050  }
3051 }
3052 
3054  const ChunkInfo &out_info,
3055  const CuMatrixBase<BaseFloat> &in,
3056  CuMatrixBase<BaseFloat> *out) const {
3057  KALDI_ASSERT(in.NumCols() == InputDim());
3058  int32 dct_dim = dct_mat_.NumCols(),
3059  dct_keep_dim = dct_mat_.NumRows(),
3060  num_rows = in.NumRows(),
3061  num_chunks = dim_ / dct_dim;
3062 
3063  in_info.CheckSize(in);
3064  out_info.CheckSize(*out);
3065  KALDI_ASSERT(num_rows == out_info.NumRows());
3066  KALDI_ASSERT(num_chunks * dct_keep_dim == out_info.NumCols());
3067 
3068  CuMatrix<BaseFloat> in_tmp;
3069  if (reorder_) {
3070  in_tmp = in;
3071  Reorder(&in_tmp, false);
3072  }
3073 
3074  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
3075  CuSubMatrix<BaseFloat> in_mat(reorder_ ? in_tmp : in,
3076  0, num_rows, dct_dim * chunk, dct_dim),
3077  out_mat(*out,
3078  0, num_rows, dct_keep_dim * chunk, dct_keep_dim);
3079 
3080  out_mat.AddMatMat(1.0, in_mat, kNoTrans, dct_mat_, kTrans, 0.0);
3081  }
3082  if (reorder_)
3083  Reorder(out, true);
3084 }
3085 
3086 void DctComponent::Backprop(const ChunkInfo &, //in_info,
3087  const ChunkInfo &, //out_info,
3088  const CuMatrixBase<BaseFloat> &, //in_value,
3089  const CuMatrixBase<BaseFloat> &, //out_value,
3090  const CuMatrixBase<BaseFloat> &out_deriv,
3091  Component *, //to_update,
3092  CuMatrix<BaseFloat> *in_deriv) const {
3093  KALDI_ASSERT(out_deriv.NumCols() == OutputDim());
3094 
3095  int32 dct_dim = dct_mat_.NumCols(),
3096  dct_keep_dim = dct_mat_.NumRows(),
3097  num_chunks = dim_ / dct_dim,
3098  num_rows = out_deriv.NumRows();
3099 
3100  in_deriv->Resize(num_rows, dim_);
3101 
3102  CuMatrix<BaseFloat> out_deriv_tmp;
3103  if (reorder_) {
3104  out_deriv_tmp = out_deriv;
3105  Reorder(&out_deriv_tmp, false);
3106  }
3107  for (int32 chunk = 0; chunk < num_chunks; chunk++) {
3108  CuSubMatrix<BaseFloat> in_deriv_mat(*in_deriv,
3109  0, num_rows, dct_dim * chunk, dct_dim),
3110  out_deriv_mat(reorder_ ? out_deriv_tmp : out_deriv,
3111  0, num_rows, dct_keep_dim * chunk, dct_keep_dim);
3112 
3113  // Note: in the reverse direction the DCT matrix is transposed. This is
3114  // normal when computing derivatives; the necessity for the transpose is
3115  // obvious if you consider what happens when the input and output dims
3116  // differ.
3117  in_deriv_mat.AddMatMat(1.0, out_deriv_mat, kNoTrans,
3118  dct_mat_, kNoTrans, 0.0);
3119  }
3120  if (reorder_)
3121  Reorder(in_deriv, true);
3122 }
3123 
3125  DctComponent *ans = new DctComponent();
3126  ans->dct_mat_ = dct_mat_;
3127  ans->dim_ = dim_;
3128  ans->reorder_ = reorder_;
3129  return ans;
3130 }
3131 
3132 void DctComponent::Write(std::ostream &os, bool binary) const {
3133  WriteToken(os, binary, "<DctComponent>");
3134  WriteToken(os, binary, "<Dim>");
3135  WriteBasicType(os, binary, dim_);
3136  WriteToken(os, binary, "<DctDim>");
3137  int32 dct_dim = dct_mat_.NumCols();
3138  WriteBasicType(os, binary, dct_dim);
3139  WriteToken(os, binary, "<Reorder>");
3140  WriteBasicType(os, binary, reorder_);
3141  WriteToken(os, binary, "<DctKeepDim>");
3142  int32 dct_keep_dim = dct_mat_.NumRows();
3143  WriteBasicType(os, binary, dct_keep_dim);
3144  WriteToken(os, binary, "</DctComponent>");
3145 }
3146 
3147 void DctComponent::Read(std::istream &is, bool binary) {
3148  ExpectOneOrTwoTokens(is, binary, "<DctComponent>", "<Dim>");
3149  ReadBasicType(is, binary, &dim_);
3150 
3151  ExpectToken(is, binary, "<DctDim>");
3152  int32 dct_dim;
3153  ReadBasicType(is, binary, &dct_dim);
3154 
3155  ExpectToken(is, binary, "<Reorder>");
3156  ReadBasicType(is, binary, &reorder_);
3157 
3158  int32 dct_keep_dim = dct_dim;
3159  std::string token;
3160  ReadToken(is, binary, &token);
3161  if (token == "<DctKeepDim>") {
3162  ReadBasicType(is, binary, &dct_keep_dim);
3163  ExpectToken(is, binary, "</DctComponent>");
3164  } else if (token != "</DctComponent>") {
3165  KALDI_ERR << "Expected token \"</DctComponent>\", got instead \""
3166  << token << "\".";
3167  }
3168 
3169  KALDI_ASSERT(dct_dim > 0 && dim_ > 0 && dim_ % dct_dim == 0);
3170  Init(dim_, dct_dim, reorder_, dct_keep_dim);
3171  //idct_mat_.Resize(dct_keep_dim, dct_dim);
3172  //ComputeDctMatrix(&dct_mat_);
3173 }
3174 
3176  std::string orig_args = args;
3177  std::string filename;
3178  bool ok = ParseFromString("matrix", &args, &filename);
3179 
3180  if (!ok || !args.empty())
3181  KALDI_ERR << "Invalid initializer for layer of type "
3182  << Type() << ": \"" << orig_args << "\"";
3183 
3184  bool binary;
3185  Input ki(filename, &binary);
3186  CuMatrix<BaseFloat> mat;
3187  mat.Read(ki.Stream(), binary);
3188  KALDI_ASSERT(mat.NumRows() != 0);
3189  Init(mat);
3190 }
3191 
3192 
3193 std::string FixedLinearComponent::Info() const {
3194  std::stringstream stream;
3195  BaseFloat mat_size = static_cast<BaseFloat>(mat_.NumRows())
3196  * static_cast<BaseFloat>(mat_.NumCols()),
3197  mat_stddev = std::sqrt(TraceMatMat(mat_, mat_, kTrans) /
3198  mat_size);
3199  stream << Component::Info() << ", params-stddev=" << mat_stddev;
3200  return stream.str();
3201 }
3202 
3204  const ChunkInfo &out_info,
3205  const CuMatrixBase<BaseFloat> &in,
3206  CuMatrixBase<BaseFloat> *out) const {
3207  in_info.CheckSize(in);
3208  out_info.CheckSize(*out);
3209  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
3210 
3211  out->AddMatMat(1.0, in, kNoTrans, mat_, kTrans, 0.0);
3212 }
3213 
3215  const ChunkInfo &, //out_info,
3216  const CuMatrixBase<BaseFloat> &, //in_value,
3217  const CuMatrixBase<BaseFloat> &, //out_value,
3218  const CuMatrixBase<BaseFloat> &out_deriv,
3219  Component *, //to_update, // may be identical to "this".
3220  CuMatrix<BaseFloat> *in_deriv) const {
3221  in_deriv->Resize(out_deriv.NumRows(), mat_.NumCols());
3222  in_deriv->AddMatMat(1.0, out_deriv, kNoTrans, mat_, kNoTrans, 0.0);
3223 }
3224 
3227  ans->Init(mat_);
3228  return ans;
3229 }
3230 
3231 
3232 void FixedLinearComponent::Write(std::ostream &os, bool binary) const {
3233  WriteToken(os, binary, "<FixedLinearComponent>");
3234  WriteToken(os, binary, "<CuMatrix>");
3235  mat_.Write(os, binary);
3236  WriteToken(os, binary, "</FixedLinearComponent>");
3237 }
3238 
3239 void FixedLinearComponent::Read(std::istream &is, bool binary) {
3240  ExpectOneOrTwoTokens(is, binary, "<FixedLinearComponent>", "<CuMatrix>");
3241  mat_.Read(is, binary);
3242  ExpectToken(is, binary, "</FixedLinearComponent>");
3243 }
3244 
3246  KALDI_ASSERT(mat.NumCols() > 1);
3247  linear_params_ = mat.Range(0, mat.NumRows(),
3248  0, mat.NumCols() - 1);
3249  bias_params_.Resize(mat.NumRows());
3250  bias_params_.CopyColFromMat(mat, mat.NumCols() - 1);
3251 }
3252 
3253 
3255  std::string orig_args = args;
3256  std::string filename;
3257  bool ok = ParseFromString("matrix", &args, &filename);
3258 
3259  if (!ok || !args.empty())
3260  KALDI_ERR << "Invalid initializer for layer of type "
3261  << Type() << ": \"" << orig_args << "\"";
3262 
3263  bool binary;
3264  Input ki(filename, &binary);
3265  CuMatrix<BaseFloat> mat;
3266  mat.Read(ki.Stream(), binary);
3267  KALDI_ASSERT(mat.NumRows() != 0);
3268  Init(mat);
3269 }
3270 
3271 
3272 std::string FixedAffineComponent::Info() const {
3273  std::stringstream stream;
3274  BaseFloat linear_params_size = static_cast<BaseFloat>(linear_params_.NumRows())
3275  * static_cast<BaseFloat>(linear_params_.NumCols()),
3276  linear_params_stddev =
3277  std::sqrt(TraceMatMat(linear_params_,
3279  linear_params_size),
3280  bias_params_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
3281  bias_params_.Dim());
3282 
3283  stream << Component::Info() << ", linear-params-stddev=" << linear_params_stddev
3284  << ", bias-params-stddev=" << bias_params_stddev;
3285  return stream.str();
3286 }
3287 
3289  const ChunkInfo &out_info,
3290  const CuMatrixBase<BaseFloat> &in,
3291  CuMatrixBase<BaseFloat> *out) const {
3292  in_info.CheckSize(in);
3293  out_info.CheckSize(*out);
3294  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
3295 
3296  out->AddMatMat(1.0, in, kNoTrans, linear_params_, kTrans, 0.0);
3297  out->AddVecToRows(1.0, bias_params_);
3298 }
3299 
3301  const ChunkInfo &, //out_info,
3302  const CuMatrixBase<BaseFloat> &, //in_value,
3303  const CuMatrixBase<BaseFloat> &, //out_value,
3304  const CuMatrixBase<BaseFloat> &out_deriv,
3305  Component *, //to_update, // may be identical to "this".
3306  CuMatrix<BaseFloat> *in_deriv) const {
3307  in_deriv->Resize(out_deriv.NumRows(), linear_params_.NumCols());
3308  in_deriv->AddMatMat(1.0, out_deriv, kNoTrans, linear_params_, kNoTrans, 0.0);
3309 }
3310 
3314  ans->bias_params_ = bias_params_;
3315  return ans;
3316 }
3317 
3318 
3319 void FixedAffineComponent::Write(std::ostream &os, bool binary) const {
3320  WriteToken(os, binary, "<FixedAffineComponent>");
3321  WriteToken(os, binary, "<LinearParams>");
3322  linear_params_.Write(os, binary);
3323  WriteToken(os, binary, "<BiasParams>");
3324  bias_params_.Write(os, binary);
3325  WriteToken(os, binary, "</FixedAffineComponent>");
3326 }
3327 
3328 void FixedAffineComponent::Read(std::istream &is, bool binary) {
3329  ExpectOneOrTwoTokens(is, binary, "<FixedAffineComponent>", "<LinearParams>");
3330  linear_params_.Read(is, binary);
3331  ExpectToken(is, binary, "<BiasParams>");
3332  bias_params_.Read(is, binary);
3333  ExpectToken(is, binary, "</FixedAffineComponent>");
3334 }
3335 
3336 
3338  KALDI_ASSERT(scales.Dim() != 0);
3339  scales_ = scales;
3340 }
3341 
3342 void FixedScaleComponent::InitFromString(std::string args) {
3343  std::string orig_args = args;
3344  std::string filename;
3345  bool ok = ParseFromString("scales", &args, &filename);
3346 
3347  if (!ok || !args.empty())
3348  KALDI_ERR << "Invalid initializer for layer of type "
3349  << Type() << ": \"" << orig_args << "\"";
3350 
3351  CuVector<BaseFloat> vec;
3352  ReadKaldiObject(filename, &vec);
3353  Init(vec);
3354 }
3355 
3356 
3357 std::string FixedScaleComponent::Info() const {
3358  std::stringstream stream;
3359  BaseFloat scales_size = static_cast<BaseFloat>(scales_.Dim()),
3360  scales_mean = scales_.Sum() / scales_size,
3361  scales_stddev = std::sqrt(VecVec(scales_, scales_) / scales_size
3362  - (scales_mean * scales_mean));
3363  stream << Component::Info() << ", scales-mean=" << scales_mean
3364  << ", scales-stddev=" << scales_stddev;
3365  return stream.str();
3366 }
3367 
3369  const ChunkInfo &out_info,
3370  const CuMatrixBase<BaseFloat> &in,
3371  CuMatrixBase<BaseFloat> *out) const {
3372  out->CopyFromMat(in);
3373  out->MulColsVec(scales_);
3374 }
3375 
3377  const ChunkInfo &, //out_info,
3378  const CuMatrixBase<BaseFloat> &, //in_value,
3379  const CuMatrixBase<BaseFloat> &, //out_value,
3380  const CuMatrixBase<BaseFloat> &out_deriv,
3381  Component *, //to_update, // may be identical to "this".
3382  CuMatrix<BaseFloat> *in_deriv) const {
3383  *in_deriv = out_deriv;
3384  in_deriv->MulColsVec(scales_);
3385 }
3386 
3389  ans->scales_ = scales_;
3390  return ans;
3391 }
3392 
3393 
3394 void FixedScaleComponent::Write(std::ostream &os, bool binary) const {
3395  WriteToken(os, binary, "<FixedScaleComponent>");
3396  WriteToken(os, binary, "<Scales>");
3397  scales_.Write(os, binary);
3398  WriteToken(os, binary, "</FixedScaleComponent>");
3399 }
3400 
3401 void FixedScaleComponent::Read(std::istream &is, bool binary) {
3402  ExpectOneOrTwoTokens(is, binary, "<FixedScaleComponent>", "<Scales>");
3403  scales_.Read(is, binary);
3404  ExpectToken(is, binary, "</FixedScaleComponent>");
3405 }
3406 
3408  KALDI_ASSERT(bias.Dim() != 0);
3409  bias_ = bias;
3410 }
3411 
3412 void FixedBiasComponent::InitFromString(std::string args) {
3413  std::string orig_args = args;
3414  std::string filename;
3415  bool ok = ParseFromString("bias", &args, &filename);
3416 
3417  if (!ok || !args.empty())
3418  KALDI_ERR << "Invalid initializer for layer of type "
3419  << Type() << ": \"" << orig_args << "\"";
3420 
3421  CuVector<BaseFloat> vec;
3422  ReadKaldiObject(filename, &vec);
3423  Init(vec);
3424 }
3425 
3426 
3427 std::string FixedBiasComponent::Info() const {
3428  std::stringstream stream;
3429  BaseFloat bias_size = static_cast<BaseFloat>(bias_.Dim()),
3430  bias_mean = bias_.Sum() / bias_size,
3431  bias_stddev = std::sqrt(VecVec(bias_, bias_) / bias_size)
3432  - (bias_mean * bias_mean);
3433  stream << Component::Info() << ", bias-mean=" << bias_mean
3434  << ", bias-stddev=" << bias_stddev;
3435  return stream.str();
3436 }
3437 
3439  const ChunkInfo &out_info,
3440  const CuMatrixBase<BaseFloat> &in,
3441  CuMatrixBase<BaseFloat> *out) const {
3442  out->CopyFromMat(in);
3443  out->AddVecToRows(1.0, bias_, 1.0);
3444 }
3445 
3446 void FixedBiasComponent::Backprop(const ChunkInfo &, //in_info,
3447  const ChunkInfo &, //out_info,
3448  const CuMatrixBase<BaseFloat> &, //in_value,
3449  const CuMatrixBase<BaseFloat> &, //out_value,
3450  const CuMatrixBase<BaseFloat> &out_deriv,
3451  Component *, //to_update,
3452  CuMatrix<BaseFloat> *in_deriv) const {
3453  *in_deriv = out_deriv;
3454 }
3455 
3458  ans->bias_ = bias_;
3459  return ans;
3460 }
3461 
3462 
3463 void FixedBiasComponent::Write(std::ostream &os, bool binary) const {
3464  WriteToken(os, binary, "<FixedBiasComponent>");
3465  WriteToken(os, binary, "<Bias>");
3466  bias_.Write(os, binary);
3467  WriteToken(os, binary, "</FixedBiasComponent>");
3468 }
3469 
3470 void FixedBiasComponent::Read(std::istream &is, bool binary) {
3471  ExpectOneOrTwoTokens(is, binary, "<FixedBiasComponent>", "<Bias>");
3472  bias_.Read(is, binary);
3473  ExpectToken(is, binary, "</FixedBiasComponent>");
3474 }
3475 
3476 
3477 
3478 
3479 std::string DropoutComponent::Info() const {
3480  std::stringstream stream;
3481  stream << Component::Info() << ", dropout_proportion = "
3482  << dropout_proportion_ << ", dropout_scale = "
3483  << dropout_scale_;
3484  return stream.str();
3485 }
3486 
3487 void DropoutComponent::InitFromString(std::string args) {
3488  std::string orig_args(args);
3489  int32 dim;
3490  BaseFloat dropout_proportion = 0.5, dropout_scale = 0.0;
3491  bool ok = ParseFromString("dim", &args, &dim);
3492  ParseFromString("dropout-proportion", &args, &dropout_proportion);
3493  ParseFromString("dropout-scale", &args, &dropout_scale);
3494 
3495  if (!ok || !args.empty() || dim <= 0)
3496  KALDI_ERR << "Invalid initializer for layer of type DropoutComponent: \""
3497  << orig_args << "\"";
3498  Init(dim, dropout_proportion, dropout_scale);
3499 }
3500 
3501 void DropoutComponent::Read(std::istream &is, bool binary) {
3502  ExpectOneOrTwoTokens(is, binary, "<DropoutComponent>", "<Dim>");
3503  ReadBasicType(is, binary, &dim_);
3504  ExpectToken(is, binary, "<DropoutScale>");
3505  ReadBasicType(is, binary, &dropout_scale_);
3506  ExpectToken(is, binary, "<DropoutProportion>");
3507  ReadBasicType(is, binary, &dropout_proportion_);
3508  ExpectToken(is, binary, "</DropoutComponent>");
3509 }
3510 
3511 void DropoutComponent::Write(std::ostream &os, bool binary) const {
3512  WriteToken(os, binary, "<DropoutComponent>");
3513  WriteToken(os, binary, "<Dim>");
3514  WriteBasicType(os, binary, dim_);
3515  WriteToken(os, binary, "<DropoutScale>");
3516  WriteBasicType(os, binary, dropout_scale_);
3517  WriteToken(os, binary, "<DropoutProportion>");
3518  WriteBasicType(os, binary, dropout_proportion_);
3519  WriteToken(os, binary, "</DropoutComponent>");
3520 }
3521 
3522 
3524  BaseFloat dropout_proportion,
3525  BaseFloat dropout_scale){
3526  dim_ = dim;
3527  dropout_proportion_ = dropout_proportion;
3528  dropout_scale_ = dropout_scale;
3529 }
3530 
3532  const ChunkInfo &out_info,
3533  const CuMatrixBase<BaseFloat> &in,
3534  CuMatrixBase<BaseFloat> *out) const {
3535  in_info.CheckSize(in);
3536  out_info.CheckSize(*out);
3537  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
3538  KALDI_ASSERT(in.NumCols() == this->InputDim());
3539 
3540  BaseFloat dp = dropout_proportion_;
3541  KALDI_ASSERT(dp < 1.0 && dp >= 0.0);
3542  KALDI_ASSERT(dropout_scale_ <= 1.0 && dropout_scale_ >= 0.0);
3543 
3544  BaseFloat low_scale = dropout_scale_,
3545  high_scale = (1.0 - (dp * low_scale)) / (1.0 - dp),
3546  average = (low_scale * dp) +
3547  (high_scale * (1.0 - dp));
3548  KALDI_ASSERT(fabs(average - 1.0) < 0.01);
3549 
3550  // This const_cast is only safe assuming you don't attempt
3551  // to use multi-threaded code with the GPU.
3552  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(out);
3553 
3554 
3555  out->Add(-dp); // now, a proportion "dp" will be <0.0
3556  out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dp" will
3557  // be zero and (1-dp) will be 1.0.
3558  if ((high_scale - low_scale) != 1.0)
3559  out->Scale(high_scale - low_scale); // now, "dp" are 0 and (1-dp) are "high_scale-low_scale".
3560  if (low_scale != 0.0)
3561  out->Add(low_scale); // now "dp" equal "low_scale" and (1.0-dp) equal "high_scale".
3562 
3563  out->MulElements(in);
3564 }
3565 
3566 void DropoutComponent::Backprop(const ChunkInfo &, //in_info,
3567  const ChunkInfo &, //out_info,
3568  const CuMatrixBase<BaseFloat> &in_value,
3569  const CuMatrixBase<BaseFloat> &out_value,
3570  const CuMatrixBase<BaseFloat> &out_deriv,
3571  Component *, //to_update
3572  CuMatrix<BaseFloat> *in_deriv) const {
3573  KALDI_ASSERT(SameDim(in_value, out_value) && SameDim(in_value, out_deriv));
3574  in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
3575  in_deriv->SetMatMatDivMat(out_deriv, out_value, in_value);
3576 }
3577 
3579  return new DropoutComponent(dim_,
3580  dropout_proportion_,
3581  dropout_scale_);
3582 }
3583 
3585  std::string orig_args(args);
3586  int32 dim;
3587  BaseFloat stddev = 1.0;
3588  bool ok = ParseFromString("dim", &args, &dim);
3589  ParseFromString("stddev", &args, &stddev);
3590 
3591  if (!ok || !args.empty() || dim <= 0)
3592  KALDI_ERR << "Invalid initializer for layer of type AdditiveNoiseComponent: \""
3593  << orig_args << "\"";
3594  Init(dim, stddev);
3595 }
3596 
3597 void AdditiveNoiseComponent::Read(std::istream &is, bool binary) {
3598  ExpectOneOrTwoTokens(is, binary, "<AdditiveNoiseComponent>", "<Dim>");
3599  ReadBasicType(is, binary, &dim_);
3600  ExpectToken(is, binary, "<Stddev>");
3601  ReadBasicType(is, binary, &stddev_);
3602  ExpectToken(is, binary, "</AdditiveNoiseComponent>");
3603 }
3604 
3605 void AdditiveNoiseComponent::Write(std::ostream &os, bool binary) const {
3606  WriteToken(os, binary, "<AdditiveNoiseComponent>");
3607  WriteToken(os, binary, "<Dim>");
3608  WriteBasicType(os, binary, dim_);
3609  WriteToken(os, binary, "<Stddev>");
3610  WriteBasicType(os, binary, stddev_);
3611  WriteToken(os, binary, "</AdditiveNoiseComponent>");
3612 }
3613 
3615  dim_ = dim;
3616  stddev_ = stddev;
3617 }
3618 
3620  const ChunkInfo &out_info,
3621  const CuMatrixBase<BaseFloat> &in,
3622  CuMatrixBase<BaseFloat> *out) const {
3623  KALDI_ASSERT(in.NumCols() == this->InputDim());
3624  out->CopyFromMat(in);
3625  CuMatrix<BaseFloat> rand(in.NumRows(), in.NumCols());
3626  const_cast<CuRand<BaseFloat>&>(random_generator_).RandUniform(&rand);
3627  out->AddMat(stddev_, rand);
3628 }
3629 
3632  patch_dim_(0), patch_step_(0), patch_stride_(0),
3633  appended_conv_(false), is_gradient_(false) {}
3634 
3636  UpdatableComponent(component),
3637  filter_params_(component.filter_params_),
3638  bias_params_(component.bias_params_),
3639  appended_conv_(component.appended_conv_),
3640  is_gradient_(component.is_gradient_) {}
3641 
3643  const CuVectorBase<BaseFloat> &bias_params,
3644  BaseFloat learning_rate):
3645  UpdatableComponent(learning_rate),
3646  filter_params_(filter_params),
3647  bias_params_(bias_params) {
3648  KALDI_ASSERT(filter_params.NumRows() == bias_params.Dim() &&
3649  bias_params.Dim() != 0);
3650  appended_conv_ = false;
3651  is_gradient_ = false;
3652 }
3653 
3654 // aquire input dim
3656  int32 filter_dim = filter_params_.NumCols();
3657  int32 num_splice = filter_dim / patch_dim_;
3658  return patch_stride_ * num_splice;
3659 }
3660 
3661 // aquire output dim
3663  int32 num_filters = filter_params_.NumRows();
3664  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3665  return num_patches * num_filters;
3666 }
3667 
3668 // initialize the component using hyperparameters
3670  int32 input_dim, int32 output_dim,
3671  int32 patch_dim, int32 patch_step,
3672  int32 patch_stride, BaseFloat param_stddev,
3673  BaseFloat bias_stddev, bool appended_conv) {
3674  UpdatableComponent::Init(learning_rate);
3675  patch_dim_ = patch_dim;
3676  patch_step_ = patch_step;
3677  patch_stride_ = patch_stride;
3678  appended_conv_ = appended_conv;
3679  int32 num_splice = input_dim / patch_stride;
3680  int32 filter_dim = num_splice * patch_dim;
3681  int32 num_patches = 1 + (patch_stride - patch_dim) / patch_step;
3682  int32 num_filters = output_dim / num_patches;
3683  KALDI_ASSERT(input_dim % patch_stride == 0);
3684  KALDI_ASSERT((patch_stride - patch_dim) % patch_step == 0);
3685  KALDI_ASSERT(output_dim % num_patches == 0);
3686 
3687  filter_params_.Resize(num_filters, filter_dim);
3688  bias_params_.Resize(num_filters);
3689  KALDI_ASSERT(param_stddev >= 0.0 && bias_stddev >= 0.0);
3690  filter_params_.SetRandn();
3691  filter_params_.Scale(param_stddev);
3692  bias_params_.SetRandn();
3693  bias_params_.Scale(bias_stddev);
3694 }
3695 
3696 // initialize the component using predefined matrix file
3697 void Convolutional1dComponent::Init(BaseFloat learning_rate, int32 patch_dim,
3698  int32 patch_step, int32 patch_stride,
3699  std::string matrix_filename,
3700  bool appended_conv) {
3701  UpdatableComponent::Init(learning_rate);
3702  patch_dim_ = patch_dim;
3703  patch_step_ = patch_step;
3704  patch_stride_ = patch_stride;
3705  appended_conv_ = appended_conv;
3706  CuMatrix<BaseFloat> mat;
3707  ReadKaldiObject(matrix_filename, &mat);
3708  KALDI_ASSERT(mat.NumCols() >= 2);
3709  int32 filter_dim = mat.NumCols() - 1, num_filters = mat.NumRows();
3710  filter_params_.Resize(num_filters, filter_dim);
3711  bias_params_.Resize(num_filters);
3712  filter_params_.CopyFromMat(mat.Range(0, num_filters, 0, filter_dim));
3713  bias_params_.CopyColFromMat(mat, filter_dim);
3714 }
3715 
3716 // resize the component, setting the parameters to zero, while
3717 // leaving any other configuration values the same
3718 void Convolutional1dComponent::Resize(int32 input_dim, int32 output_dim) {
3719  KALDI_ASSERT(input_dim > 0 && output_dim > 0);
3720  int32 num_splice = input_dim / patch_stride_;
3721  int32 filter_dim = num_splice * patch_dim_;
3722  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3723  int32 num_filters = output_dim / num_patches;
3724  KALDI_ASSERT(input_dim % patch_stride_ == 0);
3725  KALDI_ASSERT((patch_stride_ - patch_dim_) % patch_step_ == 0);
3726  KALDI_ASSERT(output_dim % num_patches == 0);
3727  filter_params_.Resize(num_filters, filter_dim);
3728  bias_params_.Resize(num_filters);
3729 }
3730 
3731 // display information about component
3732 std::string Convolutional1dComponent::Info() const {
3733  std::stringstream stream;
3734  BaseFloat filter_params_size = static_cast<BaseFloat>(filter_params_.NumRows())
3735  * static_cast<BaseFloat>(filter_params_.NumCols());
3736  BaseFloat filter_stddev =
3738  filter_params_size),
3739  bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) /
3740  bias_params_.Dim());
3741 
3742  int32 num_splice = InputDim() / patch_stride_;
3743  int32 filter_dim = num_splice * patch_dim_;
3744  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3745  int32 num_filters = OutputDim() / num_patches;
3746 
3747  stream << Type() << ", input-dim=" << InputDim()
3748  << ", output-dim=" << OutputDim()
3749  << ", num-splice=" << num_splice
3750  << ", num-patches=" << num_patches
3751  << ", num-filters=" << num_filters
3752  << ", filter-dim=" << filter_dim
3753  << ", filter-params-stddev=" << filter_stddev
3754  << ", bias-params-stddev=" << bias_stddev
3755  << ", appended-conv=" << appended_conv_
3756  << ", learning-rate=" << LearningRate();
3757  return stream.str();
3758 }
3759 
3760 // initialize the component using configuration file
3762  std::string orig_args(args);
3763  bool ok = true, appended_conv = false;
3764  BaseFloat learning_rate = learning_rate_;
3765  std::string matrix_filename;
3766  int32 input_dim = -1, output_dim = -1;
3767  int32 patch_dim = -1, patch_step = -1, patch_stride = -1;
3768  ParseFromString("learning-rate", &args, &learning_rate);
3769  ParseFromString("appended-conv", &args, &appended_conv);
3770  ok = ok && ParseFromString("patch-dim", &args, &patch_dim);
3771  ok = ok && ParseFromString("patch-step", &args, &patch_step);
3772  ok = ok && ParseFromString("patch-stride", &args, &patch_stride);
3773  if (ParseFromString("matrix", &args, &matrix_filename)) {
3774  // initialize from prefined parameter matrix
3775  Init(learning_rate, patch_dim, patch_step, patch_stride,
3776  matrix_filename, appended_conv);
3777  if (ParseFromString("input-dim", &args, &input_dim))
3778  KALDI_ASSERT(input_dim == InputDim() &&
3779  "input-dim mismatch vs. matrix.");
3780  if (ParseFromString("output-dim", &args, &output_dim))
3781  KALDI_ASSERT(output_dim == OutputDim() &&
3782  "output-dim mismatch vs. matrix.");
3783  } else {
3784  // initialize from configuration
3785  ok = ok && ParseFromString("input-dim", &args, &input_dim);
3786  ok = ok && ParseFromString("output-dim", &args, &output_dim);
3787  BaseFloat param_stddev = 1.0 / std::sqrt(input_dim), bias_stddev = 1.0;
3788  ParseFromString("param-stddev", &args, &param_stddev);
3789  ParseFromString("bias-stddev", &args, &bias_stddev);
3790  Init(learning_rate, input_dim, output_dim, patch_dim,
3791  patch_step, patch_stride, param_stddev, bias_stddev, appended_conv);
3792  }
3793  if (!args.empty())
3794  KALDI_ERR << "Could not process these elements in initializer: " << args;
3795  if (!ok)
3796  KALDI_ERR << "Bad initializer " << orig_args;
3797 }
3798 
3799 // propagation function
3800 
3801 /*
3802  In Convolution1dComponent, filter is defined $num-filters x $filter-dim,
3803  and bias vector B is defined by length $num-filters. The propatation is
3804  Y = X o A' + B
3805  where "o" is executing matrix-matrix convolution, which consists of a group
3806  of vector-matrix convolutions.
3807  For instance, the convolution of X(t) and the i-th filter A(i) is
3808  Y(t,i) = X(t) o A'(i) + B(i)
3809  The convolution used here is valid convolution. Meaning that the
3810  output of M o N is of dim |M| - |N| + 1, assuming M is not shorter then N.
3811 
3812  By default, input is arranged by
3813  x (time), y (channel), z(frequency)
3814  and output is arranged by
3815  x (time), y (frequency), z(channel).
3816  When appending convolutional1dcomponent, appended_conv_ should be
3817  set ture for the appended convolutional1dcomponent.
3818 */
3820  const ChunkInfo &out_info,
3821  const CuMatrixBase<BaseFloat> &in,
3822  CuMatrixBase<BaseFloat> *out) const {
3823  in_info.CheckSize(in);
3824  out_info.CheckSize(*out);
3825  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
3826 
3827  // dims
3828  int32 num_splice = InputDim() / patch_stride_;
3829  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3830  int32 num_filters = filter_params_.NumRows();
3831  int32 num_frames = in.NumRows();
3832  int32 filter_dim = filter_params_.NumCols();
3833 
3838  CuMatrix<BaseFloat> patches(num_frames, filter_dim * num_patches, kUndefined);
3839  // column_map is indexed by the column-index of "patches",
3840  // and the value is the corresponding column-index of "in".
3841  std::vector<int32> column_map(filter_dim * num_patches);
3842 
3843  // build-up a column selection map
3844  for (int32 patch = 0, index = 0; patch < num_patches; patch++) {
3845  int32 fstride = patch * patch_step_;
3846  for (int32 splice = 0; splice < num_splice; splice++) {
3847  int32 cstride = splice * patch_stride_;
3848  for (int32 d = 0; d < patch_dim_; d++, index++) {
3849  if (appended_conv_)
3850  column_map[index] = (fstride + d) * num_splice + splice;
3851  else
3852  column_map[index] = fstride + cstride + d;
3853  }
3854  }
3855  }
3856  CuArray<int32> cu_cols(column_map);
3857  patches.CopyCols(in, cu_cols);
3858 
3859  //
3860  // compute filter activations
3861  //
3862 
3863  std::vector<CuSubMatrix<BaseFloat>* > tgt_batch, patch_batch, filter_params_batch;
3864 
3865  CuSubMatrix<BaseFloat>* filter_params_elem = new CuSubMatrix<BaseFloat>(
3866  filter_params_, 0, filter_params_.NumRows(), 0, filter_params_.NumCols());
3867 
3868  // form batch in vector container
3869  for (int32 p = 0; p < num_patches; p++) {
3870  // form batch in vector container. for filter_params_batch, all elements
3871  // point to the same copy filter_params_elem
3872  tgt_batch.push_back(new CuSubMatrix<BaseFloat>(out->ColRange(p * num_filters,
3873  num_filters)));
3874  patch_batch.push_back(new CuSubMatrix<BaseFloat>(
3875  patches.ColRange(p * filter_dim, filter_dim)));
3876  filter_params_batch.push_back(filter_params_elem);
3877 
3878  tgt_batch[p]->AddVecToRows(1.0, bias_params_, 0.0); // add bias
3879  }
3880 
3881  // apply all filters
3882  AddMatMatBatched<BaseFloat>(1.0, tgt_batch, patch_batch, kNoTrans,
3883  filter_params_batch, kTrans, 1.0);
3884 
3885  // release memory
3886  delete filter_params_elem;
3887  for (int32 p = 0; p < num_patches; p++) {
3888  delete tgt_batch[p];
3889  delete patch_batch[p];
3890  }
3891 }
3892 
3893 // scale the parameters
3895  filter_params_.Scale(scale);
3896  bias_params_.Scale(scale);
3897 }
3898 
3899 // add another convolution component
3901  const Convolutional1dComponent *other =
3902  dynamic_cast<const Convolutional1dComponent*>(&other_in);
3903  KALDI_ASSERT(other != NULL);
3904  filter_params_.AddMat(alpha, other->filter_params_);
3905  bias_params_.AddVec(alpha, other->bias_params_);
3906 }
3907 
3908 /*
3909  This function does an operation similar to reversing a map,
3910  except it handles maps that are not one-to-one by outputting
3911  the reversed map as a vector of lists.
3912  @param[in] forward_indexes is a vector of int32, each of whose
3913  elements is between 0 and input_dim - 1.
3914  @param[in] input_dim. See definitions of forward_indexes and
3915  backward_indexes.
3916  @param[out] backward_indexes is a vector of dimension input_dim
3917  of lists, The list at (backward_indexes[i]) is a list
3918  of all indexes j such that forward_indexes[j] = i.
3919 */
3920 void Convolutional1dComponent::ReverseIndexes(const std::vector<int32> &forward_indexes,
3921  int32 input_dim,
3922  std::vector<std::vector<int32> > *backward_indexes) {
3923  int32 i, size = forward_indexes.size();
3924  int32 reserve_size = 2 + size / input_dim;
3925  backward_indexes->resize(input_dim);
3926  std::vector<std::vector<int32> >::iterator iter = backward_indexes->begin(),
3927  end = backward_indexes->end();
3928  for (; iter != end; ++iter)
3929  iter->reserve(reserve_size);
3930  for (int32 j = 0; j < forward_indexes.size(); j++) {
3931  i = forward_indexes[j];
3932  KALDI_ASSERT(i < input_dim);
3933  (*backward_indexes)[i].push_back(j);
3934  }
3935 }
3936 
3937 /*
3938  This function transforms a vector of lists into a list of vectors,
3939  padded with -1.
3940  @param[in] The input vector of lists. Let in.size() be D, and let
3941  the longest list length (i.e. the max of in[i].size()) be L.
3942  @param[out] The output list of vectors. The length of the list will
3943  be L, each vector-dimension will be D (i.e. out[i].size() == D),
3944  and if in[i] == j, then for some k we will have that
3945  out[k][j] = i. The output vectors are padded with -1
3946  where necessary if not all the input lists have the same side.
3947 */
3948 void Convolutional1dComponent::RearrangeIndexes(const std::vector<std::vector<int32> > &in,
3949  std::vector<std::vector<int32> > *out) {
3950  int32 D = in.size();
3951  int32 L = 0;
3952  for (int32 i = 0; i < D; i++)
3953  if (in[i].size() > L)
3954  L = in[i].size();
3955  out->resize(L);
3956  for (int32 i = 0; i < L; i++)
3957  (*out)[i].resize(D, -1);
3958  for (int32 i = 0; i < D; i++) {
3959  for (int32 j = 0; j < in[i].size(); j++) {
3960  (*out)[j][i] = in[i][j];
3961  }
3962  }
3963 }
3964 
3965 // back propagation function
3967  const ChunkInfo &out_info,
3968  const CuMatrixBase<BaseFloat> &in_value,
3969  const CuMatrixBase<BaseFloat> &out_value,
3970  const CuMatrixBase<BaseFloat> &out_deriv,
3971  Component *to_update_in,
3972  CuMatrix<BaseFloat> *in_deriv) const {
3973  in_deriv->Resize(out_deriv.NumRows(), InputDim());
3974  Convolutional1dComponent *to_update = dynamic_cast<Convolutional1dComponent*>(to_update_in);
3975  int32 num_splice = InputDim() / patch_stride_;
3976  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
3977  int32 num_filters = filter_params_.NumRows();
3978  int32 num_frames = out_deriv.NumRows();
3979  int32 filter_dim = filter_params_.NumCols();
3980 
3986  CuMatrix<BaseFloat> patches_deriv(num_frames, filter_dim * num_patches, kSetZero);
3987 
3988  //
3989  // backpropagate to vector of matrices
3990  // (corresponding to position of a filter)
3991  //
3992  std::vector<CuSubMatrix<BaseFloat>* > patch_deriv_batch, out_deriv_batch,
3993  filter_params_batch;
3994 
3995  CuSubMatrix<BaseFloat>* filter_params_elem = new CuSubMatrix<BaseFloat>(
3996  filter_params_, 0, filter_params_.NumRows(), 0, filter_params_.NumCols());
3997 
3998  // form batch in vector container
3999  for (int32 p = 0; p < num_patches; p++) {
4000  // form batch in vector container. for filter_params_batch, all elements
4001  // point to the same copy filter_params_elem
4002  patch_deriv_batch.push_back(new CuSubMatrix<BaseFloat>(patches_deriv.ColRange(
4003  p * filter_dim, filter_dim)));
4004  out_deriv_batch.push_back(new CuSubMatrix<BaseFloat>(out_deriv.ColRange(
4005  p * num_filters, num_filters)));
4006  filter_params_batch.push_back(filter_params_elem);
4007  }
4008  AddMatMatBatched<BaseFloat>(1.0, patch_deriv_batch, out_deriv_batch, kNoTrans,
4009  filter_params_batch, kNoTrans, 0.0);
4010 
4011  // release memory
4012  delete filter_params_elem;
4013  for (int32 p = 0; p < num_patches; p++) {
4014  delete patch_deriv_batch[p];
4015  delete out_deriv_batch[p];
4016  }
4017 
4018  // sum the derivatives into in_deriv
4019  std::vector<int32> column_map(filter_dim * num_patches);
4020  for (int32 patch = 0, index = 0; patch < num_patches; patch++) {
4021  int32 fstride = patch * patch_step_;
4022  for (int32 splice = 0; splice < num_splice; splice++) {
4023  int32 cstride = splice * patch_stride_;
4024  for (int32 d = 0; d < patch_dim_; d++, index++) {
4025  if (appended_conv_)
4026  column_map[index] = (fstride + d) * num_splice + splice;
4027  else
4028  column_map[index] = fstride + cstride + d;
4029  }
4030  }
4031  }
4032  std::vector<std::vector<int32> > reversed_column_map;
4033  ReverseIndexes(column_map, InputDim(), &reversed_column_map);
4034  std::vector<std::vector<int32> > rearranged_column_map;
4035  RearrangeIndexes(reversed_column_map, &rearranged_column_map);
4036  for (int32 p = 0; p < rearranged_column_map.size(); p++) {
4037  CuArray<int32> cu_cols(rearranged_column_map[p]);
4038  in_deriv->AddCols(patches_deriv, cu_cols);
4039  }
4040 
4041  if (to_update != NULL) {
4042  // Next update the model (must do this 2nd so the derivatives we propagate
4043  // are accurate, in case this == to_update_in.)
4044  to_update->Update(in_value, out_deriv);
4045  }
4046 }
4047 
4048 void Convolutional1dComponent::SetZero(bool treat_as_gradient) {
4049  if (treat_as_gradient) {
4050  SetLearningRate(1.0);
4051  }
4052  filter_params_.SetZero();
4053  bias_params_.SetZero();
4054  if (treat_as_gradient) {
4055  is_gradient_ = true;
4056  }
4057 }
4058 
4059 void Convolutional1dComponent::Read(std::istream &is, bool binary) {
4060  std::ostringstream ostr_beg, ostr_end;
4061  ostr_beg << "<" << Type() << ">"; // e.g. "<Convolutional1dComponent>"
4062  ostr_end << "</" << Type() << ">"; // e.g. "</Convolutional1dComponent>"
4063  // might not see the "<Convolutional1dComponent>" part because
4064  // of how ReadNew() works.
4065  ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
4066  ReadBasicType(is, binary, &learning_rate_);
4067  ExpectToken(is, binary, "<PatchDim>");
4068  ReadBasicType(is, binary, &patch_dim_);
4069  ExpectToken(is, binary, "<PatchStep>");
4070  ReadBasicType(is, binary, &patch_step_);
4071  ExpectToken(is, binary, "<PatchStride>");
4072  ReadBasicType(is, binary, &patch_stride_);
4073  // back-compatibility
4074  std::string tok;
4075  ReadToken(is, binary, &tok);
4076  if (tok == "<AppendedConv>") {
4077  ReadBasicType(is, binary, &appended_conv_);
4078  ExpectToken(is, binary, "<FilterParams>");
4079  } else {
4080  appended_conv_ = false;
4081  KALDI_ASSERT(tok == "<FilterParams>");
4082  }
4083  filter_params_.Read(is, binary);
4084  ExpectToken(is, binary, "<BiasParams>");
4085  bias_params_.Read(is, binary);
4086  ReadToken(is, binary, &tok);
4087  if (tok == "<IsGradient>") {
4088  ReadBasicType(is, binary, &is_gradient_);
4089  ExpectToken(is, binary, ostr_end.str());
4090  } else {
4091  is_gradient_ = false;
4092  KALDI_ASSERT(tok == ostr_end.str());
4093  }
4094 }
4095 
4096 void Convolutional1dComponent::Write(std::ostream &os, bool binary) const {
4097  std::ostringstream ostr_beg, ostr_end;
4098  ostr_beg << "<" << Type() << ">"; // e.g. "<Convolutional1dComponent>"
4099  ostr_end << "</" << Type() << ">"; // e.g. "</Convolutional1dComponent>"
4100  WriteToken(os, binary, ostr_beg.str());
4101  WriteToken(os, binary, "<LearningRate>");
4102  WriteBasicType(os, binary, learning_rate_);
4103  WriteToken(os, binary, "<PatchDim>");
4104  WriteBasicType(os, binary, patch_dim_);
4105  WriteToken(os, binary, "<PatchStep>");
4106  WriteBasicType(os, binary, patch_step_);
4107  WriteToken(os, binary, "<PatchStride>");
4108  WriteBasicType(os, binary, patch_stride_);
4109  WriteToken(os, binary, "<AppendedConv>");
4110  WriteBasicType(os, binary, appended_conv_);
4111  WriteToken(os, binary, "<FilterParams>");
4112  filter_params_.Write(os, binary);
4113  WriteToken(os, binary, "<BiasParams>");
4114  bias_params_.Write(os, binary);
4115  WriteToken(os, binary, "<IsGradient>");
4116  WriteBasicType(os, binary, is_gradient_);
4117  WriteToken(os, binary, ostr_end.str());
4118 }
4119 
4121  const Convolutional1dComponent *other =
4122  dynamic_cast<const Convolutional1dComponent*>(&other_in);
4124  + VecVec(bias_params_, other->bias_params_);
4125 }
4126 
4130  ans->patch_dim_ = patch_dim_;
4131  ans->patch_step_ = patch_step_;
4134  ans->bias_params_ = bias_params_;
4136  ans->is_gradient_ = is_gradient_;
4137  return ans;
4138 }
4139 
4141  CuMatrix<BaseFloat> temp_filter_params(filter_params_);
4142  temp_filter_params.SetRandn();
4143  filter_params_.AddMat(stddev, temp_filter_params);
4144 
4145  CuVector<BaseFloat> temp_bias_params(bias_params_);
4146  temp_bias_params.SetRandn();
4147  bias_params_.AddVec(stddev, temp_bias_params);
4148 }
4149 
4151  const MatrixBase<BaseFloat> &filter) {
4152  bias_params_ = bias;
4153  filter_params_ = filter;
4154  KALDI_ASSERT(bias_params_.Dim() == filter_params_.NumRows());
4155 }
4156 
4158  return (filter_params_.NumCols() + 1) * filter_params_.NumRows();
4159 }
4160 
4161 // update parameters
4163  const CuMatrixBase<BaseFloat> &out_deriv) {
4164  // useful dims
4165  int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
4166  int32 num_filters = filter_params_.NumRows();
4167  int32 filter_dim = filter_params_.NumCols();
4168  int32 num_frames = in_value.NumRows();
4169  int32 num_splice = InputDim() / patch_stride_;
4170  CuMatrix<BaseFloat> filters_grad;
4171  CuVector<BaseFloat> bias_grad;
4172 
4177  CuMatrix<BaseFloat> patches(num_frames, filter_dim * num_patches, kUndefined);
4178  std::vector<int32> column_map(filter_dim * num_patches);
4179  for (int32 patch = 0, index = 0; patch < num_patches; patch++) {
4180  int32 fstride = patch * patch_step_;
4181  for (int32 splice = 0; splice < num_splice; splice++) {
4182  int32 cstride = splice * patch_stride_;
4183  for (int32 d = 0; d < patch_dim_; d++, index++) {
4184  if (appended_conv_)
4185  column_map[index] = (fstride + d) * num_splice + splice;
4186  else
4187  column_map[index] = fstride + cstride + d;
4188  }
4189  }
4190  }
4191  CuArray<int32> cu_cols(column_map);
4192  patches.CopyCols(in_value, cu_cols);
4193 
4194  //
4195  // calculate the gradient
4196  //
4197  filters_grad.Resize(num_filters, filter_dim, kSetZero); // reset
4198  bias_grad.Resize(num_filters, kSetZero); // reset
4199 
4200  //
4201  // use all the patches
4202  //
4203 
4204  // create a single large matrix holding the smaller matrices
4205  // from the vector container filters_grad_batch along the rows
4206  CuMatrix<BaseFloat> filters_grad_blocks_batch(
4207  num_patches * filters_grad.NumRows(), filters_grad.NumCols());
4208 
4209  std::vector<CuSubMatrix<BaseFloat>* > filters_grad_batch, diff_patch_batch,
4210  patch_batch;
4211  for (int32 p = 0; p < num_patches; p++) {
4212  // form batch in vector container
4213  filters_grad_batch.push_back(new CuSubMatrix<BaseFloat>(
4214  filters_grad_blocks_batch.RowRange(
4215  p * filters_grad.NumRows(),
4216  filters_grad.NumRows())));
4217  diff_patch_batch.push_back(new CuSubMatrix<BaseFloat>(out_deriv.ColRange(
4218  p * num_filters, num_filters)));
4219  patch_batch.push_back(new CuSubMatrix<BaseFloat>(patches.ColRange(
4220  p * filter_dim, filter_dim)));
4221  }
4222 
4223  AddMatMatBatched<BaseFloat>(1.0, filters_grad_batch, diff_patch_batch,
4224  kTrans, patch_batch, kNoTrans, 1.0);
4225 
4226  // add the row blocks together to filters_grad
4227  filters_grad.AddMatBlocks(1.0, filters_grad_blocks_batch);
4228 
4229  // create a matrix holding the col blocks sum of out_deriv
4230  CuMatrix<BaseFloat> out_deriv_col_blocks_sum(out_deriv.NumRows(), num_filters);
4231 
4232  // add the col blocks together to out_deriv_col_blocks_sum
4233  out_deriv_col_blocks_sum.AddMatBlocks(1.0, out_deriv);
4234 
4235  bias_grad.AddRowSumMat(1.0, out_deriv_col_blocks_sum, 1.0);
4236 
4237  // release memory
4238  for (int32 p = 0; p < num_patches; p++) {
4239  delete filters_grad_batch[p];
4240  delete diff_patch_batch[p];
4241  delete patch_batch[p];
4242  }
4243 
4244  //
4245  // update
4246  //
4247  filter_params_.AddMat(learning_rate_, filters_grad);
4248  bias_params_.AddVec(learning_rate_, bias_grad);
4249 }
4250 
4251 void MaxpoolingComponent::Init(int32 input_dim, int32 output_dim,
4252  int32 pool_size, int32 pool_stride) {
4253  input_dim_ = input_dim;
4254  output_dim_ = output_dim;
4255  pool_size_ = pool_size;
4256  pool_stride_ = pool_stride;
4257 
4258  // sanity check
4259  // number of patches
4260  KALDI_ASSERT(input_dim_ % pool_stride_ == 0);
4261  int32 num_patches = input_dim_ / pool_stride_;
4262  // number of pools
4263  KALDI_ASSERT(num_patches % pool_size_ == 0);
4264  int32 num_pools = num_patches / pool_size_;
4265  // check output dim
4266  KALDI_ASSERT(output_dim_ == num_pools * pool_stride_);
4267 }
4268 
4269 void MaxpoolingComponent::InitFromString(std::string args) {
4270  std::string orig_args(args);
4271  int32 input_dim = 0;
4272  int32 output_dim = 0;
4273  int32 pool_size = -1, pool_stride = -1;
4274  bool ok = true;
4275 
4276  ok = ok && ParseFromString("input-dim", &args, &input_dim);
4277  ok = ok && ParseFromString("output-dim", &args, &output_dim);
4278  ok = ok && ParseFromString("pool-size", &args, &pool_size);
4279  ok = ok && ParseFromString("pool-stride", &args, &pool_stride);
4280 
4281  KALDI_LOG << output_dim << " " << input_dim << " " << ok;
4282  KALDI_LOG << "Pool: " << pool_size << " "
4283  << pool_stride << " " << ok;
4284  if (!ok || !args.empty() || output_dim <= 0)
4285  KALDI_ERR << "Invalid initializer for layer of type "
4286  << Type() << ": \"" << orig_args << "\"";
4287  Init(input_dim, output_dim, pool_size, pool_stride);
4288 }
4289 
4290 /*
4291  Input and output of maxpooling component is arranged as
4292  x (time), y (frequency), z (channel)
4293  for efficient pooling.
4294  */
4296  const ChunkInfo &out_info,
4297  const CuMatrixBase<BaseFloat> &in,
4298  CuMatrixBase<BaseFloat> *out) const {
4299  in_info.CheckSize(in);
4300  out_info.CheckSize(*out);
4301  KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
4302  int32 num_patches = input_dim_ / pool_stride_;
4303  int32 num_pools = num_patches / pool_size_;
4304 
4305  // do the max-pooling
4306  for (int32 q = 0; q < num_pools; q++) {
4307  // get output buffer of the pool
4308  CuSubMatrix<BaseFloat> pool(out->ColRange(q * pool_stride_, pool_stride_));
4309  pool.Set(-1e20); // reset a large negative value
4310  for (int32 r = 0; r < pool_size_; r++) {
4311  // col-by-col block comparison pool
4312  int32 p = r + q * pool_size_;
4313  pool.Max(in.ColRange(p * pool_stride_, pool_stride_));
4314  }
4315  }
4316 }
4317 
4318 void MaxpoolingComponent::Backprop(const ChunkInfo &, // in_info,
4319  const ChunkInfo &, // out_info,
4320  const CuMatrixBase<BaseFloat> &in_value,
4321  const CuMatrixBase<BaseFloat> &out_value,
4322  const CuMatrixBase<BaseFloat> &out_deriv,
4323  Component *to_update,
4324  CuMatrix<BaseFloat> *in_deriv) const {
4325  int32 num_patches = input_dim_ / pool_stride_;
4326  int32 num_pools = num_patches / pool_size_;
4327  std::vector<int32> patch_summands(num_patches, 0);
4328  in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero);
4329 
4330  for(int32 q = 0; q < num_pools; q++) {
4331  for(int32 r = 0; r < pool_size_; r++) {
4332  int32 p = r + q * pool_size_;
4333  CuSubMatrix<BaseFloat> in_p(in_value.ColRange(p * pool_stride_, pool_stride_));
4334  CuSubMatrix<BaseFloat> out_q(out_value.ColRange(q * pool_stride_, pool_stride_));
4335  CuSubMatrix<BaseFloat> tgt(in_deriv->ColRange(p * pool_stride_, pool_stride_));
4336  CuMatrix<BaseFloat> src(out_deriv.ColRange(q * pool_stride_, pool_stride_));
4337  // zero-out mask
4338  CuMatrix<BaseFloat> mask;
4339  in_p.EqualElementMask(out_q, &mask);
4340  src.MulElements(mask);
4341  tgt.AddMat(1.0, src);
4342  // summed deriv info
4343  patch_summands[p] += 1;
4344  }
4345  }
4346 
4347  // scale in_deriv of overlaped pools
4348  for(int32 p = 0; p < num_patches; p++) {
4349  CuSubMatrix<BaseFloat> tgt(in_deriv->ColRange(p * pool_stride_, pool_stride_));
4350  KALDI_ASSERT(patch_summands[p] > 0);
4351  tgt.Scale(1.0 / patch_summands[p]);
4352  }
4353 }
4354 
4355 void MaxpoolingComponent::Read(std::istream &is, bool binary) {
4356  ExpectOneOrTwoTokens(is, binary, "<MaxpoolingComponent>", "<InputDim>");
4357  ReadBasicType(is, binary, &input_dim_);
4358  ExpectToken(is, binary, "<OutputDim>");
4359  ReadBasicType(is, binary, &output_dim_);
4360  ExpectToken(is, binary, "<PoolSize>");
4361  ReadBasicType(is, binary, &pool_size_);
4362  ExpectToken(is, binary, "<PoolStride>");
4363  ReadBasicType(is, binary, &pool_stride_);
4364  ExpectToken(is, binary, "</MaxpoolingComponent>");
4365 }
4366 
4367 void MaxpoolingComponent::Write(std::ostream &os, bool binary) const {
4368  WriteToken(os, binary, "<MaxpoolingComponent>");
4369  WriteToken(os, binary, "<InputDim>");
4370  WriteBasicType(os, binary, input_dim_);
4371  WriteToken(os, binary, "<OutputDim>");
4372  WriteBasicType(os, binary, output_dim_);
4373  WriteToken(os, binary, "<PoolSize>");
4374  WriteBasicType(os, binary, pool_size_);
4375  WriteToken(os, binary, "<PoolStride>");
4376  WriteBasicType(os, binary, pool_stride_);
4377  WriteToken(os, binary, "</MaxpoolingComponent>");
4378 }
4379 
4380 std::string MaxpoolingComponent::Info() const {
4381  std::stringstream stream;
4382  stream << Type() << ", input-dim = " << input_dim_
4383  << ", output-dim = " << output_dim_
4384  << ", pool-size = " << pool_size_
4385  << ", pool-stride = " << pool_stride_;
4386  return stream.str();
4387 }
4388 
4389 } // namespace nnet2
4390 } // namespace kaldi
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
int32 OutputDim() const
Get size of output vectors.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void MulElements(const CuVectorBase< Real > &v)
Definition: cu-vector.cc:838
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual void Read(std::istream &is, bool binary)
We implement Read at this level as it just needs the Type().
virtual std::string Info() const
virtual void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &linear)
This kind of Component is a base-class for things like sigmoid and softmax.
virtual void Read(std::istream &is, bool binary)
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Read(std::istream &is, bool binary)
virtual int32 InputDim() const
Get size of input vectors.
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
virtual int32 InputDim() const =0
Get size of input vectors.
CuVector< BaseFloat > bias_params_
void SoftHinge(const CuMatrixBase< Real > &src)
Apply the function y = log(1 + exp(x)), to each element.
Definition: cu-matrix.cc:1555
void ApplyPow(Real power)
Definition: cu-matrix.h:438
FixedAffineComponent is an affine transform that is supplied at network initialization time and is no...
virtual Component * Copy() const
Copy component (deep copy).
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev)
virtual void Read(std::istream &is, bool binary)
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual Component * Copy() const
Copy component (deep copy).
void Check() const
Checks that the data in the ChunkInfo is valid, and die if not.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual int32 OutputDim() const
Get size of output vectors.
void GroupMax(const CuMatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j where G = x.NumCols() / y.NumCols() must be an integer.
Definition: cu-matrix.cc:1617
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
static void ReverseIndexes(const std::vector< int32 > &forward_indexes, int32 input_dim, std::vector< std::vector< int32 > > *backward_indexes)
virtual void Resize(int32 input_dim, int32 output_dim)
virtual void InitFromString(std::string args)
We implement InitFromString at this level.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
Component * CollapseWithPrevious(const FixedAffineComponent &prev) const
virtual std::string Info() const
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
void Init(const CuVectorBase< BaseFloat > &scales)
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void UnVectorize(const VectorBase< BaseFloat > &params)
Converts the parameters from vector form.
virtual Component * Copy() const
Copy component (deep copy).
virtual void PerturbParams(BaseFloat stddev)
We introduce a new virtual function that only applies to class UpdatableComponent.
BaseFloat GetScalingFactor(const CuVectorBase< BaseFloat > &in_products, BaseFloat gamma_prod, CuVectorBase< BaseFloat > *out_products)
The following function is only called if max_change_per_sample_ > 0, it returns a scaling factor alph...
virtual void UpdateSimple(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
static Component * NewFromString(const std::string &initializer_line)
Initialize the Component from one line that will contain first the type, e.g.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void Init(int32 input_dim, int32 output_dim, int32 pool_size, int32 pool_stride)
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
void CopyColFromMat(const CuMatrixBase< Real > &mat, MatrixIndexT col)
Definition: cu-vector.cc:103
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
void Init(const CuMatrixBase< BaseFloat > &matrix)
std::vector< int32 > context_
virtual Component * Copy() const
Copy component (deep copy).
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
virtual void Add(BaseFloat alpha, const UpdatableComponent &other)
This new virtual function adds the parameters of another updatable component, times some constant...
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void Init(int32 dim, int32 dct_dim, bool reorder, int32 keep_dct_dim=0)
Real Sum() const
Definition: cu-vector.cc:297
void ComputeDctMatrix(Matrix< Real > *M)
ComputeDctMatrix computes a matrix corresponding to the DCT, such that M * v equals the DCT of vector...
void Init(int32 input_dim, std::vector< int32 > context, int32 const_component_dim=0)
virtual void Read(std::istream &is, bool binary)
virtual void SetZero(bool treat_as_gradient)
Set parameters to zero, and if treat_as_gradient is true, we&#39;ll be treating this as a gradient so set...
virtual Component * Copy() const
Copy component (deep copy).
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual int32 OutputDim() const =0
Get size of output vectors.
void Add(BaseFloat alpha, const NonlinearComponent &other)
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:653
void Write(std::ostream &os, bool binary) const
Write component to stream.
void ApplyFloor(Real floor_val)
Definition: cu-matrix.h:451
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
Definition: cu-vector.cc:595
virtual void SetZero(bool treat_as_gradient)
Set parameters to zero, and if treat_as_gradient is true, we&#39;ll be treating this as a gradient so set...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Read(std::istream &is, bool binary)
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update_in, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &filter)
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
Definition: cu-matrix.cc:954
virtual int32 GetParameterDim() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
PermuteComponent does a permutation of the dimensions (by default, a fixed random permutation...
virtual void Scale(BaseFloat scale)
This new virtual function scales the parameters by this amount.
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
This is a bit similar to dropout but adding (not multiplying) Gaussian noise with a given standard de...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Read(std::istream &is, bool binary)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
int32 GetParameterDim() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev, int32 num_blocks, BaseFloat alpha)
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
virtual void InitFromString(std::string args)
Initialize, typically from a line of a config file.
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
void AddCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indices)
Add column indices[r] of src to column r.
Definition: cu-matrix.cc:2701
virtual void Read(std::istream &is, bool binary)
virtual Component * Copy() const
Copy component (deep copy).
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void SetZero(bool treat_as_gradient)
Set parameters to zero, and if treat_as_gradient is true, we&#39;ll be treating this as a gradient so set...
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
void Scale(BaseFloat scale)
This new virtual function scales the parameters by this amount.
virtual void Read(std::istream &is, bool binary)
static const BaseFloat kNormFloor
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev, int32 num_blocks)
virtual std::string Info() const
virtual void Propagate(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Perform forward pass propagation Input->Output.
Discrete cosine transform.
void DestructiveSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt)
Singular value decomposition Major limitations: For nonsquare matrices, we assume m>=n (NumRows >= Nu...
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void GetSizes(std::vector< int32 > *sizes) const
void UpdateStats(const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > *deriv=NULL)
void PreconditionDirections(CuMatrixBase< BaseFloat > *R, CuVectorBase< BaseFloat > *row_prod, BaseFloat *scale)
virtual void InitFromString(std::string args)
We implement InitFromString at this level.
void AddMatBlocks(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
This function is like AddMat (it does *this += alpha * src), except that it supports cases where *thi...
Definition: cu-matrix.cc:1119
FixedScaleComponent applies a fixed per-element scale; it&#39;s similar to the Rescale component in the n...
void Scale(Real value)
Definition: cu-matrix.cc:644
virtual void Read(std::istream &is, bool binary)=0
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix< BaseFloat > *in_deriv) const
Perform backward pass propagation of the derivative, and also either update the model (if to_update =...
bool ParseFromString(const std::string &name, std::string *string, int32 *param)
Functions used in Init routines.
std::istream & Stream()
Definition: kaldi-io.cc:826
void Init(int32 dim, std::vector< int32 > context)
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev, int32 rank_in, int32 rank_out, int32 update_period, BaseFloat num_samples_history, BaseFloat alpha, BaseFloat max_change_per_sample)
virtual std::string Info() const
virtual void Backprop(const ChunkInfo &in_info, const ChunkInfo &out_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, Component *to_update, CuMatrix<