convolution.cc
Go to the documentation of this file.
1 // nnet3/convolution.cc
2 
3 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <iterator>
21 #include <sstream>
22 #include <iomanip>
23 #include "nnet3/convolution.h"
24 #include "nnet3/nnet-parse.h"
26 
27 namespace kaldi {
28 namespace nnet3 {
29 namespace time_height_convolution {
30 
31 
45  const std::vector<int32> &columns,
46  int32 input_dim,
47  std::vector<std::vector<int32> > *backward_columns) {
48  int32 columns_dim = columns.size();
49  std::vector<std::vector<int32> > temp(input_dim);
50  for (int32 i = 0; i < columns_dim; i++) {
51  int32 j = columns[i];
52  KALDI_ASSERT(j >= -1 && j < input_dim);
53  if (j != -1)
54  temp[j].push_back(i);
55  }
56  // 'max_overlap' is the largest number of times that some j >= 0 appears in
57  // 'columns'.
58  int32 max_overlap = 0;
59  for (int32 j = 0; j < input_dim; j++)
60  max_overlap = std::max(max_overlap,
61  static_cast<int32>(temp[j].size()));
62  backward_columns->resize(max_overlap);
63  for (int32 k = 0; k < max_overlap; k++) {
64  (*backward_columns)[k].clear();
65  (*backward_columns)[k].resize(input_dim, -1);
66  }
67  for (int32 j = 0; j < input_dim; j++) {
68  for (int32 k = 0; k < static_cast<int32>(temp[j].size()); k++) {
69  int32 i = temp[j][k];
70  (*backward_columns)[k][j] = i;
71  }
72  }
73 }
74 
75 // returns true if 'vec' is of the form
76 // [ n, n+1, n+2, .... ].
77 static bool VectorIsContiguous(const std::vector<int32> &vec) {
78  KALDI_ASSERT(!vec.empty());
79  int32 s = vec.size();
80  for (int32 i = 0; i + 1 < s; i++)
81  if (vec[i+1] != vec[i] + 1)
82  return false;
83  return true;
84 }
85 
86 
87 std::string ConvolutionModel::Info() const {
88  std::ostringstream os;
89  os << "num-filters-in=" << num_filters_in
90  << ", num-filters-out=" << num_filters_out
91  << ", height-in=" << height_in
92  << ", height-out=" << height_out
93  << ", height-subsample-out=" << height_subsample_out
94  << ", {time,height}-offsets=[";
95  for (size_t i = 0; i < offsets.size(); i++) {
96  if (i > 0) os << ' ';
97  os << offsets[i].time_offset << ',' << offsets[i].height_offset;
98  }
99  os << "], required-time-offsets=[";
100  for (std::set<int32>::const_iterator iter = required_time_offsets.begin();
101  iter != required_time_offsets.end(); ++iter) {
102  if (iter != required_time_offsets.begin()) os << ',';
103  os << *iter;
104  }
105  os << "], input-dim=" << InputDim() << ", output-dim=" << OutputDim();
106  return os.str();
107 }
108 
110  { // compute all_time_offsets
111  all_time_offsets.clear();
112  for (std::vector<Offset>::const_iterator iter = offsets.begin();
113  iter != offsets.end(); ++iter)
114  all_time_offsets.insert(iter->time_offset);
115  }
116  { // compute time_offsets_modulus
118  std::set<int32>::iterator iter = all_time_offsets.begin();
119  int32 cur_offset = *iter;
120  for (++iter; iter != all_time_offsets.end(); ++iter) {
121  int32 this_offset = *iter;
123  this_offset - cur_offset);
124  cur_offset = this_offset;
125  }
126  }
127 }
128 
129 
130 bool ConvolutionModel::Check(bool check_heights_used,
131  bool allow_height_padding) const {
132  if (num_filters_in <= 0 || num_filters_out <= 0 ||
133  height_in <= 0 || height_out <= 0 ||
134  height_subsample_out <= 0 || offsets.empty() ||
135  required_time_offsets.empty()) {
136  KALDI_WARN << "Convolution model fails basic check.";
137  return false;
138  }
139  ConvolutionModel temp(*this);
140  temp.ComputeDerived();
141  if (!(temp == *this)) {
142  KALDI_WARN << "Derived variables are incorrect.";
143  return false;
144  }
145  // check that required_time_offsets is included in all_time_offsets.
146  for (std::set<int32>::iterator iter = required_time_offsets.begin();
147  iter != required_time_offsets.end(); ++iter) {
148  if (all_time_offsets.count(*iter) == 0) {
149  KALDI_WARN << "Required time offsets not a subset of all_time_offsets.";
150  return false;
151  }
152  }
154  std::vector<bool> h_in_used(height_in, false);
155  std::vector<bool> offsets_used(offsets.size(), false);
156 
157  // check that in cases where we only have the minimum
158  // required input (from required_time_offsets), each
159  // height in the output is potentially nonzero.
160  for (int32 h_out = 0; h_out < height_out * height_subsample_out;
161  h_out += height_subsample_out) {
162  bool some_input_available = false;
163  for (size_t i = 0; i < offsets.size(); i++) {
164  const Offset &offset = offsets[i];
165  int32 h_in = h_out + offset.height_offset;
166  if (h_in >= 0 && h_in < height_in) {
167  offsets_used[i] = true;
168  h_in_used[h_in] = true;
169  if (required_time_offsets.count(offset.time_offset) != 0)
170  some_input_available = true;
171  } else {
172  if (!allow_height_padding) {
173  KALDI_WARN << "height padding not allowed but is required.";
174  return false;
175  }
176  }
177  }
178  if (!some_input_available) {
179  // none of the
180  // input pixels for this output pixel were available (at least in the case
181  // where we only have the 'required' inputs on the time dimension).
182  std::ostringstream os;
183  Write(os, false);
184  KALDI_WARN << "for the " << (h_out / height_out) << "'th output height, "
185  "no input is available, if only required time-indexes "
186  "are available.";
187  // We could later change this part of the validation code to accept
188  // such models, if there is a legitimate use-case.
189  return false;
190  }
191  }
192  if (check_heights_used) {
193  for (int32 h = 0; h < height_in; h++) {
194  if (!h_in_used[h]) {
195  KALDI_WARN << "The input at the " << h << "'th height is never used.";
196  return false;
197  }
198  }
199  }
200  for (size_t i = 0; i < offsets_used.size(); i++) {
201  if (!offsets_used[i]) {
202  KALDI_WARN << "(time,height) offset (" << offsets[i].time_offset
203  << "," << offsets[i].height_offset
204  << ") of this computation is never used.";
205  return false;
206  }
207  }
208  return true;
209 }
210 
211 
213  return num_filters_in == other.num_filters_in &&
214  num_filters_out == other.num_filters_out &&
215  height_in == other.height_in &&
216  height_out == other.height_out &&
218  offsets == other.offsets &&
222 }
223 
224 
225 void ConvolutionModel::Write(std::ostream &os, bool binary) const {
226  WriteToken(os, binary, "<ConvolutionModel>");
227  WriteToken(os, binary, "<NumFiltersIn>");
228  WriteBasicType(os, binary, num_filters_in);
229  WriteToken(os, binary, "<NumFiltersOut>");
230  WriteBasicType(os, binary, num_filters_out);
231  WriteToken(os, binary, "<HeightIn>");
232  WriteBasicType(os, binary, height_in);
233  WriteToken(os, binary, "<HeightOut>");
234  WriteBasicType(os, binary, height_out);
235  WriteToken(os, binary, "<HeightSubsampleOut>");
237  WriteToken(os, binary, "<Offsets>");
238  std::vector<std::pair<int32, int32> > pairs(offsets.size());
239  for (size_t i = 0; i < offsets.size(); i++) {
240  pairs[i].first = offsets[i].time_offset;
241  pairs[i].second = offsets[i].height_offset;
242  }
243  WriteIntegerPairVector(os, binary, pairs);
244  std::vector<int32> required_time_offsets_list(required_time_offsets.begin(),
245  required_time_offsets.end());
246  WriteToken(os, binary, "<RequiredTimeOffsets>");
247  WriteIntegerVector(os, binary, required_time_offsets_list);
248  WriteToken(os, binary, "</ConvolutionModel>");
249 }
250 
251 
252 void ConvolutionModel::Read(std::istream &is, bool binary) {
253  ExpectOneOrTwoTokens(is, binary, "<ConvolutionModel>", "<NumFiltersIn>");
254  ReadBasicType(is, binary, &num_filters_in);
255  ExpectToken(is, binary, "<NumFiltersOut>");
256  ReadBasicType(is, binary, &num_filters_out);
257  ExpectToken(is, binary, "<HeightIn>");
258  ReadBasicType(is, binary, &height_in);
259  ExpectToken(is, binary, "<HeightOut>");
260  ReadBasicType(is, binary, &height_out);
261  ExpectToken(is, binary, "<HeightSubsampleOut>");
262  ReadBasicType(is, binary, &height_subsample_out);
263  ExpectToken(is, binary, "<Offsets>");
264  std::vector<std::pair<int32, int32> > pairs;
265  ReadIntegerPairVector(is, binary, &pairs);
266  offsets.resize(pairs.size());
267  for (size_t i = 0; i < offsets.size(); i++) {
268  offsets[i].time_offset = pairs[i].first;
269  offsets[i].height_offset = pairs[i].second;
270  }
271  std::vector<int32> required_time_offsets_list;
272  ExpectToken(is, binary, "<RequiredTimeOffsets>");
273  ReadIntegerVector(is, binary, &required_time_offsets_list);
274  required_time_offsets.clear();
275  required_time_offsets.insert(required_time_offsets_list.begin(),
276  required_time_offsets_list.end());
277  ExpectToken(is, binary, "</ConvolutionModel>");
278  ComputeDerived();
279  KALDI_ASSERT(Check(false, true));
280 }
281 
282 
283 void ConvolutionComputation::Write(std::ostream &os, bool binary) const {
284  WriteToken(os, binary, "<ConvComputation>");
285  WriteToken(os, binary, "<NumFiltersInOut>");
286  WriteBasicType(os, binary, num_filters_in);
287  WriteBasicType(os, binary, num_filters_out);
288  WriteToken(os, binary, "<HeightInOut>");
289  WriteBasicType(os, binary, height_in);
290  WriteBasicType(os, binary, height_out);
291  WriteToken(os, binary, "<NumTInOut>");
292  WriteBasicType(os, binary, num_t_in);
293  WriteBasicType(os, binary, num_t_out);
294  WriteToken(os, binary, "<NumImages>");
295  WriteBasicType(os, binary, num_images);
296  WriteToken(os, binary, "<TempRowsCols>");
297  WriteBasicType(os, binary, temp_rows);
298  WriteBasicType(os, binary, temp_cols);
299  int32 num_steps = steps.size();
300  WriteToken(os, binary, "<NumSteps>");
301  WriteBasicType(os, binary, num_steps);
302  for (int32 s = 0; s < num_steps; s++) {
303  const ConvolutionStep &step = steps[s];
304  WriteToken(os, binary, "<TimeShift>");
305  WriteBasicType(os, binary, step.input_time_shift);
306  WriteToken(os, binary, "<ParamsStartCol>");
307  WriteBasicType(os, binary, step.params_start_col);
308  WriteToken(os, binary, "<HeightMap>");
309  WriteIntegerVector(os, binary, step.height_map);
310  }
311  WriteToken(os, binary, "</ConvComputation>");
312 }
313 
314 
315 void ConvolutionComputation::Read(std::istream &is, bool binary) {
316  ExpectOneOrTwoTokens(is, binary, "<ConvComputation>", "<NumFiltersInOut>");
317  ReadBasicType(is, binary, &num_filters_in);
318  ReadBasicType(is, binary, &num_filters_out);
319  ExpectToken(is, binary, "<HeightInOut>");
320  ReadBasicType(is, binary, &height_in);
321  ReadBasicType(is, binary, &height_out);
322  ExpectToken(is, binary, "<NumTInOut>");
323  ReadBasicType(is, binary, &num_t_in);
324  ReadBasicType(is, binary, &num_t_out);
325  ExpectToken(is, binary, "<NumImages>");
326  ReadBasicType(is, binary, &num_images);
327  ExpectToken(is, binary, "<TempRowsCols>");
328  ReadBasicType(is, binary, &temp_rows);
329  ReadBasicType(is, binary, &temp_cols);
330  int32 num_steps;
331  ExpectToken(is, binary, "<NumSteps>");
332  ReadBasicType(is, binary, &num_steps);
333  steps.resize(num_steps);
334  for (int32 s = 0; s < num_steps; s++) {
335  ConvolutionStep &step = steps[s];
336  ExpectToken(is, binary, "<TimeShift>");
337  ReadBasicType(is, binary, &step.input_time_shift);
338  ExpectToken(is, binary, "<ParamsStartCol>");
339  ReadBasicType(is, binary, &step.params_start_col);
340  ExpectToken(is, binary, "<HeightMap>");
341  ReadIntegerVector(is, binary, &step.height_map);
342  }
343  ExpectToken(is, binary, "</ConvComputation>");
344  ComputeDerived();
345  Check();
346 }
347 
348 
351  height_in > 0 && height_out > 0);
352  KALDI_ASSERT(num_t_in >= num_t_out &&
353  num_t_out > 0 && num_images > 0);
354  KALDI_ASSERT((temp_rows == 0 && temp_cols == 0) ||
355  (temp_rows <= num_t_out * num_images &&
356  temp_cols > 0));
357  KALDI_ASSERT(temp_rows % num_images == 0);
358  bool temp_mat_required = false;
359  int32 num_steps = steps.size();
360  int32 num_extra_input_times = num_t_in - num_t_out,
361  input_cols = num_filters_in * height_in,
362  smallest_time_shift = 1000,
363  largest_time_shift = 0;
364  // check 'steps'
365  for (int32 s = 0; s < num_steps; s++) {
366  const ConvolutionStep &step = steps[s];
367  KALDI_ASSERT(step.input_time_shift >= 0 &&
368  step.input_time_shift <= num_extra_input_times);
369  if (step.input_time_shift < smallest_time_shift)
370  smallest_time_shift = step.input_time_shift;
371  if (step.input_time_shift > largest_time_shift)
372  largest_time_shift = step.input_time_shift;
373  KALDI_ASSERT(step.params_start_col >= 0 &&
374  step.params_start_col % num_filters_in == 0);
375  if (s != 0) {
376  KALDI_ASSERT(step.input_time_shift != steps[s-1].input_time_shift);
377  }
378  std::vector<int32> columns;
379  step.columns.CopyToVec(&columns);
380  KALDI_ASSERT(step.first_column == columns[0]);
381  KALDI_ASSERT(step.columns.Dim() == step.height_map.size() * num_filters_in);
382  bool all_negative = true;
383  int32 temp_height = step.height_map.size();
384  bool contiguous = true;
385  for (int32 i = 0; i < temp_height; i++) {
386  int32 h = step.height_map[i];
387  KALDI_ASSERT(h >= -1 && h < height_in);
388  if (i > 0 && step.height_map[i-1] != h-1)
389  contiguous = false;
390  if (h == -1) {
391  contiguous = false;
392  for (int32 f = 0; f < num_filters_in; f++) {
393  KALDI_ASSERT(columns[i * num_filters_in + f] == -1);
394  }
395  } else {
396  all_negative = false;
397  for (int32 f = 0; f < num_filters_in; f++) {
398  KALDI_ASSERT(columns[i * num_filters_in + f] ==
399  h * num_filters_in + f);
400  }
401  }
402  }
403  KALDI_ASSERT(contiguous == step.columns_are_contiguous);
404  if (!contiguous || columns.size() != input_cols) {
405  // we would need the temporary matrix. Make sure the
406  // temporary matrix is big enough.
407  temp_mat_required = true;
408  KALDI_ASSERT(columns.size() <= temp_cols);
409  }
410  KALDI_ASSERT(!all_negative);
411 
412  std::vector<int32> columns_reconstructed(columns.size(), -1);
413  // reconstruct 'columns' from backward_columns as a way to
414  // check that backward_columns is correct.
415  // they are reverse-direction maps, but we may need
416  // step.backward_columns.size() > 1 because of elements
417  // in the input that are duplicated in the temp matrix.
418  for (size_t k = 0; k < step.backward_columns.size(); k++) {
419  std::vector<int32> backward_columns;
420  step.backward_columns[k].CopyToVec(&backward_columns);
421  KALDI_ASSERT(int32(backward_columns.size()) ==
422  num_filters_in * height_in);
423  for (int32 l = 0; l < num_filters_in * height_in; l++) {
424  int32 c = backward_columns[l];
425  KALDI_ASSERT(c < int32(columns.size()));
426  if (c != -1) {
427  KALDI_ASSERT(columns_reconstructed[c] == -1);
428  columns_reconstructed[c] = l;
429  }
430  }
431  }
432  KALDI_ASSERT(columns_reconstructed == columns);
433  }
434  // check that all rows of the input were used.
435  KALDI_ASSERT(smallest_time_shift == 0 &&
436  largest_time_shift == num_extra_input_times);
437 
438  // check that the temp matrix is only allocated if it is required.
439  KALDI_ASSERT((temp_cols != 0) == temp_mat_required);
440 }
441 
442 
443 // Internal function called inside ConvolveForward.
444 // Note: the number of time steps covered may be different
445 // from that implied by cc.num_t_in and cc.num_t_out
446 // if the matrices are very large and we've broken the
447 // computation up into pieces to save memoiry.
449  const ConvolutionComputation &cc,
450  const CuMatrixBase<BaseFloat> &input,
451  const CuMatrixBase<BaseFloat> &params,
452  CuMatrixBase<BaseFloat> *temp_mat,
453  CuMatrixBase<BaseFloat> *output) {
454  KALDI_ASSERT(temp_mat->Stride() == temp_mat->NumCols());
455 
456  // num_t_out supersedes cc.num_t_out (they'll only be different in
457  // cases where we are doing the computation in pieces to save memory).
458  int32 input_rows = input.NumRows(),
459  output_rows = output->NumRows();
460 
461  KALDI_ASSERT(output_rows <= input_rows &&
462  input_rows % cc.num_images == 0 &&
463  output_rows % cc.num_images == 0);
464 
465  int32 num_steps = cc.steps.size();
466  for (int32 s = 0; s < num_steps; s++) {
468  int32 input_row_start = step.input_time_shift * cc.num_images;
469  // note: 'input_part' will normally be almost all of 'input', perhaps
470  // minus one or two time steps at the start or end.
471  CuSubMatrix<BaseFloat> input_part(input,
472  input_row_start, output_rows,
473  0, input.NumCols());
474  int32 temp_num_cols = step.columns.Dim(),
475  param_cols = temp_num_cols / cc.height_out;
476  CuSubMatrix<BaseFloat> params_part(params,
477  0, params.NumRows(),
478  step.params_start_col,
479  param_cols);
480  CuSubMatrix<BaseFloat> output_reshaped(
481  output->Data(), output_rows * cc.height_out,
483  if (!step.columns_are_contiguous ||
484  temp_num_cols != input.NumCols()) {
485  // In most cases we will take this branch, where we have to copy the input
486  // to a temporary matrix. (however, different steps may require different
487  // num-cols of the temporary matrix, so we create sub-parts of 'temp_mat'.
488 
489  // We create the sub-matrix 'temp_mat_part' in a lower-level way, using
490  // pointers, because we need to ensure that its num-cols and the stride
491  // are the same (this is necessary so that we can do reshaping in
492  // ConvolutionReshapedMultiply()).
493  CuSubMatrix<BaseFloat> temp_mat_part(temp_mat->Data(),
494  temp_mat->NumRows(),
495  temp_num_cols, temp_num_cols);
496  if (!step.columns_are_contiguous) {
497  // we're doing a column mapping.
498  temp_mat_part.CopyCols(input_part, step.columns);
499  } else {
500  // we're just taking a sub-matrix of the input matrix, but we still need
501  // to make a copy because we need the stride == num-cols (so that the
502  // reshaping will work).
503  temp_mat_part.CopyFromMat(input_part.ColRange(step.first_column,
504  step.columns.Dim()));
505  }
506  CuSubMatrix<BaseFloat> temp_mat_part_reshaped(
507  temp_mat_part.Data(), temp_mat_part.NumRows() * cc.height_out,
508  temp_num_cols / cc.height_out, temp_num_cols / cc.height_out);
509 
510  output_reshaped.AddMatMat(1.0, temp_mat_part_reshaped, kNoTrans,
511  params_part, kTrans, 1.0);
512  } else {
513  CuSubMatrix<BaseFloat> input_reshaped(
514  input_part.Data(), input_part.NumRows() * cc.height_out,
515  input_part.NumCols() / cc.height_out,
516  input_part.NumCols() / cc.height_out);
517 
518  output_reshaped.AddMatMat(1.0, input_reshaped, kNoTrans,
519  params_part, kTrans, 1.0);
520  }
521  }
522 }
523 
525  const ConvolutionComputation &cc,
526  const CuMatrixBase<BaseFloat> &input,
527  const CuMatrixBase<BaseFloat> &params,
528  CuMatrixBase<BaseFloat> *output) {
529  KALDI_ASSERT(input.NumCols() == input.Stride() &&
530  output->NumCols() == output->Stride());
531  KALDI_ASSERT(params.NumRows() == cc.num_filters_out);
532  KALDI_ASSERT(output->NumRows() == cc.num_t_out * cc.num_images &&
533  output->NumCols() == cc.height_out * cc.num_filters_out);
534  // the input might need to be reshaped but we can check its total size.
535  KALDI_ASSERT(input.NumRows() * input.NumCols() == cc.num_images *
536  cc.num_t_in * cc.height_in * cc.num_filters_in);
537 
538  int32 input_rows = input.NumRows(),
539  required_input_rows = cc.num_images * cc.num_t_in;
540 
541  // this if-statement handles reshaping the input and recursing if there
542  // is subsampling.
543  if (input_rows != required_input_rows) {
544  if (input_rows % required_input_rows != 0)
545  KALDI_ERR << "Input matrix has wrong size."; // error in calling code.
546  // nr is a multiple of required_nr. Reshape the matrix.
547  // we already checked that its Stride() == NumCols();
548  int32 num_cols = input.NumCols(),
549  multiple = input_rows / required_input_rows,
550  new_num_cols = num_cols * multiple,
551  new_stride = new_num_cols;
552  CuSubMatrix<BaseFloat> input_reshaped(
553  input.Data(), required_input_rows, new_num_cols, new_stride);
554  ConvolveForward(cc, input_reshaped, params, output);
555  return;
556  }
557 
558  CuMatrix<BaseFloat> temp_mat(cc.temp_rows, cc.temp_cols,
560 
561  // this if-statement handles breaking up the arguments
562  // and the computation into row-ranges if the temporary
563  // matrix would have been excessively large, and we've decided
564  // to give it fewer rows than the output (this saves
565  // memory). normally we won't take this if-statement
566  // so ignore it if you're trying to understand the framework.
567  if (cc.temp_rows != 0 && cc.temp_rows != input_rows) {
568  KALDI_ASSERT(cc.temp_rows % cc.num_images == 0);
569  int32 num_time_steps_per_chunk = cc.temp_rows / cc.num_images;
570  int32 num_extra_in = cc.num_t_in - cc.num_t_out;
571 
572  for (int32 t_start = 0; t_start < cc.num_t_out;
573  t_start += num_time_steps_per_chunk) {
574  int32 num_t_left = cc.num_t_out - t_start,
575  this_num_t_out = std::min<int32>(num_t_left,
576  num_time_steps_per_chunk),
577  this_num_t_in = this_num_t_out + num_extra_in;
578  CuSubMatrix<BaseFloat> input_part(input, t_start * cc.num_images,
579  this_num_t_in * cc.num_images,
580  0, input.NumCols());
581  CuSubMatrix<BaseFloat> output_part(*output, t_start * cc.num_images,
582  this_num_t_out * cc.num_images,
583  0, output->NumCols());
584  CuSubMatrix<BaseFloat> temp_part(temp_mat, 0,
585  this_num_t_out * cc.num_images,
586  0, temp_mat.NumCols());
587  ConvolveForwardInternal(cc, input_part, params,
588  &temp_part, &output_part);
589  }
590  return;
591  }
592  ConvolveForwardInternal(cc, input, params, &temp_mat, output);
593 }
594 
595 
596 // Internal function called inside ConvolveBackwardData.
597 // Note: the number of time steps covered may be different
598 // from that implied by cc.num_t_in and cc.num_t_out
599 // if the matrices are very large and we've broken the
600 // computation up into pieces to save memory.
601 // We require that temp_mat should not contain inf's
602 // or nan's on entry.
604  const ConvolutionComputation &cc,
605  const CuMatrixBase<BaseFloat> &params,
606  const CuMatrixBase<BaseFloat> &output_deriv,
607  CuMatrixBase<BaseFloat> *temp_mat,
608  CuMatrixBase<BaseFloat> *input_deriv) {
609  KALDI_ASSERT(temp_mat->Stride() == temp_mat->NumCols());
610 
611  // num_t_out supersedes cc.num_t_out (they'll only be different in
612  // cases where we are doing the computation in pieces to save memory).
613  int32 input_rows = input_deriv->NumRows(),
614  output_rows = output_deriv.NumRows();
615 
616  KALDI_ASSERT(output_rows <= input_rows &&
617  input_rows % cc.num_images == 0 &&
618  output_rows % cc.num_images == 0);
619 
620  int32 num_steps = cc.steps.size();
621  for (int32 s = 0; s < num_steps; s++) {
623  int32 input_row_start = step.input_time_shift * cc.num_images;
624  CuSubMatrix<BaseFloat> input_deriv_part(*input_deriv,
625  input_row_start, output_rows,
626  0, input_deriv->NumCols());
627  int32 temp_num_cols = step.columns.Dim(),
628  param_cols = temp_num_cols / cc.height_out;
629  CuSubMatrix<BaseFloat> params_part(params,
630  0, params.NumRows(),
631  step.params_start_col,
632  param_cols);
633  CuSubMatrix<BaseFloat> output_deriv_reshaped(
634  output_deriv.Data(), output_rows * cc.height_out,
636 
637  if (!step.columns_are_contiguous ||
638  temp_num_cols != input_deriv->NumCols()) {
639  // In most cases we will take this branch, where we have to propagate the
640  // input-derivative via a temporary matrix. (however, different steps may
641  // require different num-cols of the temporary matrix, so we create
642  // sub-parts of 'temp_mat'.
643 
644  // We create the sub-matrix 'temp_mat_part' in a lower-level way, using
645  // pointers, because we need to ensure that its num-cols and the stride
646  // are the same (this is necessary so that we can do reshaping in
647  // ConvolutionReshapedMultiply()).
648  CuSubMatrix<BaseFloat> temp_mat_part(temp_mat->Data(),
649  temp_mat->NumRows(),
650  temp_num_cols, temp_num_cols),
651  temp_mat_part_reshaped(
652  temp_mat_part.Data(), temp_mat_part.NumRows() * cc.height_out,
653  temp_num_cols / cc.height_out, temp_num_cols / cc.height_out);
654 
655  temp_mat_part_reshaped.AddMatMat(1.0, output_deriv_reshaped, kNoTrans,
656  params_part, kNoTrans, 0.0);
657 
658  if (!step.columns_are_contiguous) {
659  for (size_t i = 0; i < step.backward_columns.size(); i++) {
660  input_deriv_part.AddCols(temp_mat_part, step.backward_columns[i]);
661  }
662  } else {
663  // we're just taking a sub-matrix of the input matrix, but we still need
664  // to make a copy because we need the stride == num-cols (so that the
665  // reshaping will work).
666  int32 num_cols = step.columns.Dim();
667  input_deriv_part.ColRange(step.first_column,
668  num_cols).AddMat(1.0, temp_mat_part);
669  }
670  } else {
671  CuSubMatrix<BaseFloat> input_deriv_reshaped(
672  input_deriv_part.Data(), input_deriv_part.NumRows() * cc.height_out,
673  input_deriv_part.NumCols() / cc.height_out,
674  input_deriv_part.NumCols() / cc.height_out);
675  input_deriv_reshaped.AddMatMat(1.0, output_deriv_reshaped, kNoTrans,
676  params_part, kNoTrans, 1.0);
677  }
678  }
679 }
680 
681 
683  const ConvolutionComputation &cc,
684  const CuMatrixBase<BaseFloat> &params,
685  const CuMatrixBase<BaseFloat> &output_deriv,
686  CuMatrixBase<BaseFloat> *input_deriv) {
687  KALDI_ASSERT(input_deriv->NumCols() == input_deriv->Stride() &&
688  output_deriv.NumCols() == output_deriv.Stride());
689  KALDI_ASSERT(params.NumRows() == cc.num_filters_out);
690  KALDI_ASSERT(output_deriv.NumRows() == cc.num_t_out * cc.num_images &&
691  output_deriv.NumCols() == cc.height_out * cc.num_filters_out);
692  // the input might need to be reshaped but we can check its total size.
693  KALDI_ASSERT(input_deriv->NumRows() * input_deriv->NumCols() ==
694  cc.num_images * cc.num_t_in * cc.height_in * cc.num_filters_in);
695 
696  int32 input_rows = input_deriv->NumRows(),
697  required_input_rows = cc.num_images * cc.num_t_in;
698 
699  // this if-statement handles reshaping the input and recursing if there
700  // is subsampling.
701  if (input_rows != required_input_rows) {
702  if (input_rows % required_input_rows != 0)
703  KALDI_ERR << "Input matrix has wrong size."; // error in calling code.
704  // nr is a multiple of required_nr. Reshape the matrix.
705  // we already checked that its Stride() == NumCols();
706  int32 num_cols = input_deriv->NumCols(),
707  multiple = input_rows / required_input_rows,
708  new_num_cols = num_cols * multiple,
709  new_stride = new_num_cols;
710  CuSubMatrix<BaseFloat> input_deriv_reshaped(
711  input_deriv->Data(), required_input_rows,
712  new_num_cols, new_stride);
713  ConvolveBackwardData(cc, params, output_deriv, &input_deriv_reshaped);
714  return;
715  }
716 
717  CuMatrix<BaseFloat> temp_mat(cc.temp_rows, cc.temp_cols,
719 
720  // this if-statement handles breaking up the arguments
721  // and the computation into row-ranges if the temporary
722  // matrix would have been excessively large, and we've decided
723  // to give it fewer rows than the output (this saves
724  // memory). normally we won't take this if-statement
725  // so ignore it if you're trying to understand the framework.
726  if (cc.temp_rows != 0 && cc.temp_rows != input_rows) {
727  KALDI_ASSERT(cc.temp_rows % cc.num_images == 0);
728  int32 num_time_steps_per_chunk = cc.temp_rows / cc.num_images;
729  int32 num_extra_in = cc.num_t_in - cc.num_t_out;
730 
731  for (int32 t_start = 0; t_start < cc.num_t_out;
732  t_start += num_time_steps_per_chunk) {
733  int32 num_t_left = cc.num_t_out - t_start,
734  this_num_t_out = std::min<int32>(num_t_left,
735  num_time_steps_per_chunk),
736  this_num_t_in = this_num_t_out + num_extra_in;
737  CuSubMatrix<BaseFloat> input_deriv_part(
738  *input_deriv, t_start * cc.num_images,
739  this_num_t_in * cc.num_images,
740  0, input_deriv->NumCols());
741  CuSubMatrix<BaseFloat> output_deriv_part(
742  output_deriv, t_start * cc.num_images,
743  this_num_t_out * cc.num_images,
744  0, output_deriv.NumCols());
745  CuSubMatrix<BaseFloat> temp_part(
746  temp_mat, 0, this_num_t_out * cc.num_images,
747  0, temp_mat.NumCols());
748  ConvolveBackwardDataInternal(cc, params, output_deriv_part,
749  &temp_part, &input_deriv_part);
750  }
751  return;
752  }
753  ConvolveBackwardDataInternal(cc, params, output_deriv,
754  &temp_mat, input_deriv);
755 }
756 
757 
758 // Internal function called inside ConvolveBackwardParams.
759 // Note: the number of time steps covered may be different
760 // from that implied by cc.num_t_in and cc.num_t_out
761 // if the matrices are very large and we've broken the
762 // computation up into pieces to save memoiry.
764  const ConvolutionComputation &cc,
765  const CuMatrixBase<BaseFloat> &input,
766  const CuMatrixBase<BaseFloat> &output_deriv,
767  BaseFloat alpha,
768  CuMatrixBase<BaseFloat> *temp_mat,
769  CuMatrixBase<BaseFloat> *params_deriv) {
770  KALDI_ASSERT(temp_mat->Stride() == temp_mat->NumCols());
771 
772  // num_t_out supersedes cc.num_t_out (they'll only be different in
773  // cases where we are doing the computation in pieces to save memory).
774  int32 input_rows = input.NumRows(),
775  output_rows = output_deriv.NumRows();
776 
777  KALDI_ASSERT(output_rows <= input_rows &&
778  input_rows % cc.num_images == 0 &&
779  output_rows % cc.num_images == 0);
780 
781  int32 num_steps = cc.steps.size();
782  for (int32 s = 0; s < num_steps; s++) {
784  int32 input_row_start = step.input_time_shift * cc.num_images;
785  // note: 'input_part' will normally be almost all of 'input', perhaps
786  // minus one or two time steps at the start or end.
787  CuSubMatrix<BaseFloat> input_part(input,
788  input_row_start, output_rows,
789  0, input.NumCols());
790  int32 temp_num_cols = step.columns.Dim(),
791  param_cols = temp_num_cols / cc.height_out;
792  CuSubMatrix<BaseFloat> params_deriv_part(*params_deriv,
793  0, params_deriv->NumRows(),
794  step.params_start_col,
795  param_cols);
796  CuSubMatrix<BaseFloat> output_deriv_reshaped(
797  output_deriv.Data(), output_rows * cc.height_out,
799  if (!step.columns_are_contiguous ||
800  temp_num_cols != input.NumCols()) {
801  // In most cases we will take this branch, where we have to copy the input
802  // to a temporary matrix. (however, different steps may require different
803  // num-cols of the temporary matrix, so we create sub-parts of 'temp_mat'.
804 
805  // We create the sub-matrix 'temp_mat_part' in a lower-level way, using
806  // pointers, because we need to ensure that its num-cols and the stride
807  // are the same (this is necessary so that we can do reshaping in
808  // ConvolutionReshapedMultiply()).
809  CuSubMatrix<BaseFloat> temp_mat_part(temp_mat->Data(),
810  temp_mat->NumRows(),
811  temp_num_cols, temp_num_cols);
812  if (!step.columns_are_contiguous) {
813  // we're doing a column mapping.
814  temp_mat_part.CopyCols(input_part, step.columns);
815  } else {
816  // we're just taking a sub-matrix of the input matrix, but we still need
817  // to make a copy because we need the stride == num-cols (so that the
818  // reshaping will work).
819  temp_mat_part.CopyFromMat(input_part.ColRange(step.first_column,
820  step.columns.Dim()));
821  }
822  CuSubMatrix<BaseFloat> temp_mat_part_reshaped(
823  temp_mat_part.Data(), temp_mat_part.NumRows() * cc.height_out,
824  temp_num_cols / cc.height_out, temp_num_cols / cc.height_out);
825 
826  params_deriv_part.AddMatMat(alpha, output_deriv_reshaped, kTrans,
827  temp_mat_part_reshaped, kNoTrans, 1.0);
828  } else {
829  CuSubMatrix<BaseFloat> input_reshaped(
830  input_part.Data(), input_part.NumRows() * cc.height_out,
831  input_part.NumCols() / cc.height_out,
832  input_part.NumCols() / cc.height_out);
833 
834  params_deriv_part.AddMatMat(alpha, output_deriv_reshaped, kTrans,
835  input_reshaped, kNoTrans, 1.0);
836  }
837  }
838 }
839 
841  const ConvolutionComputation &cc,
842  const CuMatrixBase<BaseFloat> &input,
843  const CuMatrixBase<BaseFloat> &output_deriv,
844  BaseFloat alpha,
845  CuMatrixBase<BaseFloat> *params_deriv) {
846  KALDI_ASSERT(input.NumCols() == input.Stride() &&
847  output_deriv.NumCols() == output_deriv.Stride());
848  KALDI_ASSERT(params_deriv->NumRows() == cc.num_filters_out);
849  KALDI_ASSERT(output_deriv.NumRows() == cc.num_t_out * cc.num_images &&
850  output_deriv.NumCols() == cc.height_out * cc.num_filters_out);
851  // the input might need to be reshaped but we can check its total size.
852  KALDI_ASSERT(input.NumRows() * input.NumCols() == cc.num_images *
853  cc.num_t_in * cc.height_in * cc.num_filters_in);
854 
855  int32 input_rows = input.NumRows(),
856  required_input_rows = cc.num_images * cc.num_t_in;
857 
858  // this if-statement handles reshaping the input and recursing if there
859  // is subsampling.
860  if (input_rows != required_input_rows) {
861  if (input_rows % required_input_rows != 0)
862  KALDI_ERR << "Input matrix has wrong size."; // error in calling code.
863  // nr is a multiple of required_nr. Reshape the matrix.
864  // we already checked that its Stride() == NumCols();
865  int32 num_cols = input.NumCols(),
866  multiple = input_rows / required_input_rows,
867  new_num_cols = num_cols * multiple,
868  new_stride = new_num_cols;
869  CuSubMatrix<BaseFloat> input_reshaped(
870  input.Data(), required_input_rows, new_num_cols, new_stride);
871  ConvolveBackwardParams(cc, input_reshaped, output_deriv, alpha,
872  params_deriv);
873  return;
874  }
875 
876  CuMatrix<BaseFloat> temp_mat(cc.temp_rows, cc.temp_cols,
878 
879  // this if-statement handles breaking up the arguments
880  // and the computation into row-ranges if the temporary
881  // matrix would have been excessively large, and we've decided
882  // to give it fewer rows than the output (this saves
883  // memory). normally we won't take this if-statement
884  // so ignore it if you're trying to understand the framework.
885  if (cc.temp_rows != 0 && cc.temp_rows != input_rows) {
886  KALDI_ASSERT(cc.temp_rows % cc.num_images == 0);
887  int32 num_time_steps_per_chunk = cc.temp_rows / cc.num_images;
888  int32 num_extra_in = cc.num_t_in - cc.num_t_out;
889 
890  for (int32 t_start = 0; t_start < cc.num_t_out;
891  t_start += num_time_steps_per_chunk) {
892  int32 num_t_left = cc.num_t_out - t_start,
893  this_num_t_out = std::min<int32>(num_t_left,
894  num_time_steps_per_chunk),
895  this_num_t_in = this_num_t_out + num_extra_in;
896  CuSubMatrix<BaseFloat> input_part(
897  input, t_start * cc.num_images,
898  this_num_t_in * cc.num_images,
899  0, input.NumCols());
900  CuSubMatrix<BaseFloat> output_deriv_part(
901  output_deriv, t_start * cc.num_images,
902  this_num_t_out * cc.num_images,
903  0, output_deriv.NumCols());
904  CuSubMatrix<BaseFloat> temp_part(temp_mat,
905  0, this_num_t_out * cc.num_images,
906  0, temp_mat.NumCols());
907  ConvolveBackwardParamsInternal(cc, input_part, output_deriv_part,
908  alpha, &temp_part, params_deriv);
909  }
910  return;
911  }
912  ConvolveBackwardParamsInternal(cc, input, output_deriv,
913  alpha, &temp_mat, params_deriv);
914 }
915 
916 
917 
919  ConvolutionModel *model_padded) {
920  *model_padded = model;
921  KALDI_ASSERT(!model.offsets.empty());
922  int32 min_height_offset = model.offsets[0].height_offset,
923  max_height_offset = model.offsets[0].height_offset,
924  num_offsets = model.offsets.size();
925  for (int32 i = 1; i < num_offsets; i++) {
926  min_height_offset = std::min<int32>(min_height_offset,
927  model.offsets[i].height_offset);
928  max_height_offset = std::max<int32>(max_height_offset,
929  model.offsets[i].height_offset);
930  }
931  int32 max_output_height = model.height_subsample_out * (model.height_out - 1),
932  max_required_input = max_height_offset + max_output_height,
933  min_required_input = min_height_offset + 0;
934  int32 bottom_padding = -min_required_input,
935  top_padding = max_required_input - (model.height_in - 1);
936  if (bottom_padding < 0)
937  bottom_padding = 0;
938  if (top_padding < 0)
939  top_padding = 0;
940  model_padded->height_in += bottom_padding + top_padding;
941  for (int32 i = 0; i < num_offsets; i++)
942  model_padded->offsets[i].height_offset += bottom_padding;
943 
944  // The reason why we say 'allow_height_padding = false' below is obvious--
945  // we've 'manually' padded by changing the model, so this modified model
946  // should not require height padding. The reason we set 'check_heights_used'
947  // is a little more non-obvious. The very lowest and hightest heights
948  // should always be used, but there may, in unusual models, be other heights
949  // that are not used. We found this in random testing.
950  KALDI_ASSERT(model_padded->Check(false, false));
951 }
952 
953 
957  ConvolutionComputation *computation) {
958  int32 temp_rows = 0, temp_cols = 0;
959  for (size_t i = 0; i < computation->steps.size(); i++) {
960  const ConvolutionComputation::ConvolutionStep &step = computation->steps[i];
961  int32 height_map_size = step.height_map.size(),
962  this_num_cols = height_map_size * computation->num_filters_in;
963  bool columns_are_contiguous =
964  (step.height_map[0] != -1 && VectorIsContiguous(step.height_map));
965  bool need_temp_matrix = true;
966  if (columns_are_contiguous && step.height_map[0] == 0 &&
967  this_num_cols == computation->num_filters_in * computation->height_in) {
968  // the only situation in which we wouldn't need the temporary matrix
969  // for this step, is where the columns are all of the input matrix.
970  need_temp_matrix = false;
971  }
972  if (need_temp_matrix && this_num_cols > temp_cols)
973  temp_cols = this_num_cols;
974  }
975  if (temp_cols > 0) {
976  // work out how many rows the temporary matrix should have, taking
977  // into account the specified memory limit.
978  temp_rows = computation->num_t_out * computation->num_images;
979  BaseFloat num_megabytes = (4 * (temp_rows / 1000.0) * (temp_cols / 1000.0)),
980  megabyte_limit = opts.max_memory_mb;
981  // C++ rounds down; here, we want to round up so we add one.
982  int32 ratio = 1.0 + num_megabytes / megabyte_limit;
983 
984  // divide the number of time steps into 'ratio' pieces that are as equal as
985  // possible; round up when dividing, to make sure that new_temp_rows * ratio
986  // >= temp_rows so that we don't have a small leftover piece.
987  int32 new_num_t_out = (computation->num_t_out + ratio - 1) / ratio;
988  temp_rows = new_num_t_out * computation->num_images;
989  BaseFloat new_num_megabytes = (4 * (temp_rows / 1000.0) * (temp_cols / 1000.0));
990  // make sure we're within the memory limit.
991  if (new_num_megabytes > 1.01 * megabyte_limit) {
992  KALDI_WARN << "Memory consumed in convolution is more than requested "
993  << "(maybe very long time sequence?)";
994  }
995  }
996  computation->temp_rows = temp_rows;
997  computation->temp_cols = temp_cols;
998 
999 }
1000 
1002  const ConvolutionModel &model,
1003  const ConvolutionModel &model_padded,
1004  ConvolutionComputation *computation) {
1005  // First work out how much padding was done in PadModelHeight().
1006  int32 bottom_padding = (model_padded.offsets[0].height_offset -
1007  model.offsets[0].height_offset),
1008  total_padding = model_padded.height_in - model.height_in,
1009  top_padding = total_padding - bottom_padding;
1010 
1011  int32 old_computation_height_in = computation->height_in;
1012  // The computation may have been built for the input appended over
1013  // several frames. Check that it is for an input height that's a multiple of
1014  // the model input height.
1015  KALDI_ASSERT(old_computation_height_in % model_padded.height_in == 0 &&
1016  computation->height_out == model.height_out);
1017 
1018  // 'ratio' is the same ratio from AppendInputFrames(), it's the number
1019  // of input frames in 'model' and 'model_padded' that get appended
1020  // to form a single frame in the computation.
1021  int32 num_steps = computation->steps.size(),
1022  unpadded_input_height = model.height_in,
1023  padded_input_height = model_padded.height_in,
1024  ratio = old_computation_height_in / padded_input_height;
1025 
1026  computation->height_in = ratio * unpadded_input_height;
1027  for (int32 s = 0; s < num_steps; s++) {
1028  ConvolutionComputation::ConvolutionStep &step = computation->steps[s];
1029  int32 height_map_size = step.height_map.size();
1030  for (int32 i = 0; i < height_map_size; i++) {
1031  int32 c = step.height_map[i];
1032  KALDI_ASSERT(c >= 0); // there should be no -1's in the padded computation.
1033  // below, h is the actual height in terms of the padded computation, and m
1034  // is an index that goes from zero to (num-appended-frames - 1).
1035  int32 h = c % padded_input_height,
1036  m = c / padded_input_height;
1037  KALDI_ASSERT(m < ratio);
1038  if (h < bottom_padding || h >= padded_input_height - top_padding) {
1039  step.height_map[i] = -1;
1040  } else {
1041  step.height_map[i] = (h - bottom_padding) + m * unpadded_input_height;
1042  }
1043  }
1044  }
1045  ComputeTempMatrixSize(opts, computation);
1046  computation->ComputeDerived();
1047  computation->Check();
1048 }
1049 
1050 
1053  if (model.time_offsets_modulus == 0) {
1054  // this can only happen if model->all_time_offsets.size() == 1,
1055  // and no padding could be required here. W return to avoid
1056  // special cases below in Gcd().
1057  return;
1058  }
1059  int32 min_time_offset = *model.all_time_offsets.begin(),
1060  max_time_offset = *model.all_time_offsets.rbegin();
1061 
1062  // it makes everything much simpler if we just enforce that the stride of the
1063  // input divides model.time_offsets_modulus and also the output stride.
1064  // (enforcing this may make the input stride smaller). This may in certain
1065  // very odd cases cause us to require more inputs [actually 'blanks'] than
1066  // we really need, but it avoids a lot of careful thought.
1067  int32 old_t_step_in = io->t_step_in;
1068  io->t_step_in = Gcd(io->t_step_in, model.time_offsets_modulus);
1069  if (io->t_step_out != 0)
1070  io->t_step_in = Gcd(io->t_step_in, io->t_step_out);
1071 
1072  // to ensure that we cover all the original input points, now that
1073  // we changed the stride we may need to increase num_t_in.
1074  io->num_t_in = 1 + (old_t_step_in * (io->num_t_in - 1)) / io->t_step_in;
1075 
1076  // by 'desired' we mean usable as an input, not necessarily
1077  // required in the sense of 'required_time_offsets'.
1078  int32 first_desired_input_t = io->start_t_out + min_time_offset;
1079  if (first_desired_input_t < io->start_t_in) {
1080  KALDI_ASSERT((io->start_t_in - first_desired_input_t) %
1081  io->t_step_in == 0);
1082  io->num_t_in += (io->start_t_in - first_desired_input_t) / io->t_step_in;
1083  io->start_t_in = first_desired_input_t;
1084  }
1085 
1086  int32 last_desired_input_t =
1087  io->start_t_out + (io->num_t_out - 1) * io->t_step_out + max_time_offset,
1088  last_input_t = io->start_t_in + (io->num_t_in - 1) * io->t_step_in;
1089  // if the following assert fails, it means we had provided more input than was
1090  // needed, which is not expected. This could cause problems later, in
1091  // AppendInputFrames().
1092  KALDI_ASSERT(last_desired_input_t >= last_input_t);
1093  if (last_desired_input_t > last_input_t) {
1094  KALDI_ASSERT((last_desired_input_t - last_input_t) %
1095  io->t_step_in == 0);
1096  io->num_t_in += (last_desired_input_t - last_input_t) / io->t_step_in;
1097  }
1098 }
1099 
1100 // returns i rounded down to a multiple of n,
1101 // e.g. RoundDownToMultipleOf(3, 2) = 2,
1102 // RoundDownToMultipleOf(-1, 3) = -3
1104  return n * DivideRoundingDown(i, n);
1105 }
1106 
1107 
1108 // shifts all time-offsets in the model (in 'offsets[*].time_offset',
1109 // 'required_time_offsets', 'all_time_offsets') by adding 'shift' to them.
1110 static void ShiftAllTimeOffsets(int32 shift,
1111  ConvolutionModel *model) {
1112  { // shift 'offsets'.
1113  std::vector<ConvolutionModel::Offset>::iterator
1114  iter = model->offsets.begin(),
1115  end = model->offsets.end();
1116  for (; iter != end; ++iter)
1117  iter->time_offset += shift;
1118  }
1119  std::set<int32> temp;
1120  std::set<int32>::const_iterator iter;
1121  for (iter = model->required_time_offsets.begin();
1122  iter != model->required_time_offsets.end(); ++iter)
1123  temp.insert(*iter + shift);
1124  model->required_time_offsets.swap(temp);
1125  temp.clear();
1126  for (iter = model->all_time_offsets.begin();
1127  iter != model->all_time_offsets.end(); ++iter)
1128  temp.insert(*iter + shift);
1129  model->all_time_offsets.swap(temp);
1130 }
1131 
1132 
1133 /*
1134  \brief This function has been broken out of 'AppendInputFrames()' for clarity. It
1135  deals with appending input frames together, in cases where the input stride
1136  is smaller than the output stride.
1137 
1138  \param [in,out] io The input object representing the I/O of the convolution.
1139  It may be modified slightly by this function, in two respects.
1140  Firstly, if we are going to be reshaping the input into
1141  an input with fewer frames of larger dimension, we need to
1142  make sure the number of frames in the input of 'io' is a
1143  multiple of the relevant ratio, so we pad with zeros.
1144  Also, we may modify the stride of 'io' in cases where there
1145  is exactly one frame. This is for convenience of implementation
1146  and does not affect the frames represented.
1147  \param [out] io_appended The output object representing the I/O of the
1148  possibly-frame-appended computation. This may be the same
1149  as I/O, but it won't be if the input stride is smaller than
1150  the output stride-- in that case we need to append the frames.
1151  Note: at exit, 'io' and 'io_appended' will really represent
1152  two different 'views' of the same data, via a reshaping.
1153 
1154  \return Returns the integer ratio >= 1 between the num-cols of the
1155  'appended' features and the original features; this also
1156  equals the number of frames we append together
1157 */
1159  ConvolutionComputationIo *io_appended) {
1160  // first make sure that the output has nonzero stride (it would only have zero
1161  // stride if there was only one output time index, which is unusual). if
1162  // there's only one output time index we can set the stride to whatever we
1163  // want without affecting the list of output indexes.
1164  int32 ratio;
1165  if (io->t_step_out == 0) {
1166  KALDI_ASSERT(io->num_t_out == 1);
1167  io->t_step_out = io->t_step_in;
1168  }
1169  if (io->t_step_out == io->t_step_in) {
1170  // there is nothing to do; the output and input strides are the same.
1171  *io_appended = *io;
1172  ratio = 1;
1173  return ratio;
1174  }
1175  // Now, we ensured in PadComputationInputTime that if the output stride is
1176  // nonzero, then the input stride must divide the output stride; and if the
1177  // output stride was zero then we would have set it to the input stride just
1178  // above; and if both were zero we would have returned above. So we can just
1179  // assert that the input stride divides the output stride.
1180  KALDI_ASSERT(io->t_step_out % io->t_step_in == 0);
1181  ratio = io->t_step_out / io->t_step_in;
1182  // ratio says how many input indexes we have for each output index,
1183  // ignoring end effects. It is the number of input indexes we will
1184  // append together and 'pretend'
1185 
1186  // record this ratio in the 'input' I/O object, which we are also
1187  // modifying to record the extra required padding.
1188  io->reorder_t_in = ratio;
1189  if (io->num_t_in % ratio != 0) {
1190  // Round up the number of input frames to the nearest multiple (via
1191  // zero-padding) so we get an whole number of appended input frames.
1192  io->num_t_in += ratio - (io->num_t_in % ratio);
1193  }
1194 
1195  // OK, from this point we create the output io object.
1196  *io_appended = *io;
1197  io_appended->reorder_t_in = 1;
1198  io_appended->t_step_in = io->t_step_out;
1199  io_appended->num_t_in /= ratio;
1200  return ratio;
1201 }
1202 
1205  ConvolutionModel *model_appended,
1206  ConvolutionComputationIo *io_appended) {
1207  int32 ratio = PrepareIoForAppending(io, io_appended);
1208 
1209  if (ratio == 1) {
1210  // we are not doing any appending of frames.
1211  *model_appended = model;
1212  return;
1213  }
1214 
1215  // we also need the time-step of the output (which is also now the
1216  // time-step of the appended input).
1217  // We know that the time step is not zero, because in that case we would
1218  // have ratio == 1 and would have returned above.
1219  int32 time_step_out = io_appended->t_step_out;
1220  KALDI_ASSERT(time_step_out == io_appended->t_step_in && time_step_out != 0);
1221  int32 orig_time_step_in = io->t_step_in;
1222  KALDI_ASSERT(orig_time_step_in * ratio == time_step_out);
1223 
1224  // make sure the difference between first input and output frames is what we
1225  // expect, else something could go wrong here.
1226  int32 first_time_offset = *(model.all_time_offsets.begin());
1227  KALDI_ASSERT(io->start_t_in - io->start_t_out == first_time_offset);
1228 
1229  ConvolutionModel model_temp(model);
1230  // shift so that the first time offset is zero. this makes
1231  // the model conversion easier.
1232  ShiftAllTimeOffsets(-first_time_offset, &model_temp);
1233 
1234  model_appended->num_filters_in = model.num_filters_in;
1235  model_appended->num_filters_out = model.num_filters_out;
1236  model_appended->height_in = ratio * model.height_in;
1237  model_appended->height_out = model.height_out;
1238  model_appended->height_subsample_out = model.height_subsample_out;
1239  int32 num_offsets = model_temp.offsets.size(),
1240  old_height = model.height_in;
1241  model_appended->offsets.resize(num_offsets);
1242  model_appended->all_time_offsets.clear();
1243  for (int32 i = 0; i < num_offsets; i++) {
1244  const ConvolutionModel::Offset &old_offset = model_temp.offsets[i];
1245  ConvolutionModel::Offset &new_offset = model_appended->offsets[i];
1246  // The following two lines are important!! They are the core of how
1247  // we handle subsampling in this framework.
1248  new_offset.time_offset = RoundDownToMultipleOf(old_offset.time_offset,
1249  time_step_out);
1250  KALDI_ASSERT((old_offset.time_offset - new_offset.time_offset) %
1251  orig_time_step_in == 0);
1252  int32 row_offset = (old_offset.time_offset - new_offset.time_offset) /
1253  orig_time_step_in;
1254  new_offset.height_offset = old_offset.height_offset +
1255  row_offset * old_height;
1256  model_appended->all_time_offsets.insert(new_offset.time_offset);
1257  }
1258 
1259  // Because the 'appended' model will always be used after zero-padding on the
1260  // time axis, we can just pretend that all desired time-offsets are required.
1261  // It's a kind of free error-checking.
1262  model_appended->required_time_offsets = model_appended->all_time_offsets;
1263 
1264  // Undo the time-shifting that we did before.
1265  ShiftAllTimeOffsets(first_time_offset, model_appended);
1266 
1267  model_appended->ComputeDerived();
1268  KALDI_ASSERT(model_appended->Check(false, false));
1269 }
1270 
1272  KALDI_ASSERT(!steps.empty());
1273 
1274  int32 input_dim = height_in * num_filters_in;
1275 
1276  int32 largest_required_temp_cols = 0;
1277  for (std::vector<ConvolutionStep>::iterator iter = steps.begin();
1278  iter != steps.end(); ++iter) {
1279  ConvolutionStep &step = *iter;
1280  std::vector<int32> columns;
1281  int32 temp_height = step.height_map.size();
1282  columns.resize(temp_height * num_filters_in);
1283  for (int32 h = 0; h < temp_height; h++) {
1284  KALDI_ASSERT(step.height_map[h] >= -1 && step.height_map[h] < height_in);
1285  if (step.height_map[h] != -1) {
1286  for (int32 f = 0; f < num_filters_in; f++)
1287  columns[h * num_filters_in + f] = step.height_map[h] * num_filters_in + f;
1288  } else {
1289  for (int32 f = 0; f < num_filters_in; f++)
1290  columns[h * num_filters_in + f] = -1;
1291  }
1292  }
1293  step.columns.CopyFromVec(columns);
1294  std::vector<std::vector<int32> > backward_columns;
1295  ReverseColumnMapping(columns, input_dim, &backward_columns);
1296  step.backward_columns.resize(backward_columns.size());
1297  for (size_t i = 0; i < backward_columns.size(); i++)
1298  step.backward_columns[i].CopyFromVec(backward_columns[i]);
1299 
1300  // we could replace height_map with columns in the line below and get the
1301  // same answer, but it would be a little slower.
1302  step.columns_are_contiguous =
1303  (step.height_map[0] != -1 && VectorIsContiguous(step.height_map));
1304  step.first_column = columns[0];
1305 
1306 
1307  bool need_temp_matrix =
1308  !(step.columns_are_contiguous && step.height_map[0] == 0 &&
1309  step.height_map.size() == height_in);
1310  if (need_temp_matrix) {
1311  largest_required_temp_cols = std::max<int32>(
1312  largest_required_temp_cols, static_cast<int32>(columns.size()));
1313  }
1314  }
1315  KALDI_ASSERT(temp_cols == largest_required_temp_cols);
1316 }
1317 
1318 
1319 // returns true if the time value 't' is one of the
1320 // time values available on the input of 'io.
1322  int32 t) {
1323  int32 t_step_in = std::max<int32>(1, io.t_step_in);
1324  return (t >= io.start_t_in &&
1325  t < io.start_t_in + (t_step_in * io.num_t_in) &&
1326  (t - io.start_t_in) % t_step_in == 0);
1327 }
1328 
1330  const ConvolutionComputationIo &io,
1331  bool allow_extra_input) {
1332  KALDI_ASSERT(io.num_t_in > 0 && io.num_t_out > 0 &&
1333  !model.required_time_offsets.empty() &&
1334  !model.all_time_offsets.empty());
1335  if (!allow_extra_input) {
1337  *model.all_time_offsets.begin());
1338  int32 last_t_in = io.start_t_in + io.t_step_in * (io.num_t_in - 1),
1339  last_t_out = io.start_t_out + io.t_step_out * (io.num_t_out - 1);
1340  KALDI_ASSERT(last_t_in <= last_t_out +
1341  *model.all_time_offsets.rbegin());
1342  }
1343 
1344  std::set<int32> input_times_to_check;
1345  for (int32 n = 0; n < std::min(5, io.num_t_out); n++) {
1346  int32 t_out = io.start_t_out +
1347  RandInt(0, io.num_t_out - 1) * io.t_step_out;
1348  for (std::set<int32>::const_iterator iter =
1349  model.required_time_offsets.begin();
1350  iter != model.required_time_offsets.end();
1351  ++iter) {
1352  int32 offset = *iter;
1353  input_times_to_check.insert(t_out + offset);
1354  }
1355  }
1356  for (std::set<int32>::const_iterator iter = input_times_to_check.begin();
1357  iter != input_times_to_check.end(); ++iter) {
1358  int32 t = *iter;
1359  if (!TimeValueInInput(io, t)) {
1360  KALDI_ERR << "Error checking model and IO: time " << t
1361  << " is required but not in the input.";
1362  }
1363  }
1364 }
1365 
1366 
1368  const ConvolutionModel &model,
1369  const std::vector<Index> &input_indexes,
1370  const std::vector<Index> &output_indexes,
1371  const ConvolutionComputationOptions &opts,
1372  ConvolutionComputation *computation,
1373  std::vector<Index> *input_indexes_modified,
1374  std::vector<Index> *output_indexes_modified) {
1375 
1376  // stage zero [preparing the input and output in a regular grid.]
1378  GetComputationIo(input_indexes, output_indexes, &io);
1379 
1380  CheckModelAndIo(model, io, false);
1381 
1382  // stage 1.
1383  PadComputationInputTime(model, &io);
1384 
1385  CheckModelAndIo(model, io, false);
1386 
1387  // stage 2.
1388  ConvolutionModel model_padded;
1389  PadModelHeight(model, &model_padded);
1390 
1391  CheckModelAndIo(model_padded, io, false);
1392 
1393  // stage 3.
1394  ConvolutionModel model_appended;
1395  ConvolutionComputationIo io_appended;
1396  // make a 'fake' model and io for possibly-appended input frames. 'io' is
1397  // non-const because we may need to pad with a few extra frames.
1398  AppendInputFrames(model_padded, &io,
1399  &model_appended, &io_appended);
1400 
1401  CheckModelAndIo(model_appended, io_appended, true);
1402 
1403  // stage 4.
1404  MakeComputation(model_appended, io_appended, opts, computation);
1405 
1406  // 'reverse' of stage 2. [stage 3 kind of does its own
1407  // 'reverse' by modifying its input IO object.]
1408  // The computation is still specified for the appended input,
1409  // but the execution code can figure that out itself.
1410  UnPadModelHeight(opts, model, model_padded, computation);
1411 
1412  GetIndexesForComputation(io, input_indexes, output_indexes,
1413  input_indexes_modified, output_indexes_modified);
1414 }
1415 
1416 
1417 // Returns the greatest common divisor of the differences between the values in
1418 // 'vec', or zero if the vector has zero or one element. It is an error if
1419 // 'vec' has repeated elements (which could cause a crash in 'Gcd').
1420 static int32 FindGcdOfDifferences(std::vector<int32> &vec) {
1421  size_t size = vec.size();
1422  int32 ans = 0;
1423  for (size_t i = 0; i + 1 < size; i++) {
1424  int32 diff = vec[i+1] - vec[i];
1425  // diff should not be zero.
1426  ans = Gcd(ans, diff);
1427  }
1428  return ans;
1429 }
1430 
1431 static void RegularizeTList(std::vector<int32> &t_values,
1432  int32 *start,
1433  int32 *step,
1434  int32 *num_values) {
1435  KALDI_ASSERT(!t_values.empty() && IsSortedAndUniq(t_values));
1436  *start = t_values[0];
1437  *step = FindGcdOfDifferences(t_values);
1438  if (*step == 0) {
1439  KALDI_ASSERT(t_values.size() == 1);
1440  *num_values = 1;
1441  } else {
1442  int32 last_value = t_values.back();
1443  *num_values = 1 + (last_value - *start) / *step;
1444  KALDI_ASSERT((last_value - *start) % *step == 0);
1445  }
1446 }
1447 
1448 
1449 
1470 static void CreateIndexes(const std::vector<std::pair<int32, int32> > &n_x_pairs,
1471  int32 t_start, int32 t_step, int32 num_t_values,
1472  int32 reorder_t, std::vector<Index> *indexes) {
1473  KALDI_ASSERT(reorder_t >= 1 && num_t_values % reorder_t == 0 && t_step >= 0);
1474  if (t_step == 0) {
1475  KALDI_ASSERT(num_t_values == 1);
1476  t_step = 1;
1477  }
1478  int32 num_n_x_pairs = n_x_pairs.size();
1479  indexes->clear();
1480  indexes->reserve(num_n_x_pairs * num_t_values);
1481  int32 outer_t_step = t_step * reorder_t,
1482  t_end = t_start + (num_t_values * t_step);
1483  Index index;
1484  for (int32 t_block = t_start; t_block < t_end; t_block += outer_t_step) {
1485  for (int32 nx = 0; nx < num_n_x_pairs; nx++) {
1486  index.n = n_x_pairs[nx].first;
1487  index.x = n_x_pairs[nx].second;
1488  for (int32 t = t_block; t < t_block + outer_t_step; t += t_step) {
1489  index.t = t;
1490  indexes->push_back(index);
1491  }
1492  }
1493  }
1494  // we can remove the next assert after a while.
1495  KALDI_ASSERT(indexes->size() == num_n_x_pairs * num_t_values);
1496 }
1497 
1505 static void SetSomeIndexesBlank(const std::vector<Index> &ref_indexes,
1506  std::vector<Index> *indexes) {
1507  std::unordered_set<Index, IndexHasher> ref_set;
1508  for (std::vector<Index>::const_iterator iter = ref_indexes.begin();
1509  iter != ref_indexes.end(); ++iter)
1510  ref_set.insert(*iter);
1511 
1512  for (std::vector<Index>::iterator iter = indexes->begin();
1513  iter != indexes->end(); ++iter) {
1514  if (ref_set.count(*iter) == 0)
1515  iter->t = kNoTime;
1516  }
1517 }
1518 
1520  const std::vector<Index> &input_indexes,
1521  const std::vector<Index> &output_indexes,
1523  std::vector<std::pair<int32, int32> > n_x_pairs;
1524  GetNxList(input_indexes, &n_x_pairs);
1525  KALDI_ASSERT(!n_x_pairs.empty());
1526  io->num_images = n_x_pairs.size();
1527  if (GetVerboseLevel() >= 3) { // a debugging step.
1528  std::vector<std::pair<int32, int32> > n_x_pairs_2;
1529  GetNxList(output_indexes, &n_x_pairs_2);
1530  KALDI_ASSERT(n_x_pairs_2 == n_x_pairs);
1531  }
1532  std::vector<int32> t_values;
1533  GetTList(input_indexes, &t_values);
1534  RegularizeTList(t_values, &(io->start_t_in),
1535  &(io->t_step_in), &(io->num_t_in));
1536  GetTList(output_indexes, &t_values);
1537  RegularizeTList(t_values, &(io->start_t_out),
1538  &(io->t_step_out), &(io->num_t_out));
1539  io->reorder_t_in = 1;
1540 }
1541 
1542 
1544  const ConvolutionComputationIo &io,
1545  const std::vector<Index> &orig_input_indexes,
1546  const std::vector<Index> &orig_output_indexes,
1547  std::vector<Index> *input_indexes,
1548  std::vector<Index> *output_indexes) {
1549  std::unordered_set<Index, IndexHasher> input_set, output_set;
1550  for (std::vector<Index>::const_iterator iter = orig_input_indexes.begin();
1551  iter != orig_input_indexes.end(); ++iter)
1552  input_set.insert(*iter);
1553  for (std::vector<Index>::const_iterator iter = orig_output_indexes.begin();
1554  iter != orig_output_indexes.end(); ++iter)
1555  output_set.insert(*iter);
1556  std::vector<std::pair<int32, int32> > n_x_pairs;
1557  GetNxList(orig_input_indexes, &n_x_pairs);
1558  KALDI_ASSERT(n_x_pairs.size() == io.num_images);
1559  CreateIndexes(n_x_pairs, io.start_t_in, io.t_step_in, io.num_t_in,
1560  io.reorder_t_in, input_indexes);
1561  SetSomeIndexesBlank(orig_input_indexes, input_indexes);
1562  CreateIndexes(n_x_pairs, io.start_t_out, io.t_step_out, io.num_t_out,
1563  1, output_indexes);
1564  SetSomeIndexesBlank(orig_output_indexes, output_indexes);
1565 }
1566 
1567 
1570  const ConvolutionComputationOptions &opts,
1571  ConvolutionComputation *computation) {
1572  KALDI_ASSERT(io.t_step_in == io.t_step_out);
1573  computation->num_filters_in = model.num_filters_in;
1574  computation->num_filters_out = model.num_filters_out;
1575  computation->height_in = model.height_in;
1576  computation->height_out = model.height_out;
1577  computation->num_t_in = io.num_t_in;
1578  computation->num_t_out = io.num_t_out;
1579  computation->num_images = io.num_images;
1580  KALDI_ASSERT(io.reorder_t_in == 1);
1581  // first work out the steps of the computation, then
1582  // work out the dim of the temp matrix
1583 
1585  // Each distinct value of 'time_offset' in model.offsets
1586  // becomes one step of the computation.
1587 
1588  // if io.t_step_in was zero, use 1 (so divisions and the like will work as
1589  // expected).
1590  int32 t_step = std::max<int32>(1, io.t_step_in),
1591  num_t_extra = io.num_t_in - io.num_t_out;
1592 
1593  computation->steps.clear();
1594 
1595  int32 num_offsets = model.offsets.size(),
1596  cur_start_offset = 0, cur_end_offset = 0;
1597  for(; cur_start_offset < num_offsets; cur_start_offset = cur_end_offset) {
1598  cur_end_offset = cur_start_offset;
1599  while (cur_end_offset < num_offsets &&
1600  model.offsets[cur_end_offset].time_offset ==
1601  model.offsets[cur_start_offset].time_offset)
1602  cur_end_offset++;
1603  // we are processing the range of indexes into 'offsets'
1604  // from cur_start_offset to cur_end_offset - 1.
1605  int32 this_num_offsets = cur_end_offset - cur_start_offset;
1606  int32 time_offset = model.offsets[cur_start_offset].time_offset;
1607 
1609  // modified_time_offset will be used in working out the 'input_time_shift'
1610  // that determines which submatrix of the input matrix we'll use.
1611  // It equals the time-offset corrected for any time-difference between
1612  // the start of the output and of the input.
1613  int32 modified_time_offset = time_offset + io.start_t_out - io.start_t_in;
1614  KALDI_ASSERT(modified_time_offset >= 0 &&
1615  modified_time_offset % t_step == 0);
1616  step.input_time_shift = modified_time_offset / t_step;
1617  KALDI_ASSERT(step.input_time_shift <= num_t_extra);
1618  step.params_start_col = model.num_filters_in * cur_start_offset;
1619  step.height_map.clear();
1620  step.height_map.reserve(model.height_out * this_num_offsets);
1621  for (int32 h_out = 0;
1622  h_out < model.height_out * model.height_subsample_out;
1623  h_out += model.height_subsample_out) {
1624  for (int32 o = cur_start_offset; o < cur_end_offset; o++) {
1625  int32 this_height_offset = model.offsets[o].height_offset,
1626  h_in = h_out + this_height_offset;
1627  // by the time we call MakeComputation, the user should already have
1628  // called PadModelHeight, so there should be no need for zero padding on
1629  // the height axis, hence the following check. [we'll later modify the
1630  // resulting computation in UnPadModelHeight, and that's where
1631  // zero-padding gets taken account of.]
1632  KALDI_ASSERT(h_in >= 0 && h_in < model.height_in);
1633  step.height_map.push_back(h_in);
1634  }
1635  }
1636  computation->steps.push_back(step);
1637  }
1638  ComputeTempMatrixSize(opts, computation);
1639 }
1640 
1641 
1642 void ConvolutionComputationIo::Write(std::ostream &os, bool binary) const {
1643  WriteToken(os, binary, "<ConvCompIo>");
1644  WriteBasicType(os, binary, num_images);
1645  WriteBasicType(os, binary, start_t_in);
1646  WriteBasicType(os, binary, t_step_in);
1647  WriteBasicType(os, binary, num_t_in);
1648  WriteBasicType(os, binary, start_t_out);
1649  WriteBasicType(os, binary, t_step_out);
1650  WriteBasicType(os, binary, num_t_out);
1651  WriteBasicType(os, binary, reorder_t_in);
1652  WriteToken(os, binary, "</ConvCompIo>");
1653 }
1654 
1655 
1656 void ConvolutionComputationIo::Read(std::istream &is, bool binary) {
1657  ExpectToken(is, binary, "<ConvCompIo>");
1658  ReadBasicType(is, binary, &num_images);
1659  ReadBasicType(is, binary, &start_t_in);
1660  ReadBasicType(is, binary, &t_step_in);
1661  ReadBasicType(is, binary, &num_t_in);
1662  ReadBasicType(is, binary, &start_t_out);
1663  ReadBasicType(is, binary, &t_step_out);
1664  ReadBasicType(is, binary, &num_t_out);
1665  ReadBasicType(is, binary, &reorder_t_in);
1666  ExpectToken(is, binary, "</ConvCompIo>");
1667 }
1668 
1669 } // namespace time_height_convolution
1670 } // namespace nnet3
1671 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT Stride() const
Definition: cu-matrix.h:217
void GetTList(const std::vector< Index > &indexes, std::vector< int32 > *t_values)
This function outputs a sorted, unique list of the &#39;t&#39; values that are encountered in the provided li...
void Write(std::ostream &os, bool binary) const
Definition: convolution.cc:225
static bool VectorIsContiguous(const std::vector< int32 > &vec)
Definition: convolution.cc:77
static int32 DivideRoundingDown(int32 a, int32 b)
Returns a / b, rounding towards negative infinity in all cases.
Definition: kaldi-math.h:287
void ConvolveBackwardParams(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &output_deriv, BaseFloat alpha, CuMatrixBase< BaseFloat > *params_deriv)
This does the part of the backward derivative computation of convolution, that computes derivatives w...
Definition: convolution.cc:840
void WriteIntegerPairVector(std::ostream &os, bool binary, const std::vector< std::pair< T, T > > &v)
Function for writing STL vectors of pairs of integer types.
Definition: io-funcs-inl.h:93
void CopyFromVec(const std::vector< T > &src)
This function resizes if needed.
Definition: cu-array-inl.h:120
void CopyToVec(std::vector< T > *dst) const
This function resizes *dst if needed.
Definition: cu-array-inl.h:177
static int32 RoundDownToMultipleOf(int32 i, int32 n)
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
bool Check(bool check_heights_used=true, bool allow_height_padding=true) const
Definition: convolution.cc:130
static void ConvolveBackwardParamsInternal(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &output_deriv, BaseFloat alpha, CuMatrixBase< BaseFloat > *temp_mat, CuMatrixBase< BaseFloat > *params_deriv)
Definition: convolution.cc:763
static void ConvolveBackwardDataInternal(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &params, const CuMatrixBase< BaseFloat > &output_deriv, CuMatrixBase< BaseFloat > *temp_mat, CuMatrixBase< BaseFloat > *input_deriv)
Definition: convolution.cc:603
int32 GetVerboseLevel()
Get verbosity level, usually set via command line &#39;–verbose=&#39; switch.
Definition: kaldi-error.h:60
static void RegularizeTList(std::vector< int32 > &t_values, int32 *start, int32 *step, int32 *num_values)
static int32 FindGcdOfDifferences(std::vector< int32 > &vec)
This comment explains the basic framework used for everything related to time-height convolution...
Definition: convolution.h:125
I Gcd(I m, I n)
Definition: kaldi-math.h:297
static void CreateIndexes(const std::vector< std::pair< int32, int32 > > &n_x_pairs, int32 t_start, int32 t_step, int32 num_t_values, int32 reorder_t, std::vector< Index > *indexes)
Creates a vector of indexes with a regular structure, according to these specifications.
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void ReadIntegerPairVector(std::istream &is, bool binary, std::vector< std::pair< T, T > > *v)
Function for reading STL vector of pairs of integer types.
Definition: io-funcs-inl.h:131
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
static void ConvolveForwardInternal(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &params, CuMatrixBase< BaseFloat > *temp_mat, CuMatrixBase< BaseFloat > *output)
Definition: convolution.cc:448
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
Definition: text-utils.cc:536
void GetIndexesForComputation(const ConvolutionComputationIo &io, const std::vector< Index > &orig_input_indexes, const std::vector< Index > &orig_output_indexes, std::vector< Index > *input_indexes, std::vector< Index > *output_indexes)
This function computes the reordered and possibly padded indexes corresponding to the computation in ...
float BaseFloat
Definition: kaldi-types.h:29
This file contains some fairly low-level utilities for implementing convolutional neural networks and...
void UnPadModelHeight(const ConvolutionComputationOptions &opts, const ConvolutionModel &model, const ConvolutionModel &model_padded, ConvolutionComputation *computation)
This function modifies, if necessary, a computation that has been built for the model &#39;model_padded&#39;...
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
Definition: io-funcs-inl.h:232
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
void ConvolveBackwardData(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &params, const CuMatrixBase< BaseFloat > &output_deriv, CuMatrixBase< BaseFloat > *input_deriv)
This does the part of the backward derivative computation of convolution, that propagates derivatives...
Definition: convolution.cc:682
void MakeComputation(const ConvolutionModel &model, ConvolutionComputationIo &io, const ConvolutionComputationOptions &opts, ConvolutionComputation *computation)
static int32 PrepareIoForAppending(ConvolutionComputationIo *io, ConvolutionComputationIo *io_appended)
void AppendInputFrames(const ConvolutionModel &model, ConvolutionComputationIo *io, ConvolutionModel *model_appended, ConvolutionComputationIo *io_appended)
This function takes an input model and I/O specification, and it modifies both of them if necessary t...
struct rnnlm::@11::@12 n
#define KALDI_ERR
Definition: kaldi-error.h:147
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
#define KALDI_WARN
Definition: kaldi-error.h:150
void CompileConvolutionComputation(const ConvolutionModel &model, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, const ConvolutionComputationOptions &opts, ConvolutionComputation *computation, std::vector< Index > *input_indexes_modified, std::vector< Index > *output_indexes_modified)
This function does the compilation for a convolution computation; it&#39;s a wrapper for the functions be...
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
void PadModelHeight(const ConvolutionModel &model, ConvolutionModel *model_padded)
This function takes a model that might require zero padding in the height dimension and outputs a mod...
Definition: convolution.cc:918
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
static void ShiftAllTimeOffsets(int32 shift, ConvolutionModel *model)
static void ReverseColumnMapping(const std::vector< int32 > &columns, int32 input_dim, std::vector< std::vector< int32 > > *backward_columns)
This function, used in ConvolutionComputation::ComputeDerived(), reverses a mapping that may not be u...
Definition: convolution.cc:44
This struct represents the structure of a convolution computation.
Definition: convolution.h:252
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
void GetComputationIo(const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, ConvolutionComputationIo *io)
This function takes lists of input and output indexes to a computation (e.g.
bool operator==(const ConvolutionModel &other) const
Definition: convolution.cc:212
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
This struct contains options for compiling the convolutional computation.
Definition: convolution.h:362
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
Definition: io-funcs-inl.h:198
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
void CopyCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies column r from column indexes[r] of src.
Definition: cu-matrix.cc:2656
void Write(std::ostream &os, bool binary) const
Definition: convolution.cc:283
static void ComputeTempMatrixSize(const ConvolutionComputationOptions &opts, ConvolutionComputation *computation)
This function sets &#39;temp_rows&#39; and &#39;temp_cols&#39; in &#39;computation&#39;.
Definition: convolution.cc:956
static void SetSomeIndexesBlank(const std::vector< Index > &ref_indexes, std::vector< Index > *indexes)
This function modifies &#39;indexes&#39; by, for any Indexes which was not present in &#39;ref_indexes&#39;, setting the &#39;t&#39; value to kNoTime.
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
MatrixIndexT Dim() const
Return the vector dimension.
Definition: cu-array.h:49
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
void ConvolveForward(const ConvolutionComputation &cc, const CuMatrixBase< BaseFloat > &input, const CuMatrixBase< BaseFloat > &params, CuMatrixBase< BaseFloat > *output)
This does the forward computation of convolution.
Definition: convolution.cc:524
void CheckModelAndIo(const ConvolutionModel &model, const ConvolutionComputationIo &io, bool allow_extra_input)
Check that this model and this I/O request are compatible in terms of required context, etc, and crash if not.
static bool TimeValueInInput(const ConvolutionComputationIo &io, int32 t)
void PadComputationInputTime(const ConvolutionModel &model, ConvolutionComputationIo *io)
This function extends the set of input indexes that the computation has, to account for any required ...
const int kNoTime
Definition: nnet-common.cc:573
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95
void GetNxList(const std::vector< Index > &indexes, std::vector< std::pair< int32, int32 > > *pairs)
This function outputs a unique, lexicographically sorted list of the pairs of (n, x) values that are ...