nnet-test-utils.cc
Go to the documentation of this file.
1 // nnet3/nnet-test-utils.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Daniel Povey)
4 // Copyright 2015 Vijayaditya Peddinti
5 // Copyright 2016 Daniel Galvez
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include <iterator>
23 #include <sstream>
24 #include "nnet3/nnet-test-utils.h"
25 #include "nnet3/nnet-utils.h"
26 
27 namespace kaldi {
28 namespace nnet3 {
29 
30 
31 // A super-simple case that is just a single affine component, no nonlinearity,
32 // no splicing.
34  const NnetGenerationOptions &opts,
35  std::vector<std::string> *configs) {
36  std::ostringstream os;
37 
38  int32 input_dim = 10 + Rand() % 20,
39  output_dim = (opts.output_dim > 0 ?
40  opts.output_dim :
41  100 + Rand() % 200);
42 
43 
44  os << "component name=affine1 type=AffineComponent input-dim="
45  << input_dim << " output-dim=" << output_dim << std::endl;
46 
47  os << "input-node name=input dim=" << input_dim << std::endl;
48  os << "component-node name=affine1_node component=affine1 input=input\n";
49  os << "output-node name=output input=affine1_node\n";
50  configs->push_back(os.str());
51 }
52 
53 // A setup with context and an affine component, but no nonlinearity.
55  const NnetGenerationOptions &opts,
56  std::vector<std::string> *configs) {
57  std::ostringstream os;
58 
59  std::vector<int32> splice_context;
60  for (int32 i = -5; i < 4; i++)
61  if (Rand() % 3 == 0)
62  splice_context.push_back(i);
63  if (splice_context.empty())
64  splice_context.push_back(0);
65 
66  int32 input_dim = 10 + Rand() % 20,
67  spliced_dim = input_dim * splice_context.size(),
68  output_dim = (opts.output_dim > 0 ?
69  opts.output_dim :
70  100 + Rand() % 200);
71 
72  if (RandInt(0,1) == 0) {
73  // do it the traditional way with an AffineComponent and an Append() expression.
74  os << "component name=affine1 type=AffineComponent input-dim="
75  << spliced_dim << " output-dim=" << output_dim << std::endl;
76 
77  os << "input-node name=input dim=" << input_dim << std::endl;
78 
79  os << "component-node name=affine1_node component=affine1 input=Append(";
80  for (size_t i = 0; i < splice_context.size(); i++) {
81  int32 offset = splice_context[i];
82  os << "Offset(input, " << offset << ")";
83  if (i + 1 < splice_context.size())
84  os << ", ";
85  }
86  os << ")\n";
87  os << "output-node name=output input=affine1_node\n";
88  } else {
89  os << "component name=tdnn1 type=TdnnComponent input-dim="
90  << input_dim << " output-dim=" << output_dim
91  << " time-offsets=";
92  for (size_t i = 0; i < splice_context.size(); i++) {
93  if (i>0) os << ',';
94  os << splice_context[i];
95  }
96  os << " use-bias=" << (RandInt(0,1) == 0 ? "true":"false")
97  << " use-natural-gradient=" << (RandInt(0,1) == 0 ? "true":"false")
98  << std::endl;
99  os << "input-node name=input dim=" << input_dim << std::endl;
100  os << "component-node name=tdnn1_node component=tdnn1 input=input\n";
101  os << "output-node name=output input=tdnn1_node\n";
102  }
103  configs->push_back(os.str());
104 }
105 
106 
107 
108 // A simple case, just to get started.
109 // Generate a single config with one input, splicing, and one hidden layer.
110 // Also sometimes generate a part of the config that adds a new hidden layer.
112  const NnetGenerationOptions &opts,
113  std::vector<std::string> *configs) {
114  std::ostringstream os;
115 
116  std::vector<int32> splice_context;
117  for (int32 i = -5; i < 4; i++)
118  if (Rand() % 3 == 0)
119  splice_context.push_back(i);
120  if (splice_context.empty())
121  splice_context.push_back(0);
122 
123  int32 input_dim = 10 + Rand() % 20,
124  output_dim = (opts.output_dim > 0 ?
125  opts.output_dim :
126  100 + Rand() % 200),
127  hidden_dim = 40 + Rand() % 50;
128  int32 ivector_dim = 10 + Rand() % 20;
129  if (RandInt(0, 1) == 0 || !opts.allow_ivector)
130  ivector_dim = 0;
131  int32 spliced_dim = input_dim * splice_context.size() + ivector_dim;
132 
133  bool use_final_nonlinearity = (opts.allow_final_nonlinearity &&
134  RandInt(0, 1) == 0);
135  bool use_batch_norm = (RandInt(0, 1) == 0);
136 
137  os << "component name=affine1 type=NaturalGradientAffineComponent input-dim="
138  << spliced_dim << " output-dim=" << hidden_dim << std::endl;
139  os << "component name=relu1 type=RectifiedLinearComponent dim="
140  << hidden_dim << std::endl;
141  if (use_batch_norm) {
142  int32 block_dim = (hidden_dim % 2 == 0 ? hidden_dim / 2 : hidden_dim);
143  os << "component name=batch-norm type=BatchNormComponent dim="
144  << hidden_dim << " block-dim=" << block_dim
145  << " target-rms=2.0";
146  if (RandInt(0, 1) == 0)
147  os << " epsilon=3.0";
148  os << '\n';
149  }
150  os << "component name=final_affine type=NaturalGradientAffineComponent input-dim="
151  << hidden_dim << " output-dim=" << output_dim << std::endl;
152  if (use_final_nonlinearity) {
153  if (Rand() % 2 == 0) {
154  os << "component name=logsoftmax type=SoftmaxComponent dim="
155  << output_dim << std::endl;
156  } else {
157  os << "component name=logsoftmax type=LogSoftmaxComponent dim="
158  << output_dim << std::endl;
159  }
160  }
161  os << "input-node name=input dim=" << input_dim << std::endl;
162  if (ivector_dim != 0)
163  os << "input-node name=ivector dim=" << ivector_dim << std::endl;
164 
165  os << "component-node name=affine1_node component=affine1 input=Append(";
166  if (ivector_dim != 0)
167  os << "ReplaceIndex(ivector, t, 0), ";
168  for (size_t i = 0; i < splice_context.size(); i++) {
169  int32 offset = splice_context[i];
170  if (RandInt(0, 1) == 0) {
171  os << "Offset(input, " << offset << ")";
172  } else {
173  // testing the Scale() expression.
174  os << "Scale(-1, Offset(input, " << offset << "))";
175  }
176  if (i + 1 < splice_context.size())
177  os << ", ";
178  }
179  os << ")\n";
180  if (RandInt(0, 1) == 0) {
181  os << "component-node name=nonlin1 component=relu1 input=affine1_node\n";
182  } else if (RandInt(0, 1) == 0) {
183  os << "component-node name=nonlin1 component=relu1 input=Scale(-1.0, affine1_node)\n";
184  } else {
185  os << "component-node name=nonlin1 component=relu1 input=Sum(Const(1.0, "
186  << hidden_dim << "), Scale(-1.0, affine1_node))\n";
187  }
188  if (use_batch_norm) {
189  os << "component-node name=batch-norm component=batch-norm input=nonlin1\n";
190  os << "component-node name=final_affine component=final_affine input=batch-norm\n";
191  } else {
192  os << "component-node name=final_affine component=final_affine input=nonlin1\n";
193  }
194  if (use_final_nonlinearity) {
195  os << "component-node name=output_nonlin component=logsoftmax input=final_affine\n";
196  os << "output-node name=output input=output_nonlin\n";
197  } else {
198  os << "output-node name=output input=final_affine\n";
199  }
200  configs->push_back(os.str());
201 
202  if ((Rand() % 2) == 0) {
203  std::ostringstream os2;
204  os2 << "component name=affine2 type=NaturalGradientAffineComponent input-dim="
205  << hidden_dim << " output-dim=" << hidden_dim << std::endl;
206  os2 << "component name=relu2 type=RectifiedLinearComponent dim="
207  << hidden_dim << std::endl;
208  // regenerate the final_affine component when we add the new config.
209  os2 << "component name=final_affine type=NaturalGradientAffineComponent input-dim="
210  << hidden_dim << " output-dim=" << output_dim << std::endl;
211  os2 << "component-node name=affine2 component=affine2 input=nonlin1\n";
212  os2 << "component-node name=relu2 component=relu2 input=affine2\n";
213  os2 << "component-node name=final_affine component=final_affine input=relu2\n";
214  configs->push_back(os2.str());
215  }
216 }
217 
218 
220  const NnetGenerationOptions &opts,
221  std::vector<std::string> *configs) {
222  int32 input_dim = RandInt(10, 30),
223  input_period = RandInt(1, 3),
224  stats_period = input_period * RandInt(1, 3),
225  left_context = stats_period * RandInt(1, 10),
226  right_context = stats_period * RandInt(1, 10),
227  log_count_features = RandInt(0, 3);
228  BaseFloat variance_floor = RandInt(1, 10) * 1.0e-10;
229  bool output_stddevs = (RandInt(0, 1) == 0);
230 
231  int32 raw_stats_dim = 1 + input_dim + (output_stddevs ? input_dim : 0),
232  pooled_stats_dim = log_count_features + input_dim +
233  (output_stddevs ? input_dim : 0);
234  std::ostringstream os;
235  os << "input-node name=input dim=" << input_dim << std::endl;
236  os << "component name=statistics-extraction type=StatisticsExtractionComponent "
237  << "input-dim=" << input_dim << " input-period=" << input_period
238  << " output-period=" << stats_period << " include-variance="
239  << std::boolalpha << output_stddevs << "\n";
240 
241  os << "component name=statistics-pooling type=StatisticsPoolingComponent "
242  << "input-dim=" << raw_stats_dim << " input-period=" << stats_period
243  << " left-context=" << left_context << " right-context=" << right_context
244  << " num-log-count-features=" << log_count_features << " output-stddevs="
245  << std::boolalpha << output_stddevs << " variance-floor="
246  << variance_floor << "\n";
247 
248  os << "component name=affine type=AffineComponent "
249  << "input-dim=" << input_dim << " output-dim=" << pooled_stats_dim
250  << "\n";
251 
252  os << "component-node name=statistics-extraction component=statistics-extraction "
253  << "input=input\n";
254  os << "component-node name=statistics-pooling component=statistics-pooling "
255  << "input=statistics-extraction\n";
256  os << "component-node name=affine component=affine input=input\n";
257  os << "output-node name=output input=Sum(affine, Round(statistics-pooling, "
258  << stats_period << "))\n";
259  configs->push_back(os.str());
260 }
261 
262 // This generates a single config corresponding to an RNN.
264  const NnetGenerationOptions &opts,
265  std::vector<std::string> *configs) {
266  std::ostringstream os;
267 
268  std::vector<int32> splice_context;
269  for (int32 i = -5; i < 4; i++)
270  if (Rand() % 3 == 0)
271  splice_context.push_back(i);
272  if (splice_context.empty())
273  splice_context.push_back(0);
274 
275  int32 input_dim = 10 + Rand() % 20,
276  spliced_dim = input_dim * splice_context.size(),
277  output_dim = (opts.output_dim > 0 ?
278  opts.output_dim :
279  100 + Rand() % 200),
280  hidden_dim = 40 + Rand() % 50;
281  os << "component name=affine1 type=NaturalGradientAffineComponent input-dim="
282  << spliced_dim << " output-dim=" << hidden_dim << std::endl;
283  if (RandInt(0, 1) == 0) {
284  os << "component name=nonlin1 type=RectifiedLinearComponent dim="
285  << hidden_dim << std::endl;
286  } else {
287  os << "component name=nonlin1 type=TanhComponent dim="
288  << hidden_dim << std::endl;
289  }
290  os << "component name=recurrent_affine1 type=NaturalGradientAffineComponent input-dim="
291  << hidden_dim << " output-dim=" << hidden_dim << std::endl;
292  os << "component name=affine2 type=NaturalGradientAffineComponent input-dim="
293  << hidden_dim << " output-dim=" << output_dim << std::endl;
294  os << "component name=logsoftmax type=LogSoftmaxComponent dim="
295  << output_dim << std::endl;
296  os << "input-node name=input dim=" << input_dim << std::endl;
297 
298  os << "component-node name=affine1_node component=affine1 input=Append(";
299  for (size_t i = 0; i < splice_context.size(); i++) {
300  int32 offset = splice_context[i];
301  os << "Offset(input, " << offset << ")";
302  if (i + 1 < splice_context.size())
303  os << ", ";
304  }
305  os << ")\n";
306  os << "component-node name=recurrent_affine1 component=recurrent_affine1 "
307  "input=Offset(nonlin1, -1)\n";
308  os << "component-node name=nonlin1 component=nonlin1 "
309  "input=Sum(affine1_node, IfDefined(recurrent_affine1))\n";
310  os << "component-node name=affine2 component=affine2 input=nonlin1\n";
311  os << "component-node name=output_nonlin component=logsoftmax input=affine2\n";
312  os << "output-node name=output input=output_nonlin\n";
313  configs->push_back(os.str());
314 }
315 
316 
317 
318 // This generates a config sequence for what I *think* is a clockwork RNN, in
319 // that different parts operate at different speeds. The output layer is
320 // evaluated every frame, but the internal RNN layer is evaluated every 3
321 // frames.
323  const NnetGenerationOptions &opts,
324  std::vector<std::string> *configs) {
325  std::ostringstream os;
326 
327  std::vector<int32> splice_context;
328  for (int32 i = -5; i < 4; i++)
329  if (Rand() % 3 == 0)
330  splice_context.push_back(i);
331  if (splice_context.empty())
332  splice_context.push_back(0);
333 
334  int32 input_dim = 10 + Rand() % 20,
335  spliced_dim = input_dim * splice_context.size(),
336  output_dim = (opts.output_dim > 0 ?
337  opts.output_dim :
338  100 + Rand() % 200),
339  hidden_dim = 40 + Rand() % 50;
340  os << "component name=affine1 type=NaturalGradientAffineComponent input-dim="
341  << spliced_dim << " output-dim=" << hidden_dim << std::endl;
342  os << "component name=nonlin1 type=RectifiedLinearComponent dim="
343  << hidden_dim << std::endl;
344  os << "component name=recurrent_affine1 type=NaturalGradientAffineComponent input-dim="
345  << hidden_dim << " output-dim=" << hidden_dim << std::endl;
346  // the suffix _0, _1, _2 equals the index of the output-frame modulo 3; there
347  // are 3 versions of the final affine layer. There was a paper by Vincent
348  // Vanhoucke about something like this.
349  os << "component name=final_affine_0 type=NaturalGradientAffineComponent input-dim="
350  << hidden_dim << " output-dim=" << output_dim << std::endl;
351  os << "component name=final_affine_1 type=NaturalGradientAffineComponent input-dim="
352  << hidden_dim << " output-dim=" << output_dim << std::endl;
353  os << "component name=final_affine_2 type=NaturalGradientAffineComponent input-dim="
354  << hidden_dim << " output-dim=" << output_dim << std::endl;
355  os << "component name=logsoftmax type=LogSoftmaxComponent dim="
356  << output_dim << std::endl;
357  os << "input-node name=input dim=" << input_dim << std::endl;
358 
359  os << "component-node name=affine1_node component=affine1 input=Append(";
360  for (size_t i = 0; i < splice_context.size(); i++) {
361  int32 offset = splice_context[i];
362  os << "Offset(input, " << offset << ")";
363  if (i + 1 < splice_context.size())
364  os << ", ";
365  }
366  os << ")\n";
367  os << "component-node name=recurrent_affine1 component=recurrent_affine1 "
368  "input=Offset(nonlin1, -1)\n";
369  os << "component-node name=nonlin1 component=nonlin1 "
370  "input=Sum(affine1_node, IfDefined(recurrent_affine1))\n";
371  os << "component-node name=final_affine_0 component=final_affine_0 input=nonlin1\n";
372  os << "component-node name=final_affine_1 component=final_affine_1 input=Offset(nonlin1, -1)\n";
373  os << "component-node name=final_affine_2 component=final_affine_2 input=Offset(nonlin1, 1)\n";
374  os << "component-node name=output_nonlin component=logsoftmax input=Switch(final_affine_0, final_affine_1, final_affine_2)\n";
375  os << "output-node name=output input=output_nonlin\n";
376  configs->push_back(os.str());
377 }
378 
379 
380 
381 // This generates a single config corresponding to an LSTM.
382 // based on the equations in
383 // Sak et. al. "LSTM based RNN architectures for LVCSR", 2014
384 // We name the components based on the following equations (Eqs 7-15 in paper)
385 // i(t) = S(Wix * x(t) + Wir * r(t-1) + Wic * c(t-1) + bi)
386 // f(t) = S(Wfx * x(t) + Wfr * r(t-1) + Wfc * c(t-1) + bf)
387 // c(t) = {f(t) .* c(t-1)} + {i(t) .* g(Wcx * x(t) + Wcr * r(t-1) + bc)}
388 // o(t) = S(Wox * x(t) + Wor * r(t-1) + Woc * c(t) + bo)
389 // m(t) = o(t) .* h(c(t))
390 // r(t) = Wrm * m(t)
391 // p(t) = Wpm * m(t)
392 // y(t) = Wyr * r(t) + Wyp * p(t) + by
393 // where S : sigmoid
394 // matrix with feed-forward connections
395 // from the input x(t)
396 // W*x = [Wix^T, Wfx^T, Wcx^T, Wox^T]^T
397 
398 // matrix with recurrent (feed-back) connections
399 // from the output projection
400 // W*r = [Wir^T, Wfr^T, Wcr^T, Wor^T]^T
401 
402 // matrix to generate r(t) and p(t)
403 // m(t)
404 // W*m = [Wrm^T, Wpm^T]^T
405 // matrix to generate y(t)
406 // Wy* = [Wyr^T, Wyp^T]
407 
408 // Diagonal matrices with recurrent connections and feed-forward connections
409 // from the cell output c(t) since these can be both recurrent and
410 // feed-forward we dont combine the matrices
411 // Wic, Wfc, Woc
412 
413 
415  const NnetGenerationOptions &opts,
416  std::vector<std::string> *configs) {
417  std::ostringstream os;
418 
419  std::vector<int32> splice_context;
420  for (int32 i = -5; i < 4; i++)
421  if (Rand() % 3 == 0)
422  splice_context.push_back(i);
423  if (splice_context.empty())
424  splice_context.push_back(0);
425 
426  int32 input_dim = 10 + Rand() % 20,
427  spliced_dim = input_dim * splice_context.size(),
428  output_dim = (opts.output_dim > 0 ?
429  opts.output_dim :
430  100 + Rand() % 200),
431  cell_dim = 40 + Rand() % 50,
432  projection_dim = std::ceil(cell_dim / (Rand() % 10 + 1));
433 
434  os << "input-node name=input dim=" << input_dim << std::endl;
435 
436  // trainable cell value for start/end of file.
437  os << "component name=c0 type=ConstantComponent"
438  << " output-dim=" << cell_dim << std::endl;
439 
440 
441  // Parameter Definitions W*(* replaced by - to have valid names)
442  // Input gate control : Wi* matrices
443  os << "component name=Wi-xr type=NaturalGradientAffineComponent"
444  << " input-dim=" << spliced_dim + projection_dim
445  << " output-dim=" << cell_dim << std::endl;
446  os << "component name=Wic type=PerElementScaleComponent "
447  << " dim=" << cell_dim << std::endl;
448 
449  // Forget gate control : Wf* matrices
450  os << "component name=Wf-xr type=NaturalGradientAffineComponent"
451  << " input-dim=" << spliced_dim + projection_dim
452  << " output-dim=" << cell_dim << std::endl;
453  os << "component name=Wfc type=PerElementScaleComponent "
454  << " dim=" << cell_dim << std::endl;
455 
456  // Output gate control : Wo* matrices
457  os << "component name=Wo-xr type=NaturalGradientAffineComponent"
458  << " input-dim=" << spliced_dim + projection_dim
459  << " output-dim=" << cell_dim << std::endl;
460  os << "component name=Woc type=PerElementScaleComponent "
461  << " dim=" << cell_dim << std::endl;
462 
463  // Cell input matrices : Wc* matrices
464  os << "component name=Wc-xr type=NaturalGradientAffineComponent"
465  << " input-dim=" << spliced_dim + projection_dim
466  << " output-dim=" << cell_dim << std::endl;
467 
468 
469 
470  // projection matrices : Wrm and Wpm
471  os << "component name=W-m type=NaturalGradientAffineComponent "
472  << " input-dim=" << cell_dim
473  << " output-dim=" << 2 * projection_dim << std::endl;
474 
475  // Output : Wyr and Wyp
476  os << "component name=Wy- type=NaturalGradientAffineComponent "
477  << " input-dim=" << 2 * projection_dim
478  << " output-dim=" << cell_dim << std::endl;
479 
480  // Defining the diagonal matrices
481  // Defining the final affine transform
482  os << "component name=final_affine type=NaturalGradientAffineComponent "
483  << "input-dim=" << cell_dim << " output-dim=" << output_dim << std::endl;
484  os << "component name=logsoftmax type=LogSoftmaxComponent dim="
485  << output_dim << std::endl;
486 
487  // Defining the non-linearities
488  // declare a no-op component so that we can use a sum descriptor's output
489  // multiple times, and to make the config more readable given the equations
490  os << "component name=i type=SigmoidComponent dim="
491  << cell_dim << std::endl;
492  os << "component name=f type=SigmoidComponent dim="
493  << cell_dim << std::endl;
494  os << "component name=o type=SigmoidComponent dim="
495  << cell_dim << std::endl;
496  os << "component name=g type=TanhComponent dim="
497  << cell_dim << std::endl;
498  os << "component name=h type=TanhComponent dim="
499  << cell_dim << std::endl;
500  os << "component name=c1 type=ElementwiseProductComponent "
501  << " input-dim=" << 2 * cell_dim
502  << " output-dim=" << cell_dim << std::endl;
503  os << "component name=c2 type=ElementwiseProductComponent "
504  << " input-dim=" << 2 * cell_dim
505  << " output-dim=" << cell_dim << std::endl;
506  os << "component name=m type=ElementwiseProductComponent "
507  << " input-dim=" << 2 * cell_dim
508  << " output-dim=" << cell_dim << std::endl;
509 
510  // Defining the computations
511  std::ostringstream temp_string_stream;
512  for (size_t i = 0; i < splice_context.size(); i++) {
513  int32 offset = splice_context[i];
514  temp_string_stream << "Offset(input, " << offset << ")";
515  if (i + 1 < splice_context.size())
516  temp_string_stream << ", ";
517  }
518  std::string spliced_input = temp_string_stream.str();
519 
520  std::string c_tminus1 = "Sum(Failover(Offset(c1_t, -1), c0), IfDefined(Offset( c2_t, -1)))";
521 
522 
523  // c0. note: the input is never used as the component requires
524  // no input indexes; we just write itself as input to keep the
525  // structures happy.
526  os << "component-node name=c0 component=c0 input=c0\n";
527 
528  // i_t
529  os << "component-node name=i1 component=Wi-xr input=Append("
530  << spliced_input << ", IfDefined(Offset(r_t, -1)))\n";
531  os << "component-node name=i2 component=Wic "
532  << " input=" << c_tminus1 << std::endl;
533  os << "component-node name=i_t component=i input=Sum(i1, i2)\n";
534 
535  // f_t
536  os << "component-node name=f1 component=Wf-xr input=Append("
537  << spliced_input << ", IfDefined(Offset(r_t, -1)))\n";
538  os << "component-node name=f2 component=Wfc "
539  << " input=" << c_tminus1 << std::endl;
540  os << "component-node name=f_t component=f input=Sum(f1, f2)\n";
541 
542  // o_t
543  os << "component-node name=o1 component=Wo-xr input=Append("
544  << spliced_input << ", IfDefined(Offset(r_t, -1)))\n";
545  os << "component-node name=o2 component=Woc input=Sum(c1_t, c2_t)\n";
546  os << "component-node name=o_t component=o input=Sum(o1, o2)\n";
547 
548  // h_t
549  os << "component-node name=h_t component=h input=Sum(c1_t, c2_t)\n";
550 
551  // g_t
552  os << "component-node name=g1 component=Wc-xr input=Append("
553  << spliced_input << ", IfDefined(Offset(r_t, -1)))\n";
554  os << "component-node name=g_t component=g input=g1\n";
555 
556  // parts of c_t
557  os << "component-node name=c1_t component=c1 "
558  << " input=Append(f_t, " << c_tminus1 << ")\n";
559  os << "component-node name=c2_t component=c2 input=Append(i_t, g_t)\n";
560 
561  // m_t
562  os << "component-node name=m_t component=m input=Append(o_t, h_t)\n";
563 
564  // r_t and p_t
565  os << "component-node name=rp_t component=W-m input=m_t\n";
566  // Splitting outputs of Wy- node
567  os << "dim-range-node name=r_t input-node=rp_t dim-offset=0 "
568  << "dim=" << projection_dim << std::endl;
569 
570  // y_t
571  os << "component-node name=y_t component=Wy- input=rp_t\n";
572 
573  // Final affine transform
574  os << "component-node name=final_affine component=final_affine input=y_t\n";
575  os << "component-node name=posteriors component=logsoftmax input=final_affine\n";
576  os << "output-node name=output input=posteriors\n";
577  configs->push_back(os.str());
578 }
579 
581  const NnetGenerationOptions &opts,
582  std::vector<std::string> *configs) {
583  std::ostringstream os;
584 
585  std::vector<int32> splice_context;
586  for (int32 i = -5; i < 4; i++)
587  if (Rand() % 3 == 0)
588  splice_context.push_back(i);
589  if (splice_context.empty())
590  splice_context.push_back(0);
591 
592  int32 input_dim = 10 + Rand() % 20,
593  spliced_dim = input_dim * splice_context.size(),
594  output_dim = (opts.output_dim > 0 ?
595  opts.output_dim :
596  100 + Rand() % 200),
597  cell_dim = 40 + Rand() % 50,
598  projection_dim = std::ceil(cell_dim / (Rand() % 10 + 1));
599  int32 clipping_threshold = RandInt(6, 50),
600  zeroing_threshold = RandInt(1, 5),
601  zeroing_interval = RandInt(1, 5) * 10;
602  BaseFloat scale = 0.8 + 0.1*RandInt(0,3);
603 
604  os << "input-node name=input dim=" << input_dim << std::endl;
605 
606  // Parameter Definitions W*(* replaced by - to have valid names)
607  // Input gate control : Wi* matrices
608  os << "component name=Wi-xr type=NaturalGradientAffineComponent"
609  << " input-dim=" << spliced_dim + projection_dim
610  << " output-dim=" << cell_dim << std::endl;
611  os << "component name=Wic type=PerElementScaleComponent "
612  << " dim=" << cell_dim << std::endl;
613 
614  // Forget gate control : Wf* matrices
615  os << "component name=Wf-xr type=NaturalGradientAffineComponent"
616  << " input-dim=" << spliced_dim + projection_dim
617  << " output-dim=" << cell_dim << std::endl;
618  os << "component name=Wfc type=PerElementScaleComponent "
619  << " dim=" << cell_dim << std::endl;
620 
621  // Output gate control : Wo* matrices
622  os << "component name=Wo-xr type=NaturalGradientAffineComponent"
623  << " input-dim=" << spliced_dim + projection_dim
624  << " output-dim=" << cell_dim << std::endl;
625  os << "component name=Woc type=PerElementScaleComponent "
626  << " dim=" << cell_dim << std::endl;
627 
628  // Cell input matrices : Wc* matrices
629  os << "component name=Wc-xr type=NaturalGradientAffineComponent"
630  << " input-dim=" << spliced_dim + projection_dim
631  << " output-dim=" << cell_dim << std::endl;
632 
633 
634 
635  // projection matrices : Wrm and Wpm
636  os << "component name=W-m type=NaturalGradientAffineComponent "
637  << " input-dim=" << cell_dim
638  << " output-dim=" << 2 * projection_dim << std::endl;
639 
640  // Output : Wyr and Wyp
641  os << "component name=Wy- type=NaturalGradientAffineComponent "
642  << " input-dim=" << 2 * projection_dim
643  << " output-dim=" << cell_dim << std::endl;
644 
645  // Defining the diagonal matrices
646  // Defining the final affine transform
647  os << "component name=final_affine type=NaturalGradientAffineComponent "
648  << "input-dim=" << cell_dim << " output-dim=" << output_dim << std::endl;
649  os << "component name=logsoftmax type=LogSoftmaxComponent dim="
650  << output_dim << std::endl;
651 
652  // Defining the non-linearities
653  // declare a no-op component so that we can use a sum descriptor's output
654  // multiple times, and to make the config more readable given the equations
655  os << "component name=i type=SigmoidComponent dim="
656  << cell_dim << std::endl;
657  os << "component name=f type=SigmoidComponent dim="
658  << cell_dim << std::endl;
659  os << "component name=o type=SigmoidComponent dim="
660  << cell_dim << std::endl;
661  os << "component name=g type=TanhComponent dim="
662  << cell_dim << std::endl;
663  os << "component name=h type=TanhComponent dim="
664  << cell_dim << std::endl;
665  os << "component name=c1 type=ElementwiseProductComponent "
666  << " input-dim=" << 2 * cell_dim
667  << " output-dim=" << cell_dim << std::endl;
668  os << "component name=c2 type=ElementwiseProductComponent "
669  << " input-dim=" << 2 * cell_dim
670  << " output-dim=" << cell_dim << std::endl;
671  os << "component name=m type=ElementwiseProductComponent "
672  << " input-dim=" << 2 * cell_dim
673  << " output-dim=" << cell_dim << std::endl;
674  os << "component name=c type=BackpropTruncationComponent dim="
675  << cell_dim
676  << " scale=" << scale
677  << " clipping-threshold=" << clipping_threshold
678  << " zeroing-threshold=" << zeroing_threshold
679  << " zeroing-interval=" << zeroing_interval
680  << " recurrence-interval=1" << std::endl;
681  os << "component name=r type=BackpropTruncationComponent dim="
682  << projection_dim
683  << " scale=" << scale
684  << " clipping-threshold=" << clipping_threshold
685  << " zeroing-threshold=" << zeroing_threshold
686  << " zeroing-interval=" << zeroing_interval
687  << " recurrence-interval=1" << std::endl;
688 
689  // Defining the computations
690  std::ostringstream temp_string_stream;
691  for (size_t i = 0; i < splice_context.size(); i++) {
692  int32 offset = splice_context[i];
693  temp_string_stream << "Offset(input, " << offset << ")";
694  if (i + 1 < splice_context.size())
695  temp_string_stream << ", ";
696  }
697  std::string spliced_input = temp_string_stream.str();
698 
699  int32 offset = RandInt(-3, 3);
700  if (offset == 0)
701  offset = -1;
702 
703 
704  std::string c_tminus1;
705  {
706  std::ostringstream os_temp;
707  os_temp << "IfDefined(Offset(c_t, " << offset << "))";
708  c_tminus1 = os_temp.str();
709  }
710  os << "component-node name=c_t component=c input=Sum(c1_t, c2_t)\n";
711 
712  // i_t
713  os << "component-node name=i1 component=Wi-xr input=Append("
714  << spliced_input << ", IfDefined(Offset(r_t, " << offset << ")))\n";
715  os << "component-node name=i2 component=Wic "
716  << " input=" << c_tminus1 << std::endl;
717  os << "component-node name=i_t component=i input=Sum(i1, i2)\n";
718 
719  // f_t
720  os << "component-node name=f1 component=Wf-xr input=Append("
721  << spliced_input << ", IfDefined(Offset(r_t, " << offset << ")))\n";
722  os << "component-node name=f2 component=Wfc "
723  << " input=" << c_tminus1 << std::endl;
724  os << "component-node name=f_t component=f input=Sum(f1, f2)\n";
725 
726  // o_t
727  os << "component-node name=o1 component=Wo-xr input=Append("
728  << spliced_input << ", IfDefined(Offset(r_t, " << offset << ")))\n";
729  os << "component-node name=o2 component=Woc input=Sum(c1_t, c2_t)\n";
730  os << "component-node name=o_t component=o input=Sum(o1, o2)\n";
731 
732  // h_t
733  os << "component-node name=h_t component=h input=Sum(c1_t, c2_t)\n";
734 
735  // g_t
736  os << "component-node name=g1 component=Wc-xr input=Append("
737  << spliced_input << ", IfDefined(Offset(r_t, " << offset << ")))\n";
738  os << "component-node name=g_t component=g input=g1\n";
739 
740  // parts of c_t
741  os << "component-node name=c1_t component=c1 "
742  << " input=Append(f_t, " << c_tminus1 << ")\n";
743  os << "component-node name=c2_t component=c2 input=Append(i_t, g_t)\n";
744 
745  // m_t
746  os << "component-node name=m_t component=m input=Append(o_t, h_t)\n";
747 
748  // r_t and p_t
749  os << "component-node name=rp_t component=W-m input=m_t\n";
750  // Splitting outputs of Wy- node
751  os << "dim-range-node name=r_t_pretrunc input-node=rp_t dim-offset=0 "
752  << "dim=" << projection_dim << std::endl;
753  os << "component-node name=r_t component=r input=r_t_pretrunc\n";
754 
755  // y_t
756  os << "component-node name=y_t component=Wy- input=rp_t\n";
757 
758  // Final affine transform
759  os << "component-node name=final_affine component=final_affine input=y_t\n";
760  os << "component-node name=posteriors component=logsoftmax input=final_affine\n";
761  os << "output-node name=output input=posteriors\n";
762  configs->push_back(os.str());
763 }
764 
765 // This is a different LSTM config where computation is bunched according
766 // to inputs this is not complete, it is left here for future comparisons
768  const NnetGenerationOptions &opts,
769  std::vector<std::string> *configs) {
770  KALDI_ERR << "Not Implemented";
771  std::ostringstream os;
772 
773  std::vector<int32> splice_context;
774  for (int32 i = -5; i < 4; i++)
775  if (Rand() % 3 == 0)
776  splice_context.push_back(i);
777  if (splice_context.empty())
778  splice_context.push_back(0);
779 
780  int32 input_dim = 10 + Rand() % 20,
781  spliced_dim = input_dim * splice_context.size(),
782  output_dim = (opts.output_dim > 0 ?
783  opts.output_dim :
784  100 + Rand() % 200),
785  cell_dim = 40 + Rand() % 50,
786  projection_dim = std::ceil(cell_dim / (Rand() % 10 + 2));
787 
788  int32 offset = RandInt(-3, 3);
789  if (offset == 0)
790  offset = -1;
791 
792  os << "input-node name=input dim=" << input_dim << std::endl;
793  // Parameter Definitions W*(* replaced by - to have valid names)
794  os << "component name=W-x type=NaturalGradientAffineComponent input-dim="
795  << spliced_dim << " output-dim=" << 4 * cell_dim << std::endl;
796  os << "component name=W-r type=NaturalGradientAffineComponent input-dim="
797  << projection_dim << " output-dim=" << 4 * cell_dim << std::endl;
798  os << "component name=W-m type=NaturalGradientAffineComponent input-dim="
799  << cell_dim << " output-dim=" << 2 * projection_dim << std::endl;
800  os << "component name=Wyr type=NaturalGradientAffineComponent input-dim="
801  << projection_dim << " output-dim=" << cell_dim << std::endl;
802  os << "component name=Wyp type=NaturalGradientAffineComponent input-dim="
803  << projection_dim << " output-dim=" << cell_dim << std::endl;
804  // Defining the diagonal matrices
805  os << "component name=Wic type=PerElementScaleComponent "
806  << " dim=" << cell_dim << std::endl;
807  os << "component name=Wfc type=PerElementScaleComponent "
808  << " dim=" << cell_dim << std::endl;
809  os << "component name=Woc type=PerElementScaleComponent "
810  << " dim=" << cell_dim << std::endl;
811  // Defining the final affine transform
812  os << "component name=final_affine type=NaturalGradientAffineComponent "
813  << "input-dim=" << cell_dim << " output-dim=" << output_dim << std::endl;
814  os << "component name=logsoftmax type=LogSoftmaxComponent dim="
815  << output_dim << std::endl;
816 
817  // Defining the non-linearities
818  // declare a no-op component so that we can use a sum descriptor's output
819  // multiple times, and to make the config more readable given the equations
820  os << "component name=c_t type=NoOpComponent dim="
821  << cell_dim << std::endl;
822  os << "component name=i_t type=SigmoidComponent dim="
823  << cell_dim << std::endl;
824  os << "component name=f_t type=SigmoidComponent dim="
825  << cell_dim << std::endl;
826  os << "component name=o_t type=SigmoidComponent dim="
827  << cell_dim << std::endl;
828  os << "component name=g type=TanhComponent dim="
829  << cell_dim << std::endl;
830  os << "component name=h type=TanhComponent dim="
831  << cell_dim << std::endl;
832  os << "component name=f_t-c_tminus1 type=ElementwiseProductComponent "
833  << " input-dim=" << 2 * cell_dim
834  << " output-dim=" << cell_dim << std::endl;
835  os << "component name=i_t-g type=ElementwiseProductComponent "
836  << " input-dim=" << 2 * cell_dim
837  << " output-dim=" << cell_dim << std::endl;
838  os << "component name=m_t type=ElementwiseProductComponent "
839  << " input-dim=" << 2 * cell_dim
840  << " output-dim=" << cell_dim << std::endl;
841 
842 
843  // Defining the computations
844  os << "component-node name=W-x component=W-x input=Append(";
845  for (size_t i = 0; i < splice_context.size(); i++) {
846  int32 offset = splice_context[i];
847  os << "Offset(input, " << offset << ")";
848  if (i + 1 < splice_context.size())
849  os << ", ";
850  }
851  os << ")\n";
852 
853  os << "component-node name=W-r component=W-r input=IfDefined(Offset(r_t"
854  << offset << "))\n";
855  os << "component-node name=W-m component=W-m input=m_t \n";
856  os << "component-node name=Wic component=Wic input=IfDefined(Offset(c_t"
857  << offset << "))\n";
858  os << "component-node name=Wfc component=Wfc input=IfDefined(Offset(c_t"
859  << offset << "))\n";
860  os << "component-node name=Woc component=Woc input=c_t\n";
861 
862  // Splitting the outputs of W*m node
863  os << "dim-range-node name=r_t input-node=W-m dim-offset=0 "
864  << "dim=" << projection_dim << std::endl;
865  os << "dim-range-node name=p_t input-node=W-m dim-offset=" << projection_dim
866  << " dim=" << projection_dim << std::endl;
867 
868  // Splitting outputs of W*x node
869  os << "dim-range-node name=W_ix-x_t input-node=W-x dim-offset=0 "
870  << "dim=" << cell_dim << std::endl;
871  os << "dim-range-node name=W_fx-x_t input-node=W-x "
872  << "dim-offset=" << cell_dim << " dim="<<cell_dim << std::endl;
873  os << "dim-range-node name=W_cx-x_t input-node=W-x "
874  << "dim-offset=" << 2 * cell_dim << " dim="<<cell_dim << std::endl;
875  os << "dim-range-node name=W_ox-x_t input-node=W-x "
876  << "dim-offset=" << 3 * cell_dim << " dim="<<cell_dim << std::endl;
877 
878  // Splitting outputs of W*r node
879  os << "dim-range-node name=W_ir-r_tminus1 input-node=W-r dim-offset=0 "
880  << "dim=" << cell_dim << std::endl;
881  os << "dim-range-node name=W_fr-r_tminus1 input-node=W-r "
882  << "dim-offset=" << cell_dim << " dim="<<cell_dim << std::endl;
883  os << "dim-range-node name=W_cr-r_tminus1 input-node=W-r "
884  << "dim-offset=" << 2 * cell_dim << " dim="<<cell_dim << std::endl;
885  os << "dim-range-node name=W_or-r_tminus1 input-node=W-r "
886  << "dim-offset=" << 3 * cell_dim << " dim="<<cell_dim << std::endl;
887 
888  // Non-linear operations
889  os << "component-node name=c_t component=c_t input=Sum(f_t-c_tminus1, i_t-g)\n";
890  os << "component-node name=h component=h input=c_t\n";
891  os << "component-node name=i_t component=i_t input=Sum(W_ix-x_t, Sum(W_ir-r_tminus1, Wic))\n";
892  os << "component-node name=f_t component=f_t input=Sum(W_fx-x_t, Sum(W_fr-r_tminus1, Wfc))\n";
893  os << "component-node name=o_t component=o_t input=Sum(W_ox-x_t, Sum(W_or-r_tminus1, Woc))\n";
894  os << "component-node name=f_t-c_tminus1 component=f_t-c_tminus1 input=Append(f_t, Offset(c_t"
895  << offset << "))\n";
896  os << "component-node name=i_t-g component=i_t-g input=Append(i_t, g)\n";
897  os << "component-node name=m_t component=m_t input=Append(o_t, h)\n";
898 
899  os << "component-node name=g component=g input=Sum(W_cx-x_t, W_cr-r_tminus1)\n";
900 
901  // Final affine transform
902  os << "component-node name=Wyr component=Wyr input=r_t\n";
903  os << "component-node name=Wyp component=Wyp input=p_t\n";
904 
905  os << "component-node name=final_affine component=final_affine input=Sum(Wyr, Wyp)\n";
906 
907  os << "component-node name=posteriors component=logsoftmax input=final_affine\n";
908  os << "output-node name=output input=posteriors\n";
909 
910  configs->push_back(os.str());
911 }
912 
914  const NnetGenerationOptions &opts,
915  std::vector<std::string> *configs) {
916  std::ostringstream os;
917 
918 
919  int32 input_x_dim = 10 + Rand() % 20,
920  input_y_dim = 10 + Rand() % 20,
921  input_z_dim = 3 + Rand() % 10,
922  filt_x_dim = 1 + Rand() % input_x_dim,
923  filt_y_dim = 1 + Rand() % input_y_dim,
924  num_filters = 10 + Rand() % 20,
925  filt_x_step = (1 + Rand() % filt_x_dim),
926  filt_y_step = (1 + Rand() % filt_y_dim);
927  int32 remainder = (input_x_dim - filt_x_dim) % filt_x_step;
928  // adjusting input_x_dim to ensure divisibility
929  input_x_dim = input_x_dim - remainder;
930  remainder = (input_y_dim - filt_y_dim) % filt_y_step;
931  // adjusting input_x_dim to ensure divisibility
932  input_y_dim = input_y_dim - remainder;
933 
934  int32 input_vectorization = Rand() % 2;
935  std::string vectorization;
936  if (input_vectorization == 0) {
937  vectorization = "yzx";
938  } else {
939  vectorization = "zyx";
940  }
941 
942  os << "component name=conv type=ConvolutionComponent "
943  << " input-x-dim=" << input_x_dim
944  << " input-y-dim=" << input_y_dim
945  << " input-z-dim=" << input_z_dim
946  << " filt-x-dim=" << filt_x_dim
947  << " filt-y-dim=" << filt_y_dim
948  << " filt-x-step=" << filt_x_step
949  << " filt-y-step=" << filt_y_step
950  << " num-filters=" << num_filters
951  << " input-vectorization-order=" << vectorization
952  << std::endl;
953 
954  int32 conv_output_x_dim = (1 + (input_x_dim - filt_x_dim) / filt_x_step);
955  int32 conv_output_y_dim = (1 + (input_y_dim - filt_y_dim) / filt_y_step);
956  int32 conv_output_z_dim = num_filters;
957  int32 pool_x_size = 1 + Rand() % conv_output_x_dim;
958  int32 pool_y_size = 1 + Rand() % conv_output_y_dim;
959  int32 pool_z_size = 1 + Rand() % conv_output_z_dim;
960  int32 pool_x_step = 1;
961  int32 pool_y_step = 1;
962  int32 pool_z_step = 1;
963  do {
964  pool_x_step = (1 + Rand() % pool_x_size);
965  } while((conv_output_x_dim - pool_x_size) % pool_x_step);
966  do {
967  pool_y_step = (1 + Rand() % pool_y_size);
968  } while((conv_output_y_dim - pool_y_size) % pool_y_step);
969  do {
970  pool_z_step = (1 + Rand() % pool_z_size);
971  } while((conv_output_z_dim - pool_z_size) % pool_z_step);
972 
973  os << "component name=maxpooling type=MaxpoolingComponent "
974  << " input-x-dim=" << conv_output_x_dim
975  << " input-y-dim=" << conv_output_y_dim
976  << " input-z-dim=" << conv_output_z_dim
977  << " pool-x-size=" << pool_x_size
978  << " pool-y-size=" << pool_y_size
979  << " pool-z-size=" << pool_z_size
980  << " pool-x-step=" << pool_x_step
981  << " pool-y-step=" << pool_y_step
982  << " pool-z-step=" << pool_z_step
983  << std::endl;
984 
985  os << "input-node name=input dim=" << (input_x_dim * input_y_dim * input_z_dim) << std::endl;
986  os << "component-node name=conv_node component=conv input=input\n";
987  os << "component-node name=maxpooling_node component=maxpooling input=conv_node\n";
988  os << "output-node name=output input=conv_node\n";
989  configs->push_back(os.str());
990 }
991 
992 
993 
995  const NnetGenerationOptions &opts,
996  std::vector<std::string> *configs) {
997  std::ostringstream ss;
998 
999 
1000  int32 cur_height = RandInt(5, 15),
1001  cur_num_filt = RandInt(1, 3),
1002  num_layers = RandInt(0, 3);
1003  // note: generating zero layers is a bit odd but it exercises some code that
1004  // we otherwise wouldn't exercise.
1005 
1006 
1007  std::string cur_layer_descriptor = "input";
1008 
1009  { // input layer.
1010  ss << "input-node name=input dim=" << (cur_height * cur_num_filt)
1011  << std::endl;
1012  }
1013 
1014 
1015  for (int32 l = 0; l < num_layers; l++) {
1016  int32 next_num_filt = RandInt(1, 10);
1017 
1018  bool height_padding = (cur_height < 5 || RandInt(0, 1) == 0);
1019  int32 height_subsampling_factor = RandInt(1, 2);
1020  if (cur_height < 4) {
1021  // output height of 1 causes a problem with unused height-offsets,
1022  // so don't subsample in that case.
1023  height_subsampling_factor = 1;
1024  }
1025 
1026  int32 next_height = cur_height;
1027  if (!height_padding) {
1028  next_height -= 2; // the kernel will have height 3.
1029  }
1030  next_height = (next_height + height_subsampling_factor - 1) /
1031  height_subsampling_factor;
1032 
1033  if (next_height == cur_height && RandInt(0, 1) == 0) {
1034  // ensure that with sufficient frequency, we have the
1035  // same height and num-filt out; this enables ResNet-style
1036  // addition.
1037  next_num_filt = cur_num_filt;
1038  }
1039 
1040  std::string time_offsets, required_time_offsets;
1041  if (RandInt(0, 3) == 0) {
1042  time_offsets = "0";
1043  required_time_offsets = (RandInt(0, 1) == 0 ? "" : "0");
1044  } else if (RandInt(0, 1) == 0) {
1045  time_offsets = "-1,0,1";
1046  required_time_offsets = (RandInt(0, 1) == 0 ? "" : "-1");
1047  } else {
1048  time_offsets = "-2,0,2";
1049  required_time_offsets = (RandInt(0, 1) == 0 ? "" : "0");
1050  }
1051 
1052  ss << "component type=TimeHeightConvolutionComponent name=layer" << l << "-conv "
1053  << "num-filters-in=" << cur_num_filt
1054  << " num-filters-out=" << next_num_filt
1055  << " height-in=" << cur_height
1056  << " height-out=" << next_height
1057  << " height-offsets=" << (height_padding ? "-1,0,1" : "0,1,2")
1058  << " time-offsets=" << time_offsets;
1059 
1060  if (RandInt(0, 1) == 0) {
1061  // this limits the 'temp memory' usage to 100
1062  // bytes, which will test another code path where
1063  // it breaks up the temporary matrix into pieces
1064  ss << " max-memory-mb=1.0e-04";
1065  }
1066 
1067  if (height_subsampling_factor != 1 || RandInt(0, 1) == 0)
1068  ss << " height-subsample-out=" << height_subsampling_factor;
1069  if (required_time_offsets == "" && RandInt(0, 1) == 0) {
1070  required_time_offsets = time_offsets;
1071  // it should default to this, but we're exercising more of the config
1072  // parsing code this way.
1073  }
1074  if (required_time_offsets != "")
1075  ss << " required-time-offsets=" << required_time_offsets;
1076  if (RandInt(0, 1) == 0)
1077  ss << " param-stddev=0.1 bias-stddev=1";
1078  if (RandInt(0, 1) == 0)
1079  ss << " use-natural-gradient=false";
1080  if (RandInt(0, 1) == 0)
1081  ss << " rank-in=4";
1082  if (RandInt(0, 1) == 0)
1083  ss << " rank-out=4";
1084  if (RandInt(0, 1) == 0)
1085  ss << " alpha-in=2.0";
1086  if (RandInt(0, 1) == 0)
1087  ss << " alpha-out=2.0";
1088  ss << std::endl;
1089 
1090  ss << "component-node name=layer" << l << "-conv component=layer"
1091  << l << "-conv input=" << cur_layer_descriptor << std::endl;
1092 
1093  bool use_relu = false;
1094  if (use_relu) {
1095  ss << "component type=RectifiedLinearComponent name=layer" << l
1096  << "-relu dim=" << (next_height * next_num_filt) << std::endl;
1097  ss << "component-node name=layer" << l << "-relu component=layer"
1098  << l << "-relu input=layer" << l << "-conv" << std::endl;
1099  }
1100 
1101  std::ostringstream desc_ss;
1102  if (next_height == cur_height && next_num_filt == cur_num_filt
1103  && RandInt(0, 1) == 0) {
1104  desc_ss << "Sum(" << cur_layer_descriptor << ", layer" << l
1105  << (use_relu ? "-relu)" : "-conv)");
1106  } else {
1107  desc_ss << "layer" << l << (use_relu ? "-relu" : "-conv");
1108  }
1109 
1110  if (RandInt(0, 3) == 0) {
1111  std::ostringstream round_desc_ss;
1112  int32 modulus = RandInt(2, 3);
1113  round_desc_ss << "Round(" << desc_ss.str() << ", " << modulus << ")";
1114  cur_layer_descriptor = round_desc_ss.str();
1115  } else {
1116  cur_layer_descriptor = desc_ss.str();
1117  }
1118  cur_height = next_height;
1119  cur_num_filt = next_num_filt;
1120  }
1121 
1122  ss << "output-node name=output input=" << cur_layer_descriptor << std::endl;
1123 
1124 
1125  configs->push_back(ss.str());
1126 }
1127 
1128 
1129 
1131  const NnetGenerationOptions &opts,
1132  std::vector<std::string> *configs) {
1133  std::ostringstream ss;
1134 
1135 
1136  int32 input_dim = RandInt(100, 150),
1137  num_heads = RandInt(1, 2),
1138  key_dim = RandInt(20, 40),
1139  value_dim = RandInt(20, 40),
1140  time_stride = RandInt(1, 3),
1141  num_left_inputs = RandInt(1, 4),
1142  num_right_inputs = RandInt(0, 2),
1143  num_left_inputs_required = RandInt(0, num_left_inputs),
1144  num_right_inputs_required = RandInt(0, num_right_inputs);
1145  bool output_context = (RandInt(0, 1) == 0);
1146  int32 context_dim = (num_left_inputs + 1 + num_right_inputs),
1147  query_dim = key_dim + context_dim;
1148  int32 attention_input_dim = num_heads * (key_dim + value_dim + query_dim);
1149 
1150  std::string cur_layer_descriptor = "input";
1151 
1152  { // input layer.
1153  ss << "input-node name=input dim=" << input_dim
1154  << std::endl;
1155  }
1156 
1157  { // affine component
1158  ss << "component name=affine type=NaturalGradientAffineComponent input-dim="
1159  << input_dim << " output-dim=" << attention_input_dim << std::endl;
1160  ss << "component-node name=affine component=affine input=input"
1161  << std::endl;
1162  }
1163 
1164  { // attention component
1165  ss << "component-node name=attention component=attention input=affine"
1166  << std::endl;
1167  ss << "component name=attention type=RestrictedAttentionComponent"
1168  << " num-heads=" << num_heads << " key-dim=" << key_dim
1169  << " value-dim=" << value_dim << " time-stride=" << time_stride
1170  << " num-left-inputs=" << num_left_inputs << " num-right-inputs="
1171  << num_right_inputs << " num-left-inputs-required="
1172  << num_left_inputs_required << " num-right-inputs-required="
1173  << num_right_inputs_required
1174  << " output-context=" << (output_context ? "true" : "false")
1175  << (RandInt(0, 1) == 0 ? " key-scale=1.0" : "")
1176  << std::endl;
1177  }
1178 
1179  { // output
1180  ss << "output-node name=output input=attention" << std::endl;
1181  }
1182  configs->push_back(ss.str());
1183 }
1184 
1185 
1186 // generates a config sequence involving DistributeComponent.
1188  const NnetGenerationOptions &opts,
1189  std::vector<std::string> *configs) {
1190  int32 output_dim = (opts.output_dim > 0 ? opts.output_dim : 100);
1191  int32 x_expand = RandInt(1, 5), after_expand_dim = RandInt(10, 20),
1192  input_dim = x_expand * after_expand_dim;
1193  std::ostringstream os;
1194  os << "input-node name=input dim=" << input_dim << std::endl;
1195  os << "component name=distribute type=DistributeComponent input-dim="
1196  << input_dim << " output-dim=" << after_expand_dim << std::endl;
1197  os << "component-node name=distribute component=distribute input=input\n";
1198  os << "component name=affine type=AffineComponent input-dim="
1199  << after_expand_dim << " output-dim=" << output_dim << std::endl;
1200  os << "component-node name=affine component=affine input=distribute\n";
1201  os << "output-node name=output input=Sum(";
1202  for (int32 i = 0; i < x_expand; i++) {
1203  if (i > 0) os << ", ";
1204  os << "ReplaceIndex(affine, x, " << i << ")";
1205  }
1206  os << ")\n";
1207  configs->push_back(os.str());
1208 }
1209 
1214  std::vector<std::string> *configs) {
1215  int32 num_components = RandInt(1,5);
1216  int32 input_dim = 10 * RandInt(1,10);
1217  if (opts.output_dim > 0) {
1218  KALDI_WARN << "This function doesn't take a requested output_dim due to "
1219  "implementation complications.";
1220  }
1221  int32 max_rows_process = 512 + 512 * RandInt(1,3);
1222  std::ostringstream os;
1223  os << "component name=composite1 type=CompositeComponent max-rows-process="
1224  << max_rows_process << " num-components=" << num_components;
1225 
1226  int32 types_length = 3;
1227  std::string types[] = {"BlockAffineComponent",
1228  "RepeatedAffineComponent",
1229  "NaturalGradientRepeatedAffineComponent"};
1230  int32 last_output_dim = input_dim;
1231  // components within a composite component are indexed from 1.
1232  for(int32 i = 1; i <= num_components; i++) {
1233  os << " component" << i << "=";
1234  int32 rand_index = RandInt(0, types_length - 1);
1235  std::string rand_type = types[rand_index];
1236  os << "'type=" << rand_type << " input-dim=" << last_output_dim;
1237  int32 current_output_dim = 10 * RandInt(1,10);
1238  // must be a divisor or current_output_dim and last_output_dim
1239  int32 num_repeats = 10;
1240  os << " output-dim=" << current_output_dim;
1241  std::string repeats_string = (rand_type == "BlockAffineComponent") ? "num-blocks": "num-repeats";
1242  os << " " << repeats_string << "=" << num_repeats << "'";
1243  last_output_dim = current_output_dim;
1244  }
1245  os << std::endl << std::endl;
1246  os << "input-node name=input dim=" << input_dim << std::endl;
1247  os << "component-node name=composite1 component=composite1 input=input\n";
1248  os << "output-node name=output input=composite1\n";
1249  configs->push_back(os.str());
1250 }
1251 
1253  const NnetGenerationOptions &opts,
1254  std::vector<std::string> *configs) {
1255 start:
1256  int32 network_type = RandInt(0, 14);
1257  switch(network_type) {
1258  case 0:
1259  GenerateConfigSequenceSimplest(opts, configs);
1260  break;
1261  case 1:
1262  if (!opts.allow_context)
1263  goto start;
1264  GenerateConfigSequenceSimpleContext(opts, configs);
1265  break;
1266  case 2:
1267  if (!opts.allow_context || !opts.allow_nonlinearity)
1268  goto start;
1269  GenerateConfigSequenceSimple(opts, configs);
1270  break;
1271  case 3:
1272  if (!opts.allow_recursion || !opts.allow_context ||
1273  !opts.allow_nonlinearity)
1274  goto start;
1275  GenerateConfigSequenceRnn(opts, configs);
1276  break;
1277  case 4:
1278  if (!opts.allow_recursion || !opts.allow_context ||
1279  !opts.allow_nonlinearity)
1280  goto start;
1281  GenerateConfigSequenceRnnClockwork(opts, configs);
1282  break;
1283  case 5:
1284  if (!opts.allow_recursion || !opts.allow_context ||
1285  !opts.allow_nonlinearity)
1286  goto start;
1287  GenerateConfigSequenceLstm(opts, configs);
1288  break;
1289  case 6:
1290  if (!opts.allow_recursion || !opts.allow_context ||
1291  !opts.allow_nonlinearity)
1292  goto start;
1293  GenerateConfigSequenceLstm(opts, configs);
1294  break;
1295  case 7:
1296  if (!opts.allow_nonlinearity)
1297  goto start;
1298  GenerateConfigSequenceCnn(opts, configs);
1299  break;
1300  case 8:
1301  if (!opts.allow_use_of_x_dim)
1302  goto start;
1303  GenerateConfigSequenceDistribute(opts, configs);
1304  break;
1305  case 9:
1306  GenerateConfigSequenceCompositeBlock(opts, configs);
1307  break;
1308  case 10:
1309  if (!opts.allow_statistics_pooling)
1310  goto start;
1311  GenerateConfigSequenceStatistics(opts, configs);
1312  break;
1313  case 11:
1314  if (!opts.allow_recursion || !opts.allow_context ||
1315  !opts.allow_nonlinearity)
1316  goto start;
1318  break;
1319  // We're allocating more case statements to the most recently
1320  // added type of model, to give more thorough testing where
1321  // it's needed most.
1322  case 12:
1323  if (!opts.allow_nonlinearity || !opts.allow_context)
1324  goto start;
1325  GenerateConfigSequenceCnnNew(opts, configs);
1326  break;
1327  case 13: case 14:
1328  if (!opts.allow_nonlinearity || !opts.allow_context)
1329  goto start;
1331  break;
1332  default:
1333  KALDI_ERR << "Error generating config sequence.";
1334  }
1335  KALDI_ASSERT(!configs->empty());
1336 }
1337 
1339  const Nnet &nnet,
1340  ComputationRequest *request,
1341  std::vector<Matrix<BaseFloat> > *inputs) {
1342  KALDI_ASSERT(IsSimpleNnet(nnet));
1343 
1344  int32 left_context, right_context;
1345  ComputeSimpleNnetContext(nnet, &left_context, &right_context);
1346 
1347  int32 num_output_frames = 1 + Rand() % 10,
1348  output_start_frame = Rand() % 10,
1349  num_examples = 1 + Rand() % 4,
1350  output_end_frame = output_start_frame + num_output_frames,
1351  input_start_frame = output_start_frame - left_context - (Rand() % 3),
1352  input_end_frame = output_end_frame + right_context + (Rand() % 3),
1353  n_offset = Rand() % 2;
1354  bool need_deriv = (Rand() % 2 == 0);
1355  // make sure there are at least 3 frames of input available. this makes a
1356  // difference for our tests of statistics-pooling and statistics-extraction
1357  // component.
1358  if (input_end_frame < input_start_frame + 3)
1359  input_end_frame = input_start_frame + 3;
1360 
1361  request->inputs.clear();
1362  request->outputs.clear();
1363  inputs->clear();
1364 
1365  std::vector<Index> input_indexes, ivector_indexes, output_indexes;
1366  for (int32 n = n_offset; n < n_offset + num_examples; n++) {
1367  for (int32 t = input_start_frame; t < input_end_frame; t++)
1368  input_indexes.push_back(Index(n, t, 0));
1369  for (int32 t = output_start_frame; t < output_end_frame; t++)
1370  output_indexes.push_back(Index(n, t, 0));
1371  ivector_indexes.push_back(Index(n, 0, 0));
1372  }
1373  request->outputs.push_back(IoSpecification("output", output_indexes));
1374  if (need_deriv || (Rand() % 3 == 0))
1375  request->outputs.back().has_deriv = true;
1376  request->inputs.push_back(IoSpecification("input", input_indexes));
1377  if (need_deriv && (Rand() % 2 == 0))
1378  request->inputs.back().has_deriv = true;
1379  int32 input_dim = nnet.InputDim("input");
1380  KALDI_ASSERT(input_dim > 0);
1381  inputs->push_back(
1382  Matrix<BaseFloat>((input_end_frame - input_start_frame) * num_examples,
1383  input_dim));
1384  inputs->back().SetRandn();
1385  int32 ivector_dim = nnet.InputDim("ivector"); // may not exist.
1386  if (ivector_dim != -1) {
1387  request->inputs.push_back(IoSpecification("ivector", ivector_indexes));
1388  inputs->push_back(Matrix<BaseFloat>(num_examples, ivector_dim));
1389  inputs->back().SetRandn();
1390  if (need_deriv && (Rand() % 2 == 0))
1391  request->inputs.back().has_deriv = true;
1392  }
1393  if (Rand() % 2 == 0)
1394  request->need_model_derivative = need_deriv;
1395  if (Rand() % 2 == 0)
1396  request->store_component_stats = true;
1397 }
1398 
1399 
1400 static void GenerateRandomComponentConfig(std::string *component_type,
1401  std::string *config) {
1402 
1403  int32 n = RandInt(0, 37);
1404  BaseFloat learning_rate = 0.001 * RandInt(1, 100);
1405 
1406  std::ostringstream os;
1407  switch(n) {
1408  case 0: {
1409  *component_type = "PnormComponent";
1410  int32 output_dim = RandInt(1, 50), group_size = RandInt(1, 15),
1411  input_dim = output_dim * group_size;
1412  os << "input-dim=" << input_dim << " output-dim=" << output_dim;
1413  break;
1414  }
1415  case 1: {
1416  BaseFloat target_rms = (RandInt(1, 200) / 100.0);
1417  std::string add_log_stddev = (Rand() % 2 == 0 ? "True" : "False");
1418  *component_type = "NormalizeComponent";
1419 
1420  int32 block_dim = RandInt(2, 50), num_blocks = RandInt(1, 3),
1421  dim = block_dim * num_blocks;
1422  // avoid dim=1 because the derivatives would be zero, which
1423  // makes them hard to test.
1424  os << "dim=" << dim << " block-dim=" << block_dim
1425  << " target-rms=" << target_rms
1426  << " add-log-stddev=" << add_log_stddev;
1427  break;
1428  }
1429  case 2: {
1430  *component_type = "SigmoidComponent";
1431  os << "dim=" << RandInt(1, 50);
1432  break;
1433  }
1434  case 3: {
1435  *component_type = "TanhComponent";
1436  os << "dim=" << RandInt(1, 50);
1437  break;
1438  }
1439  case 4: {
1440  *component_type = "RectifiedLinearComponent";
1441  os << "dim=" << RandInt(1, 50);
1442  break;
1443  }
1444  case 5: {
1445  *component_type = "SoftmaxComponent";
1446  os << "dim=" << RandInt(1, 50);
1447  break;
1448  }
1449  case 6: {
1450  *component_type = "LogSoftmaxComponent";
1451  os << "dim=" << RandInt(1, 50);
1452  break;
1453  }
1454  case 7: {
1455  *component_type = "NoOpComponent";
1456  os << "dim=" << RandInt(1, 50);
1457  break;
1458  }
1459  case 8: {
1460  *component_type = "FixedAffineComponent";
1461  int32 input_dim = RandInt(1, 50), output_dim = RandInt(1, 50);
1462  os << "input-dim=" << input_dim << " output-dim=" << output_dim;
1463  break;
1464  }
1465  case 9: {
1466  *component_type = "AffineComponent";
1467  int32 input_dim = RandInt(1, 50), output_dim = RandInt(1, 50);
1468  os << "input-dim=" << input_dim << " output-dim=" << output_dim
1469  << " learning-rate=" << learning_rate;
1470  break;
1471  }
1472  case 10: {
1473  *component_type = "NaturalGradientAffineComponent";
1474  int32 input_dim = RandInt(1, 50), output_dim = RandInt(1, 50);
1475  os << "input-dim=" << input_dim << " output-dim=" << output_dim
1476  << " learning-rate=" << learning_rate;
1477  break;
1478  }
1479  case 11: {
1480  *component_type = "SumGroupComponent";
1481  std::vector<int32> sizes;
1482  int32 num_groups = RandInt(1, 50);
1483  os << "sizes=";
1484  for (int32 i = 0; i < num_groups; i++) {
1485  os << RandInt(1, 5);
1486  if (i + 1 < num_groups)
1487  os << ',';
1488  }
1489  break;
1490  }
1491  case 12: {
1492  *component_type = "FixedScaleComponent";
1493  os << "dim=" << RandInt(1, 100);
1494  break;
1495  }
1496  case 13: {
1497  *component_type = "FixedBiasComponent";
1498  os << "dim=" << RandInt(1, 100);
1499  break;
1500  }
1501  case 14: {
1502  *component_type = "NaturalGradientPerElementScaleComponent";
1503  os << "dim=" << RandInt(1, 100)
1504  << " learning-rate=" << learning_rate;
1505  break;
1506  }
1507  case 15: {
1508  *component_type = "PerElementScaleComponent";
1509  os << "dim=" << RandInt(1, 100)
1510  << " learning-rate=" << learning_rate;
1511  break;
1512  }
1513  case 16: {
1514  *component_type = "ElementwiseProductComponent";
1515  int32 output_dim = RandInt(1, 100), multiple = RandInt(2, 4),
1516  input_dim = output_dim * multiple;
1517  os << "input-dim=" << input_dim << " output-dim=" << output_dim;
1518  break;
1519  }
1520  case 17: {
1521  int32 input_vectorization = Rand() % 2;
1522  std::string vectorization;
1523  if (input_vectorization == 0) {
1524  vectorization = "yzx";
1525  } else {
1526  vectorization = "zyx";
1527  }
1528  *component_type = "ConvolutionComponent";
1529  int32 input_x_dim = 10 + Rand() % 20,
1530  input_y_dim = 10 + Rand() % 20,
1531  input_z_dim = 3 + Rand() % 10,
1532  filt_x_dim = 1 + Rand() % input_x_dim,
1533  filt_y_dim = 1 + Rand() % input_y_dim,
1534  num_filters = 1 + Rand() % 10,
1535  filt_x_step = (1 + Rand() % filt_x_dim),
1536  filt_y_step = (1 + Rand() % filt_y_dim);
1537  int32 remainder = (input_x_dim - filt_x_dim) % filt_x_step;
1538  // adjusting input_x_dim to ensure divisibility
1539  input_x_dim = input_x_dim - remainder;
1540  remainder = (input_y_dim - filt_y_dim) % filt_y_step;
1541  // adjusting input_x_dim to ensure divisibility
1542  input_y_dim = input_y_dim - remainder;
1543 
1544  os << "input-x-dim=" << input_x_dim
1545  << " input-y-dim=" << input_y_dim
1546  << " input-z-dim=" << input_z_dim
1547  << " filt-x-dim=" << filt_x_dim
1548  << " filt-y-dim=" << filt_y_dim
1549  << " filt-x-step=" << filt_x_step
1550  << " filt-y-step=" << filt_y_step
1551  << " num-filters=" << num_filters
1552  << " input-vectorization-order=" << vectorization
1553  << " learning-rate=" << learning_rate;
1554  break;
1555  // TODO : add test for file based initialization. But confirm how to write
1556  // a file which is not going to be overwritten by other components
1557  }
1558  case 18: {
1559  *component_type = "PermuteComponent";
1560  int32 input_dim = 10 + Rand() % 100;
1561  std::vector<int32> column_map(input_dim);
1562  for (int32 i = 0; i < input_dim; i++)
1563  column_map[i] = i;
1564  std::random_shuffle(column_map.begin(), column_map.end());
1565  std::ostringstream buffer;
1566  for (int32 i = 0; i < input_dim-1; i++)
1567  buffer << column_map[i] << ",";
1568  buffer << column_map.back();
1569  os << "column-map=" << buffer.str();
1570  break;
1571  }
1572  case 19: {
1573  *component_type = "PerElementOffsetComponent";
1574  std::string param_config = RandInt(0, 1)?
1575  " param-mean=0.0 param-stddev=0.0":
1576  " param-mean=1.0 param-stddev=1.0";
1577  int32 block_dim = RandInt(10, 20), dim = block_dim * RandInt(1, 2);
1578  os << "dim=" << dim << " block-dim=" << block_dim
1579  << " use-natural-gradient=" << (RandInt(0, 1) == 0 ? "true" : "false")
1580  << " learning-rate=" << learning_rate << param_config;
1581  break;
1582  }
1583  case 20: case 21: {
1584  *component_type = "CompositeComponent";
1585  int32 cur_dim = RandInt(20, 30), num_components = RandInt(1, 3),
1586  max_rows_process = RandInt(1, 30);
1587  os << "num-components=" << num_components
1588  << " max-rows-process=" << max_rows_process;
1589  std::vector<std::string> sub_configs;
1590  for (int32 i = 1; i <= num_components; i++) {
1591  if (RandInt(1, 3) == 1) {
1592  os << " component" << i << "='type=RectifiedLinearComponent dim="
1593  << cur_dim << "'";
1594  } else if (RandInt(1, 2) == 1) {
1595  os << " component" << i << "='type=TanhComponent dim="
1596  << cur_dim << "'";
1597  } else {
1598  int32 next_dim = RandInt(20, 30);
1599  os << " component" << i << "='type=AffineComponent input-dim="
1600  << cur_dim << " output-dim=" << next_dim << "'";
1601  cur_dim = next_dim;
1602  }
1603  }
1604  break;
1605  }
1606  case 22: {
1607  *component_type = "SumGroupComponent";
1608  int32 num_groups = RandInt(1, 50),
1609  input_dim = num_groups * RandInt(1, 15);
1610  os << "input-dim=" << input_dim << " output-dim=" << num_groups;
1611  break;
1612  }
1613  case 23: {
1614  *component_type = "RepeatedAffineComponent";
1615  int32 num_repeats = RandInt(1, 50),
1616  input_dim = num_repeats * RandInt(1, 15),
1617  output_dim = num_repeats * RandInt(1, 15);
1618  os << "input-dim=" << input_dim << " output-dim=" << output_dim
1619  << " num-repeats=" << num_repeats;
1620  break;
1621  }
1622  case 24: {
1623  *component_type = "BlockAffineComponent";
1624  int32 num_blocks = RandInt(1, 50),
1625  input_dim = num_blocks * RandInt(1, 15),
1626  output_dim = num_blocks * RandInt(1, 15);
1627  os << "input-dim=" << input_dim << " output-dim=" << output_dim
1628  << " num-blocks=" << num_blocks;
1629  break;
1630  }
1631  case 25: {
1632  *component_type = "NaturalGradientRepeatedAffineComponent";
1633  int32 num_repeats = RandInt(1, 50),
1634  input_dim = num_repeats * RandInt(1, 15),
1635  output_dim = num_repeats * RandInt(1, 15);
1636  os << "input-dim=" << input_dim << " output-dim=" << output_dim
1637  << " num-repeats=" << num_repeats;
1638  break;
1639  }
1640  case 26: {
1641  *component_type = "MaxpoolingComponent";
1642  int32 input_x_dim = 5 + Rand() % 10,
1643  input_y_dim = 5 + Rand() % 10,
1644  input_z_dim = 5 + Rand() % 10;
1645  int32 pool_x_size = 1 + Rand() % input_x_dim,
1646  pool_y_size = 1 + Rand() % input_y_dim,
1647  pool_z_size = 1 + Rand() % input_z_dim;
1648  int32 pool_x_step = (1 + Rand() % pool_x_size),
1649  pool_y_step = (1 + Rand() % pool_y_size),
1650  pool_z_step = (1 + Rand() % pool_z_size);
1651  // adjusting input dim to ensure divisibility
1652  int32 remainder = (input_x_dim - pool_x_size) % pool_x_step;
1653  input_x_dim = input_x_dim - remainder;
1654  remainder = (input_y_dim - pool_y_size) % pool_y_step;
1655  input_y_dim = input_y_dim - remainder;
1656  remainder = (input_z_dim - pool_z_size) % pool_z_step;
1657  input_z_dim = input_z_dim - remainder;
1658  os << " input-x-dim=" << input_x_dim
1659  << " input-y-dim=" << input_y_dim
1660  << " input-z-dim=" << input_z_dim
1661  << " pool-x-size=" << pool_x_size
1662  << " pool-y-size=" << pool_y_size
1663  << " pool-z-size=" << pool_z_size
1664  << " pool-x-step=" << pool_x_step
1665  << " pool-y-step=" << pool_y_step
1666  << " pool-z-step=" << pool_z_step;
1667  break;
1668  }
1669  case 27: {
1670  *component_type = "ConstantFunctionComponent";
1671  int32 input_dim = RandInt(1, 50), output_dim = RandInt(1, 50);
1672  bool is_updatable = (RandInt(0, 1) == 0),
1673  use_natural_gradient = (RandInt(0, 1) == 0);
1674  os << "input-dim=" << input_dim << " output-dim=" << output_dim
1675  << " learning-rate=" << learning_rate
1676  << " is-updatable=" << std::boolalpha << is_updatable
1677  << " use-natural-gradient=" << std::boolalpha << use_natural_gradient;
1678  break;
1679  }
1680  case 28: {
1681  *component_type = "ClipGradientComponent";
1682  os << "dim=" << RandInt(1, 50);
1683  os << " clipping-threshold=" << RandInt(1, 50)
1684  << " norm-based-clipping=" << (RandInt(0, 1) == 0 ? "false" : "true");
1685  if (RandInt(0, 1) == 1)
1686  os << " self-repair-scale="
1687  << (RandInt(0, 1) == 0 ? 0 : RandInt(1, 50));
1688  if (RandInt(0, 1) == 1)
1689  os << " self-repair-clipped-proportion-threshold=" << RandUniform();
1690  if (RandInt(0, 1) == 1)
1691  os << " self-repair-target=" << RandUniform();
1692  break;
1693  }
1694  case 29: {
1695  *component_type = "DropoutComponent";
1696  bool test_mode = (RandInt(0, 1) == 0);
1697  os << "dim=" << RandInt(1, 200)
1698  << " dropout-proportion=" << RandUniform() << " test-mode="
1699  << (test_mode ? "true" : "false");
1700  break;
1701  }
1702  case 30: {
1703  *component_type = "LstmNonlinearityComponent";
1704  // set self-repair scale to zero so the derivative tests will pass.
1705  os << "cell-dim=" << RandInt(1, 200)
1706  << " self-repair-scale=0.0";
1707  break;
1708  }
1709  // I think we'll get in the habit of allocating a larger number of case
1710  // labels to the most recently added component, so it gets tested more
1711  case 31: {
1712  *component_type = "BatchNormComponent";
1713  int32 block_dim = RandInt(1, 20), dim = block_dim * RandInt(1, 2);
1714  bool test_mode = (RandInt(0, 1) == 0);
1715  os << " dim=" << dim
1716  << " block-dim=" << block_dim << " target-rms="
1717  << RandInt(1, 4) << " test-mode="
1718  << (test_mode ? "true" : "false")
1719  << " epsilon=" << (RandInt(0, 1) == 0 ? "0.1" : "1.0");
1720  break;
1721  }
1722  case 32: {
1723  *component_type = "SumBlockComponent";
1724  BaseFloat scale = 0.5 * RandInt(1, 3);
1725  BaseFloat output_dim = RandInt(1, 10),
1726  input_dim = output_dim * RandInt(1, 3);
1727  os << "input-dim=" << input_dim
1728  << " output-dim=" << output_dim
1729  << " scale=" << scale;
1730  break;
1731  }
1732  case 33: {
1733  *component_type = "ScaleAndOffsetComponent";
1734  int32 block_dim = RandInt(10, 20),
1735  num_blocks = RandInt(1, 3),
1736  dim = block_dim * num_blocks;
1737  os << "dim=" << dim << " block-dim=" << block_dim
1738  << " use-natural-gradient="
1739  << (RandInt(0,1) == 0 ? "true" : "false");
1740  break;
1741  }
1742  case 34: {
1743  *component_type = "LinearComponent";
1744  int32 input_dim = RandInt(1, 50), output_dim = RandInt(1, 50);
1745  os << "input-dim=" << input_dim << " output-dim=" << output_dim
1746  << " learning-rate=" << learning_rate;
1747  break;
1748  }
1749  case 35: {
1750  // This is not technically a SimpleComponent, but it behaves as one
1751  // if time-offsets=0.
1752  *component_type = "TdnnComponent";
1753  int32 input_dim = RandInt(1, 50), output_dim = RandInt(1, 50);
1754  os << "input-dim=" << input_dim << " output-dim=" << output_dim
1755  << " learning-rate=" << learning_rate << " time-offsets=0"
1756  << " use-natural-gradient=" << (RandInt(0,1) == 0 ? "true":"false")
1757  << " use-bias=" << (RandInt(0,1) == 0 ? "true":"false");
1758  break;
1759  }
1760  case 36: {
1761  *component_type = "GruNonlinearityComponent";
1762  int32 cell_dim = RandInt(10, 20);
1763  int32 recurrent_dim = (RandInt(0, 1) == 0 ?
1764  RandInt(5, cell_dim - 1) : cell_dim);
1765  os << "cell-dim=" << cell_dim
1766  << " recurrent-dim=" << recurrent_dim;
1767  break;
1768  }
1769  case 37: {
1770  *component_type = "OutputGruNonlinearityComponent";
1771  os << "cell-dim=" << RandInt(10, 20)
1772  << " learning-rate=" << learning_rate;
1773 
1774  break;
1775  }
1776  default:
1777  KALDI_ERR << "Error generating random component";
1778  }
1779  *config = os.str();
1780 }
1781 
1784  std::string component_type, config;
1785  GenerateRandomComponentConfig(&component_type, &config);
1786  ConfigLine config_line;
1787  if (!config_line.ParseLine(config))
1788  KALDI_ERR << "Bad config line " << config;
1789 
1790  Component *c = Component::NewComponentOfType(component_type);
1791  if (c == NULL)
1792  KALDI_ERR << "Invalid component type " << component_type;
1793  c->InitFromConfig(&config_line);
1794  if (config_line.HasUnusedValues()) {
1795  KALDI_ERR << "Config line " << config_line.WholeLine()
1796  << " has unused values: "
1797  << config_line.UnusedValues();
1798  }
1799  return c;
1800 }
1801 
1803  const Nnet &nnet2,
1804  BaseFloat threshold = 1.0e-05) {
1805  KALDI_ASSERT(nnet1.NumComponents() == nnet2.NumComponents());
1806  int32 num_components = nnet1.NumComponents();
1807  for (int32 c = 0; c < num_components; c++) {
1808  const Component *c1 = nnet1.GetComponent(c),
1809  *c2 = nnet2.GetComponent(c);
1810  KALDI_ASSERT(c1->Type() == c2->Type());
1811  if (c1->Properties() & kUpdatableComponent) {
1812  const UpdatableComponent *u1 = dynamic_cast<const UpdatableComponent*>(c1),
1813  *u2 = dynamic_cast<const UpdatableComponent*>(c2);
1814  KALDI_ASSERT(u1 != NULL && u2 != NULL);
1815  BaseFloat prod11 = u1->DotProduct(*u1), prod12 = u1->DotProduct(*u2),
1816  prod21 = u2->DotProduct(*u1), prod22 = u2->DotProduct(*u2);
1817  BaseFloat max_prod = std::max(std::max(prod11, prod12),
1818  std::max(prod21, prod22)),
1819  min_prod = std::min(std::min(prod11, prod12),
1820  std::min(prod21, prod22));
1821  if (max_prod - min_prod > threshold * max_prod) {
1822  KALDI_WARN << "Component '" << nnet1.GetComponentName(c)
1823  << "' differs in nnet1 versus nnet2: prod(11,12,21,22) = "
1824  << prod11 << ',' << prod12 << ',' << prod21 << ',' << prod22;
1825  return false;
1826  }
1827  }
1828  }
1829  return true;
1830 }
1831 
1833  int32 num_supervised_frames,
1834  int32 left_context,
1835  int32 right_context,
1836  int32 output_dim,
1837  int32 input_dim,
1838  int32 ivector_dim,
1839  NnetExample *example) {
1840  KALDI_ASSERT(num_supervised_frames > 0 && left_context >= 0 &&
1841  right_context >= 0 && output_dim > 0 && input_dim > 0
1842  && example != NULL);
1843  example->io.clear();
1844 
1845  int32 feature_t_begin = RandInt(0, 2);
1846  int32 num_feat_frames = left_context + right_context + num_supervised_frames;
1847  Matrix<BaseFloat> input_mat(num_feat_frames, input_dim);
1848  input_mat.SetRandn();
1849  NnetIo input_feat("input", feature_t_begin, input_mat);
1850  if (RandInt(0, 1) == 0)
1851  input_feat.features.Compress();
1852  example->io.push_back(input_feat);
1853 
1854  if (ivector_dim > 0) {
1855  // Create a feature for the iVectors. iVectors always have t=0 in the
1856  // current framework.
1857  Matrix<BaseFloat> ivector_mat(1, ivector_dim);
1858  ivector_mat.SetRandn();
1859  NnetIo ivector_feat("ivector", 0, ivector_mat);
1860  if (RandInt(0, 1) == 0)
1861  ivector_feat.features.Compress();
1862  example->io.push_back(ivector_feat);
1863  }
1864 
1865  { // set up the output supervision.
1866  Posterior labels(num_supervised_frames);
1867  for (int32 t = 0; t < num_supervised_frames; t++) {
1868  int32 num_labels = RandInt(1, 3);
1869  BaseFloat remaining_prob_mass = 1.0;
1870  for (int32 i = 0; i < num_labels; i++) {
1871  BaseFloat this_prob = (i+1 == num_labels ? 1.0 : RandUniform()) *
1872  remaining_prob_mass;
1873  remaining_prob_mass -= this_prob;
1874  labels[t].push_back(std::pair<int32, BaseFloat>(RandInt(0, output_dim-1),
1875  this_prob));
1876  }
1877  }
1878  int32 supervision_t_begin = feature_t_begin + left_context;
1879  NnetIo output_sup("output", output_dim, supervision_t_begin,
1880  labels);
1881  example->io.push_back(output_sup);
1882  }
1883 }
1884 
1886  const NnetExample &eg2,
1887  BaseFloat delta) {
1888  if (eg1.io.size() != eg2.io.size())
1889  return false;
1890  for (size_t i = 0; i < eg1.io.size(); i++) {
1891  NnetIo io1 = eg1.io[i], io2 = eg2.io[i];
1892  if (io1.name != io2.name || io1.indexes != io2.indexes)
1893  return false;
1894  Matrix<BaseFloat> feat1, feat2;
1895  io1.features.GetMatrix(&feat1);
1896  io2.features.GetMatrix(&feat2);
1897  if (!ApproxEqual(feat1, feat2, delta))
1898  return false;
1899  }
1900  return true;
1901 }
1902 
1903 
1904 } // namespace nnet3
1905 } // namespace kaldi
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:111
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 InputDim(const std::string &input_name) const
Definition: nnet-nnet.cc:669
void GenerateSimpleNnetTrainingExample(int32 num_supervised_frames, int32 left_context, int32 right_context, int32 output_dim, int32 input_dim, int32 ivector_dim, NnetExample *example)
Low-level function that generates an nnet training example.
const std::string WholeLine()
Definition: text-utils.h:230
bool ParseLine(const std::string &line)
Definition: text-utils.cc:343
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
void GenerateConfigSequenceLstm(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
bool NnetParametersAreIdentical(const Nnet &nnet1, const Nnet &nnet2, BaseFloat threshold=1.0e-05)
Used for testing that the updatable parameters in two networks are the same.
bool need_model_derivative
if need_model_derivative is true, then we&#39;ll be doing either model training or model-derivative compu...
void GetMatrix(Matrix< BaseFloat > *mat) const
Outputs the contents as a matrix.
Abstract base-class for neural-net components.
void GenerateConfigSequenceRnnClockwork(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void GenerateConfigSequenceLstmType2(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
This file contains various routines that are useful in test code.
void GenerateConfigSequenceLstmWithTruncation(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void GenerateConfigSequenceSimpleContext(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
kaldi::int32 int32
GeneralMatrix features
The features or labels.
Definition: nnet-example.h:46
std::vector< IoSpecification > inputs
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
Definition: nnet-example.h:42
static void GenerateRandomComponentConfig(std::string *component_type, std::string *config)
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
This file contains some miscellaneous functions dealing with class Nnet.
void GenerateConfigSequenceDistribute(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
std::string UnusedValues() const
returns e.g.
Definition: text-utils.cc:518
void ComputeExampleComputationRequestSimple(const Nnet &nnet, ComputationRequest *request, std::vector< Matrix< BaseFloat > > *inputs)
This function computes an example computation request, for testing purposes.
float BaseFloat
Definition: kaldi-types.h:29
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
Definition: posterior.h:42
void GenerateConfigSequenceRnn(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet.
Definition: nnet-utils.cc:146
virtual BaseFloat DotProduct(const UpdatableComponent &other) const =0
Computes dot-product between parameters of two instances of a Component.
virtual int32 Properties() const =0
Return bitmask of the component&#39;s properties.
struct rnnlm::@11::@12 n
void GenerateConfigSequenceCnn(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void GenerateConfigSequenceCnnNew(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void SetRandn()
Sets to random values of a normal distribution.
#define KALDI_ERR
Definition: kaldi-error.h:147
bool ExampleApproxEqual(const NnetExample &eg1, const NnetExample &eg2, BaseFloat delta)
Returns true if the examples are approximately equal (only intended to be used in testing)...
#define KALDI_WARN
Definition: kaldi-error.h:150
void GenerateConfigSequenceStatistics(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
const std::string & GetComponentName(int32 component_index) const
returns individual component name.
Definition: nnet-nnet.cc:689
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
Component * GetComponent(int32 c)
Return component indexed c. Not a copy; not owned by caller.
Definition: nnet-nnet.cc:150
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
void GenerateConfigSequenceSimplest(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
virtual std::string Type() const =0
Returns a string such as "SigmoidComponent", describing the type of the object.
void GenerateConfigSequenceRestrictedAttention(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
bool IsSimpleNnet(const Nnet &nnet)
This function returns true if the nnet has the following properties: It has an output called "output"...
Definition: nnet-utils.cc:52
int32 NumComponents() const
Definition: nnet-nnet.h:124
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing=&#39;a b c&#39; baz="a b c d=&#39;a b&#39; e" and giving you access to the fields, in this case.
Definition: text-utils.h:205
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::vector< IoSpecification > outputs
bool HasUnusedValues() const
Definition: text-utils.cc:510
std::string name
the name of the input in the neural net; in simple setups it will just be "input".
Definition: nnet-example.h:36
void GenerateConfigSequenceSimple(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void GenerateConfigSequence(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
Generates a sequence of at least one config files, output as strings, where the first in the sequence...
std::vector< NnetIo > io
"io" contains the input and output.
Definition: nnet-example.h:116
static Component * NewComponentOfType(const std::string &type)
Returns a new Component of the given type e.g.
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void GenerateConfigSequenceCompositeBlock(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
Generate a config string with a composite component composed only of block affine, repeated affine, and natural gradient repeated affine components.
virtual void InitFromConfig(ConfigLine *cfl)=0
Initialize, from a ConfigLine object.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95
Component * GenerateRandomSimpleComponent()
Generates random simple component for testing.