35 std::vector<std::string> *configs) {
36 std::ostringstream os;
44 os <<
"component name=affine1 type=AffineComponent input-dim=" 45 << input_dim <<
" output-dim=" << output_dim << std::endl;
47 os <<
"input-node name=input dim=" << input_dim << std::endl;
48 os <<
"component-node name=affine1_node component=affine1 input=input\n";
49 os <<
"output-node name=output input=affine1_node\n";
50 configs->push_back(os.str());
56 std::vector<std::string> *configs) {
57 std::ostringstream os;
59 std::vector<int32> splice_context;
62 splice_context.push_back(
i);
63 if (splice_context.empty())
64 splice_context.push_back(0);
67 spliced_dim = input_dim * splice_context.size(),
74 os <<
"component name=affine1 type=AffineComponent input-dim=" 75 << spliced_dim <<
" output-dim=" << output_dim << std::endl;
77 os <<
"input-node name=input dim=" << input_dim << std::endl;
79 os <<
"component-node name=affine1_node component=affine1 input=Append(";
80 for (
size_t i = 0;
i < splice_context.size();
i++) {
81 int32 offset = splice_context[
i];
82 os <<
"Offset(input, " << offset <<
")";
83 if (
i + 1 < splice_context.size())
87 os <<
"output-node name=output input=affine1_node\n";
89 os <<
"component name=tdnn1 type=TdnnComponent input-dim=" 90 << input_dim <<
" output-dim=" << output_dim
92 for (
size_t i = 0;
i < splice_context.size();
i++) {
94 os << splice_context[
i];
96 os <<
" use-bias=" << (
RandInt(0,1) == 0 ?
"true":
"false")
97 <<
" use-natural-gradient=" << (
RandInt(0,1) == 0 ?
"true":
"false")
99 os <<
"input-node name=input dim=" << input_dim << std::endl;
100 os <<
"component-node name=tdnn1_node component=tdnn1 input=input\n";
101 os <<
"output-node name=output input=tdnn1_node\n";
103 configs->push_back(os.str());
113 std::vector<std::string> *configs) {
114 std::ostringstream os;
116 std::vector<int32> splice_context;
119 splice_context.push_back(
i);
120 if (splice_context.empty())
121 splice_context.push_back(0);
127 hidden_dim = 40 +
Rand() % 50;
131 int32 spliced_dim = input_dim * splice_context.size() + ivector_dim;
135 bool use_batch_norm = (
RandInt(0, 1) == 0);
137 os <<
"component name=affine1 type=NaturalGradientAffineComponent input-dim=" 138 << spliced_dim <<
" output-dim=" << hidden_dim << std::endl;
139 os <<
"component name=relu1 type=RectifiedLinearComponent dim=" 140 << hidden_dim << std::endl;
141 if (use_batch_norm) {
142 int32 block_dim = (hidden_dim % 2 == 0 ? hidden_dim / 2 : hidden_dim);
143 os <<
"component name=batch-norm type=BatchNormComponent dim=" 144 << hidden_dim <<
" block-dim=" << block_dim
145 <<
" target-rms=2.0";
147 os <<
" epsilon=3.0";
150 os <<
"component name=final_affine type=NaturalGradientAffineComponent input-dim=" 151 << hidden_dim <<
" output-dim=" << output_dim << std::endl;
152 if (use_final_nonlinearity) {
153 if (
Rand() % 2 == 0) {
154 os <<
"component name=logsoftmax type=SoftmaxComponent dim=" 155 << output_dim << std::endl;
157 os <<
"component name=logsoftmax type=LogSoftmaxComponent dim=" 158 << output_dim << std::endl;
161 os <<
"input-node name=input dim=" << input_dim << std::endl;
162 if (ivector_dim != 0)
163 os <<
"input-node name=ivector dim=" << ivector_dim << std::endl;
165 os <<
"component-node name=affine1_node component=affine1 input=Append(";
166 if (ivector_dim != 0)
167 os <<
"ReplaceIndex(ivector, t, 0), ";
168 for (
size_t i = 0;
i < splice_context.size();
i++) {
169 int32 offset = splice_context[
i];
171 os <<
"Offset(input, " << offset <<
")";
174 os <<
"Scale(-1, Offset(input, " << offset <<
"))";
176 if (
i + 1 < splice_context.size())
181 os <<
"component-node name=nonlin1 component=relu1 input=affine1_node\n";
182 }
else if (
RandInt(0, 1) == 0) {
183 os <<
"component-node name=nonlin1 component=relu1 input=Scale(-1.0, affine1_node)\n";
185 os <<
"component-node name=nonlin1 component=relu1 input=Sum(Const(1.0, " 186 << hidden_dim <<
"), Scale(-1.0, affine1_node))\n";
188 if (use_batch_norm) {
189 os <<
"component-node name=batch-norm component=batch-norm input=nonlin1\n";
190 os <<
"component-node name=final_affine component=final_affine input=batch-norm\n";
192 os <<
"component-node name=final_affine component=final_affine input=nonlin1\n";
194 if (use_final_nonlinearity) {
195 os <<
"component-node name=output_nonlin component=logsoftmax input=final_affine\n";
196 os <<
"output-node name=output input=output_nonlin\n";
198 os <<
"output-node name=output input=final_affine\n";
200 configs->push_back(os.str());
202 if ((
Rand() % 2) == 0) {
203 std::ostringstream os2;
204 os2 <<
"component name=affine2 type=NaturalGradientAffineComponent input-dim=" 205 << hidden_dim <<
" output-dim=" << hidden_dim << std::endl;
206 os2 <<
"component name=relu2 type=RectifiedLinearComponent dim=" 207 << hidden_dim << std::endl;
209 os2 <<
"component name=final_affine type=NaturalGradientAffineComponent input-dim=" 210 << hidden_dim <<
" output-dim=" << output_dim << std::endl;
211 os2 <<
"component-node name=affine2 component=affine2 input=nonlin1\n";
212 os2 <<
"component-node name=relu2 component=relu2 input=affine2\n";
213 os2 <<
"component-node name=final_affine component=final_affine input=relu2\n";
214 configs->push_back(os2.str());
221 std::vector<std::string> *configs) {
224 stats_period = input_period *
RandInt(1, 3),
225 left_context = stats_period *
RandInt(1, 10),
226 right_context = stats_period *
RandInt(1, 10),
227 log_count_features =
RandInt(0, 3);
229 bool output_stddevs = (
RandInt(0, 1) == 0);
231 int32 raw_stats_dim = 1 + input_dim + (output_stddevs ? input_dim : 0),
232 pooled_stats_dim = log_count_features + input_dim +
233 (output_stddevs ? input_dim : 0);
234 std::ostringstream os;
235 os <<
"input-node name=input dim=" << input_dim << std::endl;
236 os <<
"component name=statistics-extraction type=StatisticsExtractionComponent " 237 <<
"input-dim=" << input_dim <<
" input-period=" << input_period
238 <<
" output-period=" << stats_period <<
" include-variance=" 239 << std::boolalpha << output_stddevs <<
"\n";
241 os <<
"component name=statistics-pooling type=StatisticsPoolingComponent " 242 <<
"input-dim=" << raw_stats_dim <<
" input-period=" << stats_period
243 <<
" left-context=" << left_context <<
" right-context=" << right_context
244 <<
" num-log-count-features=" << log_count_features <<
" output-stddevs=" 245 << std::boolalpha << output_stddevs <<
" variance-floor=" 246 << variance_floor <<
"\n";
248 os <<
"component name=affine type=AffineComponent " 249 <<
"input-dim=" << input_dim <<
" output-dim=" << pooled_stats_dim
252 os <<
"component-node name=statistics-extraction component=statistics-extraction " 254 os <<
"component-node name=statistics-pooling component=statistics-pooling " 255 <<
"input=statistics-extraction\n";
256 os <<
"component-node name=affine component=affine input=input\n";
257 os <<
"output-node name=output input=Sum(affine, Round(statistics-pooling, " 258 << stats_period <<
"))\n";
259 configs->push_back(os.str());
265 std::vector<std::string> *configs) {
266 std::ostringstream os;
268 std::vector<int32> splice_context;
271 splice_context.push_back(
i);
272 if (splice_context.empty())
273 splice_context.push_back(0);
276 spliced_dim = input_dim * splice_context.size(),
280 hidden_dim = 40 +
Rand() % 50;
281 os <<
"component name=affine1 type=NaturalGradientAffineComponent input-dim=" 282 << spliced_dim <<
" output-dim=" << hidden_dim << std::endl;
284 os <<
"component name=nonlin1 type=RectifiedLinearComponent dim=" 285 << hidden_dim << std::endl;
287 os <<
"component name=nonlin1 type=TanhComponent dim=" 288 << hidden_dim << std::endl;
290 os <<
"component name=recurrent_affine1 type=NaturalGradientAffineComponent input-dim=" 291 << hidden_dim <<
" output-dim=" << hidden_dim << std::endl;
292 os <<
"component name=affine2 type=NaturalGradientAffineComponent input-dim=" 293 << hidden_dim <<
" output-dim=" << output_dim << std::endl;
294 os <<
"component name=logsoftmax type=LogSoftmaxComponent dim=" 295 << output_dim << std::endl;
296 os <<
"input-node name=input dim=" << input_dim << std::endl;
298 os <<
"component-node name=affine1_node component=affine1 input=Append(";
299 for (
size_t i = 0;
i < splice_context.size();
i++) {
300 int32 offset = splice_context[
i];
301 os <<
"Offset(input, " << offset <<
")";
302 if (
i + 1 < splice_context.size())
306 os <<
"component-node name=recurrent_affine1 component=recurrent_affine1 " 307 "input=Offset(nonlin1, -1)\n";
308 os <<
"component-node name=nonlin1 component=nonlin1 " 309 "input=Sum(affine1_node, IfDefined(recurrent_affine1))\n";
310 os <<
"component-node name=affine2 component=affine2 input=nonlin1\n";
311 os <<
"component-node name=output_nonlin component=logsoftmax input=affine2\n";
312 os <<
"output-node name=output input=output_nonlin\n";
313 configs->push_back(os.str());
324 std::vector<std::string> *configs) {
325 std::ostringstream os;
327 std::vector<int32> splice_context;
330 splice_context.push_back(
i);
331 if (splice_context.empty())
332 splice_context.push_back(0);
335 spliced_dim = input_dim * splice_context.size(),
339 hidden_dim = 40 +
Rand() % 50;
340 os <<
"component name=affine1 type=NaturalGradientAffineComponent input-dim=" 341 << spliced_dim <<
" output-dim=" << hidden_dim << std::endl;
342 os <<
"component name=nonlin1 type=RectifiedLinearComponent dim=" 343 << hidden_dim << std::endl;
344 os <<
"component name=recurrent_affine1 type=NaturalGradientAffineComponent input-dim=" 345 << hidden_dim <<
" output-dim=" << hidden_dim << std::endl;
349 os <<
"component name=final_affine_0 type=NaturalGradientAffineComponent input-dim=" 350 << hidden_dim <<
" output-dim=" << output_dim << std::endl;
351 os <<
"component name=final_affine_1 type=NaturalGradientAffineComponent input-dim=" 352 << hidden_dim <<
" output-dim=" << output_dim << std::endl;
353 os <<
"component name=final_affine_2 type=NaturalGradientAffineComponent input-dim=" 354 << hidden_dim <<
" output-dim=" << output_dim << std::endl;
355 os <<
"component name=logsoftmax type=LogSoftmaxComponent dim=" 356 << output_dim << std::endl;
357 os <<
"input-node name=input dim=" << input_dim << std::endl;
359 os <<
"component-node name=affine1_node component=affine1 input=Append(";
360 for (
size_t i = 0;
i < splice_context.size();
i++) {
361 int32 offset = splice_context[
i];
362 os <<
"Offset(input, " << offset <<
")";
363 if (
i + 1 < splice_context.size())
367 os <<
"component-node name=recurrent_affine1 component=recurrent_affine1 " 368 "input=Offset(nonlin1, -1)\n";
369 os <<
"component-node name=nonlin1 component=nonlin1 " 370 "input=Sum(affine1_node, IfDefined(recurrent_affine1))\n";
371 os <<
"component-node name=final_affine_0 component=final_affine_0 input=nonlin1\n";
372 os <<
"component-node name=final_affine_1 component=final_affine_1 input=Offset(nonlin1, -1)\n";
373 os <<
"component-node name=final_affine_2 component=final_affine_2 input=Offset(nonlin1, 1)\n";
374 os <<
"component-node name=output_nonlin component=logsoftmax input=Switch(final_affine_0, final_affine_1, final_affine_2)\n";
375 os <<
"output-node name=output input=output_nonlin\n";
376 configs->push_back(os.str());
416 std::vector<std::string> *configs) {
417 std::ostringstream os;
419 std::vector<int32> splice_context;
422 splice_context.push_back(
i);
423 if (splice_context.empty())
424 splice_context.push_back(0);
427 spliced_dim = input_dim * splice_context.size(),
431 cell_dim = 40 +
Rand() % 50,
432 projection_dim = std::ceil(cell_dim / (
Rand() % 10 + 1));
434 os <<
"input-node name=input dim=" << input_dim << std::endl;
437 os <<
"component name=c0 type=ConstantComponent" 438 <<
" output-dim=" << cell_dim << std::endl;
443 os <<
"component name=Wi-xr type=NaturalGradientAffineComponent" 444 <<
" input-dim=" << spliced_dim + projection_dim
445 <<
" output-dim=" << cell_dim << std::endl;
446 os <<
"component name=Wic type=PerElementScaleComponent " 447 <<
" dim=" << cell_dim << std::endl;
450 os <<
"component name=Wf-xr type=NaturalGradientAffineComponent" 451 <<
" input-dim=" << spliced_dim + projection_dim
452 <<
" output-dim=" << cell_dim << std::endl;
453 os <<
"component name=Wfc type=PerElementScaleComponent " 454 <<
" dim=" << cell_dim << std::endl;
457 os <<
"component name=Wo-xr type=NaturalGradientAffineComponent" 458 <<
" input-dim=" << spliced_dim + projection_dim
459 <<
" output-dim=" << cell_dim << std::endl;
460 os <<
"component name=Woc type=PerElementScaleComponent " 461 <<
" dim=" << cell_dim << std::endl;
464 os <<
"component name=Wc-xr type=NaturalGradientAffineComponent" 465 <<
" input-dim=" << spliced_dim + projection_dim
466 <<
" output-dim=" << cell_dim << std::endl;
471 os <<
"component name=W-m type=NaturalGradientAffineComponent " 472 <<
" input-dim=" << cell_dim
473 <<
" output-dim=" << 2 * projection_dim << std::endl;
476 os <<
"component name=Wy- type=NaturalGradientAffineComponent " 477 <<
" input-dim=" << 2 * projection_dim
478 <<
" output-dim=" << cell_dim << std::endl;
482 os <<
"component name=final_affine type=NaturalGradientAffineComponent " 483 <<
"input-dim=" << cell_dim <<
" output-dim=" << output_dim << std::endl;
484 os <<
"component name=logsoftmax type=LogSoftmaxComponent dim=" 485 << output_dim << std::endl;
490 os <<
"component name=i type=SigmoidComponent dim=" 491 << cell_dim << std::endl;
492 os <<
"component name=f type=SigmoidComponent dim=" 493 << cell_dim << std::endl;
494 os <<
"component name=o type=SigmoidComponent dim=" 495 << cell_dim << std::endl;
496 os <<
"component name=g type=TanhComponent dim=" 497 << cell_dim << std::endl;
498 os <<
"component name=h type=TanhComponent dim=" 499 << cell_dim << std::endl;
500 os <<
"component name=c1 type=ElementwiseProductComponent " 501 <<
" input-dim=" << 2 * cell_dim
502 <<
" output-dim=" << cell_dim << std::endl;
503 os <<
"component name=c2 type=ElementwiseProductComponent " 504 <<
" input-dim=" << 2 * cell_dim
505 <<
" output-dim=" << cell_dim << std::endl;
506 os <<
"component name=m type=ElementwiseProductComponent " 507 <<
" input-dim=" << 2 * cell_dim
508 <<
" output-dim=" << cell_dim << std::endl;
511 std::ostringstream temp_string_stream;
512 for (
size_t i = 0;
i < splice_context.size();
i++) {
513 int32 offset = splice_context[
i];
514 temp_string_stream <<
"Offset(input, " << offset <<
")";
515 if (
i + 1 < splice_context.size())
516 temp_string_stream <<
", ";
518 std::string spliced_input = temp_string_stream.str();
520 std::string c_tminus1 =
"Sum(Failover(Offset(c1_t, -1), c0), IfDefined(Offset( c2_t, -1)))";
526 os <<
"component-node name=c0 component=c0 input=c0\n";
529 os <<
"component-node name=i1 component=Wi-xr input=Append(" 530 << spliced_input <<
", IfDefined(Offset(r_t, -1)))\n";
531 os <<
"component-node name=i2 component=Wic " 532 <<
" input=" << c_tminus1 << std::endl;
533 os <<
"component-node name=i_t component=i input=Sum(i1, i2)\n";
536 os <<
"component-node name=f1 component=Wf-xr input=Append(" 537 << spliced_input <<
", IfDefined(Offset(r_t, -1)))\n";
538 os <<
"component-node name=f2 component=Wfc " 539 <<
" input=" << c_tminus1 << std::endl;
540 os <<
"component-node name=f_t component=f input=Sum(f1, f2)\n";
543 os <<
"component-node name=o1 component=Wo-xr input=Append(" 544 << spliced_input <<
", IfDefined(Offset(r_t, -1)))\n";
545 os <<
"component-node name=o2 component=Woc input=Sum(c1_t, c2_t)\n";
546 os <<
"component-node name=o_t component=o input=Sum(o1, o2)\n";
549 os <<
"component-node name=h_t component=h input=Sum(c1_t, c2_t)\n";
552 os <<
"component-node name=g1 component=Wc-xr input=Append(" 553 << spliced_input <<
", IfDefined(Offset(r_t, -1)))\n";
554 os <<
"component-node name=g_t component=g input=g1\n";
557 os <<
"component-node name=c1_t component=c1 " 558 <<
" input=Append(f_t, " << c_tminus1 <<
")\n";
559 os <<
"component-node name=c2_t component=c2 input=Append(i_t, g_t)\n";
562 os <<
"component-node name=m_t component=m input=Append(o_t, h_t)\n";
565 os <<
"component-node name=rp_t component=W-m input=m_t\n";
567 os <<
"dim-range-node name=r_t input-node=rp_t dim-offset=0 " 568 <<
"dim=" << projection_dim << std::endl;
571 os <<
"component-node name=y_t component=Wy- input=rp_t\n";
574 os <<
"component-node name=final_affine component=final_affine input=y_t\n";
575 os <<
"component-node name=posteriors component=logsoftmax input=final_affine\n";
576 os <<
"output-node name=output input=posteriors\n";
577 configs->push_back(os.str());
582 std::vector<std::string> *configs) {
583 std::ostringstream os;
585 std::vector<int32> splice_context;
588 splice_context.push_back(
i);
589 if (splice_context.empty())
590 splice_context.push_back(0);
593 spliced_dim = input_dim * splice_context.size(),
597 cell_dim = 40 +
Rand() % 50,
598 projection_dim = std::ceil(cell_dim / (
Rand() % 10 + 1));
600 zeroing_threshold =
RandInt(1, 5),
601 zeroing_interval =
RandInt(1, 5) * 10;
604 os <<
"input-node name=input dim=" << input_dim << std::endl;
608 os <<
"component name=Wi-xr type=NaturalGradientAffineComponent" 609 <<
" input-dim=" << spliced_dim + projection_dim
610 <<
" output-dim=" << cell_dim << std::endl;
611 os <<
"component name=Wic type=PerElementScaleComponent " 612 <<
" dim=" << cell_dim << std::endl;
615 os <<
"component name=Wf-xr type=NaturalGradientAffineComponent" 616 <<
" input-dim=" << spliced_dim + projection_dim
617 <<
" output-dim=" << cell_dim << std::endl;
618 os <<
"component name=Wfc type=PerElementScaleComponent " 619 <<
" dim=" << cell_dim << std::endl;
622 os <<
"component name=Wo-xr type=NaturalGradientAffineComponent" 623 <<
" input-dim=" << spliced_dim + projection_dim
624 <<
" output-dim=" << cell_dim << std::endl;
625 os <<
"component name=Woc type=PerElementScaleComponent " 626 <<
" dim=" << cell_dim << std::endl;
629 os <<
"component name=Wc-xr type=NaturalGradientAffineComponent" 630 <<
" input-dim=" << spliced_dim + projection_dim
631 <<
" output-dim=" << cell_dim << std::endl;
636 os <<
"component name=W-m type=NaturalGradientAffineComponent " 637 <<
" input-dim=" << cell_dim
638 <<
" output-dim=" << 2 * projection_dim << std::endl;
641 os <<
"component name=Wy- type=NaturalGradientAffineComponent " 642 <<
" input-dim=" << 2 * projection_dim
643 <<
" output-dim=" << cell_dim << std::endl;
647 os <<
"component name=final_affine type=NaturalGradientAffineComponent " 648 <<
"input-dim=" << cell_dim <<
" output-dim=" << output_dim << std::endl;
649 os <<
"component name=logsoftmax type=LogSoftmaxComponent dim=" 650 << output_dim << std::endl;
655 os <<
"component name=i type=SigmoidComponent dim=" 656 << cell_dim << std::endl;
657 os <<
"component name=f type=SigmoidComponent dim=" 658 << cell_dim << std::endl;
659 os <<
"component name=o type=SigmoidComponent dim=" 660 << cell_dim << std::endl;
661 os <<
"component name=g type=TanhComponent dim=" 662 << cell_dim << std::endl;
663 os <<
"component name=h type=TanhComponent dim=" 664 << cell_dim << std::endl;
665 os <<
"component name=c1 type=ElementwiseProductComponent " 666 <<
" input-dim=" << 2 * cell_dim
667 <<
" output-dim=" << cell_dim << std::endl;
668 os <<
"component name=c2 type=ElementwiseProductComponent " 669 <<
" input-dim=" << 2 * cell_dim
670 <<
" output-dim=" << cell_dim << std::endl;
671 os <<
"component name=m type=ElementwiseProductComponent " 672 <<
" input-dim=" << 2 * cell_dim
673 <<
" output-dim=" << cell_dim << std::endl;
674 os <<
"component name=c type=BackpropTruncationComponent dim=" 676 <<
" scale=" << scale
677 <<
" clipping-threshold=" << clipping_threshold
678 <<
" zeroing-threshold=" << zeroing_threshold
679 <<
" zeroing-interval=" << zeroing_interval
680 <<
" recurrence-interval=1" << std::endl;
681 os <<
"component name=r type=BackpropTruncationComponent dim=" 683 <<
" scale=" << scale
684 <<
" clipping-threshold=" << clipping_threshold
685 <<
" zeroing-threshold=" << zeroing_threshold
686 <<
" zeroing-interval=" << zeroing_interval
687 <<
" recurrence-interval=1" << std::endl;
690 std::ostringstream temp_string_stream;
691 for (
size_t i = 0;
i < splice_context.size();
i++) {
692 int32 offset = splice_context[
i];
693 temp_string_stream <<
"Offset(input, " << offset <<
")";
694 if (
i + 1 < splice_context.size())
695 temp_string_stream <<
", ";
697 std::string spliced_input = temp_string_stream.str();
704 std::string c_tminus1;
706 std::ostringstream os_temp;
707 os_temp <<
"IfDefined(Offset(c_t, " << offset <<
"))";
708 c_tminus1 = os_temp.str();
710 os <<
"component-node name=c_t component=c input=Sum(c1_t, c2_t)\n";
713 os <<
"component-node name=i1 component=Wi-xr input=Append(" 714 << spliced_input <<
", IfDefined(Offset(r_t, " << offset <<
")))\n";
715 os <<
"component-node name=i2 component=Wic " 716 <<
" input=" << c_tminus1 << std::endl;
717 os <<
"component-node name=i_t component=i input=Sum(i1, i2)\n";
720 os <<
"component-node name=f1 component=Wf-xr input=Append(" 721 << spliced_input <<
", IfDefined(Offset(r_t, " << offset <<
")))\n";
722 os <<
"component-node name=f2 component=Wfc " 723 <<
" input=" << c_tminus1 << std::endl;
724 os <<
"component-node name=f_t component=f input=Sum(f1, f2)\n";
727 os <<
"component-node name=o1 component=Wo-xr input=Append(" 728 << spliced_input <<
", IfDefined(Offset(r_t, " << offset <<
")))\n";
729 os <<
"component-node name=o2 component=Woc input=Sum(c1_t, c2_t)\n";
730 os <<
"component-node name=o_t component=o input=Sum(o1, o2)\n";
733 os <<
"component-node name=h_t component=h input=Sum(c1_t, c2_t)\n";
736 os <<
"component-node name=g1 component=Wc-xr input=Append(" 737 << spliced_input <<
", IfDefined(Offset(r_t, " << offset <<
")))\n";
738 os <<
"component-node name=g_t component=g input=g1\n";
741 os <<
"component-node name=c1_t component=c1 " 742 <<
" input=Append(f_t, " << c_tminus1 <<
")\n";
743 os <<
"component-node name=c2_t component=c2 input=Append(i_t, g_t)\n";
746 os <<
"component-node name=m_t component=m input=Append(o_t, h_t)\n";
749 os <<
"component-node name=rp_t component=W-m input=m_t\n";
751 os <<
"dim-range-node name=r_t_pretrunc input-node=rp_t dim-offset=0 " 752 <<
"dim=" << projection_dim << std::endl;
753 os <<
"component-node name=r_t component=r input=r_t_pretrunc\n";
756 os <<
"component-node name=y_t component=Wy- input=rp_t\n";
759 os <<
"component-node name=final_affine component=final_affine input=y_t\n";
760 os <<
"component-node name=posteriors component=logsoftmax input=final_affine\n";
761 os <<
"output-node name=output input=posteriors\n";
762 configs->push_back(os.str());
769 std::vector<std::string> *configs) {
771 std::ostringstream os;
773 std::vector<int32> splice_context;
776 splice_context.push_back(
i);
777 if (splice_context.empty())
778 splice_context.push_back(0);
781 spliced_dim = input_dim * splice_context.size(),
785 cell_dim = 40 +
Rand() % 50,
786 projection_dim = std::ceil(cell_dim / (
Rand() % 10 + 2));
792 os <<
"input-node name=input dim=" << input_dim << std::endl;
794 os <<
"component name=W-x type=NaturalGradientAffineComponent input-dim=" 795 << spliced_dim <<
" output-dim=" << 4 * cell_dim << std::endl;
796 os <<
"component name=W-r type=NaturalGradientAffineComponent input-dim=" 797 << projection_dim <<
" output-dim=" << 4 * cell_dim << std::endl;
798 os <<
"component name=W-m type=NaturalGradientAffineComponent input-dim=" 799 << cell_dim <<
" output-dim=" << 2 * projection_dim << std::endl;
800 os <<
"component name=Wyr type=NaturalGradientAffineComponent input-dim=" 801 << projection_dim <<
" output-dim=" << cell_dim << std::endl;
802 os <<
"component name=Wyp type=NaturalGradientAffineComponent input-dim=" 803 << projection_dim <<
" output-dim=" << cell_dim << std::endl;
805 os <<
"component name=Wic type=PerElementScaleComponent " 806 <<
" dim=" << cell_dim << std::endl;
807 os <<
"component name=Wfc type=PerElementScaleComponent " 808 <<
" dim=" << cell_dim << std::endl;
809 os <<
"component name=Woc type=PerElementScaleComponent " 810 <<
" dim=" << cell_dim << std::endl;
812 os <<
"component name=final_affine type=NaturalGradientAffineComponent " 813 <<
"input-dim=" << cell_dim <<
" output-dim=" << output_dim << std::endl;
814 os <<
"component name=logsoftmax type=LogSoftmaxComponent dim=" 815 << output_dim << std::endl;
820 os <<
"component name=c_t type=NoOpComponent dim=" 821 << cell_dim << std::endl;
822 os <<
"component name=i_t type=SigmoidComponent dim=" 823 << cell_dim << std::endl;
824 os <<
"component name=f_t type=SigmoidComponent dim=" 825 << cell_dim << std::endl;
826 os <<
"component name=o_t type=SigmoidComponent dim=" 827 << cell_dim << std::endl;
828 os <<
"component name=g type=TanhComponent dim=" 829 << cell_dim << std::endl;
830 os <<
"component name=h type=TanhComponent dim=" 831 << cell_dim << std::endl;
832 os <<
"component name=f_t-c_tminus1 type=ElementwiseProductComponent " 833 <<
" input-dim=" << 2 * cell_dim
834 <<
" output-dim=" << cell_dim << std::endl;
835 os <<
"component name=i_t-g type=ElementwiseProductComponent " 836 <<
" input-dim=" << 2 * cell_dim
837 <<
" output-dim=" << cell_dim << std::endl;
838 os <<
"component name=m_t type=ElementwiseProductComponent " 839 <<
" input-dim=" << 2 * cell_dim
840 <<
" output-dim=" << cell_dim << std::endl;
844 os <<
"component-node name=W-x component=W-x input=Append(";
845 for (
size_t i = 0;
i < splice_context.size();
i++) {
846 int32 offset = splice_context[
i];
847 os <<
"Offset(input, " << offset <<
")";
848 if (
i + 1 < splice_context.size())
853 os <<
"component-node name=W-r component=W-r input=IfDefined(Offset(r_t" 855 os <<
"component-node name=W-m component=W-m input=m_t \n";
856 os <<
"component-node name=Wic component=Wic input=IfDefined(Offset(c_t" 858 os <<
"component-node name=Wfc component=Wfc input=IfDefined(Offset(c_t" 860 os <<
"component-node name=Woc component=Woc input=c_t\n";
863 os <<
"dim-range-node name=r_t input-node=W-m dim-offset=0 " 864 <<
"dim=" << projection_dim << std::endl;
865 os <<
"dim-range-node name=p_t input-node=W-m dim-offset=" << projection_dim
866 <<
" dim=" << projection_dim << std::endl;
869 os <<
"dim-range-node name=W_ix-x_t input-node=W-x dim-offset=0 " 870 <<
"dim=" << cell_dim << std::endl;
871 os <<
"dim-range-node name=W_fx-x_t input-node=W-x " 872 <<
"dim-offset=" << cell_dim <<
" dim="<<cell_dim << std::endl;
873 os <<
"dim-range-node name=W_cx-x_t input-node=W-x " 874 <<
"dim-offset=" << 2 * cell_dim <<
" dim="<<cell_dim << std::endl;
875 os <<
"dim-range-node name=W_ox-x_t input-node=W-x " 876 <<
"dim-offset=" << 3 * cell_dim <<
" dim="<<cell_dim << std::endl;
879 os <<
"dim-range-node name=W_ir-r_tminus1 input-node=W-r dim-offset=0 " 880 <<
"dim=" << cell_dim << std::endl;
881 os <<
"dim-range-node name=W_fr-r_tminus1 input-node=W-r " 882 <<
"dim-offset=" << cell_dim <<
" dim="<<cell_dim << std::endl;
883 os <<
"dim-range-node name=W_cr-r_tminus1 input-node=W-r " 884 <<
"dim-offset=" << 2 * cell_dim <<
" dim="<<cell_dim << std::endl;
885 os <<
"dim-range-node name=W_or-r_tminus1 input-node=W-r " 886 <<
"dim-offset=" << 3 * cell_dim <<
" dim="<<cell_dim << std::endl;
889 os <<
"component-node name=c_t component=c_t input=Sum(f_t-c_tminus1, i_t-g)\n";
890 os <<
"component-node name=h component=h input=c_t\n";
891 os <<
"component-node name=i_t component=i_t input=Sum(W_ix-x_t, Sum(W_ir-r_tminus1, Wic))\n";
892 os <<
"component-node name=f_t component=f_t input=Sum(W_fx-x_t, Sum(W_fr-r_tminus1, Wfc))\n";
893 os <<
"component-node name=o_t component=o_t input=Sum(W_ox-x_t, Sum(W_or-r_tminus1, Woc))\n";
894 os <<
"component-node name=f_t-c_tminus1 component=f_t-c_tminus1 input=Append(f_t, Offset(c_t" 896 os <<
"component-node name=i_t-g component=i_t-g input=Append(i_t, g)\n";
897 os <<
"component-node name=m_t component=m_t input=Append(o_t, h)\n";
899 os <<
"component-node name=g component=g input=Sum(W_cx-x_t, W_cr-r_tminus1)\n";
902 os <<
"component-node name=Wyr component=Wyr input=r_t\n";
903 os <<
"component-node name=Wyp component=Wyp input=p_t\n";
905 os <<
"component-node name=final_affine component=final_affine input=Sum(Wyr, Wyp)\n";
907 os <<
"component-node name=posteriors component=logsoftmax input=final_affine\n";
908 os <<
"output-node name=output input=posteriors\n";
910 configs->push_back(os.str());
915 std::vector<std::string> *configs) {
916 std::ostringstream os;
920 input_y_dim = 10 +
Rand() % 20,
921 input_z_dim = 3 +
Rand() % 10,
922 filt_x_dim = 1 +
Rand() % input_x_dim,
923 filt_y_dim = 1 +
Rand() % input_y_dim,
924 num_filters = 10 +
Rand() % 20,
925 filt_x_step = (1 +
Rand() % filt_x_dim),
926 filt_y_step = (1 +
Rand() % filt_y_dim);
927 int32 remainder = (input_x_dim - filt_x_dim) % filt_x_step;
929 input_x_dim = input_x_dim - remainder;
930 remainder = (input_y_dim - filt_y_dim) % filt_y_step;
932 input_y_dim = input_y_dim - remainder;
935 std::string vectorization;
936 if (input_vectorization == 0) {
937 vectorization =
"yzx";
939 vectorization =
"zyx";
942 os <<
"component name=conv type=ConvolutionComponent " 943 <<
" input-x-dim=" << input_x_dim
944 <<
" input-y-dim=" << input_y_dim
945 <<
" input-z-dim=" << input_z_dim
946 <<
" filt-x-dim=" << filt_x_dim
947 <<
" filt-y-dim=" << filt_y_dim
948 <<
" filt-x-step=" << filt_x_step
949 <<
" filt-y-step=" << filt_y_step
950 <<
" num-filters=" << num_filters
951 <<
" input-vectorization-order=" << vectorization
954 int32 conv_output_x_dim = (1 + (input_x_dim - filt_x_dim) / filt_x_step);
955 int32 conv_output_y_dim = (1 + (input_y_dim - filt_y_dim) / filt_y_step);
956 int32 conv_output_z_dim = num_filters;
957 int32 pool_x_size = 1 +
Rand() % conv_output_x_dim;
958 int32 pool_y_size = 1 +
Rand() % conv_output_y_dim;
959 int32 pool_z_size = 1 +
Rand() % conv_output_z_dim;
960 int32 pool_x_step = 1;
961 int32 pool_y_step = 1;
962 int32 pool_z_step = 1;
964 pool_x_step = (1 +
Rand() % pool_x_size);
965 }
while((conv_output_x_dim - pool_x_size) % pool_x_step);
967 pool_y_step = (1 +
Rand() % pool_y_size);
968 }
while((conv_output_y_dim - pool_y_size) % pool_y_step);
970 pool_z_step = (1 +
Rand() % pool_z_size);
971 }
while((conv_output_z_dim - pool_z_size) % pool_z_step);
973 os <<
"component name=maxpooling type=MaxpoolingComponent " 974 <<
" input-x-dim=" << conv_output_x_dim
975 <<
" input-y-dim=" << conv_output_y_dim
976 <<
" input-z-dim=" << conv_output_z_dim
977 <<
" pool-x-size=" << pool_x_size
978 <<
" pool-y-size=" << pool_y_size
979 <<
" pool-z-size=" << pool_z_size
980 <<
" pool-x-step=" << pool_x_step
981 <<
" pool-y-step=" << pool_y_step
982 <<
" pool-z-step=" << pool_z_step
985 os <<
"input-node name=input dim=" << (input_x_dim * input_y_dim * input_z_dim) << std::endl;
986 os <<
"component-node name=conv_node component=conv input=input\n";
987 os <<
"component-node name=maxpooling_node component=maxpooling input=conv_node\n";
988 os <<
"output-node name=output input=conv_node\n";
989 configs->push_back(os.str());
996 std::vector<std::string> *configs) {
997 std::ostringstream ss;
1007 std::string cur_layer_descriptor =
"input";
1010 ss <<
"input-node name=input dim=" << (cur_height * cur_num_filt)
1015 for (
int32 l = 0; l < num_layers; l++) {
1018 bool height_padding = (cur_height < 5 ||
RandInt(0, 1) == 0);
1020 if (cur_height < 4) {
1023 height_subsampling_factor = 1;
1026 int32 next_height = cur_height;
1027 if (!height_padding) {
1030 next_height = (next_height + height_subsampling_factor - 1) /
1031 height_subsampling_factor;
1033 if (next_height == cur_height &&
RandInt(0, 1) == 0) {
1037 next_num_filt = cur_num_filt;
1040 std::string time_offsets, required_time_offsets;
1043 required_time_offsets = (
RandInt(0, 1) == 0 ?
"" :
"0");
1044 }
else if (
RandInt(0, 1) == 0) {
1045 time_offsets =
"-1,0,1";
1046 required_time_offsets = (
RandInt(0, 1) == 0 ?
"" :
"-1");
1048 time_offsets =
"-2,0,2";
1049 required_time_offsets = (
RandInt(0, 1) == 0 ?
"" :
"0");
1052 ss <<
"component type=TimeHeightConvolutionComponent name=layer" << l <<
"-conv " 1053 <<
"num-filters-in=" << cur_num_filt
1054 <<
" num-filters-out=" << next_num_filt
1055 <<
" height-in=" << cur_height
1056 <<
" height-out=" << next_height
1057 <<
" height-offsets=" << (height_padding ?
"-1,0,1" :
"0,1,2")
1058 <<
" time-offsets=" << time_offsets;
1064 ss <<
" max-memory-mb=1.0e-04";
1067 if (height_subsampling_factor != 1 ||
RandInt(0, 1) == 0)
1068 ss <<
" height-subsample-out=" << height_subsampling_factor;
1069 if (required_time_offsets ==
"" &&
RandInt(0, 1) == 0) {
1070 required_time_offsets = time_offsets;
1074 if (required_time_offsets !=
"")
1075 ss <<
" required-time-offsets=" << required_time_offsets;
1077 ss <<
" param-stddev=0.1 bias-stddev=1";
1079 ss <<
" use-natural-gradient=false";
1083 ss <<
" rank-out=4";
1085 ss <<
" alpha-in=2.0";
1087 ss <<
" alpha-out=2.0";
1090 ss <<
"component-node name=layer" << l <<
"-conv component=layer" 1091 << l <<
"-conv input=" << cur_layer_descriptor << std::endl;
1093 bool use_relu =
false;
1095 ss <<
"component type=RectifiedLinearComponent name=layer" << l
1096 <<
"-relu dim=" << (next_height * next_num_filt) << std::endl;
1097 ss <<
"component-node name=layer" << l <<
"-relu component=layer" 1098 << l <<
"-relu input=layer" << l <<
"-conv" << std::endl;
1101 std::ostringstream desc_ss;
1102 if (next_height == cur_height && next_num_filt == cur_num_filt
1104 desc_ss <<
"Sum(" << cur_layer_descriptor <<
", layer" << l
1105 << (use_relu ?
"-relu)" :
"-conv)");
1107 desc_ss <<
"layer" << l << (use_relu ?
"-relu" :
"-conv");
1111 std::ostringstream round_desc_ss;
1113 round_desc_ss <<
"Round(" << desc_ss.str() <<
", " << modulus <<
")";
1114 cur_layer_descriptor = round_desc_ss.str();
1116 cur_layer_descriptor = desc_ss.str();
1118 cur_height = next_height;
1119 cur_num_filt = next_num_filt;
1122 ss <<
"output-node name=output input=" << cur_layer_descriptor << std::endl;
1125 configs->push_back(ss.str());
1132 std::vector<std::string> *configs) {
1133 std::ostringstream ss;
1141 num_left_inputs =
RandInt(1, 4),
1142 num_right_inputs =
RandInt(0, 2),
1143 num_left_inputs_required =
RandInt(0, num_left_inputs),
1144 num_right_inputs_required =
RandInt(0, num_right_inputs);
1145 bool output_context = (
RandInt(0, 1) == 0);
1146 int32 context_dim = (num_left_inputs + 1 + num_right_inputs),
1147 query_dim = key_dim + context_dim;
1148 int32 attention_input_dim = num_heads * (key_dim + value_dim + query_dim);
1150 std::string cur_layer_descriptor =
"input";
1153 ss <<
"input-node name=input dim=" << input_dim
1158 ss <<
"component name=affine type=NaturalGradientAffineComponent input-dim=" 1159 << input_dim <<
" output-dim=" << attention_input_dim << std::endl;
1160 ss <<
"component-node name=affine component=affine input=input" 1165 ss <<
"component-node name=attention component=attention input=affine" 1167 ss <<
"component name=attention type=RestrictedAttentionComponent" 1168 <<
" num-heads=" << num_heads <<
" key-dim=" << key_dim
1169 <<
" value-dim=" << value_dim <<
" time-stride=" << time_stride
1170 <<
" num-left-inputs=" << num_left_inputs <<
" num-right-inputs=" 1171 << num_right_inputs <<
" num-left-inputs-required=" 1172 << num_left_inputs_required <<
" num-right-inputs-required=" 1173 << num_right_inputs_required
1174 <<
" output-context=" << (output_context ?
"true" :
"false")
1175 << (
RandInt(0, 1) == 0 ?
" key-scale=1.0" :
"")
1180 ss <<
"output-node name=output input=attention" << std::endl;
1182 configs->push_back(ss.str());
1189 std::vector<std::string> *configs) {
1192 input_dim = x_expand * after_expand_dim;
1193 std::ostringstream os;
1194 os <<
"input-node name=input dim=" << input_dim << std::endl;
1195 os <<
"component name=distribute type=DistributeComponent input-dim=" 1196 << input_dim <<
" output-dim=" << after_expand_dim << std::endl;
1197 os <<
"component-node name=distribute component=distribute input=input\n";
1198 os <<
"component name=affine type=AffineComponent input-dim=" 1199 << after_expand_dim <<
" output-dim=" << output_dim << std::endl;
1200 os <<
"component-node name=affine component=affine input=distribute\n";
1201 os <<
"output-node name=output input=Sum(";
1202 for (
int32 i = 0;
i < x_expand;
i++) {
1203 if (
i > 0) os <<
", ";
1204 os <<
"ReplaceIndex(affine, x, " <<
i <<
")";
1207 configs->push_back(os.str());
1214 std::vector<std::string> *configs) {
1218 KALDI_WARN <<
"This function doesn't take a requested output_dim due to " 1219 "implementation complications.";
1222 std::ostringstream os;
1223 os <<
"component name=composite1 type=CompositeComponent max-rows-process=" 1224 << max_rows_process <<
" num-components=" << num_components;
1226 int32 types_length = 3;
1227 std::string types[] = {
"BlockAffineComponent",
1228 "RepeatedAffineComponent",
1229 "NaturalGradientRepeatedAffineComponent"};
1230 int32 last_output_dim = input_dim;
1232 for(
int32 i = 1;
i <= num_components;
i++) {
1233 os <<
" component" <<
i <<
"=";
1235 std::string rand_type = types[rand_index];
1236 os <<
"'type=" << rand_type <<
" input-dim=" << last_output_dim;
1239 int32 num_repeats = 10;
1240 os <<
" output-dim=" << current_output_dim;
1241 std::string repeats_string = (rand_type ==
"BlockAffineComponent") ?
"num-blocks":
"num-repeats";
1242 os <<
" " << repeats_string <<
"=" << num_repeats <<
"'";
1243 last_output_dim = current_output_dim;
1245 os << std::endl << std::endl;
1246 os <<
"input-node name=input dim=" << input_dim << std::endl;
1247 os <<
"component-node name=composite1 component=composite1 input=input\n";
1248 os <<
"output-node name=output input=composite1\n";
1249 configs->push_back(os.str());
1254 std::vector<std::string> *configs) {
1257 switch(network_type) {
1333 KALDI_ERR <<
"Error generating config sequence.";
1344 int32 left_context, right_context;
1347 int32 num_output_frames = 1 +
Rand() % 10,
1348 output_start_frame =
Rand() % 10,
1349 num_examples = 1 +
Rand() % 4,
1350 output_end_frame = output_start_frame + num_output_frames,
1351 input_start_frame = output_start_frame - left_context - (
Rand() % 3),
1352 input_end_frame = output_end_frame + right_context + (
Rand() % 3),
1353 n_offset =
Rand() % 2;
1354 bool need_deriv = (
Rand() % 2 == 0);
1358 if (input_end_frame < input_start_frame + 3)
1359 input_end_frame = input_start_frame + 3;
1365 std::vector<Index> input_indexes, ivector_indexes, output_indexes;
1366 for (
int32 n = n_offset;
n < n_offset + num_examples;
n++) {
1367 for (
int32 t = input_start_frame; t < input_end_frame; t++)
1368 input_indexes.push_back(
Index(
n, t, 0));
1369 for (
int32 t = output_start_frame; t < output_end_frame; t++)
1370 output_indexes.push_back(
Index(
n, t, 0));
1371 ivector_indexes.push_back(
Index(
n, 0, 0));
1374 if (need_deriv || (
Rand() % 3 == 0))
1375 request->
outputs.back().has_deriv =
true;
1377 if (need_deriv && (
Rand() % 2 == 0))
1378 request->
inputs.back().has_deriv =
true;
1384 inputs->back().SetRandn();
1386 if (ivector_dim != -1) {
1389 inputs->back().SetRandn();
1390 if (need_deriv && (
Rand() % 2 == 0))
1391 request->
inputs.back().has_deriv =
true;
1393 if (
Rand() % 2 == 0)
1395 if (
Rand() % 2 == 0)
1401 std::string *config) {
1406 std::ostringstream os;
1409 *component_type =
"PnormComponent";
1411 input_dim = output_dim * group_size;
1412 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim;
1417 std::string add_log_stddev = (
Rand() % 2 == 0 ?
"True" :
"False");
1418 *component_type =
"NormalizeComponent";
1421 dim = block_dim * num_blocks;
1424 os <<
"dim=" << dim <<
" block-dim=" << block_dim
1425 <<
" target-rms=" << target_rms
1426 <<
" add-log-stddev=" << add_log_stddev;
1430 *component_type =
"SigmoidComponent";
1431 os <<
"dim=" <<
RandInt(1, 50);
1435 *component_type =
"TanhComponent";
1436 os <<
"dim=" <<
RandInt(1, 50);
1440 *component_type =
"RectifiedLinearComponent";
1441 os <<
"dim=" <<
RandInt(1, 50);
1445 *component_type =
"SoftmaxComponent";
1446 os <<
"dim=" <<
RandInt(1, 50);
1450 *component_type =
"LogSoftmaxComponent";
1451 os <<
"dim=" <<
RandInt(1, 50);
1455 *component_type =
"NoOpComponent";
1456 os <<
"dim=" <<
RandInt(1, 50);
1460 *component_type =
"FixedAffineComponent";
1462 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim;
1466 *component_type =
"AffineComponent";
1468 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim
1469 <<
" learning-rate=" << learning_rate;
1473 *component_type =
"NaturalGradientAffineComponent";
1475 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim
1476 <<
" learning-rate=" << learning_rate;
1480 *component_type =
"SumGroupComponent";
1481 std::vector<int32> sizes;
1484 for (
int32 i = 0;
i < num_groups;
i++) {
1486 if (
i + 1 < num_groups)
1492 *component_type =
"FixedScaleComponent";
1493 os <<
"dim=" <<
RandInt(1, 100);
1497 *component_type =
"FixedBiasComponent";
1498 os <<
"dim=" <<
RandInt(1, 100);
1502 *component_type =
"NaturalGradientPerElementScaleComponent";
1503 os <<
"dim=" <<
RandInt(1, 100)
1504 <<
" learning-rate=" << learning_rate;
1508 *component_type =
"PerElementScaleComponent";
1509 os <<
"dim=" <<
RandInt(1, 100)
1510 <<
" learning-rate=" << learning_rate;
1514 *component_type =
"ElementwiseProductComponent";
1516 input_dim = output_dim * multiple;
1517 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim;
1522 std::string vectorization;
1523 if (input_vectorization == 0) {
1524 vectorization =
"yzx";
1526 vectorization =
"zyx";
1528 *component_type =
"ConvolutionComponent";
1530 input_y_dim = 10 +
Rand() % 20,
1531 input_z_dim = 3 +
Rand() % 10,
1532 filt_x_dim = 1 +
Rand() % input_x_dim,
1533 filt_y_dim = 1 +
Rand() % input_y_dim,
1534 num_filters = 1 +
Rand() % 10,
1535 filt_x_step = (1 +
Rand() % filt_x_dim),
1536 filt_y_step = (1 +
Rand() % filt_y_dim);
1537 int32 remainder = (input_x_dim - filt_x_dim) % filt_x_step;
1539 input_x_dim = input_x_dim - remainder;
1540 remainder = (input_y_dim - filt_y_dim) % filt_y_step;
1542 input_y_dim = input_y_dim - remainder;
1544 os <<
"input-x-dim=" << input_x_dim
1545 <<
" input-y-dim=" << input_y_dim
1546 <<
" input-z-dim=" << input_z_dim
1547 <<
" filt-x-dim=" << filt_x_dim
1548 <<
" filt-y-dim=" << filt_y_dim
1549 <<
" filt-x-step=" << filt_x_step
1550 <<
" filt-y-step=" << filt_y_step
1551 <<
" num-filters=" << num_filters
1552 <<
" input-vectorization-order=" << vectorization
1553 <<
" learning-rate=" << learning_rate;
1559 *component_type =
"PermuteComponent";
1561 std::vector<int32> column_map(input_dim);
1562 for (
int32 i = 0;
i < input_dim;
i++)
1564 std::random_shuffle(column_map.begin(), column_map.end());
1565 std::ostringstream buffer;
1566 for (
int32 i = 0;
i < input_dim-1;
i++)
1567 buffer << column_map[
i] <<
",";
1568 buffer << column_map.back();
1569 os <<
"column-map=" << buffer.str();
1573 *component_type =
"PerElementOffsetComponent";
1574 std::string param_config =
RandInt(0, 1)?
1575 " param-mean=0.0 param-stddev=0.0":
1576 " param-mean=1.0 param-stddev=1.0";
1578 os <<
"dim=" << dim <<
" block-dim=" << block_dim
1579 <<
" use-natural-gradient=" << (
RandInt(0, 1) == 0 ?
"true" :
"false")
1580 <<
" learning-rate=" << learning_rate << param_config;
1584 *component_type =
"CompositeComponent";
1586 max_rows_process =
RandInt(1, 30);
1587 os <<
"num-components=" << num_components
1588 <<
" max-rows-process=" << max_rows_process;
1589 std::vector<std::string> sub_configs;
1590 for (
int32 i = 1;
i <= num_components;
i++) {
1592 os <<
" component" <<
i <<
"='type=RectifiedLinearComponent dim=" 1594 }
else if (
RandInt(1, 2) == 1) {
1595 os <<
" component" <<
i <<
"='type=TanhComponent dim=" 1599 os <<
" component" <<
i <<
"='type=AffineComponent input-dim=" 1600 << cur_dim <<
" output-dim=" << next_dim <<
"'";
1607 *component_type =
"SumGroupComponent";
1609 input_dim = num_groups *
RandInt(1, 15);
1610 os <<
"input-dim=" << input_dim <<
" output-dim=" << num_groups;
1614 *component_type =
"RepeatedAffineComponent";
1616 input_dim = num_repeats *
RandInt(1, 15),
1617 output_dim = num_repeats *
RandInt(1, 15);
1618 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim
1619 <<
" num-repeats=" << num_repeats;
1623 *component_type =
"BlockAffineComponent";
1625 input_dim = num_blocks *
RandInt(1, 15),
1626 output_dim = num_blocks *
RandInt(1, 15);
1627 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim
1628 <<
" num-blocks=" << num_blocks;
1632 *component_type =
"NaturalGradientRepeatedAffineComponent";
1634 input_dim = num_repeats *
RandInt(1, 15),
1635 output_dim = num_repeats *
RandInt(1, 15);
1636 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim
1637 <<
" num-repeats=" << num_repeats;
1641 *component_type =
"MaxpoolingComponent";
1643 input_y_dim = 5 +
Rand() % 10,
1644 input_z_dim = 5 +
Rand() % 10;
1645 int32 pool_x_size = 1 +
Rand() % input_x_dim,
1646 pool_y_size = 1 +
Rand() % input_y_dim,
1647 pool_z_size = 1 +
Rand() % input_z_dim;
1648 int32 pool_x_step = (1 +
Rand() % pool_x_size),
1649 pool_y_step = (1 +
Rand() % pool_y_size),
1650 pool_z_step = (1 +
Rand() % pool_z_size);
1652 int32 remainder = (input_x_dim - pool_x_size) % pool_x_step;
1653 input_x_dim = input_x_dim - remainder;
1654 remainder = (input_y_dim - pool_y_size) % pool_y_step;
1655 input_y_dim = input_y_dim - remainder;
1656 remainder = (input_z_dim - pool_z_size) % pool_z_step;
1657 input_z_dim = input_z_dim - remainder;
1658 os <<
" input-x-dim=" << input_x_dim
1659 <<
" input-y-dim=" << input_y_dim
1660 <<
" input-z-dim=" << input_z_dim
1661 <<
" pool-x-size=" << pool_x_size
1662 <<
" pool-y-size=" << pool_y_size
1663 <<
" pool-z-size=" << pool_z_size
1664 <<
" pool-x-step=" << pool_x_step
1665 <<
" pool-y-step=" << pool_y_step
1666 <<
" pool-z-step=" << pool_z_step;
1670 *component_type =
"ConstantFunctionComponent";
1672 bool is_updatable = (
RandInt(0, 1) == 0),
1673 use_natural_gradient = (
RandInt(0, 1) == 0);
1674 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim
1675 <<
" learning-rate=" << learning_rate
1676 <<
" is-updatable=" << std::boolalpha << is_updatable
1677 <<
" use-natural-gradient=" << std::boolalpha << use_natural_gradient;
1681 *component_type =
"ClipGradientComponent";
1682 os <<
"dim=" <<
RandInt(1, 50);
1683 os <<
" clipping-threshold=" <<
RandInt(1, 50)
1684 <<
" norm-based-clipping=" << (
RandInt(0, 1) == 0 ?
"false" :
"true");
1686 os <<
" self-repair-scale=" 1689 os <<
" self-repair-clipped-proportion-threshold=" <<
RandUniform();
1695 *component_type =
"DropoutComponent";
1696 bool test_mode = (
RandInt(0, 1) == 0);
1697 os <<
"dim=" <<
RandInt(1, 200)
1698 <<
" dropout-proportion=" <<
RandUniform() <<
" test-mode=" 1699 << (test_mode ?
"true" :
"false");
1703 *component_type =
"LstmNonlinearityComponent";
1705 os <<
"cell-dim=" <<
RandInt(1, 200)
1706 <<
" self-repair-scale=0.0";
1712 *component_type =
"BatchNormComponent";
1714 bool test_mode = (
RandInt(0, 1) == 0);
1715 os <<
" dim=" << dim
1716 <<
" block-dim=" << block_dim <<
" target-rms=" 1717 <<
RandInt(1, 4) <<
" test-mode=" 1718 << (test_mode ?
"true" :
"false")
1719 <<
" epsilon=" << (
RandInt(0, 1) == 0 ?
"0.1" :
"1.0");
1723 *component_type =
"SumBlockComponent";
1726 input_dim = output_dim *
RandInt(1, 3);
1727 os <<
"input-dim=" << input_dim
1728 <<
" output-dim=" << output_dim
1729 <<
" scale=" << scale;
1733 *component_type =
"ScaleAndOffsetComponent";
1736 dim = block_dim * num_blocks;
1737 os <<
"dim=" << dim <<
" block-dim=" << block_dim
1738 <<
" use-natural-gradient=" 1739 << (
RandInt(0,1) == 0 ?
"true" :
"false");
1743 *component_type =
"LinearComponent";
1745 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim
1746 <<
" learning-rate=" << learning_rate;
1752 *component_type =
"TdnnComponent";
1754 os <<
"input-dim=" << input_dim <<
" output-dim=" << output_dim
1755 <<
" learning-rate=" << learning_rate <<
" time-offsets=0" 1756 <<
" use-natural-gradient=" << (
RandInt(0,1) == 0 ?
"true":
"false")
1757 <<
" use-bias=" << (
RandInt(0,1) == 0 ?
"true":
"false");
1761 *component_type =
"GruNonlinearityComponent";
1764 RandInt(5, cell_dim - 1) : cell_dim);
1765 os <<
"cell-dim=" << cell_dim
1766 <<
" recurrent-dim=" << recurrent_dim;
1770 *component_type =
"OutputGruNonlinearityComponent";
1771 os <<
"cell-dim=" <<
RandInt(10, 20)
1772 <<
" learning-rate=" << learning_rate;
1777 KALDI_ERR <<
"Error generating random component";
1784 std::string component_type, config;
1788 KALDI_ERR <<
"Bad config line " << config;
1792 KALDI_ERR <<
"Invalid component type " << component_type;
1796 <<
" has unused values: " 1807 for (
int32 c = 0; c < num_components; c++) {
1813 *u2 = dynamic_cast<const UpdatableComponent*>(c2);
1816 prod21 = u2->DotProduct(*u1), prod22 = u2->DotProduct(*u2);
1817 BaseFloat max_prod = std::max(std::max(prod11, prod12),
1818 std::max(prod21, prod22)),
1819 min_prod = std::min(std::min(prod11, prod12),
1820 std::min(prod21, prod22));
1821 if (max_prod - min_prod > threshold * max_prod) {
1823 <<
"' differs in nnet1 versus nnet2: prod(11,12,21,22) = " 1824 << prod11 <<
',' << prod12 <<
',' << prod21 <<
',' << prod22;
1833 int32 num_supervised_frames,
1835 int32 right_context,
1840 KALDI_ASSERT(num_supervised_frames > 0 && left_context >= 0 &&
1841 right_context >= 0 && output_dim > 0 && input_dim > 0
1842 && example != NULL);
1843 example->
io.clear();
1846 int32 num_feat_frames = left_context + right_context + num_supervised_frames;
1849 NnetIo input_feat(
"input", feature_t_begin, input_mat);
1852 example->
io.push_back(input_feat);
1854 if (ivector_dim > 0) {
1859 NnetIo ivector_feat(
"ivector", 0, ivector_mat);
1862 example->
io.push_back(ivector_feat);
1866 Posterior labels(num_supervised_frames);
1867 for (
int32 t = 0; t < num_supervised_frames; t++) {
1870 for (
int32 i = 0;
i < num_labels;
i++) {
1872 remaining_prob_mass;
1873 remaining_prob_mass -= this_prob;
1874 labels[t].push_back(std::pair<int32, BaseFloat>(
RandInt(0, output_dim-1),
1878 int32 supervision_t_begin = feature_t_begin + left_context;
1879 NnetIo output_sup(
"output", output_dim, supervision_t_begin,
1881 example->
io.push_back(output_sup);
1888 if (eg1.
io.size() != eg2.
io.size())
1890 for (
size_t i = 0;
i < eg1.
io.size();
i++) {
1892 if (io1.
name != io2.name || io1.
indexes != io2.indexes)
1896 io2.features.GetMatrix(&feat2);
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 InputDim(const std::string &input_name) const
void GenerateSimpleNnetTrainingExample(int32 num_supervised_frames, int32 left_context, int32 right_context, int32 output_dim, int32 input_dim, int32 ivector_dim, NnetExample *example)
Low-level function that generates an nnet training example.
const std::string WholeLine()
bool ParseLine(const std::string &line)
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
bool allow_statistics_pooling
void GenerateConfigSequenceLstm(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
bool NnetParametersAreIdentical(const Nnet &nnet1, const Nnet &nnet2, BaseFloat threshold=1.0e-05)
Used for testing that the updatable parameters in two networks are the same.
bool need_model_derivative
if need_model_derivative is true, then we'll be doing either model training or model-derivative compu...
void GetMatrix(Matrix< BaseFloat > *mat) const
Outputs the contents as a matrix.
Abstract base-class for neural-net components.
void GenerateConfigSequenceRnnClockwork(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void GenerateConfigSequenceLstmType2(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
This file contains various routines that are useful in test code.
void GenerateConfigSequenceLstmWithTruncation(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void GenerateConfigSequenceSimpleContext(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
GeneralMatrix features
The features or labels.
std::vector< IoSpecification > inputs
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
static void GenerateRandomComponentConfig(std::string *component_type, std::string *config)
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
This file contains some miscellaneous functions dealing with class Nnet.
void GenerateConfigSequenceDistribute(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
std::string UnusedValues() const
returns e.g.
void ComputeExampleComputationRequestSimple(const Nnet &nnet, ComputationRequest *request, std::vector< Matrix< BaseFloat > > *inputs)
This function computes an example computation request, for testing purposes.
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
void GenerateConfigSequenceRnn(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet.
virtual BaseFloat DotProduct(const UpdatableComponent &other) const =0
Computes dot-product between parameters of two instances of a Component.
virtual int32 Properties() const =0
Return bitmask of the component's properties.
void GenerateConfigSequenceCnn(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void GenerateConfigSequenceCnnNew(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void SetRandn()
Sets to random values of a normal distribution.
bool ExampleApproxEqual(const NnetExample &eg1, const NnetExample &eg2, BaseFloat delta)
Returns true if the examples are approximately equal (only intended to be used in testing)...
void GenerateConfigSequenceStatistics(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
const std::string & GetComponentName(int32 component_index) const
returns individual component name.
int Rand(struct RandomState *state)
Component * GetComponent(int32 c)
Return component indexed c. Not a copy; not owned by caller.
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
void GenerateConfigSequenceSimplest(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
virtual std::string Type() const =0
Returns a string such as "SigmoidComponent", describing the type of the object.
bool allow_final_nonlinearity
void GenerateConfigSequenceRestrictedAttention(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
bool IsSimpleNnet(const Nnet &nnet)
This function returns true if the nnet has the following properties: It has an output called "output"...
int32 NumComponents() const
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" and giving you access to the fields, in this case.
#define KALDI_ASSERT(cond)
std::vector< IoSpecification > outputs
bool HasUnusedValues() const
std::string name
the name of the input in the neural net; in simple setups it will just be "input".
void GenerateConfigSequenceSimple(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
void GenerateConfigSequence(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
Generates a sequence of at least one config files, output as strings, where the first in the sequence...
std::vector< NnetIo > io
"io" contains the input and output.
static Component * NewComponentOfType(const std::string &type)
Returns a new Component of the given type e.g.
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
void GenerateConfigSequenceCompositeBlock(const NnetGenerationOptions &opts, std::vector< std::string > *configs)
Generate a config string with a composite component composed only of block affine, repeated affine, and natural gradient repeated affine components.
virtual void InitFromConfig(ConfigLine *cfl)=0
Initialize, from a ConfigLine object.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Component * GenerateRandomSimpleComponent()
Generates random simple component for testing.