45 bool ok = cfl->
GetValue(
"output-dim", &output_dim) &&
46 cfl->
GetValue(
"input-dim", &input_dim);
48 KALDI_ERR <<
"Invalid initializer for layer of type " 50 Init(input_dim, output_dim);
97 dropout_proportion_(other.dropout_proportion_),
98 dropout_per_frame_(other.dropout_per_frame_) { }
106 bool dropout_per_frame) {
115 bool dropout_per_frame =
false;
117 bool ok = cfl->
GetValue(
"dim", &dim) &&
118 cfl->
GetValue(
"dropout-proportion", &dropout_proportion);
119 cfl->
GetValue(
"dropout-per-frame", &dropout_per_frame);
125 dropout_proportion < 0.0 || dropout_proportion > 1.0)
126 KALDI_ERR <<
"Invalid initializer for layer of type " 128 Init(dim, dropout_proportion, dropout_per_frame);
132 std::ostringstream stream;
133 stream <<
Type() <<
", dim=" <<
dim_ 149 out->
Scale(1.0 - dropout);
171 tmp.ApplyHeaviside();
201 if (token ==
"<DropoutComponent>") {
210 if (token ==
"<DropoutPerFrame>") {
216 if (token ==
"<TestMode>") {
229 WriteToken(os, binary,
"<DropoutProportion>");
235 WriteToken(os, binary,
"</DropoutComponent>");
239 input_dim_ = input_dim;
240 output_dim_ = output_dim;
248 int32 output_dim = 0;
249 bool ok = cfl->
GetValue(
"output-dim", &output_dim) &&
250 cfl->
GetValue(
"input-dim", &input_dim);
252 KALDI_ERR <<
"Invalid initializer for layer of type " 254 Init(input_dim, output_dim);
262 int32 num_inputs = input_dim_ / output_dim_;
263 for (
int32 i = 0;
i < num_inputs;
i++) {
265 i * output_dim_, output_dim_);
283 NVTX_RANGE(
"ElementwiseProductComponent::Backprop");
284 if (!in_deriv)
return;
285 int32 num_inputs = input_dim_ / output_dim_;
286 for (
int32 i = 0;
i < num_inputs;
i++) {
291 for (
int32 j = 0;
j < num_inputs;
j++) {
309 ExpectToken(is, binary,
"</ElementwiseProductComponent>");
313 WriteToken(os, binary,
"<ElementwiseProductComponent>");
318 WriteToken(os, binary,
"</ElementwiseProductComponent>");
337 if (in_deriv != NULL) {
340 if (to_update != NULL) {
341 RepairGradients(out_value, in_deriv, to_update);
357 BaseFloat default_lower_threshold = 0.05;
365 if (self_repair_scale_ == 0.0 || count_ == 0.0 || deriv_sum_.Dim() !=
dim_ ||
370 KALDI_ASSERT(self_repair_scale_ > 0.0 && self_repair_scale_ < 0.1);
372 BaseFloat lower_threshold = (self_repair_lower_threshold_ == unset ?
373 default_lower_threshold :
374 self_repair_lower_threshold_) *
376 if (self_repair_upper_threshold_ != unset) {
377 KALDI_ERR <<
"Do not set the self-repair-upper-threshold for sigmoid " 378 <<
"components, it does nothing.";
385 thresholds_vec.
AddVec(-1.0, deriv_sum_);
386 thresholds_vec.
Add(lower_threshold);
413 in_deriv->
AddMatDiagVec(-2.0 * self_repair_scale_ / repair_probability,
414 out_value,
kNoTrans, thresholds_vec);
415 in_deriv->
AddVecToRows(self_repair_scale_ / repair_probability,
427 if (
RandInt(0, 1) == 0 && count_ != 0)
433 temp_deriv.AddMat(-1.0, out_value);
434 temp_deriv.MulElements(out_value);
435 StoreStatsInternal(out_value, &temp_deriv);
458 if (backprop_scale_ != 1.0)
459 in_deriv->
Scale(backprop_scale_);
463 backprop_scale_ = 1.0;
464 cfl->
GetValue(
"backprop-scale", &backprop_scale_);
467 KALDI_ERR <<
"Invalid initializer for layer of type " 473 std::ostringstream stream;
474 stream <<
Type() <<
", dim=" <<
dim_;
475 if (backprop_scale_ != 1.0)
476 stream <<
", backprop-scale=" << backprop_scale_;
496 backprop_scale_ = 1.0;
499 temp_vec.
Read(is, binary);
501 temp_vec.
Read(is, binary);
507 temp_vec.
Read(is, binary);
513 if (token[0] !=
'<') {
518 if (token ==
"<NumDimsSelfRepaired>") {
522 if (token ==
"<NumDimsProcessed>") {
548 if (token ==
"<SelfRepairClippedProportionThreshold>") {
549 ReadBasicType(is, binary, &self_repair_clipped_proportion_threshold_);
556 self_repair_clipped_proportion_threshold_ = 1.0;
557 self_repair_target_ = 0.0;
558 self_repair_scale_ = 0.0;
565 if (token ==
"<NumSelfRepaired>") {
569 ExpectToken(is, binary,
"</ClipGradientComponent>");
571 num_self_repaired_ = 0;
572 num_backpropped_ = 0;
578 WriteToken(os, binary,
"<ClipGradientComponent>");
581 WriteToken(os, binary,
"<ClippingThreshold>");
583 WriteToken(os, binary,
"<NormBasedClipping>");
585 WriteToken(os, binary,
"<SelfRepairClippedProportionThreshold>");
586 WriteBasicType(os, binary, self_repair_clipped_proportion_threshold_);
591 WriteToken(os, binary,
"<NumElementsClipped>");
593 WriteToken(os, binary,
"<NumElementsProcessed>");
599 WriteToken(os, binary,
"</ClipGradientComponent>");
603 std::ostringstream stream;
604 stream <<
Type() <<
", dim=" <<
dim_ 605 <<
", norm-based-clipping=" 606 << (norm_based_clipping_ ?
"true" :
"false")
607 <<
", clipping-threshold=" << clipping_threshold_
608 <<
", clipped-proportion=" 609 << (count_ > 0 ? static_cast<BaseFloat>(num_clipped_)/count_ : 0);
610 if (self_repair_scale_ != 0.0)
611 stream <<
", self-repair-clipped-proportion-threshold=" 612 << self_repair_clipped_proportion_threshold_
613 <<
", self-repair-target=" << self_repair_target_
614 <<
", self-repair-scale=" << self_repair_scale_;
620 bool norm_based_clipping,
621 BaseFloat self_repair_clipped_proportion_threshold,
626 int32 num_self_repaired,
627 int32 num_backpropped) {
629 self_repair_clipped_proportion_threshold >= 0.0 &&
630 self_repair_target >= 0.0 && self_repair_scale >= 0.0);
632 norm_based_clipping_ = norm_based_clipping;
633 clipping_threshold_ = clipping_threshold;
634 self_repair_clipped_proportion_threshold_ =
635 self_repair_clipped_proportion_threshold;
636 self_repair_target_ = self_repair_target;
637 self_repair_scale_ = self_repair_scale;
638 num_clipped_ = num_clipped;
640 num_self_repaired_ = num_self_repaired;
641 num_backpropped_ = num_backpropped;
646 bool ok = cfl->
GetValue(
"dim", &dim);
647 bool norm_based_clipping =
false;
649 BaseFloat self_repair_clipped_proportion_threshold = 0.01;
652 cfl->
GetValue(
"clipping-threshold", &clipping_threshold);
653 cfl->
GetValue(
"norm-based-clipping", &norm_based_clipping);
654 cfl->
GetValue(
"self-repair-clipped-proportion-threshold",
655 &self_repair_clipped_proportion_threshold);
657 &self_repair_target);
658 cfl->
GetValue(
"self-repair-scale", &self_repair_scale);
660 clipping_threshold < 0 || dim <= 0 ||
661 self_repair_clipped_proportion_threshold < 0.0 ||
662 self_repair_target < 0.0 || self_repair_scale < 0.0)
663 KALDI_ERR <<
"Invalid initializer for layer of type " 665 Init(dim, clipping_threshold, norm_based_clipping,
666 self_repair_clipped_proportion_threshold,
668 self_repair_scale, 0, 0, 0, 0);
689 NVTX_RANGE(
"ClipGradientComponent::Backprop");
697 if (clipping_threshold_ > 0) {
698 if (norm_based_clipping_) {
702 clipping_scales.
AddDiagMat2(pow(clipping_threshold_, -2), *in_deriv,
706 int32 num_not_scaled;
707 clipping_scales.ApplyFloor(1.0, &num_not_scaled);
710 if (num_not_scaled != clipping_scales.Dim()) {
711 clipping_scales.ApplyPow(-0.5);
715 if (to_update != NULL)
716 to_update->
num_clipped_ += (clipping_scales.Dim() - num_not_scaled);
718 if (to_update != NULL)
719 to_update->
count_ += clipping_scales.Dim();
724 in_deriv->
ApplyFloor(-1 * clipping_threshold_);
727 if (to_update != NULL) {
729 RepairGradients(debug_info, in_value, in_deriv, to_update);
731 }
else if (clipping_threshold_ == 0.0) {
745 const std::string &debug_info,
753 if (self_repair_clipped_proportion_threshold_ >= 1.0 ||
754 self_repair_scale_ == 0.0 || count_ == 0 ||
758 KALDI_ASSERT(self_repair_target_ >= 0.0 && self_repair_scale_ > 0.0);
761 (count_ > 0 ?
static_cast<BaseFloat>(num_clipped_) / count_ : 0);
764 if (clipped_proportion <= self_repair_clipped_proportion_threshold_)
771 KALDI_LOG <<
"ClipGradientComponent(node_name=" << debug_info
772 <<
")'s self-repair was activated as the first time at the " 774 <<
"-th call of Backprop() in this training job.";
788 repair_mat.
Add(-self_repair_target_);
796 in_deriv_norm_vec.ApplyPow(0.5);
797 double in_deriv_norm_sum = in_deriv_norm_vec.Sum();
798 BaseFloat magnitude = self_repair_scale_ * clipped_proportion *
799 (in_deriv_norm_sum / in_deriv_norm_vec.Dim());
803 repair_mat_norm_vec.ApplyPow(0.5);
804 double repair_mat_norm_sum = repair_mat_norm_vec.Sum();
806 if (repair_mat_norm_sum != 0.0)
807 scale = magnitude / (repair_mat_norm_sum / repair_mat_norm_vec.Dim());
812 in_deriv->
AddMat(-scale / repair_probability, repair_mat);
815 in_deriv_repaired_norm_vec.ApplyPow(0.5);
819 double in_deriv_repaired_norm_sum = in_deriv_repaired_norm_vec.Sum();
820 if (in_deriv_repaired_norm_sum != 0.0)
821 in_deriv->
Scale(in_deriv_norm_sum / in_deriv_repaired_norm_sum);
827 num_self_repaired_ = 0;
828 num_backpropped_ = 0;
833 num_clipped_ *= scale;
840 count_ += alpha * other->
count_;
873 if (self_repair_scale_ == 0.0 || count_ == 0.0 || deriv_sum_.Dim() !=
dim_ ||
878 KALDI_ASSERT(self_repair_scale_ > 0.0 && self_repair_scale_ < 0.1);
880 BaseFloat lower_threshold = (self_repair_lower_threshold_ == unset ?
881 default_lower_threshold :
882 self_repair_lower_threshold_) *
884 if (self_repair_upper_threshold_ != unset) {
885 KALDI_ERR <<
"Do not set the self-repair-upper-threshold for sigmoid " 886 <<
"components, it does nothing.";
893 thresholds_vec.
AddVec(-1.0, deriv_sum_);
894 thresholds_vec.
Add(lower_threshold);
917 in_deriv->
AddMatDiagVec(-self_repair_scale_ / repair_probability,
918 out_value,
kNoTrans, thresholds_vec);
931 if (in_deriv != NULL) {
932 in_deriv->
DiffTanh(out_value, out_deriv);
934 if (to_update != NULL) {
935 RepairGradients(out_value, in_deriv, to_update);
954 if (
RandInt(0, 1) == 0 && count_ != 0)
959 temp_deriv.
Scale(-1.0);
961 StoreStatsInternal(out_value, &temp_deriv);
975 const std::string &debug_info,
983 NVTX_RANGE(
"RectifiedLinearComponent::Backprop");
984 if (in_deriv != NULL) {
989 if (to_update != NULL) {
990 RepairGradients(in_deriv, to_update);
1001 int32 dim =
dim_, block_dim = block_dim_;
1002 BaseFloat default_lower_threshold = 0.05,
1003 default_upper_threshold = 0.95;
1008 if (self_repair_scale_ == 0.0 || count_ == 0.0 ||
1009 deriv_sum_.Dim() != dim)
1012 if (in_deriv->
NumCols() != block_dim) {
1014 int32 dim_multiple = dim / block_dim;
1016 in_deriv->
NumRows() * dim_multiple,
1017 block_dim, block_dim);
1018 RepairGradients(&in_deriv_reshaped, to_update);
1030 KALDI_ASSERT(self_repair_scale_ > 0.0 && self_repair_scale_ < 0.1);
1033 lower_threshold = (self_repair_lower_threshold_ == unset ?
1034 default_lower_threshold :
1035 self_repair_lower_threshold_) * count,
1036 upper_threshold = (self_repair_upper_threshold_ == unset ?
1037 default_upper_threshold :
1038 self_repair_upper_threshold_) *
count;
1043 thresholds_vec(0) = -lower_threshold;
1044 thresholds_vec(1) = -upper_threshold;
1048 if (block_dim == dim) {
1053 block_dim, block_dim);
1056 deriv_sum_dbl.AddRowSumMat(block_dim * 1.0 / dim, deriv_sum_mat);
1060 stats_mat.AddVecToCols(1.0, thresholds_vec, 1.0);
1063 stats_mat.ApplyHeaviside();
1072 row0.
AddVec(1.0, row1, 1.0);
1079 row0.
Scale(-self_repair_scale_ / repair_probability);
1091 if (
RandInt(0, 1) == 0 && count_ != 0)
1097 StoreStatsInternal(out_value, &temp_deriv);
1103 linear_params_.SetZero();
1104 bias_params_.SetZero();
1106 linear_params_.Scale(scale);
1107 bias_params_.Scale(scale);
1113 bias_params_.Resize(output_dim);
1114 linear_params_.Resize(output_dim, input_dim);
1127 linear_params_(component.linear_params_),
1128 bias_params_(component.bias_params_),
1129 orthonormal_constraint_(component.orthonormal_constraint_) { }
1139 bias_params.
Dim() != 0);
1160 std::ostringstream stream;
1170 return stream.str();
1189 KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
1209 std::string matrix_filename;
1210 int32 input_dim = -1, output_dim = -1;
1212 if (cfl->
GetValue(
"matrix", &matrix_filename)) {
1213 Init(matrix_filename);
1214 if (cfl->
GetValue(
"input-dim", &input_dim))
1216 "input-dim mismatch vs. matrix.");
1217 if (cfl->
GetValue(
"output-dim", &output_dim))
1219 "output-dim mismatch vs. matrix.");
1221 ok = ok && cfl->
GetValue(
"input-dim", &input_dim);
1222 ok = ok && cfl->
GetValue(
"output-dim", &output_dim);
1223 BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
1225 cfl->
GetValue(
"param-stddev", ¶m_stddev);
1226 cfl->
GetValue(
"bias-stddev", &bias_stddev);
1227 Init(input_dim, output_dim,
1228 param_stddev, bias_stddev);
1233 KALDI_ERR <<
"Could not process these elements in initializer: " 1279 if (to_update != NULL) {
1285 to_update->
Update(debug_info, in_value, out_deriv);
1302 ExpectToken(is, binary,
"<OrthonormalConstraint>");
1317 WriteToken(os, binary,
"<OrthonormalConstraint>");
1320 WriteToken(os, binary,
"</AffineComponent>");
1343 num_repeats_(component.num_repeats_) {}
1374 std::ostringstream stream;
1379 return stream.str();
1397 KALDI_ASSERT(input_dim % num_repeats == 0 && output_dim % num_repeats == 0);
1398 linear_params_.Resize(output_dim / num_repeats, input_dim / num_repeats);
1401 KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
1414 int32 input_dim = -1, output_dim = -1;
1416 ok = cfl->
GetValue(
"num-repeats", &num_repeats) && ok;
1417 ok = cfl->
GetValue(
"input-dim", &input_dim) && ok;
1418 ok = cfl->
GetValue(
"output-dim", &output_dim) && ok;
1420 "num-repeats must divide input-dim");
1422 "num-repeats must divide output-dim");
1423 BaseFloat param_stddev = 1.0 / std::sqrt(input_dim / num_repeats),
1424 bias_mean = 0.0, bias_stddev = 0.0;
1425 cfl->
GetValue(
"param-stddev", ¶m_stddev);
1426 cfl->
GetValue(
"bias-mean", &bias_mean);
1427 cfl->
GetValue(
"bias-stddev", &bias_stddev);
1428 Init(input_dim, output_dim,
1429 num_repeats, param_stddev, bias_mean, bias_stddev);
1431 KALDI_ERR <<
"Could not process these elements in initializer: " 1452 block_dim_in, block_dim_in),
1453 out_reshaped(out->
Data(), num_rows * num_repeats,
1454 block_dim_out, block_dim_out);
1458 out_reshaped.AddMatMat(1.0, in_reshaped,
kNoTrans,
1471 NVTX_RANGE(
"RepeatedAffineComponent::Backprop");
1485 num_rows = out_deriv.
NumRows(),
1490 num_rows * num_repeats,
1491 block_dim_in, block_dim_in),
1492 out_deriv_reshaped(out_deriv.
Data(),
1493 num_rows * num_repeats,
1494 block_dim_out, block_dim_out);
1501 if (to_update != NULL)
1502 to_update->
Update(in_value, out_deriv);
1513 num_rows = in_value.
NumRows(),
1518 num_rows * num_repeats,
1519 block_dim_in, block_dim_in),
1520 out_deriv_reshaped(out_deriv.
Data(),
1521 num_rows * num_repeats,
1522 block_dim_out, block_dim_out);
1528 out_deriv_reshaped);
1546 ExpectToken(is, binary, std::string(
"</") +
Type() + std::string(
">"));
1560 WriteToken(os, binary, std::string(
"</") +
Type() + std::string(
">"));
1586 if (rank_in > input_dim / 2)
1587 rank_in = input_dim / 2;
1590 preconditioner_in_.SetRank(rank_in);
1591 preconditioner_in_.SetUpdatePeriod(4);
1597 preconditioner_in_(other.preconditioner_in_) { }
1612 num_rows = in_value.
NumRows(),
1617 num_rows * num_repeats,
1618 block_dim_in, block_dim_in),
1619 out_deriv_reshaped(out_deriv.
Data(),
1620 num_rows * num_repeats,
1621 block_dim_out, block_dim_out);
1624 bias_deriv.AddRowSumMat(1.0, out_deriv_reshaped);
1628 deriv.
ColRange(0, block_dim_in).AddMatMat(
1629 1.0, out_deriv_reshaped,
kTrans,
1640 int32 num_bad_rows = 0;
1643 if (!(f - f == 0)) num_bad_rows++;
1645 KALDI_ERR <<
"Preonditioning failed, in_value sum is " 1646 << in_value.
Sum() <<
", out_deriv sum is " << out_deriv.
Sum()
1647 <<
", out_deriv has " << num_bad_rows <<
" bad rows.";
1652 bias_deriv.CopyColFromMat(deriv, block_dim_in);
1666 num_blocks_(other.num_blocks_) {}
1676 for(
int32 block_counter = 0; block_counter <
num_blocks_; block_counter++) {
1677 int32 row_offset = block_counter * num_rows_in_block;
1693 std::ostringstream stream;
1698 return stream.str();
1705 KALDI_ASSERT(input_dim > 0 && output_dim > 0 && num_blocks >= 1);
1706 KALDI_ASSERT(output_dim % num_blocks == 0 && input_dim % num_blocks == 0);
1707 const int32 num_columns_per_block = input_dim / num_blocks;
1710 KALDI_ASSERT(param_stddev >= 0.0 && bias_stddev >= 0.0);
1720 int32 input_dim = -1, output_dim = -1, num_blocks = -1;
1721 if(!cfl->
GetValue(
"input-dim", &input_dim) ||
1722 !cfl->
GetValue(
"output-dim", &output_dim) ||
1723 !cfl->
GetValue(
"num-blocks", &num_blocks))
1724 KALDI_ERR <<
"Invalid initializer for layer of type " 1727 BaseFloat param_stddev = 1.0 / std::sqrt(input_dim / num_blocks),
1728 bias_mean = 0.0, bias_stddev = 1.0;
1729 cfl->
GetValue(
"param-stddev", ¶m_stddev);
1730 cfl->
GetValue(
"bias-stddev", &bias_stddev);
1731 cfl->
GetValue(
"bias-mean", &bias_mean);
1734 KALDI_ERR <<
"Invalid initializer for layer of type " 1737 Init(input_dim, output_dim, num_blocks,
1738 param_stddev, bias_mean, bias_stddev);
1749 std::vector<CuSubMatrix<BaseFloat> *> in_batch, out_batch,
1750 linear_params_batch;
1751 for(
int block_counter = 0; block_counter <
num_blocks_; block_counter++) {
1754 num_cols_in_block));
1755 in_batch.push_back(in_block);
1759 num_rows_in_block));
1760 out_batch.push_back(out_block);
1764 num_rows_in_block));
1765 linear_params_batch.push_back(linear_params_block);
1767 AddMatMatBatched<BaseFloat>(1.0, out_batch, in_batch,
kNoTrans,
1768 linear_params_batch,
kTrans, 1.0);
1784 NVTX_RANGE(
"BlockAffineComponent::Backprop");
1795 std::vector<CuSubMatrix<BaseFloat> *> in_deriv_batch, out_deriv_batch, linear_params_batch;
1797 for(
int block_counter = 0; block_counter <
num_blocks_; block_counter++) {
1800 num_cols_in_block));
1801 in_deriv_batch.push_back(in_deriv_block);
1805 num_rows_in_block));
1806 out_deriv_batch.push_back(out_deriv_block);
1810 num_rows_in_block));
1811 linear_params_batch.push_back(linear_params_block);
1814 AddMatMatBatched<BaseFloat>(1.0, in_deriv_batch, out_deriv_batch,
kNoTrans,
1815 linear_params_batch,
kNoTrans, 1.0);
1822 if (to_update != NULL) {
1826 std::vector<CuSubMatrix<BaseFloat> *> in_value_batch,
1827 out_deriv_batch, linear_params_batch;
1829 for (
int block_counter = 0; block_counter <
num_blocks_; block_counter++) {
1832 num_cols_in_block));
1833 in_value_batch.push_back(in_value_block);
1837 num_rows_in_block));
1838 out_deriv_batch.push_back(out_deriv_block);
1842 num_rows_in_block));
1843 linear_params_batch.push_back(linear_params_block);
1847 linear_params_batch,
1912 ExpectToken(is, binary,
"</BlockAffineComponent>");
1923 WriteToken(os, binary,
"</BlockAffineComponent>");
1950 scales_.Scale(scale);
1959 scales_.AddVec(alpha, other->
scales_);
1965 scales_(component.scales_) { }
1970 scales_.AddVec(stddev, temp_scales);
1974 std::ostringstream stream;
1976 <<
", scales-min=" <<
scales_.Min()
1977 <<
", scales-max=" <<
scales_.Max();
1979 return stream.str();
2011 std::string vector_filename;
2014 if (cfl->
GetValue(
"vector", &vector_filename)) {
2015 Init(vector_filename);
2018 "input-dim mismatch vs. vector.");
2021 KALDI_ERR <<
"'dim' not provided in the config line.";
2022 BaseFloat param_mean = 1.0, param_stddev = 0.0;
2023 cfl->
GetValue(
"param-mean", ¶m_mean);
2024 cfl->
GetValue(
"param-stddev", ¶m_stddev);
2025 Init(dim, param_mean, param_stddev);
2028 KALDI_ERR <<
"Could not process these elements in initializer: " 2049 const std::string &debug_info,
2057 NVTX_RANGE(
"PerElementScaleComponent::Backprop");
2061 if (to_update != NULL) {
2067 to_update->
Update(debug_info, in_value, out_deriv);
2072 if (in_deriv->
Data() != out_deriv.
Data())
2088 ExpectToken(is, binary,
"</PerElementScaleComponent>");
2095 WriteToken(os, binary,
"</PerElementScaleComponent>");
2115 offsets_.Scale(scale);
2125 offsets_.AddVec(alpha, other->
offsets_);
2131 offsets_(component.offsets_),
2132 dim_(component.dim_),
2133 use_natural_gradient_(component.use_natural_gradient_),
2134 preconditioner_(component.preconditioner_) { }
2139 offsets_.AddVec(stddev, temp_offsets);
2143 std::ostringstream stream;
2145 <<
", offsets-min=" <<
offsets_.Min()
2146 <<
", offsets-max=" <<
offsets_.Max()
2147 <<
", block-dim=" <<
offsets_.Dim()
2148 <<
", use-natural-gradient=" 2151 return stream.str();
2167 std::string vector_filename;
2169 if (cfl->
GetValue(
"vector", &vector_filename)) {
2177 KALDI_ERR <<
"'dim' not provided in the config line.";
2180 BaseFloat param_mean = 0.0, param_stddev = 0.0;
2181 cfl->
GetValue(
"param-mean", ¶m_mean);
2182 cfl->
GetValue(
"param-stddev", ¶m_stddev);
2184 cfl->
GetValue(
"block-dim", &block_dim);
2185 if (block_dim <= 0 || dim_ % block_dim != 0)
2186 KALDI_ERR <<
"Invalid value block-dim=" << block_dim;
2195 KALDI_ERR <<
"Could not process these elements in initializer: " 2214 num_rows = out->
NumRows() * multiple;
2216 block_dim, block_dim);
2223 const std::string &debug_info,
2231 NVTX_RANGE(
"PerElementOffsetComponent::Backprop");
2235 if (in_deriv && in_deriv->
Data() != out_deriv.
Data()) {
2240 if (to_update != NULL) {
2247 block_stride = (multiple == 1 ? out_deriv.
Stride() : block_dim),
2248 num_rows = out_deriv.
NumRows() * multiple;
2251 block_dim, block_stride);
2255 out_deriv_reshaped);
2294 ExpectToken(is, binary,
"</PerElementOffsetComponent>");
2303 WriteToken(os, binary,
"<UseNaturalGradient>");
2305 WriteToken(os, binary,
"</PerElementOffsetComponent>");
2322 std::ostringstream stream;
2324 <<
", rank=" << scale_preconditioner_.GetRank();
2325 if (
dim_ != scales_.Dim())
2326 stream <<
", block-size=" << scales_.Dim();
2329 return stream.str();
2336 KALDI_ERR <<
"Dimension 'dim' must be specified and >0: " 2343 cfl->
GetValue(
"block-dim", &block_dim);
2344 if (block_dim <= 0 ||
dim_ % block_dim != 0) {
2348 scales_.Resize(block_dim);
2353 KALDI_ERR <<
"Could not process these elements in initializer: " 2355 offset_preconditioner_.SetRank(rank);
2356 scale_preconditioner_.SetRank(rank);
2359 offset_preconditioner_.SetUpdatePeriod(4);
2360 scale_preconditioner_.SetUpdatePeriod(4);
2368 scales_.Read(is, binary);
2376 scale_preconditioner_.SetRank(rank);
2377 offset_preconditioner_.SetRank(rank);
2378 ExpectToken(is, binary,
"</ScaleAndOffsetComponent>");
2386 scales_.Write(os, binary);
2389 WriteToken(os, binary,
"<UseNaturalGradient>");
2393 WriteToken(os, binary,
"</ScaleAndOffsetComponent>");
2401 scales_.Scale(scale);
2411 scales_.AddVec(alpha, other->
scales_);
2419 scales_(component.scales_),
2422 scale_preconditioner_(component.scale_preconditioner_),
2423 offset_preconditioner_(component.offset_preconditioner_) { }
2466 block_dim, block_dim),
2467 out_rearranged(out->
Data(), num_rows * multiple,
2468 block_dim, block_dim);
2488 const std::string &debug_info,
2496 NVTX_RANGE(
"ScaleAndOffsetComponent::Backprop");
2504 to_update, in_deriv);
2510 num_rows = out_value.
NumRows(),
2513 num_rows * multiple,
2514 block_dim, block_dim),
2515 out_deriv_rearranged(out_deriv.
Data(), num_rows * multiple,
2516 block_dim, block_dim);
2519 num_rows * multiple,
2520 block_dim, block_dim);
2522 out_deriv_rearranged, to_update,
2523 &in_deriv_rearranged);
2526 out_deriv_rearranged, to_update,
2536 const std::string &debug_info,
2549 &out_deriv_copy, &scale);
2564 in_value_reconstructed.
MulColsVec(scales_nonzero);
2572 &in_value_reconstructed, &scale);
2575 in_value_reconstructed);
2578 if (in_deriv->
Data() != out_deriv.
Data())
2593 std::ostringstream stream;
2597 <<
", is-updatable=" << std::boolalpha << is_updatable_
2598 <<
", use-natural-gradient=" << std::boolalpha
2601 return stream.str();
2624 const std::string &debug_info,
2632 NVTX_RANGE(
"ConstantFunctionComponent::Backprop");
2660 if (token ==
"<ConstantFunctionComponent>") {
2663 if (token ==
"<LearningRateFactor>") {
2669 if (token ==
"<IsGradient>") {
2675 if (token ==
"<LearningRate>") {
2681 if (token ==
"<InputDim>") {
2684 KALDI_ERR <<
"Expected token <InputDim>, got " 2693 ExpectToken(is, binary,
"</ConstantFunctionComponent>");
2704 WriteToken(os, binary,
"<UseNaturalGradient>");
2706 WriteToken(os, binary,
"</ConstantFunctionComponent>");
2735 output_.AddVec(stddev, temp_output);
2748 int32 output_dim = 0;
2750 bool ok = cfl->
GetValue(
"output-dim", &output_dim) &&
2754 BaseFloat output_mean = 0.0, output_stddev = 0.0;
2755 cfl->
GetValue(
"output-mean", &output_mean);
2756 cfl->
GetValue(
"output-stddev", &output_stddev);
2763 output.
Scale(output_stddev);
2764 output.
Add(output_mean);
2789 linear_params_.Read(is, binary);
2791 bias_params_.Read(is, binary);
2794 int32 rank_in, rank_out, update_period;
2801 ExpectToken(is, binary,
"<OrthonormalConstraint>");
2804 orthonormal_constraint_ = 0.0;
2813 preconditioner_in_.SetNumSamplesHistory(num_samples_history);
2814 preconditioner_out_.SetNumSamplesHistory(num_samples_history);
2815 preconditioner_in_.SetAlpha(alpha);
2816 preconditioner_out_.SetAlpha(alpha);
2817 preconditioner_in_.SetRank(rank_in);
2818 preconditioner_out_.SetRank(rank_out);
2819 preconditioner_in_.SetUpdatePeriod(update_period);
2820 preconditioner_out_.SetUpdatePeriod(update_period);
2847 if (token.find(
"NaturalGradientAffineComponent>") == std::string::npos)
2848 KALDI_ERR <<
"Expected <NaturalGradientAffineComponent> or " 2849 <<
"</NaturalGradientAffineComponent>, got " << token;
2858 bias_params.
Dim() != 0);
2869 std::string matrix_filename;
2875 if (cfl->
GetValue(
"matrix", &matrix_filename)) {
2884 if (cfl->
GetValue(
"input-dim", &input_dim))
2886 "input-dim mismatch vs. matrix.");
2887 if (cfl->
GetValue(
"output-dim", &output_dim))
2889 "output-dim mismatch vs. matrix.");
2891 int32 input_dim = -1, output_dim = -1;
2893 ok = ok && cfl->
GetValue(
"input-dim", &input_dim);
2894 ok = ok && cfl->
GetValue(
"output-dim", &output_dim);
2897 BaseFloat param_stddev = 1.0 / std::sqrt(input_dim),
2898 bias_stddev = 1.0, bias_mean = 0.0;
2899 cfl->
GetValue(
"param-stddev", ¶m_stddev);
2900 cfl->
GetValue(
"bias-stddev", &bias_stddev);
2901 cfl->
GetValue(
"bias-mean", &bias_mean);
2904 KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0 &&
2905 bias_stddev >= 0.0);
2919 int32 rank_in = -1, rank_out = -1,
2921 cfl->
GetValue(
"num-samples-history", &num_samples_history);
2923 cfl->
GetValue(
"rank-in", &rank_in);
2924 cfl->
GetValue(
"rank-out", &rank_out);
2925 cfl->
GetValue(
"update-period", &update_period);
2928 rank_in = std::min<int32>(20, (
InputDim() + 1) / 2);
2930 rank_out = std::min<int32>(80, (
OutputDim() + 1) / 2);
2942 KALDI_ERR <<
"Could not process these elements in initializer: " 2949 bool binary)
const {
2960 WriteToken(os, binary,
"<OrthonormalConstraint>");
2965 WriteToken(os, binary,
"<NumSamplesHistory>");
2969 WriteToken(os, binary,
"</NaturalGradientAffineComponent>");
2973 std::ostringstream stream;
2980 return stream.str();
2994 const std::string &debug_info,
3002 0, in_value.
NumCols()).CopyFromMat(in_value);
3006 in_value.
NumCols(), 1).Set(1.0);
3024 0, in_value_temp.
NumCols() - 1);
3036 in_value_precon_part,
kNoTrans, 1.0);
3073 params_.Read(is, binary);
3075 ExpectToken(is, binary,
"<OrthonormalConstraint>");
3084 int32 rank_in, rank_out, update_period;
3110 std::string matrix_filename;
3115 int32 input_dim = -1, output_dim = -1;
3116 if (cfl->
GetValue(
"matrix", &matrix_filename)) {
3119 if (cfl->
GetValue(
"input-dim", &input_dim))
3121 "input-dim mismatch vs. matrix.");
3122 if (cfl->
GetValue(
"output-dim", &output_dim))
3124 "output-dim mismatch vs. matrix.");
3126 ok = ok && cfl->
GetValue(
"input-dim", &input_dim);
3127 ok = ok && cfl->
GetValue(
"output-dim", &output_dim);
3130 BaseFloat param_stddev = 1.0 / std::sqrt(input_dim);
3131 cfl->
GetValue(
"param-stddev", ¶m_stddev);
3132 params_.Resize(output_dim, input_dim);
3133 KALDI_ASSERT(output_dim > 0 && input_dim > 0 && param_stddev >= 0.0);
3135 params_.Scale(param_stddev);
3138 int32 rank_in = -1, rank_out = -1, update_period = 4;
3140 num_samples_history = 2000.0;
3142 use_natural_gradient_ =
true;
3144 cfl->
GetValue(
"num-samples-history", &num_samples_history);
3146 cfl->
GetValue(
"rank-in", &rank_in);
3147 cfl->
GetValue(
"rank-out", &rank_out);
3148 cfl->
GetValue(
"update-period", &update_period);
3149 cfl->
GetValue(
"use-natural-gradient", &use_natural_gradient_);
3152 rank_in = std::min<int32>(20, (
InputDim() + 1) / 2);
3154 rank_out = std::min<int32>(80, (
OutputDim() + 1) / 2);
3169 KALDI_ERR <<
"Could not process these elements in initializer: " 3175 bool binary)
const {
3178 params_.Write(os, binary);
3180 WriteToken(os, binary,
"<OrthonormalConstraint>");
3183 WriteToken(os, binary,
"<UseNaturalGradient>");
3196 WriteToken(os, binary,
"<NumSamplesHistory>");
3200 WriteToken(os, binary,
"</LinearComponent>");
3204 std::ostringstream stream;
3213 stream <<
", use-natural-gradient=" 3214 << (use_natural_gradient_ ?
"true" :
"false")
3217 <<
", num-samples-history=" 3221 return stream.str();
3248 if (to_update != NULL) {
3260 to_update->
params_.AddMatMat(local_lrate, out_deriv_temp,
kTrans,
3278 params_(other.params_),
3280 use_natural_gradient_(other.use_natural_gradient_),
3299 if (scale == 0.0)
params_.SetZero();
3313 params_.AddMat(stddev, temp_params);
3324 params_.CopyRowsFromVec(params);
3345 std::ostringstream stream;
3349 return stream.str();
3355 bias_params_.Resize(mat.
NumRows());
3356 bias_params_.CopyColFromMat(mat, mat.
NumCols() - 1);
3360 std::string filename;
3363 if (cfl->
GetValue(
"matrix", &filename)) {
3365 KALDI_ERR <<
"Invalid initializer for layer of type " 3369 Input ki(filename, &binary);
3375 int32 input_dim = -1, output_dim = -1;
3376 if (!cfl->
GetValue(
"input-dim", &input_dim) ||
3378 KALDI_ERR <<
"Invalid initializer for layer of type " 3389 linear_params_(c.LinearParams()),
3390 bias_params_(c.BiasParams()) { }
3408 NVTX_RANGE(
"FixedAffineComponent::Backprop");
3424 WriteToken(os, binary,
"<FixedAffineComponent>");
3429 WriteToken(os, binary,
"</FixedAffineComponent>");
3437 ExpectToken(is, binary,
"</FixedAffineComponent>");
3442 std::vector<Int32Pair> cpu_vec(sizes.size());
3443 std::vector<int32> reverse_cpu_vec;
3444 int32 cur_index = 0;
3445 for (
size_t i = 0;
i < sizes.size();
i++) {
3447 cpu_vec[
i].first = cur_index;
3448 cpu_vec[
i].second = cur_index + sizes[
i];
3449 cur_index += sizes[
i];
3450 for (
int32 j = cpu_vec[
i].first;
j < cpu_vec[
i].second;
j++)
3451 reverse_cpu_vec.push_back(
i);
3453 this->indexes_ = cpu_vec;
3454 this->reverse_indexes_ = reverse_cpu_vec;
3455 this->input_dim_ = cur_index;
3456 this->output_dim_ = sizes.size();
3460 const int32 num_groups = output_dim;
3462 const int32 group_size = input_dim / num_groups;
3464 std::vector<Int32Pair> cpu_vec(num_groups);
3465 std::vector<int32> reverse_cpu_vec;
3466 int32 cur_index = 0;
3467 for (
size_t i = 0;
i < num_groups;
i++) {
3468 cpu_vec[
i].first = cur_index;
3469 cpu_vec[
i].second = cur_index + group_size;
3470 cur_index += group_size;
3471 for (
int32 j = cpu_vec[
i].first;
j < cpu_vec[
i].second;
j++)
3472 reverse_cpu_vec.push_back(
i);
3474 this->indexes_ = cpu_vec;
3475 this->reverse_indexes_ = reverse_cpu_vec;
3476 this->input_dim_ = input_dim;
3477 this->output_dim_ = num_groups;
3481 std::vector<int32> sizes;
3482 bool has_sizes = cfl->
GetValue(
"sizes", &sizes);
3485 KALDI_ERR <<
"Invalid initializer for layer of type " 3489 int32 input_dim = -1, output_dim = -1;
3490 if (!cfl->
GetValue(
"input-dim", &input_dim) ||
3492 KALDI_ERR <<
"Invalid initializer for layer of type " 3495 Init(input_dim, output_dim);
3510 std::vector<int32> sizes;
3515 if (!(token ==
"<SumGroupComponent>" ||
3516 token ==
"</SumGroupComponent>")) {
3517 KALDI_ERR <<
"Expected </SumGroupComponent>, got " << token;
3523 std::vector<Int32Pair> indexes;
3524 indexes_.CopyToVec(&indexes);
3525 sizes->resize(indexes.size());
3526 for (
size_t i = 0;
i < indexes.size();
i++) {
3527 (*sizes)[
i] = indexes[
i].second - indexes[
i].first;
3531 (*sizes)[
i] = indexes[
i].second - indexes[
i].first;
3536 WriteToken(os, binary,
"<SumGroupComponent>");
3538 std::vector<int32> sizes;
3539 this->GetSizes(&sizes);
3541 WriteToken(os, binary,
"</SumGroupComponent>");
3560 in_deriv->
CopyCols(out_deriv, reverse_indexes_);
3594 if (in_deriv == NULL)
3614 StoreStatsInternal(out_value, NULL);
3641 if (in_deriv == NULL)
3654 std::string filename;
3656 if (cfl->
GetValue(
"scales", &filename)) {
3658 KALDI_ERR <<
"Invalid initializer for layer of type " 3666 bool scale_is_set = cfl->
GetValue(
"scale", &scale);
3668 KALDI_ERR <<
"Invalid initializer for layer of type " 3683 std::ostringstream stream;
3686 return stream.str();
3718 WriteToken(os, binary,
"<FixedScaleComponent>");
3720 scales_.Write(os, binary);
3721 WriteToken(os, binary,
"</FixedScaleComponent>");
3726 scales_.Read(is, binary);
3727 ExpectToken(is, binary,
"</FixedScaleComponent>");
3736 std::string filename;
3738 if (cfl->
GetValue(
"bias", &filename)) {
3740 KALDI_ERR <<
"Invalid initializer for layer of type " 3748 KALDI_ERR <<
"Invalid initializer for layer of type " 3758 std::ostringstream stream;
3761 return stream.str();
3794 WriteToken(os, binary,
"<FixedBiasComponent>");
3796 bias_.Write(os, binary);
3797 WriteToken(os, binary,
"</FixedBiasComponent>");
3802 bias_.Read(is, binary);
3808 std::istream &is,
bool binary) {
3809 ReadUpdatableCommon(is, binary);
3811 scales_.Read(is, binary);
3814 int32 rank, update_period;
3817 preconditioner_.SetRank(rank);
3820 preconditioner_.SetUpdatePeriod(update_period);
3824 preconditioner_.SetNumSamplesHistory(num_samples_history);
3827 preconditioner_.SetAlpha(alpha);
3830 if (token ==
"<MaxChangePerMinibatch>") {
3837 KALDI_ASSERT(token ==
"</NaturalGradientPerElementScaleComponent>");
3841 bool binary)
const {
3842 WriteUpdatableCommon(os, binary);
3844 scales_.Write(os, binary);
3851 WriteToken(os, binary,
"<NumSamplesHistory>");
3852 WriteBasicType(os, binary, preconditioner_.GetNumSamplesHistory());
3855 WriteToken(os, binary,
"</NaturalGradientPerElementScaleComponent>");
3859 std::ostringstream stream;
3861 <<
", rank=" << preconditioner_.GetRank()
3862 <<
", update-period=" << preconditioner_.GetUpdatePeriod()
3863 <<
", num-samples-history=" << preconditioner_.GetNumSamplesHistory()
3864 <<
", alpha=" << preconditioner_.GetAlpha();
3865 return stream.str();
3874 BaseFloat num_samples_history = 2000.0, alpha = 4.0;
3876 cfl->
GetValue(
"update-period", &update_period);
3877 cfl->
GetValue(
"num-samples-history", &num_samples_history);
3879 InitLearningRatesFromConfig(cfl);
3880 std::string filename;
3882 if (cfl->
GetValue(
"scales", &filename)) {
3884 KALDI_ERR <<
"Invalid initializer for layer of type " 3886 Init(filename, rank, update_period, num_samples_history, alpha);
3889 BaseFloat param_mean = 1.0, param_stddev = 0.0;
3890 cfl->
GetValue(
"param-mean", ¶m_mean);
3891 cfl->
GetValue(
"param-stddev", ¶m_stddev);
3895 KALDI_ERR <<
"Invalid initializer for layer of type " 3899 Init(dim, param_mean, param_stddev, rank, update_period,
3900 num_samples_history, alpha);
3910 preconditioner_.SetRank(rank);
3911 preconditioner_.SetUpdatePeriod(update_period);
3912 preconditioner_.SetNumSamplesHistory(num_samples_history);
3913 preconditioner_.SetAlpha(alpha);
3917 std::string vector_filename,
3921 preconditioner_.SetRank(rank);
3922 preconditioner_.SetUpdatePeriod(update_period);
3923 preconditioner_.SetNumSamplesHistory(num_samples_history);
3924 preconditioner_.SetAlpha(alpha);
3931 preconditioner_(other.preconditioner_) { }
3941 const std::string &debug_info,
3955 scales_.AddVec(1.0, delta_scales);
3968 int32 dim = column_map_.Dim();
3970 std::vector<int32> reverse_column_map_cpu(dim, -1),
3971 column_map_cpu(dim);
3972 column_map_.CopyToVec(&column_map_cpu);
3974 int32 &dest = reverse_column_map_cpu[column_map_cpu[
i]];
3976 KALDI_ERR <<
"Column map does not represent a permutation.";
3979 reverse_column_map_.Resize(dim);
3980 reverse_column_map_.CopyFromVec(reverse_column_map_cpu);
4005 in_deriv->
CopyCols(out_deriv, reverse_column_map_);
4010 std::string column_map_str;
4011 ok = ok && cfl->
GetValue(
"column-map", &column_map_str);
4012 std::vector<int32> column_map;
4014 KALDI_ERR <<
"Bad initializer in PermuteComponent: column-map=" 4017 KALDI_ERR <<
"Could not process these elements in initializer: " 4020 KALDI_ERR <<
"Invalid initializer for layer of type " 4027 column_map_.CopyFromVec(column_map);
4028 ComputeReverseColumnMap();
4033 std::vector<int32> column_map;
4034 if (binary && is.peek() ==
'F') {
4037 float_map.
Read(is, binary);
4038 column_map.resize(float_map.
Dim());
4041 column_map[
i] =
static_cast<int32>(float_map(
i) + 0.5);
4047 column_map.back() = float_map.
Dim() - 1;
4051 column_map_.CopyFromVec(column_map);
4053 ComputeReverseColumnMap();
4057 WriteToken(os, binary,
"<PermuteComponent>");
4059 std::ostringstream buffer;
4060 std::vector<int32> column_map;
4061 column_map_.CopyToVec(&column_map);
4063 WriteToken(os, binary,
"</PermuteComponent>");
4067 std::ostringstream stream;
4068 stream <<
Type() <<
", dim=" << column_map_.Dim();
4069 stream <<
" , column-map=[ ";
4070 std::vector<int32> column_map(column_map_.Dim());
4071 column_map_.CopyToVec(&column_map);
4073 for (
size_t i = 0;
i < column_map.size() &&
i < max_size;
i++)
4074 stream << column_map[
i] <<
' ';
4075 if (static_cast<int32>(column_map.size()) > max_size)
4078 return stream.str();
4083 for (std::vector<Component*>::const_iterator iter = components_.begin(),
4084 end = components_.end(); iter != end; ++iter)
4093 return components_.front()->InputDim();
4099 return components_.back()->OutputDim();
4105 int32 last_component_properties = components_.back()->Properties(),
4106 first_component_properties = components_.front()->Properties();
4111 (last_component_properties &
4113 (first_component_properties &
4126 int32 num_components = components_.size();
4128 (i + 1 < num_components &&
4144 num_components = components_.size();
4145 if (max_rows_process_ > 0 && num_rows > max_rows_process_) {
4147 for (
int32 row_offset = 0; row_offset < num_rows;
4148 row_offset += max_rows_process_) {
4149 int32 this_num_rows = std::min<int32>(max_rows_process_,
4150 num_rows - row_offset);
4155 this->
Propagate(NULL, in_part, &out_part);
4159 std::vector<CuMatrix<BaseFloat> > intermediate_outputs(num_components - 1);
4160 for (
int32 i = 0;
i < num_components;
i++) {
4161 if (
i + 1 < num_components) {
4165 intermediate_outputs[
i].Resize(num_rows, components_[
i]->
OutputDim(),
4166 resize_type, GetStrideType(
i));
4169 intermediate_outputs[
i-1]);
4171 out : &(intermediate_outputs[
i]));
4172 void *memo = components_[
i]->Propagate(NULL, this_in, this_out);
4176 components_[
i]->DeleteMemo(memo);
4178 intermediate_outputs[
i-1].Resize(0, 0);
4185 int32 max_rows_process) {
4187 components_ = components;
4189 max_rows_process_ = max_rows_process;
4191 for (
size_t i = 0;
i < components_.size();
i++) {
4206 int32 max_rows_process;
4214 if (token ==
"<CompositeComponent>") {
4219 if (token ==
"<LearningRateFactor>") {
4225 if (token ==
"<IsGradient>") {
4231 if (token ==
"<LearningRate>") {
4235 if (token !=
"<MaxRowsProcess>") {
4236 KALDI_ERR <<
"Expected token <MaxRowsProcess>, got " 4242 int32 num_components;
4244 if (num_components < 0 || num_components > 100000)
4246 std::vector<Component*> components(num_components);
4247 for (
int32 i = 0;
i < num_components;
i++)
4248 components[
i] =
ReadNew(is, binary);
4249 Init(components, max_rows_process);
4258 for (
size_t i = 0;
i < components_.size();
i++)
4268 int32 num_components = components_.size();
4270 for (
int32 i = 0;
i < num_components;
i++)
4271 components_[
i]->
Write(os, binary);
4272 WriteToken(os, binary,
"</CompositeComponent>");
4290 num_components = components_.size();
4291 if (max_rows_process_ > 0 && num_rows > max_rows_process_) {
4294 for (
int32 row_offset = 0; row_offset < num_rows;
4295 row_offset += max_rows_process_) {
4296 bool have_output_value = (out_value.
NumRows() != 0);
4297 int32 this_num_rows = std::min<int32>(max_rows_process_,
4298 num_rows - row_offset);
4302 row_offset, this_num_rows,
4307 row_offset, this_num_rows,
4310 0, in_value.NumCols());
4312 row_offset, this_num_rows,
4313 0, out_deriv.NumCols());
4315 this->
Backprop(debug_info, NULL, in_value_part,
4318 out_deriv_part, NULL, to_update,
4319 in_deriv != NULL ? &in_deriv_part : NULL);
4327 std::vector<CuMatrix<BaseFloat> > intermediate_outputs(num_components);
4329 std::vector<CuMatrix<BaseFloat> > intermediate_derivs(num_components - 1);
4333 std::vector<void*> memos(num_components, NULL);
4335 int32 num_components_to_propagate = num_components;
4338 num_components_to_propagate--;
4339 if (num_components > 1) {
4344 int32 properties = components_[num_components - 2]->Properties(),
4345 next_properties = components_[num_components - 1]->Properties();
4348 num_components_to_propagate--;
4355 for (
int32 i = 0;
i < num_components_to_propagate;
i++) {
4359 intermediate_outputs[
i].Resize(num_rows, components_[
i]->
OutputDim(),
4360 resize_type, GetStrideType(
i));
4362 components_[
i]->Propagate(NULL,
4363 (
i == 0 ? in_value : intermediate_outputs[
i-1]),
4364 &(intermediate_outputs[
i]));
4367 for (
int32 i = num_components - 1;
i >= 0;
i--) {
4369 (
i == 0 ? in_value : intermediate_outputs[
i-1]),
4371 (
i == num_components - 1 ? out_value : intermediate_outputs[
i]);
4374 (to_update == NULL ? NULL :
4377 if (component_to_update != NULL &&
4379 component_to_update->
StoreStats(this_in_value, this_out_value, memos[i]);
4385 intermediate_derivs[i-1].Resize(num_rows, components_[i]->
InputDim(),
4386 resize_type, GetStrideType(i - 1));
4391 in_deriv == NULL)) {
4392 components_[
i]->Backprop(debug_info, NULL,
4393 this_in_value, this_out_value,
4394 (i + 1 == num_components ? out_deriv : intermediate_derivs[i]),
4395 memos[i], component_to_update,
4396 (i == 0 ? in_deriv : &(intermediate_derivs[i-1])));
4398 if (memos[i] != NULL)
4399 components_[
i]->DeleteMemo(memos[i]);
4406 std::ostringstream stream;
4407 stream <<
Type() <<
" ";
4408 for (
size_t i = 0;
i < components_.size();
i++) {
4409 if (
i > 0) stream <<
", ";
4410 stream <<
"sub-component" << (
i+1) <<
" = { " 4411 << components_[
i]->
Info() <<
" }";
4413 return stream.str();
4418 for (
size_t i = 0;
i < components_.size();
i++)
4419 components_[
i]->
Scale(scale);
4427 components_.size() &&
"Mismatching nnet topologies");
4428 for (
size_t i = 0;
i < components_.size();
i++)
4435 for (
size_t i = 0;
i < components_.size();
i++) {
4451 for (
size_t i = 0;
i < components_.size();
i++) {
4463 for (
size_t i = 0;
i < components_.size();
i++) {
4476 for (
size_t i = 0;
i < components_.size();
i++) {
4489 for (
size_t i = 0;
i < components_.size();
i++) {
4501 int32 cur_offset = 0;
4503 for (
size_t i = 0;
i < components_.size();
i++) {
4510 cur_offset += this_size;
4518 int32 cur_offset = 0;
4520 for (
size_t i = 0;
i < components_.size();
i++) {
4527 cur_offset += this_size;
4539 components_.size() &&
"Mismatching nnet topologies");
4541 for (
size_t i = 0.0;
i < components_.size();
i++) {
4556 for (
size_t i = 0;
i < components_.size();
i++) {
4568 std::vector<Component*> components(components_.size());
4569 for (
size_t i = 0;
i < components_.size();
i++)
4570 components[
i] = components_[
i]->
Copy();
4572 ans->
Init(components, max_rows_process_);
4579 int32 max_rows_process = 4096, num_components = -1;
4580 cfl->
GetValue(
"max-rows-process", &max_rows_process);
4581 if (!cfl->
GetValue(
"num-components", &num_components) ||
4583 KALDI_ERR <<
"Expected num-components to be defined in " 4584 <<
"CompositeComponent config line '" << cfl->
WholeLine() <<
"'";
4585 std::vector<Component*> components;
4586 for (
int32 i = 1;
i <= num_components;
i++) {
4587 std::ostringstream name_stream;
4588 name_stream <<
"component" <<
i;
4589 std::string component_config;
4590 if (!cfl->
GetValue(name_stream.str(), &component_config)) {
4592 KALDI_ERR <<
"Expected '" << name_stream.str() <<
"' to be defined in " 4593 <<
"CompositeComponent config line '" << cfl->
WholeLine() <<
"'";
4597 std::string component_type;
4599 if (!nested_line.
ParseLine(component_config) ||
4600 !nested_line.
GetValue(
"type", &component_type) ||
4604 KALDI_ERR <<
"Could not parse config line for '" << name_stream.str()
4605 <<
"(or undefined or bad component type [type=xxx]), in " 4606 <<
"CompositeComponent config line '" << cfl->
WholeLine() <<
"'";
4608 if(this_component->
Type() ==
"CompositeComponent") {
4610 delete this_component;
4613 KALDI_ERR <<
"Found CompositeComponent nested within CompositeComponent." 4614 <<
"Nested line: '" << nested_line.
WholeLine() <<
"'\n" 4615 <<
"Toplevel CompositeComponent line '" << cfl->
WholeLine()
4622 KALDI_ERR <<
"CompositeComponent contains disallowed component type: " 4625 components.push_back(this_component);
4628 KALDI_ERR <<
"Could not process these elements in initializer: " 4630 this->
Init(components, max_rows_process);
4634 KALDI_ASSERT(static_cast<size_t>(i) < components_.size());
4635 return components_[
i];
4639 KALDI_ASSERT(static_cast<size_t>(i) < components_.size());
4640 delete components_[
i];
4641 components_[
i] = component;
4646 input_dim_(other.input_dim_), output_dim_(other.output_dim_),
4647 scale_(other.scale_) { }
4654 KALDI_ERR <<
"input-dim and output-dim must both be provided.";
4660 KALDI_ERR <<
"Could not process these elements in initializer: " 4675 WriteToken(os, binary,
"<SumBlockComponent>");
4682 WriteToken(os, binary,
"</SumBlockComponent>");
4686 std::ostringstream stream;
4690 return stream.str();
4704 const std::string &debug_info,
ConstantFunctionComponent()
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
void Init(int32 dim, BaseFloat clipping_threshold, bool norm_based_clipping, BaseFloat self_repair_clipped_proportion_threshold, BaseFloat self_repair_target, BaseFloat self_repair_scale, int32 num_clipped, int32 count, int32 num_self_repaired, int32 num_backpropped)
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
CuVector< BaseFloat > offsets_
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
CuVector< BaseFloat > scales_
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
PerElementOffsetComponent()
MatrixIndexT Stride() const
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void PropagateInternal(const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void ApplyCeiling(Real ceiling_val)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void SetActualLearningRate(BaseFloat lrate)
Sets the learning rate directly, bypassing learning_rate_factor_.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
const std::string & FirstToken() const
OnlineNaturalGradient preconditioner_
virtual void Resize(int32 input_dim, int32 output_dim)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void SetUnderlyingLearningRate(BaseFloat lrate)
Sets the learning rate of gradient descent- gets multiplied by learning_rate_factor_.
OnlineNaturalGradient preconditioner_out_
bool use_natural_gradient_
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
void ApplyPow(Real power)
void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
const std::string WholeLine()
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
bool ParseLine(const std::string &line)
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void Init(int32 dim, BaseFloat param_mean, BaseFloat param_stddev, int32 rank, int32 update_period, BaseFloat num_samples_history, BaseFloat alpha)
void DeletePointers(std::vector< A *> *v)
Deletes any non-NULL pointers in the vector v, and sets the corresponding entries of v to NULL...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void SetAsGradient()
Sets is_gradient_ to true and sets learning_rate_ to 1, ignoring learning_rate_factor_.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
CuVector< BaseFloat > bias_params_
const CuSubVector< Real > Row(MatrixIndexT i) const
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
void Init(const CuVectorBase< BaseFloat > &scales)
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
virtual void StoreStats(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, void *memo)
This function may store stats on average activation values, and for some component types...
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
OnlineNaturalGradient preconditioner_
void GetSizes(std::vector< int32 > *sizes) const
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
CuMatrix< BaseFloat > linear_params_
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void CopyColFromMat(const CuMatrixBase< Real > &mat, MatrixIndexT col)
Abstract base-class for neural-net components.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
bool use_natural_gradient_
virtual Component * Copy() const
Copies component (deep copy).
CuVector< BaseFloat > bias_params_
void SetUpdatePeriod(int32 update_period)
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
void SetNumSamplesHistory(BaseFloat num_samples_history)
CuMatrix< BaseFloat > linear_params_
void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
virtual Component * Copy() const
Copies component (deep copy).
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void Init(int32 input_dim, int32 output_dim)
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
void Init(const std::vector< int32 > &sizes)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
void AddMatDiagVec(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, CuVectorBase< Real > &v, Real beta=1.0)
CuVector< BaseFloat > offsets_
void InitLearningRatesFromConfig(ConfigLine *cfl)
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
void ApplyFloor(Real floor_val)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
bool use_natural_gradient_
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
void ComputeReverseColumnMap()
virtual Component * Copy() const
Copies component (deep copy).
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
BaseFloat Epsilon() const
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
Keywords for search: natural gradient, naturalgradient, NG-SGD.
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
void BackpropInternal(const std::string &debug_info, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, ScaleAndOffsetComponent *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
PermuteComponent changes the order of the columns (i.e.
CuVector< BaseFloat > bias_params_
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual Component * Copy() const
Copies component (deep copy).
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void RepairGradients(const CuMatrixBase< BaseFloat > &out_value, CuMatrixBase< BaseFloat > *in_deriv, SigmoidComponent *to_update) const
CuVector< BaseFloat > bias_
CompositeComponent is a component representing a sequence of [simple] components. ...
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
void Init(int32 input_dim, int32 output_dim)
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void SetNaturalGradientConfigs()
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
FixedScaleComponent applies a fixed per-element scale; it's similar to the Rescale component in the n...
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
void CopyColFromVec(const CuVectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
This file contains declarations of components that are "simple", meaning they don't care about the in...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual Component * Copy() const
Copies component (deep copy).
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
OnlineNaturalGradient preconditioner_in_
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
virtual Component * Copy() const
Copies component (deep copy).
void Init(int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev)
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
void AddMatBlocks(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
This function is like AddMat (it does *this += alpha * src), except that it supports cases where *thi...
virtual Component * Copy() const
Copies component (deep copy).
NaturalGradientRepeatedAffineComponent()
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
CuArray< Int32Pair > indexes_
MatrixStrideType GetStrideType(int32 i) const
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
OnlineNaturalGradient preconditioner_in_
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
CuArray< int32 > column_map_
std::string UnusedValues() const
returns e.g.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
NaturalGradientPerElementScaleComponent is like PerElementScaleComponent but it uses a natural gradie...
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
virtual void FreezeNaturalGradient(bool freeze)
virtual
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual int32 InputDim() const
Returns input-dimension of this component.
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual int32 Properties() const
Return bitmask of the component's properties.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void ApplyPowAbs(Real power, bool include_sign=false)
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual Component * Copy() const
Copies component (deep copy).
virtual void SetNaturalGradientConfigs()
void Sigmoid(const CuMatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src": element by element...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual void UpdateSimple(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual BaseFloat DotProduct(const UpdatableComponent &other) const =0
Computes dot-product between parameters of two instances of a Component.
virtual void StoreStats(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, void *memo)
This function may store stats on average activation values, and for some component types...
virtual int32 Properties() const =0
Return bitmask of the component's properties.
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
PerElementScaleComponent()
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void SetZero()
Math operations, some calling kernels.
void SoftMaxPerRow(const CuMatrixBase< Real > &src)
Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow.
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual Component * Copy() const
Copies component (deep copy).
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
double num_dims_self_repaired_
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
BaseFloat orthonormal_constraint_
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual int32 OutputDim() const
Returns output-dimension of this component.
SumGroupComponent is used to sum up groups of posteriors.
BaseFloat learning_rate_
learning rate (typically 0.0..0.01)
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
void Init(int32 input_dim, int32 output_dim, int32 num_blocks, BaseFloat param_stddev, BaseFloat bias_mean, BaseFloat bias_stddev)
void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual Component * Copy() const
Copies component (deep copy).
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
std::string ReadUpdatableCommon(std::istream &is, bool binary)
void CopyColsFromVec(const CuVectorBase< Real > &v)
Copies vector into matrix, column-by-column.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
void Init(const CuVectorBase< BaseFloat > &scales)
CuArray< int32 > reverse_column_map_
std::vector< Component * > components_
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
static Component * ReadNew(std::istream &is, bool binary)
Read component from stream (works out its type). Dies on error.
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
void GroupPnorm(const CuMatrixBase< Real > &src, Real pow)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p) where G = x...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void ApplyPow(Real power)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
void PreconditionDirections(CuMatrixBase< BaseFloat > *X, BaseFloat *scale)
This call implements the main functionality of this class.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
virtual Component * Copy() const
Copies component (deep copy).
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void Init(const std::vector< Component *> &components, int32 max_rows_process)
virtual int32 InputDim() const
Returns input-dimension of this component.
BaseFloat learning_rate_factor_
learning rate factor (normally 1.0, but can be set to another < value so that when < you call SetLear...
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
virtual Component * Copy() const
Copies component (deep copy).
This class is used for a piece of a CuMatrix.
virtual void SetActualLearningRate(BaseFloat lrate)
Sets the learning rate directly, bypassing learning_rate_factor_.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
void Init(const CuMatrixBase< BaseFloat > &matrix)
matrix should be of size input-dim+1 to output-dim, last col is offset
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void DiffSoftmaxPerRow(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the softmax function.
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
MatrixIndexT Dim() const
Returns the dimension of the vector.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void Scale(Real alpha)
Multiplies all elements by this constant.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
virtual int32 InputDim() const
Returns input-dimension of this component.
void Swap(OnlineNaturalGradient *other)
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
void DiffTanh(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the tanh function.
void SetRandn()
Set vector to random normally-distributed noise.
virtual void SetAsGradient()
Sets is_gradient_ to true and sets learning_rate_ to 1, ignoring learning_rate_factor_.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void UpdateSimple(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
const Component * GetComponent(int32 i) const
Gets the ith component in this component.
virtual int32 Properties() const
Return bitmask of the component's properties.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void Init(int32 dim, BaseFloat dropout_proportion=0.0, bool dropout_per_frame=false)
void SetAlpha(BaseFloat alpha)
RepeatedAffineComponent()
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
void Heaviside(const CuMatrixBase< Real > &src)
Set each element to the Heaviside function of the corresponding element of "src", which we define as ...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void StoreStats(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, void *memo)
This function may store stats on average activation values, and for some component types...
void AddVec(Real alpha, const CuVectorBase< Real > &vec, Real beta=1.0)
void RepairGradients(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, CuMatrixBase< BaseFloat > *in_deriv, ClipGradientComponent *to_update) const
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
void DiffSigmoid(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the sigmoid function.
void MulColsVec(const CuVectorBase< Real > &scale)
scale i'th column by scale[i]
virtual Component * Copy() const
Copies component (deep copy).
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
const Real * Data() const
Return data pointer (const).
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
CuMatrix< BaseFloat > linear_params_
void SumColumnRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, sets (*this)(r, c) to the sum src(r, j), where j ranges from indexes[c].first through indexes[c].second - 1.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
virtual std::string Type() const =0
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual Component * Copy() const
Copies component (deep copy).
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
void StoreBackpropStats(const CuMatrixBase< BaseFloat > &out_deriv)
void EnsureNonzero(const CuMatrixBase< Real > &src, Real epsilon, CuMatrixBase< Real > *dest)
This function requires that src and dest have the same dimension and epsilon > 0. ...
OnlineNaturalGradient preconditioner_
CuMatrix< BaseFloat > params_
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
virtual void PerturbParams(BaseFloat stddev)=0
This function is to be used in testing.
Matrix for CUDA computing.
virtual Component * Copy() const
Copies component (deep copy).
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
CuArray< int32 > reverse_indexes_
MatrixIndexT NumCols() const
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
void DiffLogSoftmaxPerRow(const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv)
Differentiate backward through the log softmax function.
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
void DiffGroupPnorm(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv, Real power)
Differentiate backward through the GroupPnorm function.
virtual Component * Copy() const
Copies component (deep copy).
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
A class representing a vector.
double num_dims_processed_
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" and giving you access to the fields, in this case.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void Init(int32 dim, BaseFloat param_mean, BaseFloat param_stddev)
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
bool is_gradient_
True if this component is to be treated as a gradient rather than as parameters.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
#define KALDI_ASSERT(cond)
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
PerElementScaleComponent scales each dimension of its input with a separate trainable scale; it's lik...
void WriteUpdatableCommon(std::ostream &is, bool binary) const
void Read(std::istream &is, bool binary)
I/O functions.
virtual void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
SumBlockComponent sums over blocks of its input: for instance, if you create one with the config "inp...
FixedBiasComponent applies a fixed per-element bias; it's similar to the AddShift component in the nn...
CuVector< BaseFloat > scales_
virtual void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void Read(std::istream &is, bool binary)
I/O.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void SetParams(const CuVectorBase< BaseFloat > &bias, const CuMatrixBase< BaseFloat > &linear)
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
bool HasUnusedValues() const
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
bool GetValue(const std::string &key, std::string *value)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
CuMatrix< BaseFloat > linear_params_
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void SetComponent(int32 i, Component *component)
Sets the ith component.
OnlineNaturalGradient offset_preconditioner_
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void CopyCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies column r from column indexes[r] of src.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
BaseFloat orthonormal_constraint_
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
BaseFloat GetAlpha() const
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
NaturalGradientPerElementScaleComponent()
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
OnlineNaturalGradient scale_preconditioner_
void PrintParameterStats(std::ostringstream &os, const std::string &name, const CuVectorBase< BaseFloat > ¶ms, bool include_mean)
Print to 'os' some information about the mean and standard deviation of some parameters, used in Info() functions in nnet-simple-component.cc.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
bool SameDimAndStride(const CuMatrixBase< Real > &M, const CuMatrixBase< Real > &N)
void RepairGradients(CuMatrixBase< BaseFloat > *in_deriv, RectifiedLinearComponent *to_update) const
void LogSoftMaxPerRow(const CuMatrixBase< Real > &src)
LogSoftmax nonlinearity Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
MatrixIndexT NumRows() const
Dimensions.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void Init(const std::vector< int32 > &column_map)
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
Provides a vector abstraction class.
virtual void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
void Add(Real c)
Add a constant to each element of a vector.
CuVector< BaseFloat > scales_
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual void SetUnderlyingLearningRate(BaseFloat lrate)
Sets the learning rate of gradient descent- gets multiplied by learning_rate_factor_.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
int32 GetUpdatePeriod() const
void SetMatMatDivMat(const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const CuMatrixBase< Real > &C)
*this = a * b / c (by element; when c = 0, *this = a) *this can be an alias of a, b or c safely and g...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
static Component * NewComponentOfType(const std::string &type)
Returns a new Component of the given type e.g.
void Tanh(const CuMatrixBase< Real > &src)
Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src).
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
bool use_natural_gradient_
CuRand< BaseFloat > random_generator_
virtual void Update(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv)
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i'th row by scale[i]
void RepairGradients(const CuMatrixBase< BaseFloat > &out_value, CuMatrixBase< BaseFloat > *in_deriv, TanhComponent *to_update) const
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
OnlineNaturalGradient preconditioner_out_
virtual void StoreStats(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, void *memo)
This function may store stats on average activation values, and for some component types...
BaseFloat LearningRate() const
Gets the learning rate to be used in gradient descent.
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
NaturalGradientAffineComponent()
void Read(std::istream &in, bool binary, bool add=false)
Read function using C++ streams.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
BaseFloat GetNumSamplesHistory() const
BaseFloat dropout_proportion_
dropout-proportion is the proportion that is dropped out, e.g.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
CuVector< BaseFloat > bias_params_
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
virtual void StoreStats(const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, void *memo)
This function may store stats on average activation values, and for some component types...
virtual void InitFromConfig(ConfigLine *cfl)=0
Initialize, from a ConfigLine object.
ScaleAndOffsetComponent()
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual Component * Copy() const
Copies component (deep copy).
void Init(int32 input_dim, int32 output_dim, int32 num_repeats, BaseFloat param_stddev, BaseFloat bias_mean, BaseFloat bias_stddev)
OnlineNaturalGradient preconditioner_in_
MatrixIndexT Dim() const
Dimensions.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Vector for CUDA computing.
CuVector< BaseFloat > output_
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
const Real * RowData(MatrixIndexT r) const
Get raw row pointer (const).
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
FixedAffineComponent is an affine transform that is supplied at network initialization time and is no...
virtual int32 InputDim() const
Returns input-dimension of this component.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
This class implements an affine transform using a block diagonal matrix e.g., one whose weight matrix...
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...