36 input_x_dim_(0), input_y_dim_(0), input_z_dim_(0),
37 filt_x_dim_(0), filt_y_dim_(0),
38 filt_x_step_(0), filt_y_step_(0),
39 input_vectorization_(kZyx) { }
74 bias_params.
Dim() != 0);
90 return num_x_steps * num_y_steps * num_filters;
113 KALDI_ASSERT(param_stddev >= 0.0 && bias_stddev >= 0.0);
126 std::string matrix_filename) {
148 std::ostringstream stream;
167 std::string matrix_filename;
168 int32 input_x_dim = -1, input_y_dim = -1, input_z_dim = -1,
169 filt_x_dim = -1, filt_y_dim = -1,
170 filt_x_step = -1, filt_y_step = -1,
172 std::string input_vectorization_order =
"zyx";
174 ok = ok && cfl->
GetValue(
"input-x-dim", &input_x_dim);
175 ok = ok && cfl->
GetValue(
"input-y-dim", &input_y_dim);
176 ok = ok && cfl->
GetValue(
"input-z-dim", &input_z_dim);
177 ok = ok && cfl->
GetValue(
"filt-x-dim", &filt_x_dim);
178 ok = ok && cfl->
GetValue(
"filt-y-dim", &filt_y_dim);
179 ok = ok && cfl->
GetValue(
"filt-x-step", &filt_x_step);
180 ok = ok && cfl->
GetValue(
"filt-y-step", &filt_y_step);
186 cfl->
GetValue(
"input-vectorization-order", &input_vectorization_order);
187 if (input_vectorization_order.compare(
"zyx") == 0) {
188 input_vectorization =
kZyx;
189 }
else if (input_vectorization_order.compare(
"yzx") == 0) {
190 input_vectorization =
kYzx;
192 KALDI_ERR <<
"Unknown or unsupported input vectorization order " 193 << input_vectorization_order
194 <<
" accepted candidates are 'yzx' and 'zyx'";
197 if (cfl->
GetValue(
"matrix", &matrix_filename)) {
199 Init(input_x_dim, input_y_dim, input_z_dim,
200 filt_x_dim, filt_y_dim,
201 filt_x_step, filt_y_step,
205 ok = ok && cfl->
GetValue(
"num-filters", &num_filters);
209 int32 filter_input_dim = filt_x_dim * filt_y_dim * input_z_dim;
210 BaseFloat param_stddev = 1.0 / std::sqrt(filter_input_dim), bias_stddev = 1.0;
211 cfl->
GetValue(
"param-stddev", ¶m_stddev);
212 cfl->
GetValue(
"bias-stddev", &bias_stddev);
213 Init(input_x_dim, input_y_dim, input_z_dim,
214 filt_x_dim, filt_y_dim, filt_x_step, filt_y_step, num_filters,
215 input_vectorization, param_stddev, bias_stddev);
218 KALDI_ERR <<
"Could not process these elements in initializer: " 231 return (input_y_dim * input_z_dim) * x + (input_y_dim) * z + y;
239 return (input_y_dim * input_z_dim) * x + (input_z_dim) * y + z;
259 std::vector<int32> column_map(patches->
NumCols());
260 int32 column_map_size = column_map.size();
261 for (
int32 x_step = 0; x_step < num_x_steps; x_step++) {
262 for (
int32 y_step = 0; y_step < num_y_steps; y_step++) {
263 int32 patch_number = x_step * num_y_steps + y_step;
264 int32 patch_start_index = patch_number * filter_dim;
265 for (
int32 x = 0, index = patch_start_index; x < filt_x_dim; x++) {
266 for (
int32 y = 0; y < filt_y_dim; y++) {
267 for (
int32 z = 0; z < input_z_dim; z++, index++) {
271 y_step * filt_y_step + y, z,
272 input_x_dim, input_y_dim,
276 y_step * filt_y_step + y, z,
277 input_x_dim, input_y_dim,
301 (*out).NumCols() == (num_filters * num_x_steps * num_y_steps));
304 num_x_steps * num_y_steps * filter_dim,
309 std::vector<CuSubMatrix<BaseFloat>* > tgt_batch, patch_batch,
312 for (
int32 x_step = 0; x_step < num_x_steps; x_step++) {
313 for (
int32 y_step = 0; y_step < num_y_steps; y_step++) {
314 int32 patch_number = x_step * num_y_steps + y_step;
316 out->
ColRange(patch_number * num_filters, num_filters)));
318 patches.
ColRange(patch_number * filter_dim, filter_dim)));
319 filter_params_batch.push_back(filter_params_elem);
324 AddMatMatBatched<BaseFloat>(1.0, tgt_batch, patch_batch,
328 delete filter_params_elem;
329 for (
int32 p = 0; p < tgt_batch.size(); p++) {
331 delete patch_batch[p];
368 std::vector<std::vector<int32> > *out) {
372 if (in[
i].size() > L)
376 (*out)[
i].resize(D, -1);
378 for (
int32 j = 0;
j < in[
i].size();
j++) {
379 (*out)[
j][
i] = in[
i][
j];
404 std::vector<std::vector<int32> > reverse_column_map(in_deriv->
NumCols());
405 int32 rev_col_map_size = reverse_column_map.size();
406 for (
int32 x_step = 0; x_step < num_x_steps; x_step++) {
407 for (
int32 y_step = 0; y_step < num_y_steps; y_step++) {
408 int32 patch_number = x_step * num_y_steps + y_step;
409 int32 patch_start_index = patch_number * filter_dim;
410 for (
int32 x = 0, index = patch_start_index; x < filt_x_dim; x++) {
411 for (
int32 y = 0; y < filt_y_dim; y++) {
412 for (
int32 z = 0; z < input_z_dim; z++, index++) {
416 y_step * filt_y_step + y, z,
417 input_x_dim, input_y_dim,
422 y_step * filt_y_step + y, z,
423 input_x_dim, input_y_dim,
427 reverse_column_map[vector_index].push_back(index);
433 std::vector<std::vector<int32> > rearranged_column_map;
435 for (
int32 p = 0; p < rearranged_column_map.size(); p++) {
437 in_deriv->
AddCols(in_deriv_patches, cu_cols);
457 num_frames = out_deriv.
NumRows(),
462 (num_filters * num_x_steps * num_y_steps));
466 num_x_steps * num_y_steps * filter_dim,
469 std::vector<CuSubMatrix<BaseFloat>* > patch_deriv_batch, out_deriv_batch,
474 for (
int32 x_step = 0; x_step < num_x_steps; x_step++) {
475 for (
int32 y_step = 0; y_step < num_y_steps; y_step++) {
476 int32 patch_number = x_step * num_y_steps + y_step;
480 patch_number * filter_dim, filter_dim)));
482 patch_number * num_filters, num_filters)));
483 filter_params_batch.push_back(filter_params_elem);
486 AddMatMatBatched<BaseFloat>(1.0, patch_deriv_batch,
488 filter_params_batch,
kNoTrans, 0.0);
496 if (to_update != NULL) {
497 to_update->
Update(debug_info, in_value, out_deriv, out_deriv_batch);
501 delete filter_params_elem;
502 for (
int32 p = 0; p < patch_deriv_batch.size(); p++) {
503 delete patch_deriv_batch[p];
504 delete out_deriv_batch[p];
519 num_frames = out_deriv.
NumRows(),
523 (num_filters * num_x_steps * num_y_steps));
530 filter_dim * num_x_steps * num_y_steps,
540 num_x_steps * num_y_steps * filters_grad.
NumRows(),
543 std::vector<CuSubMatrix<BaseFloat>* > filters_grad_batch, input_patch_batch;
545 for (
int32 x_step = 0; x_step < num_x_steps; x_step++) {
546 for (
int32 y_step = 0; y_step < num_y_steps; y_step++) {
547 int32 patch_number = x_step * num_y_steps + y_step;
549 filters_grad_blocks_batch.RowRange(
553 input_patches.
ColRange(patch_number * filter_dim, filter_dim)));
557 AddMatMatBatched<BaseFloat>(1.0, filters_grad_batch, out_deriv_batch,
kTrans,
561 filters_grad.
AddMatBlocks(1.0, filters_grad_blocks_batch);
570 bias_grad.
AddRowSumMat(1.0, out_deriv_col_blocks_sum, 1.0);
573 for (
int32 p = 0; p < input_patch_batch.size(); p++) {
574 delete filters_grad_batch[p];
575 delete input_patch_batch[p];
602 int32 input_vectorization;
611 if (tok ==
"<IsGradient>") {
613 ExpectToken(is, binary,
"</ConvolutionComponent>");
636 WriteToken(os, binary,
"<InputVectorization>");
644 WriteToken(os, binary,
"</ConvolutionComponent>");
703 pool_x_size_(component.pool_x_size_),
704 pool_y_size_(component.pool_y_size_),
705 pool_z_size_(component.pool_z_size_),
706 pool_x_step_(component.pool_x_step_),
707 pool_y_step_(component.pool_y_step_),
708 pool_z_step_(component.pool_z_step_) { }
715 return num_pools_x * num_pools_y * num_pools_z;
756 KALDI_ERR <<
"Could not process these elements in initializer: " 774 std::vector<int32> column_map(patches->
NumCols());
775 int32 column_map_size = column_map.size();
781 for (
int32 x_pool = 0; x_pool < num_pools_x; x_pool++) {
782 for (
int32 y_pool = 0; y_pool < num_pools_y; y_pool++) {
783 for (
int32 z_pool = 0; z_pool < num_pools_z; z_pool++, index++) {
820 for (
int32 q = 0; q < pool_size; q++)
821 out->
Max(patches.
ColRange(q * num_pools, num_pools));
836 std::vector<std::vector<int32> > reverse_column_map(in_deriv->
NumCols());
837 int32 rev_col_map_size = reverse_column_map.size();
842 for (
int32 x_pool = 0; x_pool < num_pools_x; x_pool++) {
843 for (
int32 y_pool = 0; y_pool < num_pools_y; y_pool++) {
844 for (
int32 z_pool = 0; z_pool < num_pools_z; z_pool++, index++) {
850 reverse_column_map[vector_index].push_back(index);
857 std::vector<std::vector<int32> > rearranged_column_map;
859 for (
int32 p = 0; p < rearranged_column_map.size(); p++) {
861 in_deriv->
AddCols(in_deriv_patches, cu_cols);
893 for (
int32 q = 0; q < pool_size; q++) {
898 patches.
ColRange(q * num_pools, num_pools).CopyFromMat(mask);
930 WriteToken(os, binary,
"<MaxpoolingComponent>");
949 WriteToken(os, binary,
"</MaxpoolingComponent>");
954 std::ostringstream stream;
970 int32 cell_dim = value_sum_.NumCols();
971 return cell_dim * 5 + (use_dropout_ ? 3 : 0);
975 int32 cell_dim = value_sum_.NumCols();
981 ReadUpdatableCommon(is, binary);
983 params_.Read(is, binary);
985 value_sum_.Read(is, binary);
987 deriv_sum_.Read(is, binary);
989 self_repair_config_.Read(is, binary);
991 self_repair_total_.Read(is, binary);
995 if (tok ==
"<UseDropout>") {
999 use_dropout_ =
false;
1008 value_sum_.Scale(count_);
1009 deriv_sum_.Scale(count_);
1010 int32 cell_dim = params_.NumCols();
1011 self_repair_total_.Scale(count_ * cell_dim);
1013 InitNaturalGradient();
1015 ExpectToken(is, binary,
"</LstmNonlinearityComponent>");
1020 WriteUpdatableCommon(os, binary);
1023 params_.Write(os, binary);
1028 value_avg.
Scale(1.0 / count_);
1029 value_avg.
Write(os, binary);
1035 deriv_avg.
Scale(1.0 / count_);
1036 deriv_avg.
Write(os, binary);
1038 WriteToken(os, binary,
"<SelfRepairConfig>");
1039 self_repair_config_.Write(os, binary);
1042 int32 cell_dim = params_.NumCols();
1045 self_repair_prob.
Scale(1.0 / (count_ * cell_dim));
1046 self_repair_prob.
Write(os, binary);
1056 WriteToken(os, binary,
"</LstmNonlinearityComponent>");
1062 std::ostringstream stream;
1063 int32 cell_dim = params_.NumCols();
1065 <<
", use-dropout=" << (use_dropout_ ?
"true" :
"false");
1073 stream <<
", count=" << std::setprecision(3) << count_
1074 << std::setprecision(6);
1076 static const char *nonlin_names[] = {
"i_t_sigmoid",
"f_t_sigmoid",
"c_t_tanh",
1077 "o_t_sigmoid",
"m_t_tanh" };
1079 stream <<
", " << nonlin_names[
i] <<
"={";
1080 stream <<
" self-repair-lower-threshold=" << self_repair_config_(
i)
1081 <<
", self-repair-scale=" << self_repair_config_(
i + 5);
1085 self_repair_total_(
i) / (count_ * cell_dim);
1086 stream <<
", self-repaired-proportion=" << self_repaired_proportion;
1088 deriv_sum(deriv_sum_.Row(
i));
1090 value_avg.Scale(1.0 / count_);
1091 deriv_avg.
Scale(1.0 / count_);
1097 return stream.str();
1106 value_sum_.SetZero();
1107 deriv_sum_.SetZero();
1108 self_repair_total_.SetZero();
1115 value_sum_.SetZero();
1116 deriv_sum_.SetZero();
1117 self_repair_total_.SetZero();
1120 params_.Scale(scale);
1121 value_sum_.Scale(scale);
1122 deriv_sum_.Scale(scale);
1123 self_repair_total_.Scale(scale);
1133 params_.AddMat(alpha, other->
params_);
1137 count_ += alpha * other->
count_;
1143 params_.AddMat(stddev, temp_params);
1155 return params_.NumRows() * params_.NumCols();
1167 params_.CopyRowsFromVec(params);
1181 const std::string &debug_info,
1189 NVTX_RANGE(
"LstmNonlinearityComponent::Backprop");
1191 if (to_update_in == NULL) {
1193 deriv_sum_, self_repair_config_,
1204 int32 cell_dim = params_.NumCols();
1209 deriv_sum_, self_repair_config_,
1210 count_, in_deriv, ¶ms_deriv,
1213 &self_repair_total);
1216 self_repair_total_sum.
AddColSumMat(1.0, self_repair_total, 0.0);
1218 to_update->
count_ +=
static_cast<double>(in_value.
NumRows());
1223 ¶ms_deriv, &scale);
1233 params_(other.params_),
1234 use_dropout_(other.use_dropout_),
1235 value_sum_(other.value_sum_),
1236 deriv_sum_(other.deriv_sum_),
1237 self_repair_config_(other.self_repair_config_),
1238 self_repair_total_(other.self_repair_total_),
1239 count_(other.count_),
1240 preconditioner_(other.preconditioner_) { }
1243 int32 cell_dim,
bool use_dropout,
1246 BaseFloat sigmoid_self_repair_threshold,
1249 tanh_self_repair_threshold >= 0.0 &&
1250 tanh_self_repair_threshold <= 1.0 &&
1251 sigmoid_self_repair_threshold >= 0.0 &&
1252 sigmoid_self_repair_threshold <= 0.25 &&
1253 self_repair_scale >= 0.0 && self_repair_scale <= 0.1);
1293 bool use_dropout =
false;
1299 BaseFloat tanh_self_repair_threshold = 0.2,
1300 sigmoid_self_repair_threshold = 0.05,
1301 self_repair_scale = 1.0e-05;
1306 ok = ok && cfl->
GetValue(
"cell-dim", &cell_dim);
1307 cfl->
GetValue(
"param-stddev", ¶m_stddev);
1308 cfl->
GetValue(
"tanh-self-repair-threshold",
1309 &tanh_self_repair_threshold);
1310 cfl->
GetValue(
"sigmoid-self-repair-threshold",
1311 &sigmoid_self_repair_threshold);
1312 cfl->
GetValue(
"self-repair-scale", &self_repair_scale);
1313 cfl->
GetValue(
"use-dropout", &use_dropout);
1320 KALDI_ERR <<
"Could not process these elements in initializer: " 1323 Init(cell_dim, use_dropout, param_stddev, tanh_self_repair_threshold,
1324 sigmoid_self_repair_threshold, self_repair_scale);
1326 KALDI_ERR <<
"Invalid initializer for layer of type " 1337 int32 GruNonlinearityComponent::InputDim()
const {
1338 if (recurrent_dim_ == cell_dim_) {
1340 return 4 * cell_dim_;
1342 return 3 * cell_dim_ + 2 * recurrent_dim_;
1346 int32 GruNonlinearityComponent::OutputDim()
const {
1347 return 2 * cell_dim_;
1351 std::string GruNonlinearityComponent::Info()
const {
1352 std::ostringstream stream;
1354 <<
", cell-dim=" << cell_dim_
1355 <<
", recurrent-dim=" << recurrent_dim_;
1357 stream <<
", self-repair-threshold=" << self_repair_threshold_
1358 <<
", self-repair-scale=" << self_repair_scale_;
1360 stream <<
", count=" << std::setprecision(3) <<
count_ 1361 << std::setprecision(6);
1362 stream <<
", self-repaired-proportion=" 1374 stream <<
", alpha=" << preconditioner_in_.GetAlpha()
1375 <<
", rank-in=" << preconditioner_in_.GetRank()
1376 <<
", rank-out=" << preconditioner_out_.GetRank()
1377 <<
", update-period=" 1378 << preconditioner_in_.GetUpdatePeriod();
1379 return stream.str();
1382 void GruNonlinearityComponent::InitFromConfig(
ConfigLine *cfl) {
1384 recurrent_dim_ = -1;
1385 self_repair_threshold_ = 0.2;
1386 self_repair_scale_ = 1.0e-05;
1389 if (!cfl->
GetValue(
"cell-dim", &cell_dim_) || cell_dim_ <= 0)
1390 KALDI_ERR <<
"cell-dim > 0 is required for GruNonlinearityComponent.";
1392 BaseFloat param_stddev = 1.0 / std::sqrt(cell_dim_),
1394 int32 rank_in = 20, rank_out = 80,
1397 cfl->
GetValue(
"recurrent-dim", &recurrent_dim_);
1398 cfl->
GetValue(
"self-repair-threshold", &self_repair_threshold_);
1399 cfl->
GetValue(
"self-repair-scale", &self_repair_scale_);
1400 cfl->
GetValue(
"param-stddev", ¶m_stddev);
1402 cfl->
GetValue(
"rank-in", &rank_in);
1403 cfl->
GetValue(
"rank-out", &rank_out);
1404 cfl->
GetValue(
"update-period", &update_period);
1406 if (recurrent_dim_ < 0)
1407 recurrent_dim_ = cell_dim_;
1408 if (recurrent_dim_ == 0 || recurrent_dim_ > cell_dim_)
1409 KALDI_ERR <<
"Invalid values for cell-dim and recurrent-dim";
1411 w_h_.Resize(cell_dim_, recurrent_dim_);
1413 w_h_.Scale(param_stddev);
1415 preconditioner_in_.SetAlpha(alpha);
1416 preconditioner_in_.SetRank(rank_in);
1417 preconditioner_in_.SetUpdatePeriod(update_period);
1418 preconditioner_out_.SetAlpha(alpha);
1419 preconditioner_out_.SetRank(rank_out);
1420 preconditioner_out_.SetUpdatePeriod(update_period);
1430 void* GruNonlinearityComponent::Propagate(
1448 r_t(in, 0, num_rows, c, r),
1449 hpart_t(in, 0, num_rows, c + r, c),
1450 c_t1(in, 0, num_rows, c + r + c, c);
1463 c_t(*out, 0, num_rows, c, c);
1469 h_t.CopyFromMat(hpart_t);
1485 void GruNonlinearityComponent::Backprop(
1486 const std::string &debug_info,
1494 NVTX_RANGE(
"GruNonlinearityComponent::Backprop");
1499 (in_deriv == NULL ||
SameDim(in_value, *in_deriv)) &&
1501 GruNonlinearityComponent *to_update =
1502 dynamic_cast<GruNonlinearityComponent*
>(to_update_in);
1513 r_t(in_value, 0, num_rows, c, r),
1514 hpart_t(in_value, 0, num_rows, c + r, c),
1515 c_t1(in_value, 0, num_rows, c + r + c, c),
1516 s_t1(in_value, 0, num_rows, in_value.
NumCols() - r, r);
1525 (in_deriv == NULL ? &in_value : in_deriv);
1527 r_t_deriv(*in_deriv_ptr, 0, num_rows, c, r),
1528 hpart_t_deriv(*in_deriv_ptr, 0, num_rows, c + r, c),
1529 c_t1_deriv(*in_deriv_ptr, 0, num_rows, c + r + c, c),
1530 s_t1_deriv(*in_deriv_ptr, 0, num_rows, in_value.
NumCols() - r, r);
1539 c_t(out_value, 0, num_rows, c, c),
1540 c_t_deriv(out_deriv, 0, num_rows, c, c);
1565 h_t_deriv.
AddMat(1.0, c_t_deriv);
1571 z_t_deriv.AddMatMatElements(-1.0, c_t_deriv, h_t, 1.0);
1572 z_t_deriv.AddMatMatElements(1.0, c_t_deriv, c_t1, 1.0);
1573 c_t1_deriv.AddMatMatElements(1.0, c_t_deriv, z_t, 1.0);
1577 h_t_deriv.
DiffTanh(h_t, h_t_deriv);
1579 to_update->TanhStatsAndSelfRepair(h_t, &h_t_deriv);
1583 to_update->UpdateParameters(sdotr, h_t_deriv);
1591 hpart_t_deriv.AddMat(1.0, h_t_deriv);
1600 r_t_deriv.AddMatMatElements(1.0, sdotr_deriv, s_t1, 1.0);
1601 s_t1_deriv.AddMatMatElements(1.0, sdotr_deriv, r_t, 1.0);
1606 void GruNonlinearityComponent::TanhStatsAndSelfRepair(
1613 BaseFloat repair_and_stats_probability = 0.5;
1624 tanh_deriv.
Scale(-1.0);
1625 tanh_deriv.
Add(1.0);
1647 thresholds_vec.
Add(self_repair_threshold_ *
count_);
1655 h_t_deriv->
AddMatDiagVec(-self_repair_scale_ / repair_and_stats_probability,
1659 void GruNonlinearityComponent::UpdateParameters(
1670 out_deriv_temp(h_t_deriv);
1675 preconditioner_in_.PreconditionDirections(&in_value_temp, &in_scale);
1676 preconditioner_out_.PreconditionDirections(&out_deriv_temp, &out_scale);
1679 w_h_.AddMatMat(local_lrate, out_deriv_temp,
kTrans,
1686 void GruNonlinearityComponent::Read(std::istream &is,
bool binary) {
1693 w_h_.Read(is, binary);
1709 int32 rank_in, rank_out, update_period;
1717 preconditioner_in_.SetRank(rank_in);
1718 preconditioner_out_.SetRank(rank_out);
1719 preconditioner_in_.SetAlpha(alpha);
1720 preconditioner_out_.SetAlpha(alpha);
1721 preconditioner_in_.SetUpdatePeriod(update_period);
1722 preconditioner_out_.SetUpdatePeriod(update_period);
1723 ExpectToken(is, binary,
"</GruNonlinearityComponent>");
1726 void GruNonlinearityComponent::Write(std::ostream &os,
bool binary)
const {
1733 w_h_.Write(os, binary);
1740 temp.
Write(os, binary);
1744 temp.
Write(os, binary);
1750 WriteToken(os, binary,
"<SelfRepairThreshold>");
1755 BaseFloat alpha = preconditioner_in_.GetAlpha();
1756 int32 rank_in = preconditioner_in_.GetRank(),
1757 rank_out = preconditioner_out_.GetRank(),
1758 update_period = preconditioner_in_.GetUpdatePeriod();
1766 WriteToken(os, binary,
"</GruNonlinearityComponent>");
1769 void GruNonlinearityComponent::Scale(
BaseFloat scale) {
1785 void GruNonlinearityComponent::Add(
BaseFloat alpha,
1787 const GruNonlinearityComponent *other =
1788 dynamic_cast<const GruNonlinearityComponent*
>(&other_in);
1790 w_h_.AddMat(alpha, other->w_h_);
1794 count_ += alpha * other->count_;
1797 void GruNonlinearityComponent::ZeroStats() {
1804 void GruNonlinearityComponent::Check()
const {
1806 recurrent_dim_ <= cell_dim_ &&
1807 self_repair_threshold_ >= 0.0 &&
1808 self_repair_scale_ >= 0.0 );
1810 w_h_.NumCols() == recurrent_dim_);
1818 w_h_.AddMat(stddev, temp_params);
1823 const GruNonlinearityComponent *other =
1824 dynamic_cast<const GruNonlinearityComponent*
>(&other_in);
1830 return w_h_.NumRows() * w_h_.NumCols();
1839 void GruNonlinearityComponent::UnVectorize(
1842 w_h_.CopyRowsFromVec(params);
1846 preconditioner_in_.Freeze(freeze);
1847 preconditioner_out_.Freeze(freeze);
1850 GruNonlinearityComponent::GruNonlinearityComponent(
1851 const GruNonlinearityComponent &other):
1853 cell_dim_(other.cell_dim_),
1854 recurrent_dim_(other.recurrent_dim_),
1860 self_repair_threshold_(other.self_repair_threshold_),
1861 self_repair_scale_(other.self_repair_scale_),
1862 preconditioner_in_(other.preconditioner_in_),
1863 preconditioner_out_(other.preconditioner_out_) {
1868 int32 OutputGruNonlinearityComponent::InputDim()
const {
1869 return 3 * cell_dim_;
1872 int32 OutputGruNonlinearityComponent::OutputDim()
const {
1873 return 2 * cell_dim_;
1877 std::string OutputGruNonlinearityComponent::Info()
const {
1878 std::ostringstream stream;
1880 <<
", cell-dim=" << cell_dim_;
1882 stream <<
", self-repair-threshold=" << self_repair_threshold_
1883 <<
", self-repair-scale=" << self_repair_scale_;
1885 stream <<
", count=" << std::setprecision(3) <<
count_ 1886 << std::setprecision(6);
1887 stream <<
", self-repaired-proportion=" 1901 <<
", update-period=" 1903 return stream.str();
1906 void OutputGruNonlinearityComponent::InitFromConfig(
ConfigLine *cfl) {
1908 self_repair_threshold_ = 0.2;
1909 self_repair_scale_ = 1.0e-05;
1912 if (!cfl->
GetValue(
"cell-dim", &cell_dim_) || cell_dim_ <= 0)
1913 KALDI_ERR <<
"cell-dim > 0 is required for GruNonlinearityComponent.";
1915 BaseFloat param_mean = 0.0, param_stddev = 1.0,
1920 cfl->
GetValue(
"self-repair-threshold", &self_repair_threshold_);
1921 cfl->
GetValue(
"self-repair-scale", &self_repair_scale_);
1922 cfl->
GetValue(
"param-mean", ¶m_mean);
1923 cfl->
GetValue(
"param-stddev", ¶m_stddev);
1926 cfl->
GetValue(
"update-period", &update_period);
1929 w_h_.Resize(cell_dim_);
1931 w_h_.Scale(param_stddev);
1932 w_h_.Add(param_mean);
1946 void* OutputGruNonlinearityComponent::Propagate(
1963 hpart_t(in, 0, num_rows, c, c),
1964 c_t1(in, 0, num_rows, c + c, c);
1967 c_t(*out, 0, num_rows, c, c);
1987 void OutputGruNonlinearityComponent::Backprop(
1988 const std::string &debug_info,
1996 NVTX_RANGE(
"OutputGruNonlinearityComponent::Backprop");
2001 (in_deriv == NULL ||
SameDim(in_value, *in_deriv)) &&
2003 OutputGruNonlinearityComponent *to_update =
2004 dynamic_cast<OutputGruNonlinearityComponent*
>(to_update_in);
2014 hpart_t(in_value, 0, num_rows, c, c),
2015 c_t1(in_value, 0, num_rows, c + c, c);
2023 (in_deriv == NULL ? &in_value : in_deriv);
2025 hpart_t_deriv(*in_deriv_ptr, 0, num_rows, c, c),
2026 c_t1_deriv(*in_deriv_ptr, 0, num_rows, c + c, c);
2035 c_t(out_value, 0, num_rows, c, c),
2036 c_t_deriv(out_deriv, 0, num_rows, c, c);
2055 h_t_deriv.
AddMat(1.0, c_t_deriv);
2061 z_t_deriv.AddMatMatElements(-1.0, c_t_deriv, h_t, 1.0);
2062 z_t_deriv.AddMatMatElements(1.0, c_t_deriv, c_t1, 1.0);
2067 h_t_deriv.
DiffTanh(h_t, h_t_deriv);
2069 to_update->TanhStatsAndSelfRepair(h_t, &h_t_deriv);
2072 to_update->UpdateParameters(c_t1, h_t_deriv);
2079 hpart_t_deriv.AddMat(1.0, h_t_deriv);
2086 c_t1_deriv.
AddMat(1.0, h_t_deriv);
2091 void OutputGruNonlinearityComponent::TanhStatsAndSelfRepair(
2098 BaseFloat repair_and_stats_probability = 0.5;
2109 tanh_deriv.
Scale(-1.0);
2110 tanh_deriv.
Add(1.0);
2132 thresholds_vec.
Add(self_repair_threshold_ *
count_);
2140 h_t_deriv->
AddMatDiagVec(-self_repair_scale_ / repair_and_stats_probability,
2144 void OutputGruNonlinearityComponent::UpdateParameters(
2164 w_h_.AddVec(1.0, delta_w_h);
2170 void OutputGruNonlinearityComponent::Read(std::istream &is,
bool binary) {
2175 w_h_.Read(is, binary);
2191 int32 rank, update_period;
2201 ExpectToken(is, binary,
"</OutputGruNonlinearityComponent>");
2204 void OutputGruNonlinearityComponent::Write(std::ostream &os,
bool binary)
const {
2209 w_h_.Write(os, binary);
2216 temp.
Write(os, binary);
2220 temp.
Write(os, binary);
2226 WriteToken(os, binary,
"<SelfRepairThreshold>");
2240 WriteToken(os, binary,
"</OutputGruNonlinearityComponent>");
2243 void OutputGruNonlinearityComponent::Scale(
BaseFloat scale) {
2259 void OutputGruNonlinearityComponent::Add(
BaseFloat alpha,
2261 const OutputGruNonlinearityComponent *other =
2262 dynamic_cast<const OutputGruNonlinearityComponent*
>(&other_in);
2264 w_h_.AddVec(alpha, other->w_h_);
2268 count_ += alpha * other->count_;
2271 void OutputGruNonlinearityComponent::ZeroStats() {
2278 void OutputGruNonlinearityComponent::Check()
const {
2280 self_repair_threshold_ >= 0.0 &&
2281 self_repair_scale_ >= 0.0 );
2290 w_h_.AddVec(stddev, temp_params);
2295 const OutputGruNonlinearityComponent *other =
2296 dynamic_cast<const OutputGruNonlinearityComponent*
>(&other_in);
2298 return VecVec(w_h_, other->w_h_);
2311 void OutputGruNonlinearityComponent::UnVectorize(
2314 w_h_.CopyFromVec(params);
2321 OutputGruNonlinearityComponent::OutputGruNonlinearityComponent(
2322 const OutputGruNonlinearityComponent &other):
2324 cell_dim_(other.cell_dim_),
2330 self_repair_threshold_(other.self_repair_threshold_),
2331 self_repair_scale_(other.self_repair_scale_),
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void InputToInputPatches(const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *patches) const
void Write(std::ostream &out, bool binary) const
write to stream.
virtual int32 OutputDim() const
Returns output-dimension of this component.
void ApplyPow(Real power)
const std::string WholeLine()
CuMatrix< double > value_sum_
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void InputToInputPatches(const CuMatrixBase< BaseFloat > &in, CuMatrix< BaseFloat > *patches) const
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim, int32 filt_x_dim, int32 filt_y_dim, int32 filt_x_step, int32 filt_y_step, int32 num_filters, TensorVectorizationType input_vectorization, BaseFloat param_stddev, BaseFloat bias_stddev)
virtual Component * Copy() const
Copies component (deep copy).
Base class which provides matrix operations not involving resizing or allocation. ...
Abstract base-class for neural-net components.
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
void SetUpdatePeriod(int32 update_period)
void SetNumSamplesHistory(BaseFloat num_samples_history)
virtual Component * Copy() const
Copies component (deep copy).
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
void AddMatDiagVec(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, CuVectorBase< Real > &v, Real beta=1.0)
void InitLearningRatesFromConfig(ConfigLine *cfl)
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
void InitNaturalGradient()
void InderivPatchesToInderiv(const CuMatrix< BaseFloat > &in_deriv_patches, CuMatrixBase< BaseFloat > *in_deriv) const
std::string SummarizeVector(const VectorBase< float > &vec)
Returns a string that summarizes a vector fairly succintly, for printing stats in info lines...
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Keywords for search: natural gradient, naturalgradient, NG-SGD.
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
void AddMatMatElements(const Real alpha, const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const Real beta)
*this = beta * *this + alpha * A .* B (.* element by element multiplication)
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
TensorVectorizationType input_vectorization_
void AddCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indices)
Add column indices[r] of src to column r.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual int32 OutputDim() const
Returns output-dimension of this component.
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
void BackpropLstmNonlinearity(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > ¶ms, const CuMatrixBase< Real > &output_deriv, const CuMatrixBase< double > &deriv_sum_in, const CuVectorBase< Real > &self_repair_config, double count_in, CuMatrixBase< Real > *input_deriv, CuMatrixBase< Real > *params_deriv, CuMatrixBase< double > *value_sum_out, CuMatrixBase< double > *deriv_sum_out, CuMatrixBase< Real > *self_repair_sum_out)
This function does the 'backward' pass corresponding to the function ComputeLstmNonlinearity.
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void AddMatBlocks(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
This function is like AddMat (it does *this += alpha * src), except that it supports cases where *thi...
void FreezeNaturalGradient(bool freeze, Nnet *nnet)
Controls if natural gradient will be updated.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
std::string UnusedValues() const
returns e.g.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &filter)
int32 NumParameters(const Nnet &src)
Returns the total of the number of parameters in the updatable components of the nnet.
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual int32 InputDim() const
Returns input-dimension of this component.
void Max(const CuMatrixBase< Real > &A)
Do, elementwise, *this = max(*this, A).
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
virtual int32 InputDim() const
Returns input-dimension of this component.
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
CuVector< BaseFloat > bias_params_
void Scale(Real alpha)
Multiply each element with a scalar value.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
void SetZero()
Math operations, some calling kernels.
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
BaseFloat learning_rate_
learning rate (typically 0.0..0.01)
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
You can view this as an overflow from nnet-simple-component.h.
std::string ReadUpdatableCommon(std::istream &is, bool binary)
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void Resize(MatrixIndexT dim, MatrixResizeType t=kSetZero)
Allocate the memory.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
#define KALDI_PARANOID_ASSERT(cond)
void PreconditionDirections(CuMatrixBase< BaseFloat > *X, BaseFloat *scale)
This call implements the main functionality of this class.
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
CuMatrix< BaseFloat > filter_params_
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
BaseFloat DotProduct(const Nnet &nnet1, const Nnet &nnet2)
Returns dot product between two networks of the same structure (calls the DotProduct functions of the...
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
CuVector< BaseFloat > self_repair_config_
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update_in, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
CuMatrix< double > deriv_sum_
virtual int32 OutputDim() const
Returns output-dimension of this component.
void Swap(OnlineNaturalGradient *other)
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
void DiffTanh(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the tanh function.
CuMatrix< BaseFloat > params_
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
void SetAlpha(BaseFloat alpha)
void AddVec(Real alpha, const CuVectorBase< Real > &vec, Real beta=1.0)
void MulColsVec(const CuVectorBase< Real > &scale)
scale i'th column by scale[i]
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Matrix for CUDA computing.
MatrixIndexT NumCols() const
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
A class representing a vector.
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" and giving you access to the fields, in this case.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
bool is_gradient_
True if this component is to be treated as a gradient rather than as parameters.
virtual int32 InputDim() const
Returns input-dimension of this component.
#define KALDI_ASSERT(cond)
void WriteUpdatableCommon(std::ostream &is, bool binary) const
void Read(std::istream &is, bool binary)
I/O functions.
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
CuVector< double > self_repair_total_
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
void PerturbParams(BaseFloat stddev, Nnet *nnet)
Calls PerturbParams (with the given stddev) on all updatable components of the nnet.
bool HasUnusedValues() const
void CopyRowsFromMat(const MatrixBase< Real > &M)
Performs a row stack of the matrix M.
bool GetValue(const std::string &key, std::string *value)
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update_in, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
void CopyCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies column r from column indexes[r] of src.
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
int32 YzxVectorIndex(int32 x, int32 y, int32 z, int32 input_x_dim, int32 input_y_dim, int32 input_z_dim)
BaseFloat GetAlpha() const
virtual void FreezeNaturalGradient(bool freeze)
virtual
void PrintParameterStats(std::ostringstream &os, const std::string &name, const CuVectorBase< BaseFloat > ¶ms, bool include_mean)
Print to 'os' some information about the mean and standard deviation of some parameters, used in Info() functions in nnet-simple-component.cc.
void Init(int32 cell_dim, bool use_dropout, BaseFloat param_stddev, BaseFloat tanh_self_repair_threshold, BaseFloat sigmoid_self_repair_threshold, BaseFloat self_repair_scale)
MatrixIndexT NumRows() const
Dimensions.
Provides a vector abstraction class.
void InderivPatchesToInderiv(const CuMatrix< BaseFloat > &in_deriv_patches, CuMatrixBase< BaseFloat > *in_deriv) const
virtual void SetUnderlyingLearningRate(BaseFloat lrate)
Sets the learning rate of gradient descent- gets multiplied by learning_rate_factor_.
int32 GetUpdatePeriod() const
int32 ZyxVectorIndex(int32 x, int32 y, int32 z, int32 input_x_dim, int32 input_y_dim, int32 input_z_dim)
void Tanh(const CuMatrixBase< Real > &src)
Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src).
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void EqualElementMask(const CuMatrixBase< Real > &mat, CuMatrix< Real > *mask) const
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
WARNING, this component is deprecated in favor of TimeHeightConvolutionComponent, and will be deleted...
virtual void Check() const
OnlineNaturalGradient preconditioner_
LstmNonlinearityComponent()
void Update(const std::string &debug_info, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_deriv, const std::vector< CuSubMatrix< BaseFloat > *> &out_deriv_batch)
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
void RearrangeIndexes(const std::vector< std::vector< int32 > > &in, std::vector< std::vector< int32 > > *out)
MatrixIndexT Dim() const
Dimensions.
Vector for CUDA computing.
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void ComputeLstmNonlinearity(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > ¶ms, CuMatrixBase< Real > *output)
this is a special-purpose function used by class LstmNonlinearityComponent, to do its forward propaga...
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).