32 std::vector<std::pair<int32, int32> > *out) {
33 in.
CopyToVec(
reinterpret_cast<std::vector<Int32Pair>*
>(out));
38 const std::vector<Int32Pair> *in_cast =
39 reinterpret_cast<const std::vector<Int32Pair>*
>(&in);
50 *input_index = output_index;
51 int32 output_x = output_index.
x, input_x;
53 input_x = output_x / num_blocks;
55 input_x = (output_x - num_blocks + 1) / num_blocks;
57 input_index->
x = input_x;
59 *block = output_x - (input_x * num_blocks);
65 const Index &output_index,
66 std::vector<Index> *desired_indexes)
const {
67 desired_indexes->resize(1);
74 const Index &output_index,
76 std::vector<Index> *used_inputs)
const {
79 if (!input_index_set(input_index))
83 used_inputs->push_back(input_index);
89 WriteToken(ostream, binary,
"<DistributeComponentPrecomputedIndexes>");
92 WriteToken(ostream, binary,
"</DistributeComponentPrecomputedIndexes>");
98 ExpectToken(istream, binary,
"</DistributeComponentPrecomputedIndexes>");
104 const std::vector<Index> &input_indexes,
105 const std::vector<Index> &output_indexes,
107 unordered_map<Index, int32, IndexHasher> index_to_input_dim;
108 int32 num_input_indexes = input_indexes.size(),
109 num_output_indexes = output_indexes.size();
110 for (
int32 i = 0;
i < num_input_indexes;
i++)
111 index_to_input_dim[input_indexes[
i]] =
i;
114 ans->
pairs.resize(output_indexes.size());
119 for (
int32 i = 0;
i < num_output_indexes;
i++) {
123 unordered_map<Index, int32, IndexHasher>::iterator iter =
124 index_to_input_dim.find(input_index);
125 if (iter == index_to_input_dim.end())
126 KALDI_ERR <<
"Input index not found (code error)";
127 int32 input_row = iter->second;
128 ans->
pairs[
i] = std::pair<int32,int32>(input_row, block_index * block_size);
137 int32 num_output_rows,
138 std::vector<const BaseFloat*> *input_pointers)
const {
141 KALDI_ASSERT(indexes != NULL &&
"Invalid pointer type");
143 input_pointers->resize(num_output_rows);
147 const BaseFloat **input_pointers_data = &((*input_pointers)[0]);
148 const std::pair<int32, int32> *pairs_data = &(indexes->
pairs[0]);
149 for (
int32 i = 0;
i < num_output_rows;
i++) {
150 input_pointers_data[
i] = input_data +
151 pairs_data[
i].first * input_stride +
152 pairs_data[
i].second;
158 int32 num_output_rows,
160 std::vector<BaseFloat*> *input_pointers)
const {
163 KALDI_ASSERT(indexes != NULL &&
"Invalid pointer type");
165 input_pointers->resize(num_output_rows);
169 BaseFloat **input_pointers_data = &((*input_pointers)[0]);
170 const std::pair<int32, int32> *pairs_data = &(indexes->
pairs[0]);
171 for (
int32 i = 0;
i < num_output_rows;
i++) {
172 input_pointers_data[
i] = input_data +
173 pairs_data[
i].first * input_stride +
174 pairs_data[
i].second;
186 std::vector<const BaseFloat*> input_pointers;
203 if (in_deriv == NULL)
return;
206 num_output_rows = out_deriv.
NumRows();
207 if (num_output_rows != in_deriv->
NumRows() * num_blocks) {
214 std::vector<BaseFloat*> input_pointers;
224 KALDI_ASSERT(input_dim > 0 && output_dim > 0 && input_dim % output_dim == 0);
229 int32 input_dim, output_dim;
230 bool ok = cfl->
GetValue(
"input-dim", &input_dim) &&
231 cfl->
GetValue(
"output-dim", &output_dim);
233 KALDI_ERR <<
"Invalid initializer for layer of type " 236 Init(input_dim, output_dim);
240 WriteToken(os, binary,
"<DistributeComponent>");
245 WriteToken(os, binary,
"</DistributeComponent>");
258 WriteToken(os, binary,
"<StatisticsExtractionComponentPrecomputedIndexes>");
260 std::vector<std::pair<int32, int32> > pairs_cpu;
264 counts.Write(os, binary);
266 std::vector<int32> backward_indexes_cpu;
267 backward_indexes.CopyToVec(&backward_indexes_cpu);
269 WriteToken(os, binary,
"</StatisticsExtractionComponentPrecomputedIndexes>");
274 "<StatisticsExtractionComponentPrecomputedIndexes>",
276 std::vector<std::pair<int32, int32> > pairs_cpu;
280 counts.Read(is, binary);
282 std::vector<int32> backward_indexes_cpu;
284 backward_indexes.CopyFromVec(backward_indexes_cpu);
285 ExpectToken(is, binary,
"</StatisticsExtractionComponentPrecomputedIndexes>");
291 const std::vector<Index> &input_indexes,
292 const std::vector<Index> &output_indexes,
293 bool need_backprop)
const {
294 int32 num_input_indexes = input_indexes.size(),
295 num_output_indexes = output_indexes.size();
301 invalid_pair.
first = -1;
303 std::vector<Int32Pair> forward_indexes_cpu(output_indexes.size(),
305 std::vector<int32> backward_indexes_cpu(input_indexes.size(), -1);
309 unordered_map<Index, int32, IndexHasher> index_to_input_pos;
310 for (
int32 i = 0;
i < num_input_indexes;
i++)
311 index_to_input_pos[input_indexes[
i]] =
i;
313 for (
int32 i = 0;
i < num_output_indexes;
i++) {
314 Index output_index = output_indexes[
i];
315 Index input_index(output_index);
317 t_start = output_period_ * (t / output_period_);
319 t_start -= output_period_;
320 int32 t_end = t_start + output_period_;
321 for (
int32 t = t_start; t < t_end; t += input_period_) {
323 unordered_map<Index, int32, IndexHasher>::iterator iter =
324 index_to_input_pos.find(input_index);
325 if (iter != index_to_input_pos.end()) {
326 int32 input_pos = iter->second;
327 if (forward_indexes_cpu[
i].first == -1) {
328 forward_indexes_cpu[
i].first = input_pos;
329 forward_indexes_cpu[
i].second = input_pos + 1;
335 forward_indexes_cpu[
i].second++;
336 counts_cpu(
i) += 1.0;
339 backward_indexes_cpu[input_pos] =
i;
344 for (
int32 i = 0;
i < num_input_indexes;
i++) {
355 input_dim_(-1), input_period_(1), output_period_(1),
356 include_variance_(true) { }
374 KALDI_ERR <<
"Could not process these elements in initializer: " 378 KALDI_ERR <<
"Invalid initializer for layer of type " 386 KALDI_ERR <<
"Invalid configuration of StatisticsExtractionComponent";
390 std::vector<Index> *input_indexes,
391 std::vector<Index> *output_indexes)
const {
392 std::sort(input_indexes->begin(), input_indexes->end(),
394 std::sort(output_indexes->begin(), output_indexes->end(),
400 const Index &output_index,
402 std::vector<Index> *used_inputs)
const {
403 Index input_index(output_index);
412 if (input_index_set(input_index))
417 used_inputs->clear();
421 if (input_index_set(input_index)) {
423 used_inputs->push_back(input_index);
432 const Index &output_index,
433 std::vector<Index> *desired_indexes)
const {
434 desired_indexes->clear();
435 Index input_index(output_index);
443 desired_indexes->push_back(input_index);
478 const std::string &debug_info,
486 NVTX_RANGE(
"StatisticsExtractionComponent::Backprop");
513 ExpectToken(is, binary,
"</StatisticsExtractionComponent>");
518 WriteToken(os, binary,
"<StatisticsExtractionComponent>");
527 WriteToken(os, binary,
"</StatisticsExtractionComponent>");
531 WriteToken(os, binary,
"<StatisticsPoolingComponentPrecomputedIndexes>");
533 std::vector<std::pair<int32, int32> > indexes_cpu;
539 WriteToken(os, binary,
"</StatisticsPoolingComponentPrecomputedIndexes>");
544 "<StatisticsPoolingComponentPrecomputedIndexes>",
546 std::vector<std::pair<int32, int32> > indexes_cpu;
552 ExpectToken(is, binary,
"</StatisticsPoolingComponentPrecomputedIndexes>");
558 cfl->
GetValue(
"left-context", &left_context_);
559 cfl->
GetValue(
"right-context", &right_context_);
560 cfl->
GetValue(
"num-log-count-features", &num_log_count_features_);
561 cfl->
GetValue(
"output-stddevs", &output_stddevs_);
562 cfl->
GetValue(
"variance-floor", &variance_floor_);
565 KALDI_ERR <<
"Could not process these elements in initializer: " 568 if (!ok ||
input_dim_ <= 0 || left_context_ + right_context_ <= 0 ||
569 num_log_count_features_ < 0)
570 KALDI_ERR <<
"Invalid initializer for layer of type " 577 num_log_count_features_(0), output_stddevs_(false),
578 variance_floor_(1.0e-10), require_direct_input_(false) { }
619 ExpectToken(is, binary,
"</StatisticsPoolingComponent>");
627 WriteToken(os, binary,
"<StatisticsPoolingComponent>");
636 WriteToken(os, binary,
"<NumLogCountFeatures>");
642 WriteToken(os, binary,
"</StatisticsPoolingComponent>");
646 std::vector<Index> *input_indexes,
647 std::vector<Index> *output_indexes)
const {
648 std::sort(input_indexes->begin(), input_indexes->end(),
650 std::sort(output_indexes->begin(), output_indexes->end(),
656 const Index &output_index,
657 std::vector<Index> *desired_indexes)
const {
658 desired_indexes->clear();
659 Index input_index(output_index);
660 int32 middle_t = output_index.
t,
666 desired_indexes->push_back(input_index);
672 const Index &output_index,
674 std::vector<Index> *used_inputs)
const {
676 used_inputs->clear();
683 Index input_index(output_index);
684 int32 output_t = output_index.
t,
690 if (input_index_set(input_index))
698 if (input_index_set(input_index)) {
700 used_inputs->push_back(input_index);
710 const std::vector<Index> &input_indexes,
711 const std::vector<Index> &output_indexes,
712 bool need_backprop)
const {
713 int32 num_input_indexes = input_indexes.size(),
714 num_output_indexes = output_indexes.size();
719 invalid_pair.
first = -1;
723 std::vector<Int32Pair> forward_indexes_cpu(num_output_indexes,
730 std::vector<Int32Pair> backward_indexes_cpu(num_input_indexes,
734 unordered_map<Index, int32, IndexHasher> index_to_input_pos;
735 for (
int32 i = 0;
i < num_input_indexes;
i++)
736 index_to_input_pos[input_indexes[
i]] =
i;
738 for (
int32 i = 0;
i < num_output_indexes;
i++) {
739 Index input_index(output_indexes[
i]);
740 int32 middle_t = input_index.
t,
745 unordered_map<Index, int32, IndexHasher>::iterator iter =
746 index_to_input_pos.find(input_index);
747 if (iter != index_to_input_pos.end()) {
748 int32 input_pos = iter->second;
749 if (forward_indexes_cpu[i].first == -1) {
750 forward_indexes_cpu[
i].first = input_pos;
751 forward_indexes_cpu[
i].second = input_pos + 1;
753 KALDI_ASSERT(forward_indexes_cpu[i].second == input_pos);
754 forward_indexes_cpu[
i].second++;
756 if (backward_indexes_cpu[input_pos].first == -1) {
757 backward_indexes_cpu[input_pos].first =
i;
758 backward_indexes_cpu[input_pos].second = i + 1;
760 KALDI_ASSERT(backward_indexes_cpu[input_pos].second == i);
761 backward_indexes_cpu[input_pos].second++;
767 for (
int32 i = 0;
i < num_input_indexes;
i++) {
815 variance(*out, 0, num_rows_out,
827 const std::string &debug_info,
835 NVTX_RANGE(
"StatisticsPoolingComponent::Backprop");
850 variance_deriv(out_deriv, 0, num_rows_out,
852 mean_value(out_value, 0, num_rows_out,
854 stddev_value(out_value, 0, num_rows_out,
860 variance_deriv.DivElements(stddev_value);
861 variance_deriv.Scale(0.5);
867 mean_deriv.AddMatMatElements(-2.0, mean_value, variance_deriv, 1.0);
906 if (tok ==
"<Scale>") {
928 ExpectToken(is, binary,
"</BackpropTruncationComponent>");
933 WriteToken(os, binary,
"<BackpropTruncationComponent>");
938 WriteToken(os, binary,
"<ClippingThreshold>");
944 WriteToken(os, binary,
"<RecurrenceInterval>");
946 WriteToken(os, binary,
"<NumElementsClipped>");
948 WriteToken(os, binary,
"<NumElementsZeroed>");
950 WriteToken(os, binary,
"<NumElementsProcessed>");
952 WriteToken(os, binary,
"<NumZeroingBoundaries>");
954 WriteToken(os, binary,
"</BackpropTruncationComponent>");
960 "<BackpropTruncationComponentPrecomputedIndexes>");
962 zeroing.Write(ostream, binary);
966 "</BackpropTruncationComponentPrecomputedIndexes>");
972 "<BackpropTruncationComponentPrecomputedIndexes>",
974 zeroing.Read(istream, binary);
978 "</BackpropTruncationComponentPrecomputedIndexes>");
982 std::ostringstream stream;
983 stream <<
Type() <<
", dim=" << dim_
984 <<
", scale=" << scale_
985 <<
", count=" << std::setprecision(3) << count_ << std::setprecision(6)
986 <<
", recurrence-interval=" << recurrence_interval_
987 <<
", clipping-threshold=" << clipping_threshold_
988 <<
", clipped-proportion=" 989 << (count_ > 0.0 ? num_clipped_ / count_ : 0)
990 <<
", zeroing-threshold=" << zeroing_threshold_
991 <<
", zeroing-interval=" << zeroing_interval_
992 <<
", zeroed-proportion=" 993 << (count_zeroing_boundaries_ > 0.0 ?
994 num_zeroed_ / count_zeroing_boundaries_ : 0)
995 <<
", count-zeroing-boundaries=" 996 <<
static_cast<int32>(count_zeroing_boundaries_);
1003 int32 recurrence_interval) {
1004 KALDI_ASSERT(clipping_threshold >= 0 && zeroing_threshold >= 0 &&
1005 scale > 0.0 && zeroing_interval > 0 &&
1006 recurrence_interval > 0 && dim > 0);
1009 clipping_threshold_ = clipping_threshold;
1010 zeroing_threshold_ = zeroing_threshold;
1011 zeroing_interval_ = zeroing_interval;
1012 recurrence_interval_ = recurrence_interval;
1016 count_zeroing_boundaries_ = 0.0;
1022 bool ok = cfl->
GetValue(
"dim", &dim);
1024 clipping_threshold = 30.0,
1025 zeroing_threshold = 15.0;
1026 int32 zeroing_interval = 20, recurrence_interval = 1;
1028 cfl->
GetValue(
"clipping-threshold", &clipping_threshold);
1029 cfl->
GetValue(
"zeroing-threshold", &zeroing_threshold);
1030 cfl->
GetValue(
"zeroing-interval", &zeroing_interval);
1031 cfl->
GetValue(
"recurrence-interval", &recurrence_interval);
1033 clipping_threshold < 0 || zeroing_threshold < 0 || zeroing_interval < 1 ||
1034 recurrence_interval < 1 || dim <= 0)
1035 KALDI_ERR <<
"Invalid initializer for layer of type " 1037 Init(dim, scale, clipping_threshold, zeroing_threshold,
1038 zeroing_interval, recurrence_interval);
1061 const std::vector<Index> &input_indexes,
1062 const std::vector<Index> &output_indexes,
1063 bool need_backprop)
const {
1064 int32 num_input_indexes = input_indexes.size(),
1065 num_output_indexes = output_indexes.size();
1069 for (
int32 i = 0;
i < num_output_indexes;
i++) {
1070 const int32 output_n = output_indexes[
i].n;
1071 const int32 output_t = output_indexes[
i].t;
1083 zeroing_cpu(
i) = -1.0;
1114 NVTX_RANGE(
"BackpropTruncationComponent::Backprop");
1123 in_deriv->
Scale(scale_);
1130 (clipping_threshold_ <= 0.0 ? 1.0e+10 : clipping_threshold_);
1134 clipping_scales.
AddDiagMat2(pow(clipping_threshold, -2), *in_deriv,
1138 int32 num_not_scaled;
1139 clipping_scales.ApplyFloor(1.0, &num_not_scaled);
1141 clipping_scales.ApplyPow(-0.5);
1143 if (to_update != NULL) {
1144 to_update->
num_clipped_ += (clipping_scales.Dim() - num_not_scaled);
1145 to_update->
count_ += clipping_scales.Dim();
1150 (zeroing_threshold_ <= 0.0 ? 1.0e+10 : zeroing_threshold_);
1155 zeroing_scales_vec.
Set(-pow(zeroing_threshold, 2));
1157 zeroing_scales_vec.AddDiagMat2(1.0, *in_deriv,
kNoTrans, 1.0);
1166 if (to_update != NULL) {
1170 zeroing_scales_vec.
Add(1.0);
1184 count_zeroing_boundaries_ = 0.0;
1192 count_zeroing_boundaries_ *= scale;
1193 num_clipped_ *= scale;
1194 num_zeroed_ *= scale;
1203 count_ += alpha * other->
count_;
1211 std::ostringstream stream;
1215 <<
", is-updatable=" << std::boolalpha << is_updatable_
1216 <<
", use-natural-gradient=" << std::boolalpha
1217 << use_natural_gradient_;
1219 return stream.str();
1224 use_natural_gradient_(true) { }
1242 const std::string &debug_info,
1278 if (token ==
"<ConstantComponent>") {
1281 if (token ==
"<LearningRateFactor>") {
1287 if (token ==
"<IsGradient>") {
1293 if (token ==
"<MaxChange>") {
1299 if (token ==
"<LearningRate>") {
1305 if (token !=
"<Output>") {
1306 KALDI_ERR <<
"Expected token <Output>, got " << token;
1322 WriteToken(os, binary,
"<UseNaturalGradient>");
1324 WriteToken(os, binary,
"</ConstantComponent>");
1353 output_.AddVec(stddev, temp_output);
1366 int32 output_dim = 0;
1368 bool ok = cfl->
GetValue(
"output-dim", &output_dim);
1371 BaseFloat output_mean = 0.0, output_stddev = 0.0;
1372 cfl->
GetValue(
"output-mean", &output_mean);
1373 cfl->
GetValue(
"output-stddev", &output_stddev);
1379 output.
Scale(output_stddev);
1380 output.
Add(output_mean);
1403 std::ostringstream stream;
1405 <<
", output-dim=" << output_dim_
1406 <<
", dropout-proportion=" << dropout_proportion_;
1408 stream <<
", continuous=true";
1409 return stream.str();
1413 output_dim_(-1), dropout_proportion_(0.5), continuous_(false) { }
1427 KALDI_ASSERT(dropout_proportion >= 0.0 && dropout_proportion <= 1.0);
1429 if (dropout_proportion == 0) {
1439 out->
Scale(dropout_proportion * 4.0);
1441 out->
Add(1.0 - (2.0 * dropout_proportion));
1447 out->
Set(1.0 - dropout_proportion);
1452 out->
Add(-dropout_proportion);
1465 temp.
Add(-dropout_proportion);
1467 temp.
Add(-1.0 + (2.0 * dropout_proportion));
1495 ExpectToken(is, binary,
"</DropoutMaskComponent>");
1500 WriteToken(os, binary,
"<DropoutMaskComponent>");
1503 WriteToken(os, binary,
"<DropoutProportion>");
1509 WriteToken(os, binary,
"</DropoutMaskComponent>");
1530 std::ostringstream stream;
1533 <<
", block-dim=" << block_dim_
1536 stream <<
", continuous=true";
1537 if (specaugment_max_proportion_ != 0)
1538 stream <<
", specaugment-max-proportion=" << specaugment_max_proportion_
1539 <<
", specaugment-max-regions=" << specaugment_max_regions_;
1540 if (time_period_ > 0)
1541 stream <<
", time-period=" << time_period_;
1542 return stream.str();
1546 dim_(-1), block_dim_(-1), time_period_(0),
1548 specaugment_max_proportion_(0.0),
1549 specaugment_max_regions_(1),
1586 num_rows_reshaped = num_rows * dim_multiple;
1597 const std::string &debug_info,
1605 NVTX_RANGE(
"GeneralDropoutComponent::Backprop");
1626 num_rows_reshaped = num_rows * dim_multiple;
1646 ExpectToken(is, binary,
"<SpecAugmentMaxProportion>");
1649 ExpectToken(is, binary,
"<SpecAugmentMaxRegions>");
1670 ExpectToken(is, binary,
"</GeneralDropoutComponent>");
1675 WriteToken(os, binary,
"<GeneralDropoutComponent>");
1682 WriteToken(os, binary,
"<DropoutProportion>");
1685 WriteToken(os, binary,
"<SpecAugmentMaxProportion>");
1688 WriteToken(os, binary,
"<SpecAugmentMaxRegions>");
1696 WriteToken(os, binary,
"</GeneralDropoutComponent>");
1730 KALDI_ERR <<
"Invalid config values: specaugment-max-proportion = " 1740 int32 num_mask_rows)
const {
1752 int32 specaugment_max_zeroed =
static_cast<int32>(
1754 for (
int32 seq = 0; seq < num_mask_rows; seq++) {
1758 int32 num_bins_zeroed =
RandInt(0, specaugment_max_zeroed);
1759 if (num_bins_zeroed != 0) {
1763 for (
int32 i = start_bin;
i < start_bin + num_bins_zeroed;
i++)
1764 this_mask(
i % num_freq_bins) = 0.0;
1773 int32 half_bin_size = num_freq_bins / 2,
1774 quarter_bin_size = half_bin_size / 2,
1775 start_bin =
RandInt(0, num_freq_bins - 1),
1776 end_bin = start_bin + half_bin_size;
1777 for (
int32 i = 0;
i < quarter_bin_size;
i++) {
1778 BaseFloat &a = this_mask((start_bin +
i) % num_freq_bins),
1779 &b = this_mask((end_bin -
i) % num_freq_bins);
1796 ans->
Add(-dropout_proportion);
1801 ans->
Scale(1.0 / (1.0 - dropout_proportion));
1803 ans->
Scale(dropout_proportion * 4.0);
1805 ans->
Add(1.0 - (2.0 * dropout_proportion));
1812 const std::vector<Index> &input_indexes,
1813 const std::vector<Index> &output_indexes,
1814 bool need_backprop)
const {
1821 std::vector<int32> indexes(size);
1830 for (int32
i = 0;
i < size;
i++) {
1831 int32
n = input_indexes[
i].n,
1834 std::pair<int32, int32> p(n, t);
1836 std::unordered_map<std::pair<int32,int32>,
int32,
1839 if (iter != m.end()) {
1840 indexes[
i] = iter->second;
1843 indexes[
i] = cur_row;
1849 if (multiple == 1) {
1853 std::vector<int32> repeated_indexes;
1854 repeated_indexes.reserve(size * multiple);
1855 for (int32
i = 0;
i < size;
i++) {
1856 int32 row = indexes[
i];
1857 for (int32
j = 0;
j < multiple;
j++)
1858 repeated_indexes.push_back(row);
1866 bool binary)
const {
1868 "<GeneralDropoutComponentPrecomputedIndexes>");
1872 indexes.Write(os, binary);
1874 "</GeneralDropoutComponentPrecomputedIndexes>");
1880 "<GeneralDropoutComponentPrecomputedIndexes>",
1884 indexes.Read(is, binary);
1886 "</GeneralDropoutComponentPrecomputedIndexes>");
1890 std::ostringstream stream;
1893 <<
", zeroed-proportion=" << zeroed_proportion_
1894 <<
", time-mask-max-frames=" << time_mask_max_frames_;
1895 return stream.str();
1899 dim_(-1), zeroed_proportion_(0.25),
1900 time_mask_max_frames_(10) { }
1932 const std::string &debug_info,
1940 NVTX_RANGE(
"SpecAugmentTimeMaskComponent::Backprop");
1972 ExpectToken(is, binary,
"</SpecAugmentTimeMaskComponent>");
1977 WriteToken(os, binary,
"<SpecAugmentTimeMaskComponent>");
1980 WriteToken(os, binary,
"<ZeroedProportion>");
1982 WriteToken(os, binary,
"<TimeMaskMaxFrames>");
1986 WriteToken(os, binary,
"</SpecAugmentTimeMaskComponent>");
2008 const std::vector<std::vector<int32> > &indexes = indexes_in.
indexes;
2009 int32 num_sequences = indexes.size();
2012 non_time_mask_max_frames = time_mask_max_frames * (1-z) / z;
2014 non_time_mask_max_frames > 0);
2017 for (
int32 s = 0; s < num_sequences; s++) {
2020 const std::vector<int32> this_row_indexes = indexes[s];
2021 int32 seq_length = this_row_indexes.size();
2025 while (t < seq_length) {
2030 int32 nonzeroed_length =
RandInt(1, non_time_mask_max_frames);
2031 for (; t < seq_length && nonzeroed_length > 0; t++, nonzeroed_length--)
2032 mask(this_row_indexes[t]) = 1.0;
2034 int32 zeroed_length =
RandInt(1, time_mask_max_frames);
2035 for (; t < seq_length && zeroed_length > 0; t++, zeroed_length--)
2036 mask(this_row_indexes[t]) = 0.0;
2044 const std::vector<Index> &input_indexes,
2045 const std::vector<Index> &output_indexes,
2046 bool need_backprop)
const {
2051 int32 size = input_indexes.size();
2057 std::vector<std::tuple<int32, int32, int32> > sort_indexes(size);
2059 std::unordered_set<int32> all_n_values;
2063 all_n_values.insert(n);
2064 std::get<0>(sort_indexes[
i]) = n;
2065 std::get<1>(sort_indexes[
i]) = input_indexes[
i].t;
2066 std::get<2>(sort_indexes[
i]) =
i;
2068 std::sort(sort_indexes.begin(), sort_indexes.end());
2072 int32 num_n_values = all_n_values.size(),
2074 cur_n_value = std::get<0>(sort_indexes[0]);
2075 ans->
indexes.resize(num_n_values);
2077 std::tuple<int32, int32, int32> &tp(sort_indexes[
i]);
2078 int32 n = std::get<0>(tp),
2079 row_index = std::get<2>(tp);
2081 if (n > cur_n_value) {
2086 ans->
indexes[n_idx].push_back(row_index);
2095 bool binary)
const {
2097 "<SpecAugmentTimeMaskComponentPrecomputedIndexes>");
2099 int32 size = indexes.size();
2105 "</SpecAugmentTimeMaskComponentPrecomputedIndexes>");
2111 "<SpecAugmentTimeMaskComponentPrecomputedIndexes>",
2116 indexes.resize(size);
2120 "</SpecAugmentTimeMaskComponentPrecomputedIndexes>");
2122 for (
auto v : indexes) tot_size += v.size();
std::vector< std::vector< int32 > > indexes
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
void MulElements(const CuVectorBase< Real > &v)
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
MatrixIndexT Stride() const
virtual void Read(std::istream &is, bool binary)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
static int32 DivideRoundingDown(int32 a, int32 b)
Returns a / b, rounding towards negative infinity in all cases.
void ApplyPow(Real power)
const std::string WholeLine()
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
void SetZero()
Math operations.
virtual void Read(std::istream &istream, bool binary)
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
BaseFloat zeroing_threshold_
void ComputeInputIndexAndBlock(const Index &output_index, Index *input_index, int32 *block) const
void WriteIntegerPairVector(std::ostream &os, bool binary, const std::vector< std::pair< T, T > > &v)
Function for writing STL vectors of pairs of integer types.
void CopyFromVec(const std::vector< T > &src)
This function resizes if needed.
BaseFloat dropout_proportion_
bool use_natural_gradient_
virtual int32 OutputDim() const
Returns output-dimension of this component.
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
void CopyToVec(std::vector< T > *dst) const
This function resizes *dst if needed.
int32 recurrence_interval_
static void CopyPairVector(const CuArray< Int32Pair > &in, std::vector< std::pair< int32, int32 > > *out)
void CopyColFromMat(const CuMatrixBase< Real > &mat, MatrixIndexT col)
Abstract base-class for neural-net components.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void AddRows(Real alpha, const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
virtual void Read(std::istream &is, bool binary)
void AddRowRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, do (*this)(r, c) += src(j, c), where j ranges from ind...
void Init(int32 dim, BaseFloat scale, BaseFloat clipping_threshold, BaseFloat zeroing_threshold, int32 zeroing_interval, int32 recurrence_interval)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
An abstract representation of a set of Indexes.
bool WithProb(BaseFloat prob, struct RandomState *state)
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
CuVector< BaseFloat > * GetMemo(const SpecAugmentTimeMaskComponentPrecomputedIndexes &indexes) const
void InitLearningRatesFromConfig(ConfigLine *cfl)
BaseFloat dropout_proportion_
void ApplyFloor(Real floor_val)
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual Component * Copy() const
Copies component (deep copy).
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
Keywords for search: natural gradient, naturalgradient, NG-SGD.
void DivRowsVec(const CuVectorBase< Real > &div)
divide i'th row by scale[i]
void AddMatMatElements(const Real alpha, const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const Real beta)
*this = beta * *this + alpha * A .* B (.* element by element multiplication)
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
BaseFloat max_change_
configuration value for imposing max-change
virtual bool IsComputable(const MiscComputationInfo &misc_info, const Index &output_index, const IndexSet &input_index_set, std::vector< Index > *used_inputs) const
This function only does something interesting for non-simple Components, and it exists to make it pos...
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void ReadIntegerPairVector(std::istream &is, bool binary, std::vector< std::pair< T, T > > *v)
Function for reading STL vector of pairs of integer types.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
void CopyColFromVec(const CuVectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
virtual void PerturbParams(BaseFloat stddev)
This function is to be used in testing.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
SpecAugmentTimeMaskComponent implements the time part of SpecAugment.
void ExpectOneOrTwoTokens(std::istream &is, bool binary, const std::string &token1, const std::string &token2)
This function is like ExpectToken but for two tokens, and it will either accept token1 and then token...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
virtual Component * Copy() const
Copies component (deep copy).
CuMatrix< BaseFloat > * GetMemo(int32 num_mask_rows) const
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual BaseFloat DotProduct(const UpdatableComponent &other) const
Computes dot-product between parameters of two instances of a Component.
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
std::string UnusedValues() const
returns e.g.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
SpecAugmentTimeMaskComponent()
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
static void ExpectToken(const std::string &token, const std::string &what_we_are_parsing, const std::string **next_token)
virtual void Write(std::ostream &ostream, bool binary) const
CuArray< Int32Pair > forward_indexes
BaseFloat zeroed_proportion_
int32 time_mask_max_frames_
bool require_direct_input_
CuArray< Int32Pair > backward_indexes
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
int32 num_log_count_features_
void SetZero()
Math operations, some calling kernels.
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
void MulRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Does for each row r, this.Row(r) *= alpha * src.row(indexes[r]), where '*=' is elementwise multiplica...
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
void CopyRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies row r from row indexes[r] of src.
BaseFloat learning_rate_
learning rate (typically 0.0..0.01)
BaseFloat variance_floor_
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
void PreconditionDirections(CuMatrixBase< BaseFloat > *X, BaseFloat *scale)
This call implements the main functionality of this class.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
BaseFloat learning_rate_factor_
learning rate factor (normally 1.0, but can be set to another < value so that when < you call SetLear...
This class is used for a piece of a CuMatrix.
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
void Scale(Real alpha)
Multiplies all elements by this constant.
int PeekToken(std::istream &is, bool binary)
PeekToken will return the first character of the next token, or -1 if end of file.
void Swap(OnlineNaturalGradient *other)
Real Sum() const
Returns sum of the elements.
void SetRandn()
Set vector to random normally-distributed noise.
StatisticsPoolingComponent()
double count_zeroing_boundaries_
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
virtual void Write(std::ostream &os, bool binary) const
int32 specaugment_max_regions_
virtual void Read(std::istream &is, bool binary)
const Real * Data() const
Return data pointer (const).
virtual void Write(std::ostream &ostream, bool binary) const
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
GeneralDropoutComponent implements dropout, including a continuous variant where the thing we multipl...
virtual void Write(std::ostream &os, bool binary) const
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
CuVector< BaseFloat > output_
Matrix for CUDA computing.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
MatrixIndexT NumCols() const
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
A class representing a vector.
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
virtual void ReorderIndexes(std::vector< Index > *input_indexes, std::vector< Index > *output_indexes) const
This function only does something interesting for non-simple Components.
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" and giving you access to the fields, in this case.
BaseFloat clipping_threshold_
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
bool is_gradient_
True if this component is to be treated as a gradient rather than as parameters.
#define KALDI_ASSERT(cond)
void WriteUpdatableCommon(std::ostream &is, bool binary) const
virtual void GetInputIndexes(const MiscComputationInfo &misc_info, const Index &output_index, std::vector< Index > *desired_indexes) const
This function only does something interesting for non-simple Components.
void Init(int32 input_dim, int32 output_dim)
virtual void Read(std::istream &is, bool binary)
Read function (used after we know the type of the Component); accepts input that is missing the token...
Real * Data()
Returns a pointer to the start of the vector's data.
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
bool HasUnusedValues() const
bool GetValue(const std::string &key, std::string *value)
OnlineNaturalGradient preconditioner_
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
virtual void * Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, CuMatrixBase< BaseFloat > *out) const
Propagate function.
virtual void Read(std::istream &istream, bool binary)
CuVector< BaseFloat > zeroing
void CopyToRows(const CuArrayBase< Real *> &dst) const
For each row r of this matrix, copies it to the array of floats at the location given by dst[r]...
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *to_update, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
virtual Component * Copy() const
Copies component (deep copy).
void PrintParameterStats(std::ostringstream &os, const std::string &name, const CuVectorBase< BaseFloat > ¶ms, bool include_mean)
Print to 'os' some information about the mean and standard deviation of some parameters, used in Info() functions in nnet-simple-component.cc.
virtual ComponentPrecomputedIndexes * PrecomputeIndexes(const MiscComputationInfo &misc_info, const std::vector< Index > &input_indexes, const std::vector< Index > &output_indexes, bool need_backprop) const
This function must return NULL for simple Components.
virtual Component * Copy() const
Copies component (deep copy).
MatrixIndexT NumRows() const
Dimensions.
Provides a vector abstraction class.
void Add(Real c)
Add a constant to each element of a vector.
MatrixIndexT Dim() const
Return the vector dimension.
virtual std::string Type() const
Returns a string such as "SigmoidComponent", describing the type of the object.
std::vector< std::pair< int32, int32 > > pairs
BaseFloat specaugment_max_proportion_
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
virtual std::string Info() const
Returns some text-form information about this component, for diagnostics.
CuRand< BaseFloat > random_generator_
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i'th row by scale[i]
virtual Component * Copy() const
Copies component (deep copy).
virtual void Write(std::ostream &os, bool binary) const
Write component to stream.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void ComputeInputPointers(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in, int32 num_output_rows, std::vector< const BaseFloat *> *input_pointers) const
This file contains declarations of components that are not "simple", meaning they care about the inde...
virtual void Write(std::ostream &os, bool binary) const
virtual void InitFromConfig(ConfigLine *cfl)
Initialize, from a ConfigLine object.
virtual void Backprop(const std::string &debug_info, const ComponentPrecomputedIndexes *indexes, const CuMatrixBase< BaseFloat > &in_value, const CuMatrixBase< BaseFloat > &out_value, const CuMatrixBase< BaseFloat > &out_deriv, void *memo, Component *, CuMatrixBase< BaseFloat > *in_deriv) const
Backprop function; depending on which of the arguments 'to_update' and 'in_deriv' are non-NULL...
void Set(Real)
Sets all elements to a specific value.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
GeneralDropoutComponent()
A hashing function-object for pairs of ints.