34 static void GetIoNames(
const std::vector<NnetExample> &src,
35 std::vector<std::string> *names_vec) {
36 std::set<std::string> names;
37 std::vector<NnetExample>::const_iterator iter = src.begin(), end = src.end();
38 for (; iter != end; ++iter) {
39 std::vector<NnetIo>::const_iterator iter2 = iter->io.begin(),
40 end2 = iter->io.end();
41 for (; iter2 != end2; ++iter2)
42 names.insert(iter2->name);
50 static void GetIoSizes(
const std::vector<NnetExample> &src,
51 const std::vector<std::string> &names,
52 std::vector<int32> *sizes) {
53 std::vector<int32> dims(names.size(), -1);
55 sizes->resize(names.size(), 0);
56 std::vector<std::string>::const_iterator names_begin = names.begin(),
57 names_end = names.end();
58 std::vector<NnetExample>::const_iterator iter = src.begin(), end = src.end();
59 for (; iter != end; ++iter) {
60 std::vector<NnetIo>::const_iterator iter2 = iter->io.begin(),
61 end2 = iter->io.end();
62 for (; iter2 != end2; ++iter2) {
64 std::vector<std::string>::const_iterator names_iter =
65 std::lower_bound(names_begin, names_end, io.
name);
67 int32 i = names_iter - names_begin;
71 }
else if (dims[i] != this_dim) {
72 KALDI_ERR <<
"Merging examples with inconsistent feature dims: " 73 << dims[
i] <<
" vs. " << this_dim <<
" for '" 78 (*sizes)[
i] += this_size;
88 static void MergeIo(
const std::vector<NnetExample> &src,
89 const std::vector<std::string> &names,
90 const std::vector<int32> &sizes,
94 int32 num_feats = names.size();
96 std::vector<int32> cur_size(num_feats, 0);
99 std::vector<std::vector<GeneralMatrix const*> > output_lists(num_feats);
102 merged_eg->
io.clear();
103 merged_eg->
io.resize(num_feats);
104 for (
int32 f = 0; f < num_feats; f++) {
106 int32 size = sizes[f];
112 std::vector<std::string>::const_iterator names_begin = names.begin(),
113 names_end = names.end();
114 std::vector<NnetExample>::const_iterator eg_iter = src.begin(),
116 for (
int32 n = 0; eg_iter != eg_end; ++eg_iter, ++
n) {
117 std::vector<NnetIo>::const_iterator io_iter = eg_iter->io.begin(),
118 io_end = eg_iter->io.end();
119 for (; io_iter != io_end; ++io_iter) {
120 const NnetIo &io = *io_iter;
121 std::vector<std::string>::const_iterator names_iter =
122 std::lower_bound(names_begin, names_end, io.
name);
125 int32 f = names_iter - names_begin;
127 int32 &this_offset = cur_size[f];
131 output_lists[f].push_back(&(io.
features));
134 NnetIo &output_io = merged_eg->
io[f];
136 output_io.
indexes.begin() + this_offset);
137 std::vector<Index>::iterator output_iter = output_io.
indexes.begin();
139 for (
int32 i = this_offset;
i < this_offset + this_size;
i++) {
143 "Merging already-merged egs? Not currentlysupported.");
144 output_iter[
i].n =
n;
146 this_offset += this_size;
150 for (
int32 f = 0; f < num_feats; f++) {
152 &(merged_eg->
io[f].features));
155 merged_eg->
io[f].features.Compress();
166 std::vector<std::string> io_names;
169 std::vector<int32> io_sizes;
171 MergeIo(src, io_names, io_sizes, compress, merged_eg);
175 const std::vector<std::string> &exclude_names,
179 std::vector<NnetIo>::iterator iter = eg->
io.begin(),
181 for (; iter != end; iter++) {
182 bool name_is_excluded =
false;
183 std::vector<std::string>::const_iterator
184 exclude_iter = exclude_names.begin(),
185 exclude_end = exclude_names.end();
186 for (; exclude_iter != exclude_end; ++exclude_iter) {
187 if (iter->name == *exclude_iter) {
188 name_is_excluded =
true;
192 if (!name_is_excluded) {
194 std::vector<Index>::iterator index_iter = iter->indexes.begin(),
195 index_end = iter->indexes.end();
196 for (; index_iter != index_end; ++index_iter)
197 index_iter->t += t_offset;
204 bool need_model_derivative,
205 bool store_component_stats,
208 request->
inputs.reserve(eg.
io.size());
213 for (
size_t i = 0;
i < eg.
io.size();
i++) {
215 const std::string &name = io.
name;
217 if (node_index == -1 ||
219 KALDI_ERR <<
"Nnet example has input or output named '" << name
220 <<
"', but no such input or output node is in the network.";
222 std::vector<IoSpecification> &dest =
224 dest.resize(dest.size() + 1);
231 if (request->
inputs.empty())
232 KALDI_ERR <<
"No inputs in computation request.";
234 KALDI_ERR <<
"No outputs in computation request.";
242 std::vector<unsigned char> char_vec(dim);
249 char_vec[
i] =
static_cast<unsigned char>(255.0 * value + 0.5);
254 vec.
Write(os, binary);
263 std::vector<unsigned char> char_vec;
265 int32 dim = char_vec.size();
269 data[
i] = scale * char_vec[
i];
271 vec->
Read(is, binary);
277 int32 *num_frames_overlap) {
278 if (*num_frames % frame_subsampling_factor != 0) {
279 int32 new_num_frames = frame_subsampling_factor *
280 (*num_frames / frame_subsampling_factor + 1);
281 KALDI_LOG <<
"Rounding up --num-frames=" << (*num_frames)
282 <<
" to a multiple of --frame-subsampling-factor=" 283 << frame_subsampling_factor
284 <<
", now --num-frames=" << new_num_frames;
285 *num_frames = new_num_frames;
287 if (*num_frames_overlap % frame_subsampling_factor != 0) {
288 int32 new_num_frames_overlap = frame_subsampling_factor *
289 (*num_frames_overlap / frame_subsampling_factor + 1);
290 KALDI_LOG <<
"Rounding up --num-frames-overlap=" << (*num_frames_overlap)
291 <<
" to a multiple of --frame-subsampling-factor=" 292 << frame_subsampling_factor
293 <<
", now --num-frames-overlap=" << new_num_frames_overlap;
294 *num_frames_overlap = new_num_frames_overlap;
296 if (*num_frames_overlap < 0 || *num_frames_overlap >= *num_frames) {
297 KALDI_ERR <<
"--num-frames-overlap=" << (*num_frames_overlap) <<
" < " 298 <<
"--num-frames=" << (*num_frames);
308 KALDI_ERR <<
"Invalid option (expected comma-separated list of integers): " 314 KALDI_ERR <<
"Invalid value --frame-subsampling-factor=" << m;
316 bool changed =
false;
322 if (value % m != 0) {
323 value = m * ((value / m) + 1);
329 std::ostringstream rounded_num_frames_str;
332 rounded_num_frames_str <<
',';
336 <<
" to multiples of --frame-subsampling-factor=" << m
337 <<
", to: " << rounded_num_frames_str.str();
344 total_num_utterances_(0), total_input_frames_(0),
345 total_frames_overlap_(0), total_num_chunks_(0),
346 total_frames_in_chunks_(0) {
349 KALDI_ERR <<
"You need to call ComputeDerived() on the " 350 "ExampleGenerationConfig().";
360 <<
"100 frames per second)";
364 output_percent_no_overlap = output_percent - overlap_percent;
366 KALDI_LOG <<
"Average chunk length was " << average_chunk_length
367 <<
" frames; overlap between adjacent chunks was " 368 << overlap_percent <<
"% of input length; length of output was " 369 << output_percent <<
"% of input length (minus overlap = " 370 << output_percent_no_overlap <<
"%).";
372 std::ostringstream os;
373 os << std::setprecision(4);
376 int32 chunk_size = iter->first,
377 num_frames = chunk_size * iter->second;
380 os << chunk_size <<
" = " << percent_of_total <<
"%";
382 KALDI_LOG <<
"Output frames are distributed among chunk-sizes as follows: " 389 const std::vector<int32> &split)
const {
394 KALDI_ASSERT(num_frames_overlap < principal_num_frames &&
395 "--num-frames-overlap value is too high");
396 float overlap_proportion = num_frames_overlap / principal_num_frames;
397 float ans = std::accumulate(split.begin(), split.end(),
int32(0));
398 for (
size_t i = 0;
i + 1 < split.size();
i++) {
399 float min_adjacent_chunk_length = std::min(split[
i], split[i + 1]),
400 overlap = overlap_proportion * min_adjacent_chunk_length;
453 std::vector<std::vector<int32> > splits;
471 std::vector<std::vector<float> > costs_for_length(
472 max_utterance_length + 1);
473 int32 num_splits = splits.size();
475 for (
int32 u = 0; u <= max_utterance_length; u++)
476 costs_for_length[u].reserve(num_splits);
478 for (
int32 s = 0; s < num_splits; s++) {
479 const std::vector<int32> &split = splits[s];
481 int32 max_chunk_size = *std::max_element(split.begin(), split.end());
482 for (
int32 u = 0; u <= max_utterance_length; u++) {
487 float c = (default_duration >
float(u) ? default_duration -
float(u) :
488 2.0 * (u - default_duration));
489 if (u < max_chunk_size)
491 c = std::numeric_limits<float>::max();
493 costs_for_length[u].push_back(c);
500 for (
int32 u = 0; u <= max_utterance_length; u++) {
501 const std::vector<float> &costs = costs_for_length[u];
502 float min_cost = *std::min_element(costs.begin(), costs.end());
503 if (min_cost == std::numeric_limits<float>::max()) {
510 float cost_threshold = 1.9999;
516 std::vector<int32> possible_splits;
517 std::vector<float>::const_iterator iter = costs.begin(), end = costs.end();
519 for (; iter != end; ++iter,++s)
520 if (*iter < min_cost + cost_threshold)
525 std::ostringstream os;
526 for (
int32 u = 0; u <= max_utterance_length; u++) {
529 std::vector<std::vector<int32 > >::const_iterator
533 while (iter1 != end1) {
534 std::vector<int32>::const_iterator iter2 = iter1->begin(),
536 while (iter2 != end2) {
539 if (iter2 != end2) os <<
",";
542 if (iter1 != end1) os <<
"/";
545 if (u < max_utterance_length) os <<
", ";
548 KALDI_VLOG(3) <<
"Utterance-length-to-splits map is: " << os.str();
554 int32 utterance_length,
555 int32 supervision_length,
556 int32 length_tolerance)
const {
558 expected_supervision_length = (utterance_length + sf - 1) / sf;
559 if (std::abs(supervision_length - expected_supervision_length)
560 <= length_tolerance) {
564 KALDI_WARN <<
"Supervision does not have expected length for utterance " 565 << utt <<
": expected length = " << utterance_length
566 <<
", got " << supervision_length;
568 KALDI_WARN <<
"Supervision does not have expected length for utterance " 569 << utt <<
": expected length = (" << utterance_length
570 <<
" + " << sf <<
" - 1) / " << sf <<
" = " 571 << expected_supervision_length
572 <<
", got: " << supervision_length
573 <<
" (note: --frame-subsampling-factor=" << sf <<
")";
581 int32 utterance_length, std::vector<int32> *chunk_sizes)
const {
589 num_primary_length_repeats = 0;
592 while (utterance_length > max_tabulated_length) {
593 utterance_length -= (primary_length - num_frames_overlap);
594 num_primary_length_repeats++;
597 const std::vector<std::vector<int32> > &possible_splits =
599 if (possible_splits.empty()) {
600 chunk_sizes->clear();
603 int32 num_possible_splits = possible_splits.size(),
604 randomly_chosen_split =
RandInt(0, num_possible_splits - 1);
605 *chunk_sizes = possible_splits[randomly_chosen_split];
606 for (
int32 i = 0;
i < num_primary_length_repeats;
i++)
607 chunk_sizes->push_back(primary_length);
609 std::sort(chunk_sizes->begin(), chunk_sizes->end());
611 std::reverse(chunk_sizes->begin(), chunk_sizes->end());
623 max_length = primary_length;
624 for (
int32 i = 0;
i < num_lengths;
i++) {
628 return 2 * max_length + primary_length;
652 for (
int32 i = 0;
i < num_lengths;
i++) {
653 for (
int32 j = 0;
j < num_lengths;
j++) {
654 std::vector<int32> vec;
662 splits_set.insert(vec);
664 vec.push_back(primary_length);
665 std::sort(vec.begin(), vec.end());
669 for (SetType::const_iterator iter = splits_set.begin();
670 iter != splits_set.end(); ++iter)
671 splits->push_back(*iter);
672 std::sort(splits->begin(), splits->end());
681 int32 size = vec->size();
689 int32 common_part = n / size,
690 remainder = n % size,
i;
691 for (i = 0; i < remainder; i++) {
692 (*vec)[
i] = common_part + 1;
694 for (; i < size; i++) {
695 (*vec)[
i] = common_part;
697 std::random_shuffle(vec->begin(), vec->end());
704 const std::vector<int32> &magnitudes,
705 std::vector<int32> *vec) {
706 KALDI_ASSERT(!vec->empty() && vec->size() == magnitudes.size());
707 int32 size = vec->size();
714 float total_magnitude = std::accumulate(magnitudes.begin(), magnitudes.end(),
719 std::vector<std::pair<float, int32> > partial_counts;
720 int32 total_count = 0;
722 float this_count = n *
float(magnitudes[
i]) / total_magnitude;
725 int32 this_whole_count =
static_cast<int32>(this_count),
726 this_partial_count = this_count - this_whole_count;
727 (*vec)[
i] = this_whole_count;
728 total_count += this_whole_count;
729 partial_counts.push_back(std::pair<float, int32>(-this_partial_count, i));
731 KALDI_ASSERT(total_count <= n && total_count + size >= n);
732 std::sort(partial_counts.begin(), partial_counts.end());
737 for(; total_count <
n; i++,total_count++) {
738 (*vec)[partial_counts[
i].second]++;
745 bool enforce_subsampling_factor,
746 const std::vector<int32> &chunk_sizes,
747 std::vector<int32> *gap_sizes)
const {
748 if (chunk_sizes.empty()) {
754 int32 utterance_length_reduced = (utterance_length + (sf - 1)) / sf;
755 std::vector<int32> chunk_sizes_reduced(chunk_sizes);
761 chunk_sizes_reduced, gap_sizes);
762 KALDI_ASSERT(gap_sizes->size() ==
static_cast<size_t>(size));
767 int32 num_chunks = chunk_sizes.size(),
768 total_of_chunk_sizes = std::accumulate(chunk_sizes.begin(),
771 total_gap = utterance_length - total_of_chunk_sizes;
772 gap_sizes->resize(num_chunks);
779 if (num_chunks == 1) {
782 KALDI_ERR <<
"Chunk size is " << chunk_sizes[0]
783 <<
" but utterance length is only " 788 std::vector<int32> magnitudes(num_chunks - 1),
789 overlaps(num_chunks - 1);
794 for (
int32 i = 0;
i + 1 < num_chunks;
i++) {
795 magnitudes[
i] = std::min<int32>(chunk_sizes[
i], chunk_sizes[
i + 1]);
798 for (
int32 i = 0;
i + 1 < num_chunks;
i++) {
807 for (
int32 i = 1;
i < num_chunks;
i++)
808 (*gap_sizes)[
i] = overlaps[
i-1];
812 std::vector<int32> gaps(num_chunks + 1);
816 for (
int32 i = 0;
i < num_chunks;
i++)
817 (*gap_sizes)[
i] = gaps[
i];
823 int32 utterance_length,
824 std::vector<ChunkTimeInfo> *chunk_info) {
835 (*chunk_info).push_back(*info);
837 std::vector<int32> chunk_sizes;
839 std::vector<int32> gaps(chunk_sizes.size());
840 GetGapSizes(utterance_length,
true, chunk_sizes, &gaps);
841 int32 num_chunks = chunk_sizes.size();
842 chunk_info->resize(num_chunks);
843 for (
int32 i = 0;
i < num_chunks;
i++) {
864 int32 utterance_length,
865 const std::vector<ChunkTimeInfo> &chunk_info) {
869 for (
size_t c = 0; c < chunk_info.size(); c++) {
870 int32 chunk_size = chunk_info[c].num_frames;
872 int32 last_chunk_end = chunk_info[c-1].first_frame +
873 chunk_info[c-1].num_frames;
874 if (last_chunk_end > chunk_info[c].first_frame)
890 int32 utterance_length,
891 std::vector<ChunkTimeInfo> *chunk_info)
const {
893 int32 num_output_frames = (utterance_length + sf - 1) / sf;
898 std::vector<int32>
count(num_output_frames, 0);
899 int32 num_chunks = chunk_info->size();
900 for (
int32 i = 0;
i < num_chunks;
i++) {
907 for (
int32 i = 0;
i < num_chunks;
i++) {
911 for (
int32 t = t_start;
920 int32 ans = 0, num_ranges = ranges.size();
921 for (
int32 i = 0;
i < num_ranges;
i++) {
922 int32 possible_ans = 0;
923 if (max_value >= ranges[
i].first) {
924 if (max_value >= ranges[
i].second)
925 possible_ans = ranges[
i].second;
927 possible_ans = max_value;
929 if (possible_ans > ans)
938 std::vector<std::string> split_str;
940 if (split_str.empty())
943 int_set->
ranges.resize(split_str.size());
944 for (
size_t i = 0;
i < split_str.size();
i++) {
945 std::vector<int32> split_range;
947 if (split_range.size() < 1 || split_range.size() > 2 ||
948 split_range[0] > split_range.back() || split_range[0] <= 0)
950 int_set->
ranges[
i].first = split_range[0];
951 int_set->
ranges[
i].second = split_range.back();
959 if (measure_output_frames !=
"deprecated") {
960 KALDI_WARN <<
"The --measure-output-frames option is deprecated " 961 "and will be ignored.";
963 if (discard_partial_minibatches !=
"deprecated") {
964 KALDI_WARN <<
"The --discard-partial-minibatches option is deprecated " 965 "and will be ignored.";
967 std::vector<std::string> minibatch_size_split;
969 if (minibatch_size_split.empty()) {
970 KALDI_ERR <<
"Invalid option --minibatch-size=" << minibatch_size;
973 rules.resize(minibatch_size_split.size());
974 for (
size_t i = 0;
i < minibatch_size_split.size();
i++) {
975 int32 &eg_size = rules[
i].first;
976 IntSet &int_set = rules[
i].second;
980 std::string &this_rule = minibatch_size_split[
i];
981 if (this_rule.find(
'=') != std::string::npos) {
982 std::vector<std::string> rule_split;
984 if (rule_split.size() != 2) {
985 KALDI_ERR <<
"Could not parse option --minibatch-size=" 989 !ParseIntSet(rule_split[1], &int_set))
990 KALDI_ERR <<
"Could not parse option --minibatch-size=" 994 if (minibatch_size_split.size() != 1) {
995 KALDI_ERR <<
"Could not parse option --minibatch-size=" 996 << minibatch_size <<
" (all rules must have " 997 <<
"eg-size specified if >1 rule)";
999 if (!ParseIntSet(this_rule, &int_set))
1000 KALDI_ERR <<
"Could not parse option --minibatch-size=" 1006 std::vector<int32> all_sizes(minibatch_size_split.size());
1007 for (
size_t i = 0;
i < minibatch_size_split.size();
i++)
1008 all_sizes[
i] = rules[
i].first;
1009 std::sort(all_sizes.begin(), all_sizes.end());
1011 KALDI_ERR <<
"Invalid --minibatch-size=" << minibatch_size
1012 <<
" (repeated example-sizes)";
1018 int32 num_available_egs,
1019 bool input_ended)
const {
1021 int32 num_rules = rules.size();
1023 KALDI_ERR <<
"You need to call ComputeDerived() before calling " 1025 int32 min_distance = std::numeric_limits<int32>::max(),
1026 closest_rule_index = 0;
1027 for (
int32 i = 0;
i < num_rules;
i++) {
1028 int32 distance = std::abs(size_of_eg - rules[
i].first);
1029 if (distance < min_distance) {
1030 min_distance = distance;
1031 closest_rule_index =
i;
1037 int32 largest_size = rules[closest_rule_index].second.largest_size;
1038 if (largest_size <= num_available_egs)
1039 return largest_size;
1043 int32 s = rules[closest_rule_index].second.LargestValueInRange(
1052 size_t structure_hash,
1053 int32 minibatch_size) {
1054 std::pair<int32, size_t> p(example_size, structure_hash);
1057 unordered_map<int32, int32> &h = stats_[p].minibatch_to_num_written;
1058 unordered_map<int32, int32>::iterator iter = h.find(minibatch_size);
1059 if (iter == h.end())
1060 h[minibatch_size] = 1;
1066 size_t structure_hash,
1067 int32 num_discarded) {
1068 std::pair<int32, size_t> p(example_size, structure_hash);
1069 stats_[p].num_discarded += num_discarded;
1074 PrintSpecificStats();
1075 PrintAggregateStats();
1080 int64 num_distinct_egs_types = 0,
1082 total_discarded_egs = 0,
1083 total_discarded_egs_size = 0,
1085 total_non_discarded_egs = 0,
1088 total_non_discarded_egs_size = 0,
1090 num_minibatches = 0,
1091 num_distinct_minibatch_types = 0;
1097 StatsType::const_iterator eg_iter = stats_.begin(), eg_end = stats_.end();
1099 for (; eg_iter != eg_end; ++eg_iter) {
1100 int32 eg_size = eg_iter->first.first;
1102 num_distinct_egs_types++;
1106 unordered_map<int32, int32>::const_iterator
1109 for (; mb_iter != mb_end; ++mb_iter) {
1110 int32 mb_size = mb_iter->first,
1111 num_written = mb_iter->second;
1112 num_distinct_minibatch_types++;
1113 num_minibatches += num_written;
1114 total_non_discarded_egs += num_written * mb_size;
1115 total_non_discarded_egs_size += num_written * mb_size * eg_size;
1120 int64 total_input_egs = total_discarded_egs + total_non_discarded_egs,
1121 total_input_egs_size =
1122 total_discarded_egs_size + total_non_discarded_egs_size;
1124 float avg_input_egs_size = total_input_egs_size * 1.0 / total_input_egs;
1125 float percent_discarded = total_discarded_egs * 100.0 / total_input_egs;
1128 float avg_minibatch_size = total_non_discarded_egs * 1.0 / num_minibatches;
1130 std::ostringstream os;
1131 os << std::setprecision(4);
1132 os <<
"Processed " << total_input_egs
1133 <<
" egs of avg. size " << avg_input_egs_size
1134 <<
" into " << num_minibatches <<
" minibatches, discarding " 1135 << percent_discarded <<
"% of egs. Avg minibatch size was " 1136 << avg_minibatch_size <<
", #distinct types of egs/minibatches " 1137 <<
"was " << num_distinct_egs_types <<
"/" 1138 << num_distinct_minibatch_types;
1143 KALDI_LOG <<
"Merged specific eg types as follows [format: <eg-size1>=" 1144 "{<mb-size1>-><num-minibatches1>,<mbsize2>-><num-minibatches2>.../d=<num-discarded>}" 1145 ",<egs-size2>={...},... (note,egs-size == number of input " 1146 "frames including context).";
1147 std::ostringstream os;
1152 SortedMapType stats;
1153 stats.insert(stats_.begin(), stats_.end());
1154 SortedMapType::const_iterator eg_iter = stats.begin(), eg_end = stats.end();
1155 for (; eg_iter != eg_end; ++eg_iter) {
1156 int32 eg_size = eg_iter->first.first;
1157 if (eg_iter != stats.begin())
1159 os << eg_size <<
"={";
1161 unordered_map<int32, int32>::const_iterator
1164 for (; mb_iter != mb_end; ++mb_iter) {
1165 int32 mb_size = mb_iter->first,
1166 num_written = mb_iter->second;
1169 os << mb_size <<
"->" << num_written;
1180 for (
size_t i = 0;
i < a.
io.size();
i++) {
1190 finished_(false), num_egs_written_(0),
1191 config_(config), writer_(writer) { }
1201 std::vector<NnetExample*> &vec =
eg_to_egs_[eg];
1204 num_available = vec.size();
1205 bool input_ended =
false;
1208 if (minibatch_size != 0) {
1211 std::vector<NnetExample*> vec_copy(vec);
1216 std::vector<NnetExample> egs_to_merge(minibatch_size);
1217 for (
int32 i = 0;
i < minibatch_size;
i++) {
1218 egs_to_merge[
i].Swap(vec_copy[
i]);
1229 size_t structure_hash = eg_hasher(egs[0]);
1230 int32 minibatch_size = egs.size();
1234 std::ostringstream key;
1245 std::vector<std::vector<NnetExample*> > all_egs;
1249 for (; iter != end; ++iter)
1250 all_egs.push_back(iter->second);
1253 for (
size_t i = 0;
i < all_egs.size();
i++) {
1254 int32 minibatch_size;
1255 std::vector<NnetExample*> &vec = all_egs[
i];
1258 bool input_ended =
true;
1259 while (!vec.empty() &&
1261 input_ended)) != 0) {
1264 std::vector<NnetExample> egs_to_merge(minibatch_size);
1265 for (
int32 i = 0;
i < minibatch_size;
i++) {
1266 egs_to_merge[
i].Swap(vec[
i]);
1269 vec.erase(vec.begin(), vec.begin() + minibatch_size);
1275 size_t structure_hash = eg_hasher(*(vec[0]));
1276 int32 num_discarded = vec.size();
1278 for (
int32 i = 0;
i < num_discarded;
i++)
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
void AcceptExample(NnetExample *a)
void CopySetToVector(const std::set< T > &s, std::vector< T > *v)
Copies the elements of a set to a vector.
void DiscardedExamples(int32 example_size, size_t structure_hash, int32 num_discarded)
Users call this function to inform this class that after processing all the data, for examples of ori...
void WriteMinibatch(const std::vector< NnetExample > &egs)
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
A hashing function-object for vectors.
bool LengthsMatch(const std::string &utt, int32 utterance_length, int32 supervision_length, int32 length_tolerance=0) const
int64 total_frames_in_chunks_
int32 total_num_utterances_
float DefaultDurationOfSplit(const std::vector< int32 > &split) const
bool need_model_derivative
if need_model_derivative is true, then we'll be doing either model training or model-derivative compu...
int32 frame_subsampling_factor
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
static void GetIoNames(const std::vector< NnetExample > &src, std::vector< std::string > *names_vec)
bool IsInputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kInput.
std::string num_frames_str
int32 MinibatchSize(int32 size_of_eg, int32 num_available_egs, bool input_ended) const
This function tells you what minibatch size should be used for this eg.
static void DistributeRandomlyUniform(int32 n, std::vector< int32 > *vec)
A templated class for writing objects to an archive or script file; see The Table concept...
GeneralMatrix features
The features or labels.
std::vector< std::vector< std::vector< int32 > > > splits_for_length_
void ShiftExampleTimes(int32 t_offset, const std::vector< std::string > &exclude_names, NnetExample *eg)
Shifts the time-index t of everything in the "eg" by adding "t_offset" to all "t" values...
std::vector< IoSpecification > inputs
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
void GetGapSizes(int32 utterance_length, bool enforce_subsampling_factor, const std::vector< int32 > &chunk_sizes, std::vector< int32 > *gap_sizes) const
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
const ExampleMergingConfig & config_
int32 left_context_initial
int32 LargestValueInRange(int32 max_value) const
void Write(const std::string &key, const T &value) const
NnetExampleWriter * writer_
int64 total_input_frames_
std::vector< int32 > num_frames
int32 MaxUtteranceLength() const
int64 total_frames_overlap_
unordered_map< int32, int32 > minibatch_to_num_written
This hashing object hashes just the structural aspects of the NnetExample without looking at the valu...
void SetOutputWeights(int32 utterance_length, std::vector< ChunkTimeInfo > *chunk_info) const
void PrintStats() const
Calling this will cause a log message with information about the examples to be printed.
MatrixIndexT NumCols() const
void WriteVectorAsChar(std::ostream &os, bool binary, const VectorBase< BaseFloat > &vec)
static bool ParseIntSet(const std::string &str, IntSet *int_set)
void GetChunkSizesForUtterance(int32 utterance_length, std::vector< int32 > *chunk_sizes) const
bool IsOutputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kDescriptor and is not directly fo...
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
void RoundUpNumFrames(int32 frame_subsampling_factor, int32 *num_frames, int32 *num_frames_overlap)
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
static void GetIoSizes(const std::vector< NnetExample > &src, const std::vector< std::string > &names, std::vector< int32 > *sizes)
void AccStatsForUtterance(int32 utterance_length, const std::vector< ChunkTimeInfo > &chunk_info)
int32 GetNnetExampleSize(const NnetExample &a)
This function returns the 'size' of a nnet-example as defined for purposes of merging egs...
Real * Data()
Returns a pointer to the start of the vector's data.
void InitSplitForLength()
MatrixIndexT Dim() const
Returns the dimension of the vector.
int32 right_context_final
const ExampleGenerationConfig & config_
void WroteExample(int32 example_size, size_t structure_hash, int32 minibatch_size)
Users call this function to inform this class that one minibatch has been written aggregating 'miniba...
void AppendGeneralMatrixRows(const std::vector< const GeneralMatrix *> &src, GeneralMatrix *mat)
Appends all the matrix rows of a list of GeneralMatrixes, to get a single GeneralMatrix.
std::map< int32, int32 > chunk_size_to_count_
ExampleMergingStats stats_
static void DistributeRandomly(int32 n, const std::vector< int32 > &magnitudes, std::vector< int32 > *vec)
std::vector< Index > indexes
void PrintAggregateStats() const
A class representing a vector.
#define KALDI_ASSERT(cond)
std::vector< IoSpecification > outputs
MatrixIndexT NumRows() const
void InitSplits(std::vector< std::vector< int32 > > *splits) const
static void MergeIo(const std::vector< NnetExample > &src, const std::vector< std::string > &names, const std::vector< int32 > &sizes, bool compress, NnetExample *merged_eg)
void ReadVectorAsChar(std::istream &is, bool binary, Vector< BaseFloat > *vec)
ExampleMerger(const ExampleMergingConfig &config, NnetExampleWriter *writer)
void GetChunksForUtterance(int32 utterance_length, std::vector< ChunkTimeInfo > *chunk_info)
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
std::string name
the name of the input in the neural net; in simple setups it will just be "input".
struct ChunkTimeInfo is used by class UtteranceSplitter to output information about how we split an u...
int32 GetNodeIndex(const std::string &node_name) const
returns index associated with this node name, or -1 if no such index.
Provides a vector abstraction class.
void PrintSpecificStats() const
std::vector< NnetIo > io
"io" contains the input and output.
std::vector< std::pair< int32, int32 > > ranges
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
std::vector< BaseFloat > output_weights
void Read(std::istream &in, bool binary, bool add=false)
Read function using C++ streams.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
void ComputeDerived()
This function decodes 'num_frames_str' into 'num_frames', and ensures that the members of 'num_frames...
void GetComputationRequest(const Nnet &nnet, const NnetExample &eg, bool need_model_derivative, bool store_component_stats, ComputationRequest *request)
This function takes a NnetExample (which should already have been frame-selected, if desired...
void MergeExamples(const std::vector< NnetExample > &src, bool compress, NnetExample *merged_eg)
Merge a set of input examples into a single example (typically the size of "src" will be the minibatc...
UtteranceSplitter(const ExampleGenerationConfig &config)