36 const std::vector<std::vector<std::pair<int32, int32> > > &submat_lists,
37 std::unordered_map<int32,int32> *submat_counts,
38 std::vector<int32> *submats_with_large_counts) {
39 auto iter = submat_lists.begin(), end = submat_lists.end();
40 for (; iter != end; ++iter) {
41 std::vector<std::pair<int32, int32> >::const_iterator
42 iter2 = iter->begin(), end2 = iter->end();
43 for (; iter2 != end2; ++iter2) {
44 int32 submat_index = iter2->first;
46 std::unordered_map<int32,int32>::iterator
47 iter = submat_counts->find(submat_index);
48 if (iter == submat_counts->end())
49 (*submat_counts)[submat_index] = 1;
54 auto counts_iter = submat_counts->begin(),
55 counts_end = submat_counts->end();
56 size_t cutoff = submat_lists.size() / 2;
57 for (; counts_iter != counts_end; ++counts_iter)
58 if (counts_iter->second > cutoff)
59 submats_with_large_counts->push_back(counts_iter->first);
74 const std::vector<int32> &submats_to_separate,
75 const std::vector<std::vector<std::pair<int32, int32> > > &submat_lists,
76 std::vector<std::vector<std::pair<int32, int32> > > *reduced_submat_lists,
77 std::vector<std::vector<std::pair<int32, int32> > > *split_lists) {
78 KALDI_ASSERT(split_lists->empty() && !submats_to_separate.empty());
79 size_t num_to_separate = submats_to_separate.size(),
80 num_rows = submat_lists.size();
81 std::unordered_map<int32, size_t> submat_to_index;
82 reduced_submat_lists->clear();
83 reduced_submat_lists->resize(num_rows);
84 split_lists->resize(num_to_separate);
85 for (
size_t i = 0;
i < num_to_separate;
i++) {
86 (*split_lists)[
i].resize(num_rows, std::pair<int32, int32>(-1, -1));
87 int32 submat = submats_to_separate[
i];
88 submat_to_index[submat] =
i;
90 for (
size_t row = 0; row < submat_lists.size(); row++) {
91 std::vector<std::pair<int32, int32> >::const_iterator
92 iter = submat_lists[row].begin(), end = submat_lists[row].end();
93 std::vector<std::pair<int32, int32> >
94 &reduced_list = (*reduced_submat_lists)[row];
97 for (; iter != end; ++iter) {
98 int32 submat_index = iter->first;
99 std::unordered_map<int32, size_t>::const_iterator map_iter =
100 submat_to_index.find(submat_index);
101 if (map_iter == submat_to_index.end()) {
102 reduced_list.push_back(*iter);
105 size_t index = map_iter->second;
106 std::pair<int32,int32> &p = (*split_lists)[index][row];
110 reduced_list.push_back(*iter);
113 p.first = submat_index;
114 int32 src_row_index = iter->second;
115 p.second = src_row_index;
121 const std::vector<std::vector<std::pair<int32, int32> > > &submat_lists,
122 std::vector<std::vector<std::pair<int32, int32> > > *split_lists) {
123 size_t num_rows = submat_lists.size(),
124 num_output_lists = 0;
125 auto iter = submat_lists.begin(), end = submat_lists.end();
126 for (; iter != end; ++iter)
127 if (iter->size() > num_output_lists)
128 num_output_lists = iter->size();
129 split_lists->clear();
130 if (num_output_lists == 0)
132 else if (num_output_lists == 1) {
133 split_lists->resize(1);
134 std::vector<std::pair<int32, int32> > &list = (*split_lists)[0];
135 list.resize(num_rows, std::pair<int32, int32>(-1, -1));
136 for (
size_t i = 0;
i < num_rows;
i++) {
137 if (!submat_lists[
i].empty())
138 list[
i] = submat_lists[
i][0];
144 std::unordered_map<int32,int32> submat_counts;
145 std::vector<int32> submats_with_large_counts;
146 GetSubmatCounts(submat_lists, &submat_counts, &submats_with_large_counts);
147 if (!submats_with_large_counts.empty()) {
151 std::vector<std::vector<std::pair<int32, int32> > > reduced_submat_lists;
154 &reduced_submat_lists,
158 std::vector<std::vector<std::pair<int32, int32> > > reduced_split_lists;
160 size_t cur_num_lists = split_lists->size(),
161 num_extra_lists = reduced_split_lists.size(),
162 new_num_lists = cur_num_lists + num_extra_lists;
163 split_lists->resize(new_num_lists);
164 for (
size_t i = 0;
i < num_extra_lists;
i++)
165 (*split_lists)[cur_num_lists +
i].swap(reduced_split_lists[
i]);
171 split_lists->resize(num_output_lists);
172 for (
size_t i = 0;
i < num_output_lists;
i++)
173 (*split_lists)[
i].resize(num_rows, std::pair<int32, int32>(-1, -1));
174 for (
size_t row = 0; row < num_rows; row++) {
175 const std::vector<std::pair<int32, int32> > &this_list =
177 size_t this_list_size = submat_lists[row].size();
178 for (
size_t i = 0;
i < this_list_size;
i++) {
179 (*split_lists)[
i][row] = this_list[
i];
191 const std::vector<std::pair<int32, int32> > &location_vector,
193 std::vector<int32> *second_values) {
195 second_values->clear();
196 second_values->reserve(location_vector.size());
197 std::vector<std::pair<int32, int32> >::const_iterator iter;
198 for (iter = location_vector.begin(); iter < location_vector.end(); ++iter) {
199 if (iter->first != -1) {
200 if (*first_value == -1)
201 *first_value = iter->first;
202 if (iter->first != *first_value)
204 second_values->push_back(iter->second);
206 second_values->push_back(-1);
215 const std::vector<int32> &indexes,
216 std::vector<std::vector<int32> > *indexes_out) {
217 indexes_out->clear();
218 indexes_out->reserve(3);
219 if (indexes.empty())
return;
220 int32 max_value = *std::max_element(indexes.begin(), indexes.end());
221 if (max_value == -1)
return;
222 std::vector<int32> num_segments_seen(max_value + 1, 0);
223 int32 dim = indexes.size(), num_output_vectors = 0;
226 if (indexes[
i] == -1) {
230 int32 value = indexes[
i], start_index =
i;
231 for (;
i < dim && indexes[
i] == value;
i++);
236 int32 this_num_segments_seen = num_segments_seen[value]++;
237 if (this_num_segments_seen >= num_output_vectors) {
239 indexes_out->resize(++num_output_vectors);
240 indexes_out->back().resize(dim, -1);
242 std::vector<int32> &this_out_vec((*indexes_out)[this_num_segments_seen]);
243 std::vector<int32>::iterator iter = this_out_vec.begin() + start_index,
244 end = this_out_vec.begin() + end_index;
246 for (; iter != end; ++iter) *iter = value;
281 std::vector<std::vector<std::pair<int32, int32> > >* split_lists) {
282 split_lists->clear();
283 typedef unordered_map<std::pair<int32, int32>,
286 MapType pair_to_count;
287 int32 cur_num_lists = 0;
289 for (int32
i = 0;
i < list.size();
i++) {
290 if (list[
i].first == -1)
292 MapType::iterator iter = pair_to_count.find(list[
i]);
294 if (iter == pair_to_count.end())
295 pair_to_count[list[i]] = this_count = 1;
297 this_count = (++iter->second);
298 if (this_count > cur_num_lists) {
300 split_lists->resize(this_count);
301 split_lists->back().resize(list.size(),
302 std::pair<int32, int32>(-1, -1));
305 (*split_lists)[this_count-1][
i] = list[
i];
307 if (split_lists->size() == 0)
308 KALDI_ERR <<
"Input list has just dummy pairs";
312 const std::vector<std::vector<std::pair<int32, int32> > > &submat_lists,
313 std::vector<std::vector<std::pair<int32, int32> > > *split_lists) {
314 std::vector<std::vector<std::pair<int32, int32> > > split_lists_intermediate;
317 for (
size_t i = 0;
i < split_lists_intermediate.size();
i++) {
319 std::vector<int32> second_values;
321 &first_value, &second_values)) {
324 if (first_value == -1) {
328 std::vector<std::vector<int32> > second_values_split;
330 if (second_values_split.size() == 1) {
332 split_lists->push_back(split_lists_intermediate[i]);
334 for (
size_t j = 0;
j < second_values_split.size();
j++) {
335 split_lists->resize(split_lists->size() + 1);
336 const std::vector<int32> &input_list = second_values_split[
j];
337 std::vector<std::pair<int32, int32> > &output_list =
339 output_list.resize(input_list.size());
340 int32 size = input_list.size();
341 for (
int32 k = 0; k < size; k++) {
342 int32 row = input_list[k];
343 if (row == -1) output_list[k].first = -1;
344 else output_list[k].first = first_value;
345 output_list[k].second = row;
353 std::vector<std::vector<std::pair<int32, int32> > > new_split_lists;
356 for (
int32 j = 0;
j < new_split_lists.size();
j++) {
357 split_lists->push_back(new_split_lists[
j]);
370 const std::vector<int32> &indexes,
371 std::vector<std::pair<int32, int32> > *reverse_indexes) {
372 reverse_indexes->clear();
373 int32 num_indexes = indexes.size();
374 if (num_indexes == 0)
376 int32 num_input_indexes =
377 *std::max_element(indexes.begin(), indexes.end()) + 1;
379 if (num_input_indexes == 0) {
381 KALDI_WARN <<
"HasContiguousProperty called on vector of -1's.";
384 reverse_indexes->resize(num_input_indexes,
385 std::pair<int32,int32>(-1, -1));
389 for (
int32 i = 0;
i < num_indexes;
i++) {
391 if (j == -1)
continue;
393 std::pair<int32, int32> &pair = (*reverse_indexes)[
j];
394 if (pair.first == -1) {
398 pair.first = std::min(pair.first,
i);
399 pair.second = std::max(pair.second,
i + 1);
403 for (
int32 i = 0;
i < num_input_indexes;
i++) {
404 std::pair<int32, int32> pair = (*reverse_indexes)[
i];
405 if (pair.first != -1) {
406 for (
int32 j = pair.first;
j < pair.second;
j++)
417 std::vector<std::pair<int32, int32> > *pairs) {
421 for (std::vector<Index>::const_iterator iter = indexes.begin();
422 iter != indexes.end(); ++iter)
423 n_x_set.insert(std::pair<int32, int32>(iter->n, iter->x));
425 pairs->reserve(n_x_set.size());
427 iter = n_x_set.begin(); iter != n_x_set.end(); ++iter)
428 pairs->push_back(*iter);
429 std::sort(pairs->begin(), pairs->end());
435 std::vector<int32> *t_values) {
437 std::unordered_set<int32> t_set;
439 for (std::vector<Index>::const_iterator iter = indexes.begin();
440 iter != indexes.end(); ++iter)
442 t_set.insert(iter->t);
444 t_values->reserve(t_set.size());
445 for (std::unordered_set<int32>::iterator iter = t_set.begin();
446 iter != t_set.end(); ++iter)
447 t_values->push_back(*iter);
448 std::sort(t_values->begin(), t_values->end());
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void GetTList(const std::vector< Index > &indexes, std::vector< int32 > *t_values)
This function outputs a sorted, unique list of the 't' values that are encountered in the provided li...
bool HasContiguousProperty(const std::vector< int32 > &indexes, std::vector< std::pair< int32, int32 > > *reverse_indexes)
This function returns true if for each integer i != -1, all the indexes j at which indexes[j] == i ar...
bool ConvertToIndexes(const std::vector< std::pair< int32, int32 > > &location_vector, int32 *first_value, std::vector< int32 > *second_values)
If it is the case for some i >= 0 that all the .first elements of "location_vector" are either i or -...
void SeparateSubmatsWithLargeCounts(const std::vector< int32 > &submats_to_separate, const std::vector< std::vector< std::pair< int32, int32 > > > &submat_lists, std::vector< std::vector< std::pair< int32, int32 > > > *reduced_submat_lists, std::vector< std::vector< std::pair< int32, int32 > > > *split_lists)
This function, used in SplitLocations(), is used to make separate 'split lists' for certain high-coun...
void GetSubmatCounts(const std::vector< std::vector< std::pair< int32, int32 > > > &submat_lists, std::unordered_map< int32, int32 > *submat_counts, std::vector< int32 > *submats_with_large_counts)
Gets counts of submatrices (the 1st members of pairs) in submat_lists.
void SplitLocations(const std::vector< std::vector< std::pair< int32, int32 > > > &submat_lists, std::vector< std::vector< std::pair< int32, int32 > > > *split_lists)
The input to this function is a vector (indexed by matrix-row-index) of lists of pairs (submat_index...
void SplitPairList(std::vector< std::pair< int32, int32 > > &list, std::vector< std::vector< std::pair< int32, int32 > > > *split_lists)
This function splits a vector of pairs into a list of vectors of pairs.
void EnsureContiguousProperty(const std::vector< int32 > &indexes, std::vector< std::vector< int32 > > *indexes_out)
This function takes a vector of indexes and splits it up into as separate vectors of the same size...
void SplitLocationsBackward(const std::vector< std::vector< std::pair< int32, int32 > > > &submat_lists, std::vector< std::vector< std::pair< int32, int32 > > > *split_lists)
This function has the same interface as SplitLocations(); however, it ensures certain additional prop...
#define KALDI_ASSERT(cond)
void GetNxList(const std::vector< Index > &indexes, std::vector< std::pair< int32, int32 > > *pairs)
This function outputs a unique, lexicographically sorted list of the pairs of (n, x) values that are ...
A hashing function-object for pairs of ints.