nnet-compile-utils-test.cc
Go to the documentation of this file.
1 // nnet3/nnet-compile-utils-test.cc
2 
3 // Copyright 2015 Johns Hopkins University (author: Vijayaditya Peddinti)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "util/common-utils.h"
22 
23 namespace kaldi {
24 namespace nnet3 {
25 
26 struct ComparePair : public std::unary_function<std::pair<int32, int32>, bool>
27 {
28  explicit ComparePair(const std::pair<int32, int32> &correct_pair):
29  correct_pair_(correct_pair) {}
30  bool operator() (std::pair<int32, int32> const &arg)
31  { return (arg.first == correct_pair_.first) &&
32  (arg.second == correct_pair_.second); }
33  std::pair<int32, int32> correct_pair_;
34 };
35 
37  public std::unary_function<std::pair<int32, int32>, bool>
38 {
39  explicit PairIsEqualComparator(const std::pair<int32, int32> pair):
40  pair_(pair) {}
41  bool operator() (std::pair<int32, int32> const &arg)
42  {
43  if (pair_.first == arg.first)
44  return pair_.second == arg.second;
45  return false;
46  }
47  std::pair<int32, int32> pair_;
48 };
49 
51  std::vector<std::vector<std::pair<int32, int32> > > vec_vec_pair) {
52  std::ostringstream ostream;
53  for (int32 i = 0; i < vec_vec_pair.size(); i++) {
54  for (int32 j = 0; j < vec_vec_pair[i].size(); j++) {
55  ostream << "(" << vec_vec_pair[i][j].first << ","
56  << vec_vec_pair[i][j].second << ") ";
57  }
58  ostream << std::endl;
59  }
60  KALDI_LOG << ostream.str();
61 }
62 
63 // Function to check SplitLocationsBackward() method
64 // checks if the submat_lists and split_lists have the same non-dummy elements
65 // checks if the submat_lists are split into same first_element lists wherever
66 // possible
67 // checks if the split_lists satisfy either "unique contiguous segments"
68 // property or unique pairs property (see SplitLocationsBackward in
69 // nnet-compile-utils.h for more details)
70 void UnitTestSplitLocationsBackward(bool verbose) {
71  int32 minibatch_size = Rand() % 1024 + 100;
72  int32 num_submat_indexes = Rand() % 10 + 1;
73  int32 max_submat_list_size = Rand() % 10 + 1;
74  int32 min_num_kaddrows = Rand() % 2; // minimum number of kAddRows compatible
75  // lists expected in the final split lists. This value will be used to
76  // create input submat_lists so that this is guaranteed
77  max_submat_list_size = min_num_kaddrows + max_submat_list_size;
78 
79  std::vector<std::pair<int32, int32> > all_pairs;
80  all_pairs.reserve(minibatch_size * max_submat_list_size);
81  std::vector<std::vector<std::pair<int32, int32> > >
82  submat_lists(minibatch_size),
83  split_lists;
84  std::vector<int32> submat_indexes(num_submat_indexes);
85  for (int32 i = 0; i < num_submat_indexes; i++) {
86  submat_indexes[i] = Rand();
87  }
88 
89  // generating submat_lists
90  int32 max_generated_submat_list_size = 0;
91  for (int32 i = 0; i < minibatch_size; i++) {
92  int32 num_locations = Rand() % max_submat_list_size + 1;
93  max_generated_submat_list_size =
94  max_generated_submat_list_size < num_locations ?
95  num_locations : max_generated_submat_list_size;
96  submat_lists[i].reserve(num_locations);
97  for (int32 j = 0; j < num_locations; j++) {
98  if (j <= min_num_kaddrows && j < num_submat_indexes)
99  // since we need min_num_kaddrows in the split_lists we ensure that
100  // we add a pair with the same first element in all the submat_lists
101  submat_lists[i].push_back(std::make_pair(submat_indexes[j],
102  Rand() % minibatch_size));
103  submat_lists[i].push_back(
104  std::make_pair(submat_indexes[Rand() % num_submat_indexes],
105  Rand() % minibatch_size));
106  }
107  all_pairs.insert(all_pairs.end(), submat_lists[i].begin(),
108  submat_lists[i].end());
109  }
110 
111  SplitLocationsBackward(submat_lists, &split_lists);
112  // Checking split_lists has all the necessary properties
113  for (int32 i = 0; i < split_lists.size(); i++) {
114  int32 first_value;
115  std::vector<int32> second_values;
116  if (ConvertToIndexes(split_lists[i], &first_value, &second_values)) {
117  // checking for contiguity and uniqueness of .second elements
118  std::vector<int32> occurred_values;
119  int32 prev_value = -10; // using a negative value as all indices are > 0
120  for (int32 j = 0; j < second_values.size(); j++) {
121  if (second_values[j] == -1)
122  continue;
123  if (second_values[j] != prev_value) {
124  std::vector<int32>::iterator iter = std::find(occurred_values.begin(),
125  occurred_values.end(),
126  second_values[j]);
127  KALDI_ASSERT(iter == occurred_values.end());
128  }
129  }
130  } else {
131  std::vector<std::pair<int32, int32> > list_of_pairs;
132  // checking for uniques of elements in the list
133  for (int32 j = 0; j < split_lists[i].size(); j++) {
134  if (split_lists[i][j].first == -1)
135  continue;
136  std::vector<std::pair<int32, int32> >::const_iterator iter =
137  std::find_if(list_of_pairs.begin(), list_of_pairs.end(),
138  PairIsEqualComparator(split_lists[i][j]));
139  KALDI_ASSERT(iter == list_of_pairs.end());
140  list_of_pairs.push_back(split_lists[i][j]);
141  }
142  }
143  }
144  if (verbose) {
145  KALDI_LOG << "submat_list";
146  PrintVectorVectorPair(submat_lists);
147  KALDI_LOG << "split_lists";
148  PrintVectorVectorPair(split_lists);
149  KALDI_LOG << "===========================";
150  }
151  int32 num_kaddrows_in_output = 0;
152  int32 first_value;
153  std::vector<int32> second_values;
154  // ensure that elements in submat_lists are also present
155  // in split_lists
156  for (int32 i = 0 ; i < split_lists.size(); i++) {
157  second_values.clear();
158  if (ConvertToIndexes(split_lists[i], &first_value, &second_values)) {
159  // Checking if ConvertToIndexes did a proper conversion of the indexes
160  KALDI_ASSERT(second_values.size() == split_lists[i].size());
161  for (int32 j = 0; j < second_values.size(); j++) {
162  if (split_lists[i][j].first != -1)
163  KALDI_ASSERT((split_lists[i][j].first == first_value) &&
164  (split_lists[i][j].second == second_values[j]));
165  }
166  num_kaddrows_in_output++;
167  }
168  for (int32 j = 0; j < split_lists[i].size(); j++) {
169  if (split_lists[i][j].first == -1)
170  continue;
171  std::vector<std::pair<int32, int32> >::iterator iter =
172  std::find_if(all_pairs.begin(), all_pairs.end(),
173  ComparePair(split_lists[i][j]));
174  KALDI_ASSERT(iter != all_pairs.end());
175  all_pairs.erase(iter);
176  }
177  }
178  KALDI_ASSERT(all_pairs.size() == 0);
179  // ensure that there are at least as many kAddRows compatible split_lists as
180  // specified
181  KALDI_ASSERT(num_kaddrows_in_output >= min_num_kaddrows);
182 }
183 
184 
186  for (int32 k = 0; k < 10; k++) {
187  int32 size = RandInt(0, 5);
188  std::vector<int32> indexes(size);
189  for (int32 i = 0; i < size; i++)
190  indexes[i] = RandInt(-1, 4);
191  std::vector<std::pair<int32, int32> > reverse_indexes;
192  bool ans = HasContiguousProperty(indexes, &reverse_indexes);
193  if (!ans) { // doesn't have contiguous propety.
194  KALDI_LOG << "no.";
195  bool found_example = false;
196  for (int32 i = 0; i < size; i++) {
197  if (indexes[i] != -1) {
198  bool found_not_same = false;
199  for (int32 j = i + 1; j < size; j++) {
200  if (indexes[j] != indexes[i]) found_not_same = true;
201  else if (found_not_same) found_example = true; // found something like x y x.
202  }
203  }
204  }
205  KALDI_ASSERT(found_example);
206  } else {
207  KALDI_LOG << "yes.";
208  for (int32 i = 0; i < reverse_indexes.size(); i++) {
209  for (int32 j = reverse_indexes[i].first;
210  j < reverse_indexes[i].second; j++) {
211  KALDI_ASSERT(indexes[j] == i);
212  indexes[j] = -1;
213  }
214  }
215  for (int32 i = 0; i < size; i++) // make sure all indexes covered.
216  KALDI_ASSERT(indexes[i] == -1);
217  }
218  }
219 }
220 
221 
223  for (int32 k = 0; k < 10; k++) {
224  int32 size = RandInt(0, 5);
225  std::vector<int32> indexes(size);
226  for (int32 i = 0; i < size; i++)
227  indexes[i] = RandInt(-1, 4);
228  std::vector<std::pair<int32, int32> > reverse_indexes;
229  bool ans = HasContiguousProperty(indexes, &reverse_indexes);
230  if (ans) { // has contiguous property -> EnsureContiguousProperty should do
231  // nothing.
232  std::vector<std::vector<int32> > indexes_split;
233  EnsureContiguousProperty(indexes, &indexes_split);
234  if (indexes.size() == 0 ||
235  *std::max_element(indexes.begin(), indexes.end()) == -1) {
236  KALDI_ASSERT(indexes_split.size() == 0);
237  } else {
238  KALDI_ASSERT(indexes_split.size() == 1 &&
239  indexes_split[0] == indexes);
240  }
241  } else {
242  std::vector<std::vector<int32> > indexes_split;
243  EnsureContiguousProperty(indexes, &indexes_split);
244  KALDI_ASSERT(indexes_split.size() > 1);
245  for (int32 i = 0; i < indexes.size(); i++) {
246  int32 this_val = indexes[i];
247  bool found = (this_val == -1); // not looking for anything if
248  // this_val is -1.
249  for (int32 j = 0; j < indexes_split.size(); j++) {
250  if (found) {
251  KALDI_ASSERT(indexes_split[j][i] == -1);
252  } else {
253  if (indexes_split[j][i] == this_val) {
254  found = true;
255  } else {
256  KALDI_ASSERT(indexes_split[j][i] == -1);
257  }
258  }
259  }
260  KALDI_ASSERT(found);
261  for (int32 j = 0; j < indexes_split.size(); j++) {
262  KALDI_ASSERT(indexes_split[j].size() == indexes.size() &&
263  HasContiguousProperty(indexes_split[j], &reverse_indexes));
264  }
265  }
266  }
267  }
268 }
269 
270 
271 // Function to check SplitLocations() method
272 // checks if the submat_lists and split_lists have the same non-dummy elements
273 // checks if the submat_lists are split into same first_element lists wherever
274 // possible
275 void UnitTestSplitLocations(bool verbose) {
276  int32 minibatch_size = Rand() % 1024 + 100;
277  int32 num_submat_indexes = Rand() % 10 + 1;
278  int32 max_submat_list_size = Rand() % 10 + 1;
279  int32 min_num_kaddrows = Rand() % 2; // minimum number of kAddRows compatible
280  // lists expected in the final split lists. This value will be used to
281  // create input submat_lists so that this is guaranteed
282  max_submat_list_size = min_num_kaddrows + max_submat_list_size;
283 
284  std::vector<std::pair<int32, int32> > all_pairs;
285  all_pairs.reserve(minibatch_size * max_submat_list_size);
286  std::vector<std::vector<std::pair<int32, int32> > >
287  submat_lists(minibatch_size),
288  split_lists;
289  std::vector<int32> submat_indexes(num_submat_indexes);
290  for (int32 i = 0; i < num_submat_indexes; i++) {
291  submat_indexes[i] = Rand();
292  }
293 
294  // generating submat_lists
295  int32 max_generated_submat_list_size = 0;
296  for (int32 i = 0; i < minibatch_size; i++) {
297  int32 num_locations = Rand() % max_submat_list_size + 1;
298  max_generated_submat_list_size =
299  max_generated_submat_list_size < num_locations ?
300  num_locations : max_generated_submat_list_size;
301  submat_lists[i].reserve(num_locations);
302  for (int32 j = 0; j < num_locations; j++) {
303  // note from dan: I edited the following line to resolve a valgrind error
304  // but cannot really understand at this point what this code is doing.
305  if (j <= min_num_kaddrows && j < num_submat_indexes) {
306  // since we need min_num_kaddrows in the split_lists we ensure that
307  // we add a pair with the same first element in all the submat_lists
308  submat_lists[i].push_back(std::make_pair(submat_indexes[j],
309  Rand() % minibatch_size));
310  }
311  submat_lists[i].push_back(
312  std::make_pair(submat_indexes[Rand() % num_submat_indexes],
313  Rand() % minibatch_size));
314  }
315  all_pairs.insert(all_pairs.end(), submat_lists[i].begin(),
316  submat_lists[i].end());
317  }
318 
319  SplitLocations(submat_lists, &split_lists);
320  if (verbose) {
321  KALDI_LOG << "submat_list";
322  PrintVectorVectorPair(submat_lists);
323  KALDI_LOG << "split_lists";
324  PrintVectorVectorPair(split_lists);
325  KALDI_LOG << "===========================";
326  KALDI_LOG << split_lists.size();
327  }
328  int32 num_kaddrows_in_output = 0;
329  int32 first_value;
330  std::vector<int32> second_values;
331  // ensure that elements in submat_lists are also present
332  // in split_lists
333  for (int32 i = 0 ; i < split_lists.size(); i++) {
334  second_values.clear();
335  if (ConvertToIndexes(split_lists[i], &first_value, &second_values)) {
336  // Checking if ConvertToIndexes did a proper conversion of the indexes
337  for (int32 j = 0; j < second_values.size(); j++) {
338  if (split_lists[i][j].first != -1)
339  KALDI_ASSERT((split_lists[i][j].first == first_value) &&
340  (split_lists[i][j].second == second_values[j]));
341  }
342  num_kaddrows_in_output++;
343  }
344  for (int32 j = 0; j < split_lists[i].size(); j++) {
345  if (split_lists[i][j].first == -1)
346  continue;
347  std::vector<std::pair<int32, int32> >::iterator iter =
348  std::find_if(all_pairs.begin(), all_pairs.end(),
349  ComparePair(split_lists[i][j]));
350  KALDI_ASSERT(iter != all_pairs.end());
351  all_pairs.erase(iter);
352  }
353  }
354  KALDI_ASSERT(all_pairs.size() == 0);
355  // ensure that there are at least as many kAddRows compatible split_lists as
356  // specified
357  KALDI_ASSERT(num_kaddrows_in_output >= min_num_kaddrows);
358 }
359 
360 } // namespace nnet2
361 } // namespace kaldi
362 
363 int main() {
364  using namespace kaldi;
365  using namespace kaldi::nnet3;
366  bool verbose = false;
367  for (int32 loop = 0; loop < 10; loop++) {
368  UnitTestSplitLocations(verbose);
372  }
373  KALDI_LOG << "Tests passed.";
374  return 0;
375 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool HasContiguousProperty(const std::vector< int32 > &indexes, std::vector< std::pair< int32, int32 > > *reverse_indexes)
This function returns true if for each integer i != -1, all the indexes j at which indexes[j] == i ar...
bool ConvertToIndexes(const std::vector< std::pair< int32, int32 > > &location_vector, int32 *first_value, std::vector< int32 > *second_values)
If it is the case for some i >= 0 that all the .first elements of "location_vector" are either i or -...
kaldi::int32 int32
void SplitLocations(const std::vector< std::vector< std::pair< int32, int32 > > > &submat_lists, std::vector< std::vector< std::pair< int32, int32 > > > *split_lists)
The input to this function is a vector (indexed by matrix-row-index) of lists of pairs (submat_index...
ComparePair(const std::pair< int32, int32 > &correct_pair)
void UnitTestSplitLocations(bool verbose)
void EnsureContiguousProperty(const std::vector< int32 > &indexes, std::vector< std::vector< int32 > > *indexes_out)
This function takes a vector of indexes and splits it up into as separate vectors of the same size...
void PrintVectorVectorPair(std::vector< std::vector< std::pair< int32, int32 > > > vec_vec_pair)
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
void SplitLocationsBackward(const std::vector< std::vector< std::pair< int32, int32 > > > &submat_lists, std::vector< std::vector< std::pair< int32, int32 > > > *split_lists)
This function has the same interface as SplitLocations(); however, it ensures certain additional prop...
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
PairIsEqualComparator(const std::pair< int32, int32 > pair)
void UnitTestSplitLocationsBackward(bool verbose)
std::pair< int32, int32 > correct_pair_
void UnitTestEnsureContiguousProperty()
#define KALDI_LOG
Definition: kaldi-error.h:153
bool operator()(std::pair< int32, int32 > const &arg)
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95