nnet-randomizer-test.cc
Go to the documentation of this file.
1 // nnet/nnet-randomizer-test.cc
2 
3 // Copyright 2013 Brno University of Technology (author: Karel Vesely)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "nnet/nnet-randomizer.h"
21 
22 #include <numeric>
23 #include <vector>
24 #include <algorithm>
25 
26 using namespace kaldi;
27 using namespace kaldi::nnet1;
28 
30 
31 template<class Real>
32 static void InitRand(VectorBase<Real> *v) {
33  for (MatrixIndexT i = 0;i < v->Dim();i++)
34  (*v)(i) = RandGauss();
35 }
36 
37 template<class Real>
38 static void InitRand(MatrixBase<Real> *M) {
39  do {
40  for (MatrixIndexT i = 0;i < M->NumRows();i++)
41  for (MatrixIndexT j = 0;j < M->NumCols();j++)
42  (*M)(i, j) = RandGauss();
43  } while (M->NumRows() != 0 && M->Cond() > 100);
44 }
45 
46 
47 template<class Real>
48 static void AssertEqual(const VectorBase<Real> &A,
49  const VectorBase<Real> &B,
50  float tol = 0.001) {
51  KALDI_ASSERT(A.Dim() == B.Dim());
52  for (MatrixIndexT i = 0; i < A.Dim(); i++) {
53  KALDI_ASSERT(std::abs(A(i)-B(i)) < tol);
54  }
55 }
56 
57 
58 template<class RandomAccessIterator>
59 static void AssertEqual(RandomAccessIterator begin1, RandomAccessIterator end1,
60  RandomAccessIterator begin2, RandomAccessIterator end2) {
61  KALDI_ASSERT((end1 - begin1) == (end2 - begin2));
62  KALDI_ASSERT(end1 > begin1);
63  for ( ; begin1 < end1; ++begin1, ++begin2) {
64  KALDI_ASSERT(*begin1 == *begin2);
65  }
66 }
67 
68 
70 
74  r.Init(c);
75  const std::vector<int32>& m = r.Generate(5);
76  KALDI_ASSERT(m.size() == 5);
77  int32 sum_of_elems = std::accumulate(m.begin(), m.end(),0);
78  KALDI_ASSERT(sum_of_elems == 4 + 3 + 2 + 1 + 0);
79 }
80 
82  Matrix<BaseFloat> m(1111, 10);
83  InitRand(&m);
84  CuMatrix<BaseFloat> m2(m);
85  // config
87  c.randomizer_size = 1000;
88  c.minibatch_size = 100;
89  // randomizer
91  r.Init(c);
92  r.AddData(m2);
93  KALDI_ASSERT(r.IsFull());
94  // create vector with consecutive indices
95  std::vector<int32> mask(1111);
96  for (int32 i = 0; i < 1111; i++) {
97  mask[i] = i;
98  }
99  r.Randomize(mask); // no shuffling
100  // make sure we get same data we put to randomizer
101  int32 i=0;
102  for ( ; !r.Done(); r.Next(), i++) {
103  KALDI_LOG << i;
104  const CuMatrixBase<BaseFloat> &m3 = r.Value();
105  Matrix<BaseFloat> m4(m3.NumRows(), m3.NumCols());
106  m3.CopyToMat(&m4);
108  }
109  KALDI_ASSERT(i == 11); // 11 minibatches
110 
111  KALDI_LOG << "Filling for 2nd time";
112  // try to fill buffer one more time, and empty it
113  KALDI_ASSERT(!r.IsFull());
114  r.AddData(m2);
115  KALDI_ASSERT(r.IsFull());
116  KALDI_ASSERT(r.NumFrames() == 11 + 1111);
117  { // check last 11 rows were copied to the front in the buffer
118  const CuMatrixBase<BaseFloat> &m3 = r.Value();
119  Matrix<BaseFloat> m4(m3.NumRows(), m3.NumCols());
120  m3.CopyToMat(&m4);
121  AssertEqual(m4.RowRange(0, 11), m.RowRange(1100, 11));
122  }
123  KALDI_ASSERT(!r.Done());
124  for ( ; !r.Done(); r.Next(), i++) {
125  KALDI_LOG << i;
126  const CuMatrixBase<BaseFloat>& m3 = r.Value();
127  static_cast<const void>(m3); // variable no longer unused,
128  }
129  KALDI_ASSERT(i == 22); // 22 minibatches
130 }
131 
133  Vector<BaseFloat> v(1111);
134  InitRand(&v);
135  // config
137  c.randomizer_size = 1000;
138  c.minibatch_size = 100;
139  // randomizer
141  r.Init(c);
142  r.AddData(v);
143  KALDI_ASSERT(r.IsFull());
144  // create vector with consecutive indices
145  std::vector<int32> mask(1111);
146  for (int32 i = 0; i < 1111; i++) {
147  mask[i] = i;
148  }
149  r.Randomize(mask); // no shuffling
150  // make sure we get same data we put to randomizer
151  int32 i = 0;
152  for ( ; !r.Done(); r.Next(), i++) {
153  KALDI_LOG << i;
154  const VectorBase<BaseFloat> &v2 = r.Value();
156  }
157  KALDI_ASSERT(i == 11); // 11 minibatches
158 
159  KALDI_LOG << "Filling for 2nd time";
160  // try to fill buffer one more time, and empty it
161  KALDI_ASSERT(!r.IsFull());
162  r.AddData(v);
163  KALDI_ASSERT(r.IsFull());
164  KALDI_ASSERT(r.NumFrames() == 11 + 1111);
165  { // check last 11 rows were copied to the front in the buffer
166  const VectorBase<BaseFloat> &v2 = r.Value();
167  AssertEqual(v2.Range(0, 11), v.Range(1100, 11));
168  }
169  KALDI_ASSERT(!r.Done());
170  for ( ; !r.Done(); r.Next(), i++) {
171  KALDI_LOG << i;
172  const VectorBase<BaseFloat>& v2 = r.Value();
173  static_cast<const void>(v2); // variable no longer unused,
174  }
175  KALDI_ASSERT(i == 22); // 22 minibatches
176 }
177 
179  // prepare vector with some data,
180  std::vector<int32> v(1111);
181  for (int32 i = 0; i < v.size(); i++) {
182  v.at(i) = i;
183  }
184  std::random_shuffle(v.begin(), v.end());
185 
186  // config
188  c.randomizer_size = 1000;
189  c.minibatch_size = 100;
190  // randomizer
192  r.Init(c);
193  r.AddData(v);
194  KALDI_ASSERT(r.IsFull());
195  // create vector with consecutive indices
196  std::vector<int32> mask(1111);
197  for (int32 i = 0; i < 1111; i++) {
198  mask[i]=i;
199  }
200  r.Randomize(mask); // no shuffling
201  // make sure we get same data we put to randomizer
202  int32 i = 0;
203  for ( ; !r.Done(); r.Next(), i++) {
204  KALDI_LOG << i;
205  std::vector<int32> v2 = r.Value();
206  AssertEqual(v2.begin(),
207  v2.end(),
208  v.begin() + (i * c.minibatch_size),
209  v.begin() + ((i+1) * c.minibatch_size));
210  }
211  KALDI_ASSERT(i == 11); // 11 minibatches
212 
213  KALDI_LOG << "Filling for 2nd time";
214  // try to fill buffer one more time, and empty it
215  KALDI_ASSERT(!r.IsFull());
216  r.AddData(v);
217  KALDI_ASSERT(r.IsFull());
218  KALDI_ASSERT(r.NumFrames() == 11 + 1111);
219  { // check last 11 rows were copied to the front in the buffer
220  std::vector<int32> v2 = r.Value();
221  AssertEqual(v2.begin(), v2.begin()+11, v.begin()+1100, v.begin()+1100+11);
222  }
223  KALDI_ASSERT(!r.Done());
224  for ( ; !r.Done(); r.Next(), i++) {
225  KALDI_LOG << i;
226  std::vector<int32> v2 = r.Value();
227  }
228  KALDI_ASSERT(i == 22); // 22 minibatches
229 }
230 
231 
232 int main() {
237 
238  std::cout << "Tests succeeded.\n";
239 }
240 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool IsFull()
Returns true, when capacity is full.
void UnitTestMatrixRandomizer()
bool Done()
Returns true, if no more data for another mini-batch (after current one)
void Next()
Sets cursor to next mini-batch.
void CopyToMat(MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
Definition: cu-matrix.cc:447
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Real Cond() const
Returns condition number by computing Svd.
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void Next()
Sets cursor to next mini-batch.
Generates randomly ordered vector of indices,.
bool Done()
Returns true, if no more data for another mini-batch (after current one)
Randomizes elements of a vector according to a mask.
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void Init(const NnetDataRandomizerOptions &conf)
Set the randomizer parameters (size)
bool Done()
Returns true, if no more data for another mini-batch (after current one)
int32 randomizer_size
Maximum number of samples we have in memory,.
const std::vector< T > & Value()
Returns matrix-window with next mini-batch.
static void InitRand(VectorBase< Real > *v)
int32 NumFrames()
Number of frames stored inside the Randomizer.
void Randomize(const std::vector< int32 > &mask)
Randomize matrix row-order using mask.
int32 NumFrames()
Number of frames stored inside the Randomizer.
void AddData(const Vector< BaseFloat > &v)
Add data to randomization buffer.
void Next()
Sets cursor to next mini-batch.
Configuration variables that affect how frame-level shuffling is done.
void UnitTestVectorRandomizer()
int32 MatrixIndexT
Definition: matrix-common.h:98
void Init(const NnetDataRandomizerOptions &conf)
Init, call srand,.
void Init(const NnetDataRandomizerOptions &conf)
Set the randomizer parameters (size)
int32 NumFrames()
Number of frames stored inside the Randomizer.
void RandGauss(BaseFloat mu, BaseFloat sigma, CuMatrixBase< Real > *mat, struct RandomState *state=NULL)
Fill CuMatrix with random numbers (Gaussian distribution): mu = the mean value, sigma = standard devi...
Definition: nnet-utils.h:164
int main()
void UnitTestRandomizerMask()
void Randomize(const std::vector< int32 > &mask)
Randomize matrix row-order using mask.
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
void UnitTestStdVectorRandomizer()
const CuMatrixBase< BaseFloat > & Value()
Returns matrix-window with next mini-batch.
void Randomize(const std::vector< int32 > &mask)
Randomize matrix row-order using mask.
Shuffles rows of a matrix according to the indices in the mask,.
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
const std::vector< int32 > & Generate(int32 mask_size)
Generate randomly ordered vector of integers 0..[mask_size -1],.
SubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
Definition: kaldi-matrix.h:209
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
bool IsFull()
Returns true, when capacity is full.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
void Init(const NnetDataRandomizerOptions &conf)
Set the randomizer parameters (size)
void AddData(const std::vector< T > &v)
Add data to randomization buffer.
Randomizes elements of a vector according to a mask.
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
#define KALDI_LOG
Definition: kaldi-error.h:153
void AddData(const CuMatrixBase< BaseFloat > &m)
Add data to randomization buffer.
bool IsFull()
Returns true, when capacity is full.
const Vector< BaseFloat > & Value()
Returns matrix-window with next mini-batch.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).
Definition: kaldi-vector.h:94