cu-rand-speed-test.cc
Go to the documentation of this file.
1 // cudamatrix/cu-rand-speed-test.cc
2 
3 // Copyright 2016 Brno University of Technology (author: Karel Vesely)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include <iostream>
22 #include <vector>
23 #include <cstdlib>
24 
25 #include "base/kaldi-common.h"
26 #include "util/common-utils.h"
27 #include "cudamatrix/cu-matrix.h"
28 #include "cudamatrix/cu-vector.h"
29 #include "cudamatrix/cu-rand.h"
30 
31 using namespace kaldi;
32 
33 
34 namespace kaldi {
35 
36 template<typename Real>
37 std::string NameOf() {
38  return (sizeof(Real) == 8 ? "<double>" : "<float>");
39 }
40 
41 template <typename T>
42 std::string ToString(const T& t) {
43  std::ostringstream os;
44  os << t;
45  return os.str();
46 }
47 
48 template<typename Real>
49 std::string MeanVariance(const CuMatrixBase<Real>& m) {
50  std::ostringstream os;
51  Real mean = m.Sum() / (m.NumRows()*m.NumCols());
52  CuMatrix<Real> tmp(m);
53  tmp.Add(-mean);
54  tmp.ApplyPow(2.0);
55  Real var = tmp.Sum() / (tmp.NumRows()*tmp.NumCols());
56  return std::string("mean ") + ToString(mean) + ", std-dev " + ToString(std::sqrt(var));
57 }
58 
59 template<typename Real>
60 std::string MeanVariance(const CuVectorBase<Real>& v) {
61  std::ostringstream os;
62  Real mean = v.Sum() / v.Dim();
63  CuVector<Real> tmp(v);
64  tmp.Add(-mean);
65  tmp.ApplyPow(2.0);
66  Real var = tmp.Sum() / tmp.Dim();
67  return std::string("mean ") + ToString(mean) + ", std-dev " + ToString(std::sqrt(var));
68 }
69 
70 
71 template <typename Real>
73  Timer t;
74  CuRand<Real> rand;
75  CuMatrix<Real> m(249,1001, kUndefined);
76  for (int32 i = 0; i < iter; i++) {
77  rand.RandUniform(&m);
78  }
79  CuMatrix<Real> m2(256,1024, kUndefined);
80  for (int32 i = 0; i < iter; i++) {
81  rand.RandUniform(&m2);
82  }
83  // flops = number of generated random numbers per second,
84  Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed();
85  KALDI_LOG << __func__ << NameOf<Real>()
86  << " Speed was " << flops << " rand_elems/s. "
87  << "(debug " << MeanVariance(m) << ")";
88 }
89 
90 template <typename Real>
92  Timer t;
93  CuRand<Real> rand;
94  CuMatrix<Real> m(249,1001, kUndefined);
95  for (int32 i = 0; i < iter; i++) {
96  rand.RandUniform(dynamic_cast<CuMatrixBase<Real>*>(&m));
97  }
98  CuMatrix<Real> m2(256,1024, kUndefined);
99  for (int32 i = 0; i < iter; i++) {
100  rand.RandUniform(dynamic_cast<CuMatrixBase<Real>*>(&m2));
101  }
102  // flops = number of generated random numbers per second,
103  Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed();
104  KALDI_LOG << __func__ << NameOf<Real>()
105  << " Speed was " << flops << " rand_elems/s. "
106  << "(debug " << MeanVariance(m) << ")";
107 }
108 
109 template <typename Real>
111  Timer t;
112  CuRand<Real> rand;
113  CuMatrix<Real> m(249,1001, kUndefined);
114  for (int32 i = 0; i < iter; i++) {
115  rand.RandGaussian(&m);
116  }
117  CuMatrix<Real> m2(256,1024, kUndefined);
118  for (int32 i = 0; i < iter; i++) {
119  rand.RandGaussian(&m2);
120  }
121  // flops = number of generated random numbers per second,
122  Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed();
123  KALDI_LOG << __func__ << NameOf<Real>()
124  << " Speed was " << flops << " rand_elems/s. "
125  << "(debug " << MeanVariance(m) << ")";
126 }
127 
128 template <typename Real>
130  Timer t;
131  CuRand<Real> rand;
132  CuMatrix<Real> m(249,1001, kUndefined);
133  for (int32 i = 0; i < iter; i++) {
134  rand.RandGaussian(dynamic_cast<CuMatrixBase<Real>*>(&m));
135  }
136  CuMatrix<Real> m2(256,1024, kUndefined);
137  for (int32 i = 0; i < iter; i++) {
138  rand.RandGaussian(dynamic_cast<CuMatrixBase<Real>*>(&m2));
139  }
140  // flops = number of generated random numbers per second,
141  Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed();
142  KALDI_LOG << __func__ << NameOf<Real>()
143  << " Speed was " << flops << " rand_elems/s. "
144  << "(debug " << MeanVariance(m) << ")";
145 }
146 
147 template <typename Real>
149  Timer t;
150  CuRand<Real> rand;
151  CuVector<Real> v(2011, kUndefined);
152  for (int32 i = 0; i < iter; i++) {
153  rand.RandUniform(&v);
154  }
155  CuVector<Real> v2(2048, kUndefined);
156  for (int32 i = 0; i < iter; i++) {
157  rand.RandUniform(&v2);
158  }
159  // flops = number of generated random numbers per second,
160  Real flops = iter * (v.Dim() + v2.Dim()) / t.Elapsed();
161  KALDI_LOG << __func__ << NameOf<Real>()
162  << " Speed was " << flops << " rand_elems/s. "
163  << "(debug " << MeanVariance(v) << ")";
164 }
165 
166 template <typename Real>
168  Timer t;
169  CuRand<Real> rand;
170  CuVector<Real> v(2011, kUndefined);
171  for (int32 i = 0; i < iter; i++) {
172  rand.RandGaussian(&v);
173  }
174  CuVector<Real> v2(2048, kUndefined);
175  for (int32 i = 0; i < iter; i++) {
176  rand.RandGaussian(&v2);
177  }
178  // flops = number of generated random numbers per second,
179  Real flops = iter * (v.Dim() + v2.Dim()) / t.Elapsed();
180  KALDI_LOG << __func__ << NameOf<Real>()
181  << " Speed was " << flops << " rand_elems/s. "
182  << "(debug " << MeanVariance(v) << ")";
183 }
184 
185 } // namespace kaldi
186 
187 
188 int main() {
189  int32 iter = 10; // Be quick on CPU,
190 #if HAVE_CUDA == 1
191  for (int32 loop = 0; loop < 2; loop++) { // NO for loop if 'HAVE_CUDA != 1',
192  CuDevice::Instantiate().SetDebugStrideMode(true);
193  if ( loop == 0)
194  CuDevice::Instantiate().SelectGpuId("no");
195  else {
196  CuDevice::Instantiate().SelectGpuId("yes");
197  iter = 400; // GPUs are faster,
198  }
199 #endif
200  Timer t;
201  kaldi::CuRandUniformMatrixSpeedTest<float>(iter);
202  kaldi::CuRandUniformMatrixBaseSpeedTest<float>(iter);
203  kaldi::CuRandUniformVectorSpeedTest<float>(iter);
204  kaldi::CuRandGaussianMatrixSpeedTest<float>(iter);
205  kaldi::CuRandGaussianMatrixBaseSpeedTest<float>(iter);
206  kaldi::CuRandGaussianVectorSpeedTest<float>(iter);
207  fprintf(stderr, "---\n");
208 
209  kaldi::CuRandUniformMatrixSpeedTest<double>(iter);
210  kaldi::CuRandUniformMatrixBaseSpeedTest<double>(iter);
211  kaldi::CuRandUniformVectorSpeedTest<double>(iter);
212  kaldi::CuRandGaussianMatrixSpeedTest<double>(iter);
213  kaldi::CuRandGaussianMatrixBaseSpeedTest<double>(iter);
214  kaldi::CuRandGaussianVectorSpeedTest<double>(iter);
215  fprintf(stderr, "--- ELAPSED %fs.\n\n", t.Elapsed());
216 #if HAVE_CUDA == 1
217  } // No for loop if 'HAVE_CUDA != 1',
218  CuDevice::Instantiate().PrintProfile();
219 #endif
220  KALDI_LOG << "Tests succeeded.";
221 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void ApplyPow(Real power)
Definition: cu-matrix.h:438
std::string MeanVariance(const CuMatrixBase< Real > &m)
void RandUniform(CuMatrixBase< Real > *tgt)
Fill with uniform [0..1] floats,.
Definition: cu-rand.cc:60
void Add(Real value)
Definition: cu-vector.cc:1157
Real Sum() const
Definition: cu-vector.cc:297
Real Sum() const
Definition: cu-matrix.cc:3012
std::string NameOf()
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void CuRandUniformMatrixSpeedTest(const int32 iter)
void RandGaussian(CuMatrixBase< Real > *tgt)
Fill with Normal random numbers,.
Definition: cu-rand.cc:116
void CuRandGaussianMatrixBaseSpeedTest(const int32 iter)
void Add(Real value)
Definition: cu-matrix.cc:582
int main()
void ApplyPow(Real power)
Definition: cu-vector.h:147
void CuRandUniformVectorSpeedTest(const int32 iter)
void CuRandGaussianVectorSpeedTest(const int32 iter)
void CuRandUniformMatrixBaseSpeedTest(const int32 iter)
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
void CuRandGaussianMatrixSpeedTest(const int32 iter)
std::string ToString(const T &t)
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
#define KALDI_LOG
Definition: kaldi-error.h:153
double Elapsed() const
Returns time in seconds.
Definition: timer.h:74
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69
Vector for CUDA computing.
Definition: matrix-common.h:72