cu-rand.cc
Go to the documentation of this file.
1 // cudamatrix/cu-rand.cc
2 
3 // Copyright 2016-2017 Brno University of Technology (author Karel Vesely)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "cudamatrix/cu-rand.h"
21 
22 namespace kaldi {
23 
24 #if HAVE_CUDA == 1
25 
28 template<typename Real>
29 curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, Real *ptr, size_t num);
30 //
31 template<>
32 curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, float *ptr, size_t num) {
33  return curandGenerateUniform(gen, ptr, num);
34 }
35 template<>
36 curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, double *ptr, size_t num) {
37  return curandGenerateUniformDouble(gen, ptr, num);
38 }
39 
41 template<typename Real>
42 curandStatus_t curandGenerateNormalWrap(
43  curandGenerator_t gen, Real *ptr, size_t num);
44 //
45 template<>
46 curandStatus_t curandGenerateNormalWrap<float>(
47  curandGenerator_t gen, float *ptr, size_t num) {
48  return curandGenerateNormal(gen, ptr, num, 0.0 /*mean*/, 1.0 /*stddev*/);
49 }
50 template<>
51 curandStatus_t curandGenerateNormalWrap<double>(
52  curandGenerator_t gen, double *ptr, size_t num) {
53  return curandGenerateNormalDouble(gen, ptr, num, 0.0 /*mean*/, 1.0 /*stddev*/);
54 }
56 #endif
57 
58 
59 template<typename Real>
61 #if HAVE_CUDA == 1
62  if (CuDevice::Instantiate().Enabled()) {
63  CuTimer tim;
64  // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix,
65  // so we should not use it to generate random numbers over whole stride.
66  // Use the option kStrideEqualNumCols to ensure consistency
67  // (because when memory is nearly exhausted, the stride of CudaMallocPitch
68  // may vary).
69  CuMatrix<Real> tmp(tgt->NumRows(), tgt->NumCols(), kUndefined,
71  size_t s = static_cast<size_t>(tmp.NumRows()) * static_cast<size_t>(tmp.Stride());
72  CURAND_SAFE_CALL(curandGenerateUniformWrap(
73  GetCurandHandle(), tmp.Data(), s));
74  tgt->CopyFromMat(tmp);
75  CuDevice::Instantiate().AccuProfile(__func__, tim);
76  } else
77 #endif
78  {
79  tgt->Mat().SetRandUniform();
80  }
81 }
82 
83 template<typename Real>
85 #if HAVE_CUDA == 1
86  if (CuDevice::Instantiate().Enabled()) {
87  CuTimer tim;
88  // Here we don't need to use 'tmp' matrix,
89  size_t s = static_cast<size_t>(tgt->NumRows()) * static_cast<size_t>(tgt->Stride());
90  CURAND_SAFE_CALL(curandGenerateUniformWrap(
91  GetCurandHandle(), tgt->Data(), s));
92  CuDevice::Instantiate().AccuProfile(__func__, tim);
93  } else
94 #endif
95  {
96  tgt->Mat().SetRandUniform();
97  }
98 }
99 
100 template<typename Real>
102 #if HAVE_CUDA == 1
103  if (CuDevice::Instantiate().Enabled()) {
104  CuTimer tim;
105  CURAND_SAFE_CALL(curandGenerateUniformWrap(
106  GetCurandHandle(), tgt->Data(), tgt->Dim()));
107  CuDevice::Instantiate().AccuProfile(__func__, tim);
108  } else
109 #endif
110  {
111  tgt->Vec().SetRandUniform();
112  }
113 }
114 
115 template<typename Real>
117 #if HAVE_CUDA == 1
118  if (CuDevice::Instantiate().Enabled()) {
119  CuTimer tim;
120  // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix,
121  // so we should not use it to generate random numbers over whole stride.
122  // Also, we ensure to have 'even' number of elements for calling 'curand'
123  // by possibly adding one column. Even number of elements is required by
124  // curandGenerateUniform(), curandGenerateUniformDouble().
125  // Use the option kStrideEqualNumCols to ensure consistency
126  // (because when memory is nearly exhausted, the stride of CudaMallocPitch
127  // may vary).
128  MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1,
129  CuMatrix<Real> tmp(tgt->NumRows(), num_cols_even, kUndefined,
131  CURAND_SAFE_CALL(curandGenerateNormalWrap(
132  GetCurandHandle(), tmp.Data(), tmp.NumRows()*tmp.Stride()));
133  tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols()));
134  CuDevice::Instantiate().AccuProfile(__func__, tim);
135  } else
136 #endif
137  {
138  tgt->Mat().SetRandn();
139  }
140 }
141 
142 template<typename Real>
144 #if HAVE_CUDA == 1
145  if (CuDevice::Instantiate().Enabled()) {
146  CuTimer tim;
147  // Here we don't need to use 'tmp' matrix, if the number of elements is even,
148  MatrixIndexT num_elements = tgt->NumRows() * tgt->Stride();
149  if (0 == (num_elements % 2)) {
150  CURAND_SAFE_CALL(curandGenerateNormalWrap(
151  GetCurandHandle(), tgt->Data(), num_elements));
152  } else {
153  // We use 'tmp' matrix with one column added, this guarantees an even
154  // number of elements. Use the option kStrideEqualNumCols to ensure
155  // consistency (because when memory is nearly exhausted, the stride of
156  // CudaMallocPitch may vary).
157  MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1,
158  CuMatrix<Real> tmp(tgt->NumRows(), num_cols_even, kUndefined,
160  CURAND_SAFE_CALL(curandGenerateNormalWrap(
161  GetCurandHandle(), tmp.Data(), tmp.NumRows() * tmp.Stride()));
162  tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols()));
163  }
164  CuDevice::Instantiate().AccuProfile(__func__, tim);
165  } else
166 #endif
167  {
168  tgt->Mat().SetRandn();
169  }
170 }
171 
172 template<typename Real>
174 #if HAVE_CUDA == 1
175  if (CuDevice::Instantiate().Enabled()) {
176  CuTimer tim;
177  // To ensure 'even' number of elements, we use 'tmp' vector of even length.
178  // Even number of elements is required by 'curand' functions:
179  // curandGenerateUniform(), curandGenerateUniformDouble().
180  MatrixIndexT num_elements = tgt->Dim();
181  if (0 == (num_elements % 2)) {
182  CURAND_SAFE_CALL(curandGenerateNormalWrap(
183  GetCurandHandle(), tgt->Data(), tgt->Dim()));
184  } else {
185  MatrixIndexT dim_even = tgt->Dim() + (tgt->Dim() % 2); // + 0 or 1,
186  CuVector<Real> tmp(dim_even, kUndefined);
187  CURAND_SAFE_CALL(curandGenerateNormalWrap(
188  GetCurandHandle(), tmp.Data(), tmp.Dim()));
189  tgt->CopyFromVec(tmp.Range(0,tgt->Dim()));
190  }
191  CuDevice::Instantiate().AccuProfile(__func__, tim);
192  } else
193 #endif
194  {
195  tgt->Vec().SetRandn();
196  }
197 }
198 
200 template<typename Real>
202  CuMatrix<Real> tmp(probs.NumRows(), probs.NumCols());
203  this->RandUniform(&tmp); // [0..1]
204  tmp.Scale(-1.0); // [-1..0]
205  tmp.AddMat(1.0, probs); // [-1..+1]
206  states->Heaviside(tmp); // negative
207 }
208 
210 template<typename Real>
212  // Use the option kStrideEqualNumCols to ensure consistency (because when
213  // memory is nearly exhausted, the stride of CudaMallocPitch may vary).
214  CuMatrix<Real> tmp(tgt->NumRows(), tgt->NumCols(),
216  this->RandGaussian(&tmp);
217  tgt->AddMat(gscale, tmp);
218 }
219 
220 // explicit instantiation,
221 template class CuRand<float>;
222 template class CuRand<double>;
223 
224 } // namespace,
const MatrixBase< Real > & Mat() const
Definition: cu-matrix.h:755
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT Stride() const
Definition: cu-matrix.h:217
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
void RandUniform(CuMatrixBase< Real > *tgt)
Fill with uniform [0..1] floats,.
Definition: cu-rand.cc:60
void BinarizeProbs(const CuMatrix< Real > &probs, CuMatrix< Real > *states)
align probabilities to discrete 0/1 states (use uniform sampling),
Definition: cu-rand.cc:201
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
Definition: cu-matrix.cc:954
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void AddGaussNoise(CuMatrix< Real > *tgt, Real gscale=1.0)
add gaussian noise to each element,
Definition: cu-rand.cc:211
void RandGaussian(CuMatrixBase< Real > *tgt)
Fill with Normal random numbers,.
Definition: cu-rand.cc:116
int32 MatrixIndexT
Definition: matrix-common.h:98
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078
void Heaviside(const CuMatrixBase< Real > &src)
Set each element to the Heaviside function of the corresponding element of "src", which we define as ...
Definition: cu-matrix.cc:2435
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
CuSubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Definition: cu-vector.h:160
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
const Matrix< Real > & Mat() const
Definition: cu-matrix.h:879
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69
Vector for CUDA computing.
Definition: matrix-common.h:72