CuRand< Real > Class Template Reference

#include <cu-common.h>

Public Member Functions

void SeedGpu ()
 
void RandUniform (CuMatrixBase< Real > *tgt)
 Fill with uniform [0..1] floats,. More...
 
void RandUniform (CuMatrix< Real > *tgt)
 
void RandUniform (CuVectorBase< Real > *tgt)
 
void RandGaussian (CuMatrixBase< Real > *tgt)
 Fill with Normal random numbers,. More...
 
void RandGaussian (CuMatrix< Real > *tgt)
 
void RandGaussian (CuVectorBase< Real > *tgt)
 
void BinarizeProbs (const CuMatrix< Real > &probs, CuMatrix< Real > *states)
 align probabilities to discrete 0/1 states (use uniform sampling), More...
 
void AddGaussNoise (CuMatrix< Real > *tgt, Real gscale=1.0)
 add gaussian noise to each element, More...
 

Detailed Description

template<typename Real>
class kaldi::CuRand< Real >

Definition at line 152 of file cu-common.h.

Member Function Documentation

◆ AddGaussNoise()

void AddGaussNoise ( CuMatrix< Real > *  tgt,
Real  gscale = 1.0 
)

add gaussian noise to each element,

add gaussian noise to each element

Definition at line 211 of file cu-rand.cc.

Referenced by main(), and CuRand< float >::SeedGpu().

211  {
212  // Use the option kStrideEqualNumCols to ensure consistency (because when
213  // memory is nearly exhausted, the stride of CudaMallocPitch may vary).
214  CuMatrix<Real> tmp(tgt->NumRows(), tgt->NumCols(),
216  this->RandGaussian(&tmp);
217  tgt->AddMat(gscale, tmp);
218 }
void RandGaussian(CuMatrixBase< Real > *tgt)
Fill with Normal random numbers,.
Definition: cu-rand.cc:116

◆ BinarizeProbs()

void BinarizeProbs ( const CuMatrix< Real > &  probs,
CuMatrix< Real > *  states 
)

align probabilities to discrete 0/1 states (use uniform sampling),

convert probabilities binary values,

Definition at line 201 of file cu-rand.cc.

Referenced by main(), and CuRand< float >::SeedGpu().

201  {
202  CuMatrix<Real> tmp(probs.NumRows(), probs.NumCols());
203  this->RandUniform(&tmp); // [0..1]
204  tmp.Scale(-1.0); // [-1..0]
205  tmp.AddMat(1.0, probs); // [-1..+1]
206  states->Heaviside(tmp); // negative
207 }
void RandUniform(CuMatrixBase< Real > *tgt)
Fill with uniform [0..1] floats,.
Definition: cu-rand.cc:60

◆ RandGaussian() [1/3]

void RandGaussian ( CuMatrixBase< Real > *  tgt)

Fill with Normal random numbers,.

Definition at line 116 of file cu-rand.cc.

Referenced by kaldi::CuRandGaussianMatrixBaseSpeedTest(), kaldi::CuRandGaussianMatrixSpeedTest(), kaldi::CuRandGaussianVectorSpeedTest(), CuRand< float >::SeedGpu(), CuPackedMatrix< Real >::SetRandn(), CuVectorBase< float >::SetRandn(), and CuMatrixBase< float >::SetRandn().

116  {
117 #if HAVE_CUDA == 1
118  if (CuDevice::Instantiate().Enabled()) {
119  CuTimer tim;
120  // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix,
121  // so we should not use it to generate random numbers over whole stride.
122  // Also, we ensure to have 'even' number of elements for calling 'curand'
123  // by possibly adding one column. Even number of elements is required by
124  // curandGenerateUniform(), curandGenerateUniformDouble().
125  // Use the option kStrideEqualNumCols to ensure consistency
126  // (because when memory is nearly exhausted, the stride of CudaMallocPitch
127  // may vary).
128  MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1,
129  CuMatrix<Real> tmp(tgt->NumRows(), num_cols_even, kUndefined,
131  CURAND_SAFE_CALL(curandGenerateNormalWrap(
132  GetCurandHandle(), tmp.Data(), tmp.NumRows()*tmp.Stride()));
133  tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols()));
134  CuDevice::Instantiate().AccuProfile(__func__, tim);
135  } else
136 #endif
137  {
138  tgt->Mat().SetRandn();
139  }
140 }
int32 MatrixIndexT
Definition: matrix-common.h:98

◆ RandGaussian() [2/3]

void RandGaussian ( CuMatrix< Real > *  tgt)

Definition at line 143 of file cu-rand.cc.

143  {
144 #if HAVE_CUDA == 1
145  if (CuDevice::Instantiate().Enabled()) {
146  CuTimer tim;
147  // Here we don't need to use 'tmp' matrix, if the number of elements is even,
148  MatrixIndexT num_elements = tgt->NumRows() * tgt->Stride();
149  if (0 == (num_elements % 2)) {
150  CURAND_SAFE_CALL(curandGenerateNormalWrap(
151  GetCurandHandle(), tgt->Data(), num_elements));
152  } else {
153  // We use 'tmp' matrix with one column added, this guarantees an even
154  // number of elements. Use the option kStrideEqualNumCols to ensure
155  // consistency (because when memory is nearly exhausted, the stride of
156  // CudaMallocPitch may vary).
157  MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1,
158  CuMatrix<Real> tmp(tgt->NumRows(), num_cols_even, kUndefined,
160  CURAND_SAFE_CALL(curandGenerateNormalWrap(
161  GetCurandHandle(), tmp.Data(), tmp.NumRows() * tmp.Stride()));
162  tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols()));
163  }
164  CuDevice::Instantiate().AccuProfile(__func__, tim);
165  } else
166 #endif
167  {
168  tgt->Mat().SetRandn();
169  }
170 }
int32 MatrixIndexT
Definition: matrix-common.h:98

◆ RandGaussian() [3/3]

void RandGaussian ( CuVectorBase< Real > *  tgt)

Definition at line 173 of file cu-rand.cc.

173  {
174 #if HAVE_CUDA == 1
175  if (CuDevice::Instantiate().Enabled()) {
176  CuTimer tim;
177  // To ensure 'even' number of elements, we use 'tmp' vector of even length.
178  // Even number of elements is required by 'curand' functions:
179  // curandGenerateUniform(), curandGenerateUniformDouble().
180  MatrixIndexT num_elements = tgt->Dim();
181  if (0 == (num_elements % 2)) {
182  CURAND_SAFE_CALL(curandGenerateNormalWrap(
183  GetCurandHandle(), tgt->Data(), tgt->Dim()));
184  } else {
185  MatrixIndexT dim_even = tgt->Dim() + (tgt->Dim() % 2); // + 0 or 1,
186  CuVector<Real> tmp(dim_even, kUndefined);
187  CURAND_SAFE_CALL(curandGenerateNormalWrap(
188  GetCurandHandle(), tmp.Data(), tmp.Dim()));
189  tgt->CopyFromVec(tmp.Range(0,tgt->Dim()));
190  }
191  CuDevice::Instantiate().AccuProfile(__func__, tim);
192  } else
193 #endif
194  {
195  tgt->Vec().SetRandn();
196  }
197 }
int32 MatrixIndexT
Definition: matrix-common.h:98

◆ RandUniform() [1/3]

void RandUniform ( CuMatrixBase< Real > *  tgt)

Fill with uniform [0..1] floats,.

Definition at line 60 of file cu-rand.cc.

Referenced by kaldi::CuRandUniformMatrixBaseSpeedTest(), kaldi::CuRandUniformMatrixSpeedTest(), kaldi::CuRandUniformVectorSpeedTest(), CuRand< float >::SeedGpu(), CuVectorBase< float >::SetRandUniform(), and CuMatrixBase< float >::SetRandUniform().

60  {
61 #if HAVE_CUDA == 1
62  if (CuDevice::Instantiate().Enabled()) {
63  CuTimer tim;
64  // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix,
65  // so we should not use it to generate random numbers over whole stride.
66  // Use the option kStrideEqualNumCols to ensure consistency
67  // (because when memory is nearly exhausted, the stride of CudaMallocPitch
68  // may vary).
69  CuMatrix<Real> tmp(tgt->NumRows(), tgt->NumCols(), kUndefined,
71  size_t s = static_cast<size_t>(tmp.NumRows()) * static_cast<size_t>(tmp.Stride());
72  CURAND_SAFE_CALL(curandGenerateUniformWrap(
73  GetCurandHandle(), tmp.Data(), s));
74  tgt->CopyFromMat(tmp);
75  CuDevice::Instantiate().AccuProfile(__func__, tim);
76  } else
77 #endif
78  {
79  tgt->Mat().SetRandUniform();
80  }
81 }

◆ RandUniform() [2/3]

void RandUniform ( CuMatrix< Real > *  tgt)

Definition at line 84 of file cu-rand.cc.

84  {
85 #if HAVE_CUDA == 1
86  if (CuDevice::Instantiate().Enabled()) {
87  CuTimer tim;
88  // Here we don't need to use 'tmp' matrix,
89  size_t s = static_cast<size_t>(tgt->NumRows()) * static_cast<size_t>(tgt->Stride());
90  CURAND_SAFE_CALL(curandGenerateUniformWrap(
91  GetCurandHandle(), tgt->Data(), s));
92  CuDevice::Instantiate().AccuProfile(__func__, tim);
93  } else
94 #endif
95  {
96  tgt->Mat().SetRandUniform();
97  }
98 }

◆ RandUniform() [3/3]

void RandUniform ( CuVectorBase< Real > *  tgt)

Definition at line 101 of file cu-rand.cc.

101  {
102 #if HAVE_CUDA == 1
103  if (CuDevice::Instantiate().Enabled()) {
104  CuTimer tim;
105  CURAND_SAFE_CALL(curandGenerateUniformWrap(
106  GetCurandHandle(), tgt->Data(), tgt->Dim()));
107  CuDevice::Instantiate().AccuProfile(__func__, tim);
108  } else
109 #endif
110  {
111  tgt->Vec().SetRandUniform();
112  }
113 }

◆ SeedGpu()

void SeedGpu ( )
inline

Definition at line 34 of file cu-rand.h.

34  {
35  #if HAVE_CUDA == 1
36  CuDevice::Instantiate().SeedGpu();
37  #endif
38  }

The documentation for this class was generated from the following files: