34 using namespace kaldi;
39 template<
typename Real>
41 return (
sizeof(Real) == 8 ?
"<double>" :
"<float>");
51 for (;tim.
Elapsed() < time_in_secs; iter++) {
57 KALDI_LOG <<
"For CuVector::Softmax" << NameOf<Real>() <<
", for dim = " 58 << dim <<
", speed was " << gflops <<
" gigaflops.";
69 for (;tim.
Elapsed() < time_in_secs; iter++) {
75 KALDI_LOG <<
"For CuVector::Sum" << NameOf<Real>() <<
", for dim = " 76 << dim <<
", speed was " << gflops <<
" gigaflops.";
86 for (;tim.
Elapsed() < time_in_secs; iter++) {
93 KALDI_LOG <<
"For CuVector::CopyFromVec" << NameOf<Real>() <<
" to " 94 << NameOf<OtherReal>() <<
", for dim = " 95 << dim <<
", speed was " << gflops <<
" gigaflops.";
102 template<
typename Real>
void TestCuVectorSumChooseMinLength() {
104 for (
int dim = 100; dim < 1000000; dim = dim * 1.5 + 1 ) {
107 Real result = 0, result_cpu = 0;
112 for (; tim.
Elapsed() < time_in_secs; iter++) {
115 int dimGrid = n_blocks(M.
Dim(), dimBlock);
120 cuda_vec_sum(dimGrid, dimBlock, M.
Data(), ans.
Data(), M.
Dim(), 1);
121 CU_SAFE_CALL(cudaGetLastError());
123 result = ans_cpu.
Sum();
127 gflops = (fdim * iter) / (tim.
Elapsed() * 1.0e+09);
132 for (; tim.
Elapsed() < time_in_secs; iter++) {
134 result_cpu = M_cpu.
Sum();
138 gflops_cpu = (fdim * iter) / (tim.
Elapsed() * 1.0e+09);
140 KALDI_LOG <<
"CuVector::Sum" << NameOf<Real>() <<
", dim: " << dim
141 <<
", speed: GPU " << (gflops > gflops_cpu ?
">" :
"<")
142 <<
" CPU, GPU speed: " << gflops <<
" Gflops. CPU speed: " 143 << gflops_cpu <<
" Gflops. Result diff: " << (result - result_cpu);
155 for (;tim.
Elapsed() < time_in_secs; iter++) {
163 KALDI_LOG <<
"For CuVector::VecVecOne" << NameOf<Real>() <<
", for dim = " 164 << dim <<
", speed was " << gflops <<
" gigaflops.";
183 for (;tim.
Elapsed() < time_in_secs; iter++) {
189 KALDI_LOG <<
"For CuVector::AddDiagMatMat" << NameOf<Real>()
190 << (transN ==
kNoTrans ?
"[no-trans],":
"[trans],")
191 << (transO ==
kNoTrans ?
"[no-trans],":
"[trans],")
192 <<
" for dim = "<< dim <<
", speed was " << gflops <<
" gigaflops.";
199 int32 size = 1024 * 32;
208 for (; tim.
Elapsed() < time_in_secs; iter++) {
214 KALDI_LOG <<
"For CuVector::AddDiagMat2Shapes" << NameOf<Real>()
215 << (trans ==
kTrans ?
"[trans]" :
"[no-trans]") <<
", for dim = (" 216 << size / dim <<
", " << dim <<
"), speed was " << gflops
232 for (;tim.
Elapsed() < time_in_secs; iter++) {
238 KALDI_LOG <<
"For CuVector::AddDiagMat2" << NameOf<Real>()
239 << (trans ==
kTrans ?
"[trans]" :
"[no-trans]") <<
", for dim = " 240 << dim <<
", speed was " << gflops <<
" gigaflops.";
254 for (;tim.
Elapsed() < time_in_secs; iter++) {
260 KALDI_LOG <<
"For CuVector::AddRowSumMat" << NameOf<Real>()
261 << (trans ==
kTrans ?
"[trans]" :
"[no-trans]") <<
", for dim = " 262 << dim <<
", speed was " << gflops <<
" gigaflops.";
276 for (;tim.
Elapsed() < time_in_secs; iter++) {
282 KALDI_LOG <<
"For CuVector::AddColSumMat" << NameOf<Real>()
283 << (trans ==
kTrans ?
"[trans]" :
"[no-trans]") <<
", for dim = " 284 << dim <<
", speed was " << gflops <<
" gigaflops.";
292 Real threshold =
RandInt(-35000, 35000) / Real(100);
296 for (;tim.
Elapsed() < time_in_secs; iter++) {
303 KALDI_LOG <<
"For CuVector::ApplyFloor" << NameOf<Real>() <<
", for dim = " 304 << dim <<
", speed was " << gflops <<
" gigaflops.";
313 Real threshold =
RandInt(-35000, 35000) / Real(100);
317 for (;tim.
Elapsed() < time_in_secs; iter++) {
323 KALDI_LOG <<
"For CuVector::ApplyFloor (no count variety)" << NameOf<Real>()
324 <<
", for dim = " << dim <<
", speed was " << gflops
334 Real threshold =
RandInt(-35000, 35000) / Real(100);
338 for (;tim.
Elapsed() < time_in_secs; iter++) {
345 KALDI_LOG <<
"For CuVector::ApplyCeiling" << NameOf<Real>() <<
", for dim = " 346 << dim <<
", speed was " << gflops <<
" gigaflops.";
355 Real threshold =
RandInt(-35000, 35000) / Real(100);
359 for (;tim.
Elapsed() < time_in_secs; iter++) {
365 KALDI_LOG <<
"For CuVector::ApplyCeiling (no count variety)" << NameOf<Real>()
366 <<
", for dim = " << dim <<
", speed was " << gflops
380 transM != transN ? num_cols : num_rows);
387 for (;tim.
Elapsed() < time_in_secs; iter++) {
394 KALDI_LOG <<
"For CuVector::AddDiagMatMat" << NameOf<Real>()
395 << (transM ==
kNoTrans ?
"[no-trans],":
"[trans],")
396 << (transN ==
kNoTrans ?
"[no-trans],":
"[trans],")
397 <<
" for dim = "<< num_rows <<
", " << num_cols
398 <<
", speed was " << gflops <<
" gigaflops.";
403 const size_t a = 1 << 5;
404 const size_t b = 1 << 8;
405 for (
size_t i = a;
i <= b;
i *= 2) {
406 for (
size_t j = a;
j <= b;
j *= 2) {
407 if (
i *
j <= a * b) {
416 std::vector<int32> sizes;
417 for (
int i = 32;
i <= 1024;
i *= 2) {
420 int32 ns = sizes.size();
421 for (
int32 s = 0; s < ns; s++)
422 TestCuVectorSoftmax<Real>(sizes[s]);
424 TestCuVectorSumChooseMinLength<Real>();
426 for (
int32 s = 0; s < ns; s++)
427 TestCuVectorSum<Real>(sizes[s]);
428 for (
int32 s = 0; s < ns; s++)
429 TestCuVectorVecVecOne<Real>(sizes[s]);
430 for (
int32 s = 0; s < ns; s++)
431 TestCuVectorCopyFromVec<Real, float>(sizes[s]);
432 for (
int32 s = 0; s < ns; s++)
433 TestCuVectorCopyFromVec<Real, double>(sizes[s]);
434 for (
int32 s = 0; s < ns; s++) {
438 TestCuVectorAddDiagMatMat<Real>(sizes[s],
kTrans,
kTrans);
440 for (
int32 s = 0; s < ns; s++) {
441 TestCuVectorAddDiagMat2OnVariousShapes<Real>(sizes[s],
kNoTrans);
442 TestCuVectorAddDiagMat2OnVariousShapes<Real>(sizes[s],
kTrans);
444 for (
int32 s = 0; s < ns; s++) {
445 TestCuVectorAddDiagMat2<Real>(sizes[s],
kNoTrans);
446 TestCuVectorAddDiagMat2<Real>(sizes[s],
kTrans);
448 for (
int32 s = 0; s < ns; s++) {
449 TestCuVectorAddRowSumMat<Real>(sizes[s],
kNoTrans);
450 TestCuVectorAddRowSumMat<Real>(sizes[s],
kTrans);
452 for (
int32 s = 0; s < ns; s++) {
453 TestCuVectorAddColSumMat<Real>(sizes[s],
kNoTrans);
454 TestCuVectorAddColSumMat<Real>(sizes[s],
kTrans);
456 for (
int32 s = 0; s < ns; s++) {
457 TestCuVectorApplyFloor<Real>(sizes[s]);
458 TestCuVectorApplyFloorNoCount<Real>(sizes[s]);
460 for (
int32 s = 0; s < ns; s++) {
461 TestCuVectorApplyCeiling<Real>(sizes[s]);
462 TestCuVectorApplyCeilingNoCount<Real>(sizes[s]);
475 CuDevice::Instantiate().SelectGpuId(
"yes");
478 kaldi::CudaVectorSpeedTest<float>();
480 if (CuDevice::Instantiate().DoublePrecisionSupported()) {
481 kaldi::CudaVectorSpeedTest<double>();
483 KALDI_WARN <<
"Double precision not supported";
486 kaldi::CudaVectorSpeedTest<double>();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void TestCuVectorApplyFloor(int32 dim)
void ApplyCeiling(Real ceiling_val, MatrixIndexT *ceiled_count=NULL)
void TestCuVectorSoftmax(int32 dim)
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
void TestCuVectorSum(int32 dim)
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=NULL)
void TestCuVectorAddDiagMat2OnVariousShapes(int32 dim, MatrixTransposeType trans)
void TestCuVectorVecVecOne(int32 dim)
void AddDiagMatMat(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, const CuMatrixBase< Real > &N, MatrixTransposeType transN, Real beta=1.0)
Add the diagonal of a matrix product: *this = diag(M N), assuming the "trans" arguments are both kNoT...
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
void TestCuVectorAddDiagMat2(int32 dim, MatrixTransposeType trans)
void TestCuVectorApplyCeilingNoCount(int32 dim)
void TestCuVectorAddDiagMatMat(int32 dim, MatrixTransposeType transN, MatrixTransposeType transO)
void TestCuVectorCopyFromVec(int32 dim)
Real Sum() const
Returns sum of the elements.
A class representing a vector.
void TestCuVectorAddDiagMatMatShape(int32 num_rows, int32 num_cols, MatrixTransposeType transM, MatrixTransposeType transN)
Real * Data()
Returns a pointer to the start of the vector's data.
void CudaVectorSpeedTest()
void TestCuVectorApplyCeiling(int32 dim)
void TestCuVectorAddColSumMat(int32 dim, MatrixTransposeType trans)
void TestCuVectorApplyFloorNoCount(int32 dim)
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
double Elapsed() const
Returns time in seconds.
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
MatrixIndexT Dim() const
Dimensions.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
void TestCuVectorAddRowSumMat(int32 dim, MatrixTransposeType trans)