doc/cu-vector-speed-test_8cc_source.html

 // cudamatrix/cu-vector-speed-test.cc

 // Copyright      2013  Johns Hopkins University (author: Daniel Povey)
 //                2017  Daniel Galvez
 //           2016-2018  Shiyin Kang


 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.


 #include <iostream>
 #include <vector>
 #include <cstdlib>

 #include "base/kaldi-common.h"
 #include "util/common-utils.h"
 #include "cudamatrix/cu-matrix.h"
 #include "cudamatrix/cu-vector.h"
 #include "cudamatrix/cu-math.h"

 using namespace kaldi;


 namespace kaldi {

 template<typename Real>
 std::string NameOf() {
   return (sizeof(Real) == 8 ? "<double>" : "<float>");
 }

 template<typename Real> void TestCuVectorSoftmax(int32 dim) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> M(dim);
   M.SetRandn();

   Timer tim;
   int32 iter = 0;
   for (;tim.Elapsed() < time_in_secs; iter++) {
     M.ApplySoftMax();
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::Softmax" << NameOf<Real>() << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";
 }


 template<typename Real> void TestCuVectorSum(int32 dim) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> M(dim);
   M.SetRandn();

   Timer tim;
   int32 iter = 0;
   for (;tim.Elapsed() < time_in_secs; iter++) {
     M.Sum();
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::Sum" << NameOf<Real>() << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";
 }

 template<typename Real, typename OtherReal> void TestCuVectorCopyFromVec(int32 dim) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> M(dim);
   M.SetRandn();

   Timer tim;
   int32 iter = 0;
   for (;tim.Elapsed() < time_in_secs; iter++) {
     CuVector<OtherReal> v(dim);
     v.CopyFromVec(M);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::CopyFromVec" << NameOf<Real>() << " to "
             <<  NameOf<OtherReal>() << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";
 }


 #if HAVE_CUDA == 1
 // This test choose the min length of vectors to be reduced on GPU.
 // Smaller vector will be copied to RAM and reduced on CPU.
 template<typename Real> void TestCuVectorSumChooseMinLength() {
   BaseFloat time_in_secs = 0.02;
   for (int dim = 100; dim < 1000000; dim = dim * 1.5 + 1 ) {
     CuVector<Real> M(dim);
     BaseFloat gflops, gflops_cpu;
     Real result = 0, result_cpu = 0;
     M.SetRandn();
     {
       Timer tim;
       int32 iter = 0;
       for (; tim.Elapsed() < time_in_secs; iter++) {
         // Force GPU reduction
         int dimBlock = CU1DBLOCK;
         int dimGrid = n_blocks(M.Dim(), dimBlock);
         if (dimGrid > 256) {
           dimGrid = 256;
         }
         CuVector<Real> ans(dimGrid, kUndefined);
         cuda_vec_sum(dimGrid, dimBlock, M.Data(), ans.Data(), M.Dim(), 1);
         CU_SAFE_CALL(cudaGetLastError());
         Vector<Real> ans_cpu(ans);
         result = ans_cpu.Sum();
       }

       BaseFloat fdim = dim;
       gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
     }
     {
       Timer tim;
       int32 iter = 0;
       for (; tim.Elapsed() < time_in_secs; iter++) {
         Vector<Real> M_cpu(M);
         result_cpu = M_cpu.Sum();
       }

       BaseFloat fdim = dim;
       gflops_cpu = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
     }
     KALDI_LOG << "CuVector::Sum" << NameOf<Real>() << ", dim: " << dim
               << ", speed: GPU " << (gflops > gflops_cpu ? ">" : "<")
               << " CPU, GPU speed: " << gflops << " Gflops. CPU speed: "
               << gflops_cpu << " Gflops. Result diff: " << (result - result_cpu);
   }
 }
 #endif

 template<typename Real> void TestCuVectorVecVecOne(int32 dim) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> M(dim);
   M.SetRandn();

   Timer tim;
   int32 iter = 0;
   for (;tim.Elapsed() < time_in_secs; iter++) {
     CuVector<Real> ones(dim);
     ones.Set(1.0);
     VecVec(M, ones);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::VecVecOne" << NameOf<Real>() << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";
 }


 template<typename Real> void TestCuVectorAddDiagMatMat(int32 dim,
                                                        MatrixTransposeType transN,
                                                        MatrixTransposeType transO) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(dim);
   v.SetRandn();
   CuMatrix<Real> N(dim, dim), O(dim, dim);
   N.SetRandn();
   O.SetRandn();

   Timer tim;
   int32 iter = 0;

   for (;tim.Elapsed() < time_in_secs; iter++) {
     v.AddDiagMatMat(1.0, N, transN, O, transO, 1.0);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::AddDiagMatMat" << NameOf<Real>()
             << (transN == kNoTrans ? "[no-trans],":"[trans],")
             << (transO == kNoTrans ? "[no-trans],":"[trans],")
             << " for dim = "<< dim << ", speed was " << gflops << " gigaflops.";
 }


 template<typename Real> void TestCuVectorAddDiagMat2OnVariousShapes(
     int32 dim, MatrixTransposeType trans) {
   BaseFloat time_in_secs = 0.02;
   int32 size = 1024 * 32;
   CuVector<Real> v(trans == kNoTrans ? size / dim : dim);
   v.SetRandn();
   CuMatrix<Real> N(size / dim, dim);
   N.SetRandn();

   Timer tim;
   int32 iter = 0;

   for (; tim.Elapsed() < time_in_secs; iter++) {
     v.AddDiagMat2(1.0, N, trans, 0.0);
   }

   BaseFloat fdim = size;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::AddDiagMat2Shapes" << NameOf<Real>()
             << (trans == kTrans ? "[trans]" : "[no-trans]") << ", for dim = ("
             << size / dim << ", " << dim  << "), speed was " << gflops
             << " gigaflops.";
 }


 template<typename Real> void TestCuVectorAddDiagMat2(int32 dim, MatrixTransposeType trans) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(dim);
   v.SetRandn();
   CuMatrix<Real> N(dim, dim);
   N.SetRandn();

   Timer tim;
   int32 iter = 0;

   for (;tim.Elapsed() < time_in_secs; iter++) {
     v.AddDiagMat2(1.0, N, trans, 0.0);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::AddDiagMat2" << NameOf<Real>()
             << (trans == kTrans ? "[trans]" : "[no-trans]") << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";
 }


 template<typename Real> void TestCuVectorAddRowSumMat(int32 dim, MatrixTransposeType trans) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(dim);
   v.SetRandn();
   CuMatrix<Real> N(dim, dim);
   N.SetRandn();

   Timer tim;
   int32 iter = 0;

   for (;tim.Elapsed() < time_in_secs; iter++) {
     v.AddRowSumMat(1.0, N, 0.5);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::AddRowSumMat" << NameOf<Real>()
             << (trans == kTrans ? "[trans]" : "[no-trans]") << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";
 }


 template<typename Real> void TestCuVectorAddColSumMat(int32 dim, MatrixTransposeType trans) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(dim);
   v.SetRandn();
   CuMatrix<Real> N(dim, dim);
   N.SetRandn();

   Timer tim;
   int32 iter = 0;

   for (;tim.Elapsed() < time_in_secs; iter++) {
     v.AddColSumMat(1.0, N, 0.5);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::AddColSumMat" << NameOf<Real>()
             << (trans == kTrans ? "[trans]" : "[no-trans]") << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";
 }


 template<typename Real> void TestCuVectorApplyFloor(int32 dim) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(dim);
   v.SetRandn();
   Real threshold = RandInt(-35000, 35000) / Real(100);

   Timer tim;
   int32 iter = 0;
   for (;tim.Elapsed() < time_in_secs; iter++) {
     MatrixIndexT dummy_count;
     v.ApplyFloor(threshold, &dummy_count);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::ApplyFloor" << NameOf<Real>() << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";

 }


 template<typename Real> void TestCuVectorApplyFloorNoCount(int32 dim) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(dim);
   v.SetRandn();
   Real threshold = RandInt(-35000, 35000) / Real(100);

   Timer tim;
   int32 iter = 0;
   for (;tim.Elapsed() < time_in_secs; iter++) {
     v.ApplyFloor(threshold, nullptr);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::ApplyFloor (no count variety)" << NameOf<Real>()
             << ", for dim = " << dim << ", speed was " << gflops
             << " gigaflops.";

 }


 template<typename Real> void TestCuVectorApplyCeiling(int32 dim) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(dim);
   v.SetRandn();
   Real threshold = RandInt(-35000, 35000) / Real(100);

   Timer tim;
   int32 iter = 0;
   for (;tim.Elapsed() < time_in_secs; iter++) {
     MatrixIndexT dummy_count;
     v.ApplyCeiling(threshold, &dummy_count);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::ApplyCeiling" << NameOf<Real>() << ", for dim = "
             << dim << ", speed was " << gflops << " gigaflops.";

 }


 template<typename Real> void TestCuVectorApplyCeilingNoCount(int32 dim) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(dim);
   v.SetRandn();
   Real threshold = RandInt(-35000, 35000) / Real(100);

   Timer tim;
   int32 iter = 0;
   for (;tim.Elapsed() < time_in_secs; iter++) {
     v.ApplyCeiling(threshold, nullptr);
   }

   BaseFloat fdim = dim;
   BaseFloat gflops = (fdim * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::ApplyCeiling (no count variety)" << NameOf<Real>()
             << ", for dim = " << dim << ", speed was " << gflops
             << " gigaflops.";

 }


 template<typename Real> void TestCuVectorAddDiagMatMatShape(
     int32 num_rows, int32 num_cols, MatrixTransposeType transM,
     MatrixTransposeType transN) {
   BaseFloat time_in_secs = 0.02;
   CuVector<Real> v(transM == kTrans ? num_cols : num_rows);
   v.SetRandn();
   CuMatrix<Real> M(num_rows, num_cols);
   CuMatrix<Real> N(transM != transN ? num_rows : num_cols,
                    transM != transN ? num_cols : num_rows);
   M.SetRandn();
   N.SetRandn();

   Timer tim;
   int32 iter = 0;

   for (;tim.Elapsed() < time_in_secs; iter++) {
     v.AddDiagMatMat(1.0, M, transM, N, transN, 1.0);
   }

   BaseFloat fnr = num_rows;
   BaseFloat fnc = num_cols;
   BaseFloat gflops = (fnr * fnc * iter) / (tim.Elapsed() * 1.0e+09);
   KALDI_LOG << "For CuVector::AddDiagMatMat" << NameOf<Real>()
             << (transM == kNoTrans ? "[no-trans],":"[trans],")
             << (transN == kNoTrans ? "[no-trans],":"[trans],")
             << " for dim = "<< num_rows << ", " << num_cols
             << ", speed was " << gflops << " gigaflops.";
 }


 template<typename Real> void CudaVectorSpeedTest() {
   const size_t a = 1 << 5;
   const size_t b = 1 << 8;
   for (size_t i = a; i <= b; i *= 2) {
     for (size_t j = a; j <= b; j *= 2) {
       if (i * j <= a * b) {
         TestCuVectorAddDiagMatMatShape<Real>(i, j, kNoTrans, kNoTrans);
         TestCuVectorAddDiagMatMatShape<Real>(i, j, kNoTrans, kTrans);
         TestCuVectorAddDiagMatMatShape<Real>(i, j, kTrans, kNoTrans);
         TestCuVectorAddDiagMatMatShape<Real>(i, j, kTrans, kTrans);
       }
     }
   }

   std::vector<int32> sizes;
   for (int i = 32; i <= 1024; i *= 2) {
     sizes.push_back(i);
   }
   int32 ns = sizes.size();
   for (int32 s = 0; s < ns; s++)
     TestCuVectorSoftmax<Real>(sizes[s]);
 #if HAVE_CUDA == 1
   TestCuVectorSumChooseMinLength<Real>();
 #endif
   for (int32 s = 0; s < ns; s++)
     TestCuVectorSum<Real>(sizes[s]);
   for (int32 s = 0; s < ns; s++)
     TestCuVectorVecVecOne<Real>(sizes[s]);
   for (int32 s = 0; s < ns; s++)
     TestCuVectorCopyFromVec<Real, float>(sizes[s]);
   for (int32 s = 0; s < ns; s++)
     TestCuVectorCopyFromVec<Real, double>(sizes[s]);
   for (int32 s = 0; s < ns; s++) {
     TestCuVectorAddDiagMatMat<Real>(sizes[s], kNoTrans, kNoTrans);
     TestCuVectorAddDiagMatMat<Real>(sizes[s], kNoTrans, kTrans);
     TestCuVectorAddDiagMatMat<Real>(sizes[s], kTrans, kNoTrans);
     TestCuVectorAddDiagMatMat<Real>(sizes[s], kTrans, kTrans);
   }
   for (int32 s = 0; s < ns; s++) {
     TestCuVectorAddDiagMat2OnVariousShapes<Real>(sizes[s], kNoTrans);
     TestCuVectorAddDiagMat2OnVariousShapes<Real>(sizes[s], kTrans);
   }
   for (int32 s = 0; s < ns; s++) {
     TestCuVectorAddDiagMat2<Real>(sizes[s], kNoTrans);
     TestCuVectorAddDiagMat2<Real>(sizes[s], kTrans);
   }
   for (int32 s = 0; s < ns; s++) {
     TestCuVectorAddRowSumMat<Real>(sizes[s], kNoTrans);
     TestCuVectorAddRowSumMat<Real>(sizes[s], kTrans);
   }
   for (int32 s = 0; s < ns; s++) {
     TestCuVectorAddColSumMat<Real>(sizes[s], kNoTrans);
     TestCuVectorAddColSumMat<Real>(sizes[s], kTrans);
   }
   for (int32 s = 0; s < ns; s++) {
     TestCuVectorApplyFloor<Real>(sizes[s]);
     TestCuVectorApplyFloorNoCount<Real>(sizes[s]);
   }
   for (int32 s = 0; s < ns; s++) {
     TestCuVectorApplyCeiling<Real>(sizes[s]);
     TestCuVectorApplyCeilingNoCount<Real>(sizes[s]);
   }

 }


 } // namespace kaldi


 int main() {
   kaldi::SetVerboseLevel(1);
   //Select the GPU
 #if HAVE_CUDA == 1
   CuDevice::Instantiate().SelectGpuId("yes"); //-2 .. automatic selection
 #endif

   kaldi::CudaVectorSpeedTest<float>();
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().DoublePrecisionSupported()) {
     kaldi::CudaVectorSpeedTest<double>();
   } else {
     KALDI_WARN << "Double precision not supported";
   }
 #else
   kaldi::CudaVectorSpeedTest<double>();
 #endif
   KALDI_LOG << "Tests succeeded.";
 }

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::kUndefined
Definition: matrix-common.h:39

kaldi::TestCuVectorApplyFloor
void TestCuVectorApplyFloor(int32 dim)
Definition: cu-vector-speed-test.cc:288

kaldi::CuVectorBase::ApplySoftMax
void ApplySoftMax()
Definition: cu-vector.cc:334

rnnlm::j
int j
Definition: mikolov-rnnlm-lib.cc:66

kaldi::CuVector
Definition: matrix-common.h:74

kaldi::CuVectorBase::ApplyCeiling
void ApplyCeiling(Real ceiling_val, MatrixIndexT *ceiled_count=NULL)
Definition: cu-vector.h:143

kaldi::TestCuVectorSoftmax
void TestCuVectorSoftmax(int32 dim)
Definition: cu-vector-speed-test.cc:44

kaldi::CuVectorBase::Sum
Real Sum() const
Definition: cu-vector.cc:297

kaldi::CuVectorBase::Set
void Set(Real value)
Definition: cu-vector.cc:1135

kaldi::CuVectorBase::AddDiagMat2
void AddDiagMat2(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans)...
Definition: cu-vector.cc:595

kaldi::CuMatrixBase::SetRandn
void SetRandn()
Definition: cu-matrix.cc:3132

kaldi::NameOf
std::string NameOf()
Definition: matrix-lib-speed-test.cc:30

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

common-utils.h

cu-matrix.h

kaldi::TestCuVectorSum
void TestCuVectorSum(int32 dim)
Definition: cu-vector-speed-test.cc:62

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::CuVectorBase::ApplyFloor
void ApplyFloor(Real floor_val, MatrixIndexT *floored_count=NULL)
Definition: cu-vector.h:139

cu-vector.h

kaldi::TestCuVectorAddDiagMat2OnVariousShapes
void TestCuVectorAddDiagMat2OnVariousShapes(int32 dim, MatrixTransposeType trans)
Definition: cu-vector-speed-test.cc:196

kaldi::TestCuVectorVecVecOne
void TestCuVectorVecVecOne(int32 dim)
Definition: cu-vector-speed-test.cc:148

kaldi::CuVectorBase::AddDiagMatMat
void AddDiagMatMat(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, const CuMatrixBase< Real > &N, MatrixTransposeType transN, Real beta=1.0)
Add the diagonal of a matrix product: *this = diag(M N), assuming the "trans" arguments are both kNoT...
Definition: cu-vector.cc:611

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::SetVerboseLevel
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
Definition: kaldi-error.h:64

kaldi::MatrixIndexT
int32 MatrixIndexT
Definition: matrix-common.h:98

kaldi::CuVectorBase::CopyFromVec
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078

float

kaldi::CuVectorBase::AddColSumMat
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
Definition: cu-vector.cc:1298

kaldi::TestCuVectorAddDiagMat2
void TestCuVectorAddDiagMat2(int32 dim, MatrixTransposeType trans)
Definition: cu-vector-speed-test.cc:222

kaldi::TestCuVectorApplyCeilingNoCount
void TestCuVectorApplyCeilingNoCount(int32 dim)
Definition: cu-vector-speed-test.cc:351

CU1DBLOCK
#define CU1DBLOCK
Definition: cu-matrixdim.h:57

kaldi::kNoTrans
Definition: matrix-common.h:34

KALDI_WARN
#define KALDI_WARN
Definition: kaldi-error.h:150

kaldi::TestCuVectorAddDiagMatMat
void TestCuVectorAddDiagMatMat(int32 dim, MatrixTransposeType transN, MatrixTransposeType transO)
Definition: cu-vector-speed-test.cc:170

kaldi::TestCuVectorCopyFromVec
void TestCuVectorCopyFromVec(int32 dim)
Definition: cu-vector-speed-test.cc:79

kaldi::VectorBase::Sum
Real Sum() const
Returns sum of the elements.
Definition: kaldi-vector.cc:688

kaldi::Timer
Definition: timer.h:63

kaldi::CuVectorBase::SetRandn
void SetRandn()
Definition: cu-vector.cc:281

rnnlm::i
int i
Definition: mikolov-rnnlm-lib.cc:66

cu-math.h

kaldi::Vector
A class representing a vector.
Definition: kaldi-vector.h:406

kaldi::TestCuVectorAddDiagMatMatShape
void TestCuVectorAddDiagMatMatShape(int32 num_rows, int32 num_cols, MatrixTransposeType transM, MatrixTransposeType transN)
Definition: cu-vector-speed-test.cc:372

kaldi::MatrixTransposeType
MatrixTransposeType
Definition: matrix-common.h:32

kaldi::CuVectorBase::Data
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72

kaldi::CudaVectorSpeedTest
void CudaVectorSpeedTest()
Definition: cu-vector-speed-test.cc:402

kaldi::TestCuVectorApplyCeiling
void TestCuVectorApplyCeiling(int32 dim)
Definition: cu-vector-speed-test.cc:330

kaldi::TestCuVectorAddColSumMat
void TestCuVectorAddColSumMat(int32 dim, MatrixTransposeType trans)
Definition: cu-vector-speed-test.cc:266

kaldi::TestCuVectorApplyFloorNoCount
void TestCuVectorApplyFloorNoCount(int32 dim)
Definition: cu-vector-speed-test.cc:309

KALDI_LOG
#define KALDI_LOG
Definition: kaldi-error.h:153

kaldi::VecVec
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37

kaldi::Timer::Elapsed
double Elapsed() const
Returns time in seconds.
Definition: timer.h:74

kaldi-common.h

kaldi::CuVectorBase::AddRowSumMat
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
Definition: cu-vector.cc:1277

kaldi::CuVectorBase::Dim
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

kaldi::RandInt
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95

main
int main()
Definition: cu-vector-speed-test.cc:471

kaldi::TestCuVectorAddRowSumMat
void TestCuVectorAddRowSumMat(int32 dim, MatrixTransposeType trans)
Definition: cu-vector-speed-test.cc:244