doc/cu-sp-matrix_8cc_source.html

 // cudamatrix/cu-sp-matrix.cc

 // Copyright      2013  Karel Vesely
 //           2014-2015  Johns Hopkins University (author: Daniel Povey)

 // See ../../COPYING for clarification regarding multiple authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //  http://www.apache.org/licenses/LICENSE-2.0
 //
 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 // MERCHANTABLITY OR NON-INFRINGEMENT.
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

 #if HAVE_CUDA == 1
 #include <cuda_runtime_api.h>
 #include <cublas_v2.h>
 #endif

 #include "base/timer.h"
 #include "cudamatrix/cu-common.h"
 #include "cudamatrix/cu-vector.h"
 #include "cudamatrix/cu-device.h"
 #include "cudamatrix/cu-kernels.h"
 #include "cudamatrix/cu-math.h"
 #include "cudamatrix/cu-sp-matrix.h"
 #include "cudamatrix/cu-matrix.h"
 #include "cudamatrix/cublas-wrappers.h"

 namespace kaldi {

 template<typename Real>
 void CuSpMatrix<Real>::CopyFromMat(const CuMatrixBase<Real> &M,
                                    SpCopyType copy_type) {
   KALDI_ASSERT(this->num_rows_ == M.NumRows() &&
                this->num_rows_ == M.NumCols());
   if (this->num_rows_ == 0)
     return;
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuTimer tim;
     MatrixIndexT D = this->NumRows();
     if (D == 0)
       return;
     switch (copy_type) {
       case kTakeMeanAndCheck:
         KALDI_ERR << "kTakeMeanAndCheck not supported!";
       // The grid/block dimensions have been very roughly tuned for the
       // individual cases.
       case kTakeMean:
         {
           dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
           dim3 dimGrid(n_blocks(D, CU2DBLOCK), n_blocks(D, CU2DBLOCK));
           cuda_take_mean(dimGrid, dimBlock, M.Data(), this->data_, M.Dim());
           CU_SAFE_CALL(cudaGetLastError());
         }
         break;
       case kTakeLower:
         {
           int32 block_size = std::min(CU1DBLOCK, this->num_rows_);
           dim3 dimBlock(1, block_size);
           dim3 dimGrid(D, n_blocks(D, block_size));
           cuda_take_lower(dimGrid, dimBlock, M.Data(), this->data_, M.Dim());
           CU_SAFE_CALL(cudaGetLastError());
         }
         break;
       case kTakeUpper:
         {
           dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
           dim3 dimGrid(n_blocks(D, CU2DBLOCK), n_blocks(D, CU2DBLOCK));
           cuda_take_upper(dimGrid, dimBlock, M.Data(), this->data_, M.Dim());
           CU_SAFE_CALL(cudaGetLastError());
         }
         break;
       default:
         KALDI_ASSERT("Invalid argument to CuSpMatrix::CopyFromMat");
     }
     CuDevice::Instantiate().AccuProfile("CuSpMatrix::CopyFromMat(from CuMatrixBase)", tim);
   } else
 #endif
   {
     Mat().CopyFromMat(M.Mat(), copy_type);
   }
 }

 template<typename Real>
 void CuSpMatrix<Real>::Invert() {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuMatrix<Real> mat(this->num_rows_, this->num_rows_);
     mat.CopyFromSp(*this);
     mat.SymInvertPosDef();
     this->CopyFromMat(mat);
   } else
 #endif
   { // Use inversion of CPU-based SpMatrix.
     Mat().Invert();
   }
 }

 template<typename Real>
 void CuSpMatrix<Real>::AddVec2(const Real alpha, const CuVectorBase<Real> &v) {
   KALDI_ASSERT(v.Dim() == this->NumRows());
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     if (this->num_rows_ == 0) return;
     CuTimer tim;
     size_t nr = this->num_rows_;
     dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
     dim3 dimGrid(n_blocks(nr, CU2DBLOCK), n_blocks(nr, CU2DBLOCK));

     CUBLAS_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(),
                                 1, this->Data()));

     CuDevice::Instantiate().AccuProfile("CuSpMatrix::AddVec2", tim);
   } else
 #endif
   {
     Mat().AddVec2(alpha, v.Vec());
   }
 }

 template<typename Real>
 void CuSpMatrix<Real>::AddMat2(const Real alpha, const CuMatrixBase<Real> &M,
                                MatrixTransposeType transM, const Real beta) {
   KALDI_ASSERT((transM == kNoTrans && this->NumRows() == M.NumRows())
                || (transM == kTrans && this->NumRows() == M.NumCols()));

 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     if (this->num_rows_ == 0) return;
     CuTimer tim;
     MatrixIndexT this_dim = this->NumRows(),
         m_other_dim = (transM == kNoTrans ? M.NumCols() : M.NumRows());

     if (this_dim == 0) return;
     if (alpha == 0.0) {
       if (beta != 1.0) this->Scale(beta);
       return;
     }

     cublasOperation_t trans = (transM == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T);

     CuMatrix<Real> tmp_mat(*this);
     cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans, this_dim, m_other_dim, alpha, M.Data(),
                 M.Stride(), beta, tmp_mat.Data(), tmp_mat.Stride());
     this->CopyFromMat(tmp_mat, kTakeLower);

     CuDevice::Instantiate().AccuProfile("CuSpMatrix::AddMat2", tim);
   } else
 #endif
   {
     Mat().AddMat2(alpha, M.Mat(), transM, beta);
   }
 }

 template<typename Real, typename OtherReal>
 Real TraceSpSp(const CuSpMatrix<Real> &A, const CuSpMatrix<OtherReal> &B) {
   KALDI_ASSERT(A.NumRows() == B.NumRows());
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     if (A.NumRows() == 0) return 0.0;
     MatrixIndexT nr = A.NumRows(), size = nr * (nr+1) / 2;
     CuVector<Real> Adiag(nr, kUndefined);
     CuVector<OtherReal> Bdiag(nr, kUndefined);
     Adiag.CopyDiagFromPacked(A);
     Bdiag.CopyDiagFromPacked(B);
     CuSubVector<Real> Aall(A.Data(), size);
     CuSubVector<OtherReal> Ball(B.Data(), size);
     // Below, we subtrace VecVec(Adiag, Bdiag) to remove double-counting
     // on the diagonal.
     return 2.0 * VecVec(Aall, Ball) - VecVec(Adiag, Bdiag);
   } else
 #endif
   {
     return TraceSpSp(A.Mat(), B.Mat());
   }
 }
 template
 float TraceSpSp(const CuSpMatrix<float> &A, const CuSpMatrix<float> &B);
 template
 float TraceSpSp(const CuSpMatrix<float> &A, const CuSpMatrix<double> &B);
 template
 double TraceSpSp(const CuSpMatrix<double> &A, const CuSpMatrix<float> &B);
 template
 double TraceSpSp(const CuSpMatrix<double> &A, const CuSpMatrix<double> &B);


 template<typename Real>
 bool CuSpMatrix<Real>::ApproxEqual(const CuSpMatrix<Real> &B, Real tol) const {
   KALDI_ASSERT(this->NumRows() == B.NumRows());
   CuSpMatrix<Real> diff(*this);
   diff.AddSp(-1.0, B);
   Real a = this->FrobeniusNorm(), b = B.FrobeniusNorm(),
       d = diff.FrobeniusNorm();
   return (d <= tol * std::max(a, b));
 }

 template<typename Real>
 bool CuSpMatrix<Real>::IsUnit(Real tol) const {
   // want to return:
   //FrobeniusNorm(*this - I) <= tol * NumRows(), i.e.:
   //sqrt (trace((*this - I)(*this-I)) <= tol * NumRows()
   //    trace((*this - I)(*this - I)) <= tol * NumRows()
   // trace(*this * *this) + trace(I) - 2 * trace(*this) <= tol * NumRows()
   // trace(*this * *this) + dim - 2*this.Trace() <= tol * NumRows()

   // Note: we could do this more efficiently still, by slightly changing the
   // definition of IsUnit and getting rid of the extra stuff inside TraceSpSp
   // that corrects for the diagonal being counted twice.
   return (TraceSpSp(*this, *this) + this->NumRows() - 2.0 * this->Trace() <=
           tol * this->NumRows());
 }

 template <class Real>
 CuSpMatrix<Real>& CuSpMatrix<Real>::operator = (const CuSpMatrix<Real> &in) {
   this->Resize(in.NumRows(), kUndefined);
   this->CopyFromPacked(in);
   return *this;
 }

 template class CuSpMatrix<float>;
 template class CuSpMatrix<double>;


 } // namespace
kaldi::CuMatrixBase::Mat
const MatrixBase< Real > & Mat() const
Definition: cu-matrix.h:755

kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20

kaldi::CuMatrixBase::Stride
MatrixIndexT Stride() const
Definition: cu-matrix.h:217

kaldi::CuSpMatrix::operator=
CuSpMatrix< Real > & operator=(const CuSpMatrix< Real > &in)
Definition: cu-sp-matrix.cc:226

kaldi::kUndefined
Definition: matrix-common.h:39

kaldi::CuVectorBase::CopyDiagFromPacked
void CopyDiagFromPacked(const CuPackedMatrix< Real > &M)
Extracts the diagonal of a packed matrix M; works for Sp or Tp.
Definition: cu-vector.cc:1177

kaldi::CuVector
Definition: matrix-common.h:74

kaldi::CuPackedMatrix::NumRows
MatrixIndexT NumRows() const
Definition: cu-packed-matrix.h:135

cu-device.h

cublas-wrappers.h

kaldi::int32
kaldi::int32 int32
Definition: online-tcp-source.cc:27

cu-matrix.h

kaldi::CuMatrix
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71

kaldi::CuSpMatrix::Invert
void Invert()
Note: the CuMatrix version of the Invert() function will only work for positive definite matrices; it...
Definition: cu-sp-matrix.cc:93

kaldi::CuSpMatrix
Definition: matrix-common.h:76

cu-vector.h

data_
uint64 data_
Definition: arpa-lm-compiler.cc:108

timer.h

kaldi::SpCopyType
SpCopyType
Definition: matrix-common.h:49

kaldi::CuSubVector
Definition: matrix-common.h:73

kaldi::kTrans
Definition: matrix-common.h:33

kaldi::CuSpMatrix::Mat
const SpMatrix< Real > & Mat() const
Definition: cu-sp-matrix.h:132

kaldi::kTakeMeanAndCheck
Definition: matrix-common.h:53

kaldi::CuMatrixBase::SymInvertPosDef
void SymInvertPosDef()
Inversion for positive definite symmetric matrices.
Definition: cu-matrix.cc:2111

cu-sp-matrix.h

kaldi::MatrixIndexT
int32 MatrixIndexT
Definition: matrix-common.h:98

kaldi::CuMatrixBase::CopyFromSp
void CopyFromSp(const CuSpMatrix< Real > &M)
Definition: cu-matrix.cc:360

kaldi::kTakeMean
Definition: matrix-common.h:52

cu-kernels.h

KALDI_ERR
#define KALDI_ERR
Definition: kaldi-error.h:147

CU1DBLOCK
#define CU1DBLOCK
Definition: cu-matrixdim.h:57

kaldi::kNoTrans
Definition: matrix-common.h:34

kaldi::TraceSpSp
double TraceSpSp(const SpMatrix< double > &A, const SpMatrix< double > &B)
Definition: sp-matrix.cc:326

CU2DBLOCK
#define CU2DBLOCK
Definition: cu-matrixdim.h:61

kaldi::CuSpMatrix::IsUnit
bool IsUnit(Real tol=0.001) const
Definition: cu-sp-matrix.cc:210

kaldi::CuSpMatrix::FrobeniusNorm
Real FrobeniusNorm() const
Definition: cu-sp-matrix.h:79

kaldi::CuSpMatrix::CopyFromMat
void CopyFromMat(const CuMatrixBase< Real > &orig, SpCopyType copy_type=kTakeLower)
Definition: cu-sp-matrix.cc:39

kaldi::CuMatrixBase::Data
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746

cu-math.h

kaldi::CuMatrixBase
Matrix for CUDA computing.
Definition: matrix-common.h:69

kaldi::CuPackedMatrix::Data
Real * Data()
Definition: cu-packed-matrix.h:113

kaldi::CuMatrixBase::NumCols
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216

kaldi::CuVectorBase::Vec
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

KALDI_ASSERT
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

cu-common.h

kaldi::MatrixTransposeType
MatrixTransposeType
Definition: matrix-common.h:32

kaldi::CuVectorBase::Data
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72

kaldi::CuMatrixBase::Dim
::MatrixDim Dim() const
Definition: cu-matrix.h:221

kaldi::kTakeLower
Definition: matrix-common.h:50

kaldi::CuMatrixBase::NumRows
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

kaldi::VecVec
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Definition: kaldi-vector.cc:37

kaldi::CuSpMatrix::AddMat2
void AddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
Definition: cu-sp-matrix.cc:130

rnnlm::d
double d
Definition: mikolov-rnnlm-lib.cc:64

kaldi::CuVectorBase::Dim
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

kaldi::CuVectorBase
Vector for CUDA computing.
Definition: matrix-common.h:72

kaldi::kTakeUpper
Definition: matrix-common.h:51

kaldi::CuSpMatrix::AddVec2
void AddVec2(const Real alpha, const CuVectorBase< Real > &v)
Definition: cu-sp-matrix.cc:108

kaldi::CuSpMatrix::ApproxEqual
bool ApproxEqual(const CuSpMatrix< Real > &other, Real tol=0.001) const
Definition: cu-sp-matrix.cc:200