22 #include <cuda_runtime_api.h>    23 #include <cublas_v2.h>    38 template<
typename Real>
    42                this->num_rows_ == M.
NumCols());
    43   if (this->num_rows_ == 0)
    46   if (CuDevice::Instantiate().Enabled()) {
    53         KALDI_ERR << 
"kTakeMeanAndCheck not supported!";
    60           cuda_take_mean(dimGrid, dimBlock, M.
Data(), this->
data_, M.
Dim());
    61           CU_SAFE_CALL(cudaGetLastError());
    67           dim3 dimBlock(1, block_size);
    68           dim3 dimGrid(D, n_blocks(D, block_size));
    69           cuda_take_lower(dimGrid, dimBlock, M.
Data(), this->
data_, M.
Dim());
    70           CU_SAFE_CALL(cudaGetLastError());
    77           cuda_take_upper(dimGrid, dimBlock, M.
Data(), this->
data_, M.
Dim());
    78           CU_SAFE_CALL(cudaGetLastError());
    82         KALDI_ASSERT(
"Invalid argument to CuSpMatrix::CopyFromMat");
    84     CuDevice::Instantiate().AccuProfile(
"CuSpMatrix::CopyFromMat(from CuMatrixBase)", tim);
    88     Mat().CopyFromMat(M.
Mat(), copy_type);
    92 template<
typename Real>
    95   if (CuDevice::Instantiate().Enabled()) {
    99     this->CopyFromMat(mat);
   107 template<
typename Real>
   111   if (CuDevice::Instantiate().Enabled()) {
   112     if (this->num_rows_ == 0) 
return;
   114     size_t nr = this->num_rows_;
   118     CUBLAS_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.
Data(),
   121     CuDevice::Instantiate().AccuProfile(
"CuSpMatrix::AddVec2", tim);
   125     Mat().AddVec2(alpha, v.
Vec());
   129 template<
typename Real>
   136   if (CuDevice::Instantiate().Enabled()) {
   137     if (this->num_rows_ == 0) 
return;
   142     if (this_dim == 0) 
return;
   144       if (beta != 1.0) this->Scale(beta);
   148     cublasOperation_t trans = (transM == 
kTrans ? CUBLAS_OP_N : CUBLAS_OP_T);
   151     cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans, this_dim, m_other_dim, alpha, M.
Data(),
   155     CuDevice::Instantiate().AccuProfile(
"CuSpMatrix::AddMat2", tim);
   159     Mat().AddMat2(alpha, M.
Mat(), transM, beta);
   167 template<
typename Real, 
typename OtherReal>
   171   if (CuDevice::Instantiate().Enabled()) {
   172     if (A.
NumRows() == 0) 
return 0.0;
   176     Adiag.CopyDiagFromPacked(A);
   199 template<
typename Real>
   205       d = diff.FrobeniusNorm();
   206   return (
d <= tol * std::max(a, b));
   209 template<
typename Real>
   221   return (
TraceSpSp(*
this, *
this) + this->NumRows() - 2.0 * this->Trace() <=
   222           tol * this->NumRows());
   225 template <
class Real>
   228   this->CopyFromPacked(in);
 const MatrixBase< Real > & Mat() const
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
MatrixIndexT Stride() const
 
CuSpMatrix< Real > & operator=(const CuSpMatrix< Real > &in)
 
void CopyDiagFromPacked(const CuPackedMatrix< Real > &M)
Extracts the diagonal of a packed matrix M; works for Sp or Tp. 
 
MatrixIndexT NumRows() const
 
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
 
void Invert()
Note: the CuMatrix version of the Invert() function will only work for positive definite matrices; it...
 
const SpMatrix< Real > & Mat() const
 
void SymInvertPosDef()
Inversion for positive definite symmetric matrices. 
 
void CopyFromSp(const CuSpMatrix< Real > &M)
 
double TraceSpSp(const SpMatrix< double > &A, const SpMatrix< double > &B)
 
bool IsUnit(Real tol=0.001) const
 
Real FrobeniusNorm() const
 
void CopyFromMat(const CuMatrixBase< Real > &orig, SpCopyType copy_type=kTakeLower)
 
const Real * Data() const
Return data pointer (const). 
 
Matrix for CUDA computing. 
 
MatrixIndexT NumCols() const
 
const VectorBase< Real > & Vec() const
 
#define KALDI_ASSERT(cond)
 
Real * Data()
Returns a pointer to the start of the vector's data. 
 
MatrixIndexT NumRows() const
Dimensions. 
 
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2. 
 
void AddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
 
MatrixIndexT Dim() const
Dimensions. 
 
Vector for CUDA computing. 
 
void AddVec2(const Real alpha, const CuVectorBase< Real > &v)
 
bool ApproxEqual(const CuSpMatrix< Real > &other, Real tol=0.001) const