22 #include <cuda_runtime_api.h> 23 #include <cublas_v2.h> 38 template<
typename Real>
42 this->num_rows_ == M.
NumCols());
43 if (this->num_rows_ == 0)
46 if (CuDevice::Instantiate().Enabled()) {
53 KALDI_ERR <<
"kTakeMeanAndCheck not supported!";
60 cuda_take_mean(dimGrid, dimBlock, M.
Data(), this->
data_, M.
Dim());
61 CU_SAFE_CALL(cudaGetLastError());
67 dim3 dimBlock(1, block_size);
68 dim3 dimGrid(D, n_blocks(D, block_size));
69 cuda_take_lower(dimGrid, dimBlock, M.
Data(), this->
data_, M.
Dim());
70 CU_SAFE_CALL(cudaGetLastError());
77 cuda_take_upper(dimGrid, dimBlock, M.
Data(), this->
data_, M.
Dim());
78 CU_SAFE_CALL(cudaGetLastError());
82 KALDI_ASSERT(
"Invalid argument to CuSpMatrix::CopyFromMat");
84 CuDevice::Instantiate().AccuProfile(
"CuSpMatrix::CopyFromMat(from CuMatrixBase)", tim);
88 Mat().CopyFromMat(M.
Mat(), copy_type);
92 template<
typename Real>
95 if (CuDevice::Instantiate().Enabled()) {
99 this->CopyFromMat(mat);
107 template<
typename Real>
111 if (CuDevice::Instantiate().Enabled()) {
112 if (this->num_rows_ == 0)
return;
114 size_t nr = this->num_rows_;
118 CUBLAS_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.
Data(),
121 CuDevice::Instantiate().AccuProfile(
"CuSpMatrix::AddVec2", tim);
125 Mat().AddVec2(alpha, v.
Vec());
129 template<
typename Real>
136 if (CuDevice::Instantiate().Enabled()) {
137 if (this->num_rows_ == 0)
return;
142 if (this_dim == 0)
return;
144 if (beta != 1.0) this->Scale(beta);
148 cublasOperation_t trans = (transM ==
kTrans ? CUBLAS_OP_N : CUBLAS_OP_T);
151 cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans, this_dim, m_other_dim, alpha, M.
Data(),
155 CuDevice::Instantiate().AccuProfile(
"CuSpMatrix::AddMat2", tim);
159 Mat().AddMat2(alpha, M.
Mat(), transM, beta);
167 template<
typename Real,
typename OtherReal>
171 if (CuDevice::Instantiate().Enabled()) {
172 if (A.
NumRows() == 0)
return 0.0;
176 Adiag.CopyDiagFromPacked(A);
199 template<
typename Real>
205 d = diff.FrobeniusNorm();
206 return (
d <= tol * std::max(a, b));
209 template<
typename Real>
221 return (
TraceSpSp(*
this, *
this) + this->NumRows() - 2.0 * this->Trace() <=
222 tol * this->NumRows());
225 template <
class Real>
228 this->CopyFromPacked(in);
const MatrixBase< Real > & Mat() const
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
MatrixIndexT Stride() const
CuSpMatrix< Real > & operator=(const CuSpMatrix< Real > &in)
void CopyDiagFromPacked(const CuPackedMatrix< Real > &M)
Extracts the diagonal of a packed matrix M; works for Sp or Tp.
MatrixIndexT NumRows() const
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void Invert()
Note: the CuMatrix version of the Invert() function will only work for positive definite matrices; it...
const SpMatrix< Real > & Mat() const
void SymInvertPosDef()
Inversion for positive definite symmetric matrices.
void CopyFromSp(const CuSpMatrix< Real > &M)
double TraceSpSp(const SpMatrix< double > &A, const SpMatrix< double > &B)
bool IsUnit(Real tol=0.001) const
Real FrobeniusNorm() const
void CopyFromMat(const CuMatrixBase< Real > &orig, SpCopyType copy_type=kTakeLower)
const Real * Data() const
Return data pointer (const).
Matrix for CUDA computing.
MatrixIndexT NumCols() const
const VectorBase< Real > & Vec() const
#define KALDI_ASSERT(cond)
Real * Data()
Returns a pointer to the start of the vector's data.
MatrixIndexT NumRows() const
Dimensions.
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
MatrixIndexT Dim() const
Dimensions.
Vector for CUDA computing.
void AddVec2(const Real alpha, const CuVectorBase< Real > &v)
bool ApproxEqual(const CuSpMatrix< Real > &other, Real tol=0.001) const