#include <matrix-common.h>

Collaboration diagram for CuSparseMatrix< Real >:

Public Member Functions
MatrixIndexT	NumRows () const

MatrixIndexT	NumCols () const

MatrixIndexT	NumElements () const

template<typename OtherReal >
void	CopyToMat (CuMatrixBase< OtherReal > *dest, MatrixTransposeType trans=kNoTrans) const

Real	Sum () const

Real	FrobeniusNorm () const

CuSparseMatrix< Real > &	operator= (const SparseMatrix< Real > &smat)
	Copy from CPU-based matrix. More...

CuSparseMatrix< Real > &	operator= (const CuSparseMatrix< Real > &smat)
	Copy from possibly-GPU-based matrix. More...

template<typename OtherReal >
void	CopyFromSmat (const SparseMatrix< OtherReal > &smat)
	Copy from CPU-based matrix. More...

void	CopyFromSmat (const CuSparseMatrix< Real > &smat, MatrixTransposeType trans=kNoTrans)
	Copy from GPU-based matrix, supporting transposition. More...

void	SelectRows (const CuArray< int32 > &row_indexes, const CuSparseMatrix< Real > &smat_other)
	Select a subset of the rows of a CuSparseMatrix. More...

template<typename OtherReal >
void	CopyToSmat (SparseMatrix< OtherReal > *smat) const
	Copy to CPU-based matrix. More...

void	CopyElementsToVec (CuVectorBase< Real > *vec) const
	Copy elements to CuVector. More...

void	Swap (SparseMatrix< Real > *smat)
	Swap with CPU-based matrix. More...

void	Swap (CuSparseMatrix< Real > *smat)
	Swap with possibly-CPU-based matrix. More...

void	SetRandn (BaseFloat zero_prob)
	Sets up to a pseudo-randomly initialized matrix, with each element zero with probability zero_prob and else normally distributed- mostly for purposes of testing. More...

void	Write (std::ostream &os, bool binary) const

void	Read (std::istream &is, bool binary)

	CuSparseMatrix ()
	Default constructor. More...

	CuSparseMatrix (const SparseMatrix< Real > &smat)
	Constructor from CPU-based sparse matrix. More...

	CuSparseMatrix (const CuSparseMatrix< Real > &smat, MatrixTransposeType trans=kNoTrans)
	Constructor from GPU-based sparse matrix (supports transposition). More...

	CuSparseMatrix (const CuArray< int32 > &indexes, int32 dim, MatrixTransposeType trans=kNoTrans)
	Constructor from an array of indexes. More...

	CuSparseMatrix (const CuArray< int32 > &indexes, const CuVectorBase< Real > &weights, int32 dim, MatrixTransposeType trans=kNoTrans)
	Constructor from an array of indexes and an array of weights; requires indexes.Dim() == weights.Dim(). More...

	~CuSparseMatrix ()

Protected Member Functions
const SparseMatrix< Real > &	Smat () const

SparseMatrix< Real > &	Smat ()

void	Resize (const MatrixIndexT num_rows, const MatrixIndexT num_cols, const MatrixIndexT nnz, MatrixResizeType resize_type=kSetZero)
	Users of this class won't normally have to use Resize. More...

const Real *	CsrVal () const
	Returns pointer to the data array of length nnz_ that holds all nonzero values in zero-based CSR format. More...

Real *	CsrVal ()

const int *	CsrRowPtr () const
	Returns pointer to the integer array of length NumRows()+1 that holds indices of the first nonzero element in the i-th row, while the last entry contains nnz_, as zero-based CSR format is used. More...

int *	CsrRowPtr ()

const int *	CsrColIdx () const
	Returns pointer to the integer array of length nnz_ that contains the column indices of the corresponding elements in array CsrVal() More...

int *	CsrColIdx ()

Private Member Functions
void	Destroy ()

Private Attributes
std::vector< SparseVector< Real > >	cpu_rows_

MatrixIndexT	num_rows_

MatrixIndexT	num_cols_

MatrixIndexT	nnz_

int *	csr_row_ptr_col_idx_

Real *	csr_val_

Friends
class	CuMatrixBase< float >

class	CuMatrixBase< double >

class	CuMatrixBase< Real >

class	CuVectorBase< float >

class	CuVectorBase< double >

class	CuVectorBase< Real >

Real	TraceMatSmat (const CuMatrixBase< Real > &A, const CuSparseMatrix< Real > &B, MatrixTransposeType trans)

Detailed Description

template<class Real>
class kaldi::CuSparseMatrix< Real >

Definition at line 78 of file matrix-common.h.

Constructor & Destructor Documentation

◆ CuSparseMatrix() [1/5]

CuSparseMatrix ( )

inline

Default constructor.

Definition at line 123 of file cu-sparse-matrix.h.

Referenced by CuSparseMatrix< Real >::CuSparseMatrix().

                    :
       num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
           NULL) {
   }

◆ CuSparseMatrix() [2/5]

CuSparseMatrix ( const SparseMatrix< Real > & smat )

inlineexplicit

Constructor from CPU-based sparse matrix.

Definition at line 129 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::CopyFromSmat().

                                                           :
       num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
       NULL) {
     this->CopyFromSmat(smat);
   }

◆ CuSparseMatrix() [3/5]

CuSparseMatrix	(	const CuSparseMatrix< Real > &	smat,
		MatrixTransposeType	trans = `kNoTrans`
	)

inline

Constructor from GPU-based sparse matrix (supports transposition).

Definition at line 136 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CuSparseMatrix(), and kaldi::kNoTrans.

                                :
       num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
       NULL) {
     this->CopyFromSmat(smat, trans);
   }

◆ CuSparseMatrix() [4/5]

CuSparseMatrix	(	const CuArray< int32 > &	indexes,
		int32	dim,
		MatrixTransposeType	trans = `kNoTrans`
	)

Constructor from an array of indexes.

If trans == kNoTrans, construct a sparse matrix with num-rows == indexes.Dim() and num-cols = 'dim'. 'indexes' is expected to contain elements in the range [0, dim - 1]. Each row 'i' of *this after calling the constructor will contain a single element at column-index indexes[i] with value 1.0.

If trans == kTrans, the result will be the transpose of the sparse matrix described above.

Definition at line 162 of file cu-sparse-matrix.cc.

References CuArrayBase< T >::CopyFromArray(), CuArrayBase< T >::CopyToVec(), CuSparseMatrix< Real >::CsrColIdx(), CuSparseMatrix< Real >::CsrRowPtr(), CuSparseMatrix< Real >::CsrVal(), CuArrayBase< T >::Dim(), kaldi::kTrans, kaldi::kUndefined, CuSparseMatrix< Real >::NumElements(), CuSparseMatrix< Real >::NumRows(), CuSparseMatrix< Real >::Resize(), CuArrayBase< T >::Sequence(), CuVectorBase< Real >::Set(), CuSparseMatrix< Real >::Smat(), and CuSparseMatrix< Real >::Swap().

                                                                 :
     num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
     NULL) {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     Resize(indexes.Dim(), dim, indexes.Dim(), kUndefined);
     if (NumElements() == 0) {
       return;
     }
     CuSubArray<int> row_ptr(CsrRowPtr(), NumRows() + 1);
     row_ptr.Sequence(0);
     CuSubArray<int> col_idx(CsrColIdx(), NumElements());
     col_idx.CopyFromArray(indexes);
     CuSubVector<Real> val(CsrVal(), NumElements());
     val.Set(1);
 
     if (trans == kTrans) {
       CuSparseMatrix<Real> tmp(*this, kTrans);
       this->Swap(&tmp);
     }
   } else
 #endif
   {
     std::vector<int32> idx(indexes.Dim());
     indexes.CopyToVec(&idx);
     SparseMatrix<Real> tmp(idx, dim, trans);
     Smat().Swap(&tmp);
   }
 }

◆ CuSparseMatrix() [5/5]

CuSparseMatrix	(	const CuArray< int32 > &	indexes,
		const CuVectorBase< Real > &	weights,
		int32	dim,
		MatrixTransposeType	trans = `kNoTrans`
	)

Constructor from an array of indexes and an array of weights; requires indexes.Dim() == weights.Dim().

If trans == kNoTrans, construct a sparse matrix with num-rows == indexes.Dim() and num-cols = 'dim'. 'indexes' is expected to contain elements in the range [0, dim - 1]. Each row 'i' of *this after calling the constructor will contain a single element at column-index indexes[i] with value weights[i]. If trans == kTrans, the result will be the transpose of the sparse matrix described above.

Definition at line 194 of file cu-sparse-matrix.cc.

                                                                            :
     num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
     NULL) {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     Resize(indexes.Dim(), dim, indexes.Dim(), kUndefined);
     if (NumElements() == 0) {
       return;
     }
     CuSubArray<int> row_ptr(CsrRowPtr(), NumRows() + 1);
     row_ptr.Sequence(0);
     CuSubArray<int> col_idx(CsrColIdx(), NumElements());
     col_idx.CopyFromArray(indexes);
     CuSubVector<Real> val(CsrVal(), NumElements());
     val.CopyFromVec(weights);
 
     if (trans == kTrans) {
       CuSparseMatrix<Real> tmp(*this, kTrans);
       this->Swap(&tmp);
     }
   } else
 #endif
   {
     std::vector<int32> idx(indexes.Dim());
     indexes.CopyToVec(&idx);
     SparseMatrix<Real> tmp(idx, weights.Vec(), dim, trans);
     Smat().Swap(&tmp);
   }
 }

◆ ~CuSparseMatrix()

~CuSparseMatrix ( )

inline

Definition at line 170 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::Destroy().

                     {
     Destroy();
   }

Member Function Documentation

◆ CopyElementsToVec()

void CopyElementsToVec ( CuVectorBase< Real > * vec ) const

Copy elements to CuVector.

It is the caller's responsibility to resize <*vec>.

Definition at line 452 of file cu-sparse-matrix.cc.

References CuVectorBase< Real >::CopyFromVec(), CuSparseMatrix< Real >::CsrVal(), CuVectorBase< Real >::Dim(), KALDI_ASSERT, CuSparseMatrix< Real >::NumElements(), CuSparseMatrix< Real >::Smat(), and CuVectorBase< Real >::Vec().

                                                                           {
   KALDI_ASSERT(vec != NULL);
   KALDI_ASSERT(this->NumElements() == vec->Dim());
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuSubVector<Real> val(CsrVal(), NumElements());
     vec->CopyFromVec(val);
   } else
 #endif
   {
     Smat().CopyElementsToVec(&(vec->Vec()));
   }
 }

◆ CopyFromSmat() [1/2]

void CopyFromSmat ( const SparseMatrix< OtherReal > & smat )

Copy from CPU-based matrix.

We will add the transpose option later when it is necessary. Resizes *this as needed.

Definition at line 326 of file cu-sparse-matrix.cc.

Referenced by CuSparseMatrix< Real >::CuSparseMatrix(), and CuSparseMatrix< Real >::operator=().

                                                                            {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     Resize(smat.NumRows(), smat.NumCols(), smat.NumElements(), kUndefined);
     if (NumElements() == 0) {
       return;
     }
     std::vector<int> row_ptr(NumRows() + 1);
     std::vector<int> col_idx(NumElements());
     Vector<Real> val(NumElements(), kUndefined);
 
     int n = 0;
     for (int32 i = 0; i < smat.NumRows(); ++i) {
       row_ptr[i] = n;
       for (int32 j = 0; j < (smat.Data() + i)->NumElements(); ++j, ++n) {
         col_idx[n] = ((smat.Data() + i)->Data() + j)->first;
         val(n) = static_cast<Real>(((smat.Data() + i)->Data() + j)->second);
       }
     }
     row_ptr[NumRows()] = n;
     KALDI_ASSERT(n == NumElements());
 
     CuSubArray<int> cu_row_ptr(CsrRowPtr(), NumRows() + 1);
     cu_row_ptr.CopyFromVec(row_ptr);
     CuSubArray<int> cu_col_idx(CsrColIdx(), NumElements());
     cu_col_idx.CopyFromVec(col_idx);
     CuSubVector<Real> cu_val(CsrVal(), NumElements());
     cu_val.CopyFromVec(val);
   } else
 #endif
   {
     this->Smat().CopyFromSmat(smat);
   }
 }

◆ CopyFromSmat() [2/2]

void CopyFromSmat	(	const CuSparseMatrix< Real > &	smat,
		MatrixTransposeType	trans = `kNoTrans`
	)

Copy from GPU-based matrix, supporting transposition.

Resizes *this as needed.

Definition at line 370 of file cu-sparse-matrix.cc.

References CuArrayBase< T >::CopyFromArray(), CuVectorBase< Real >::CopyFromVec(), CuSparseMatrix< Real >::csr_row_ptr_col_idx_, CuSparseMatrix< Real >::CsrColIdx(), CuSparseMatrix< Real >::CsrRowPtr(), CuSparseMatrix< Real >::CsrVal(), kaldi::kNoTrans, kaldi::kUndefined, CuSparseMatrix< Real >::NumCols(), CuSparseMatrix< Real >::NumElements(), CuSparseMatrix< Real >::NumRows(), CuSparseMatrix< Real >::Resize(), and CuSparseMatrix< Real >::Smat().

                                                                    {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     if (trans == kNoTrans) {
       Resize(smat.NumRows(), smat.NumCols(), smat.NumElements(), kUndefined);
 
       CuSubVector<Real> val_to(CsrVal(), NumElements());
       CuSubVector<Real> val_from(smat.CsrVal(), smat.NumElements());
       val_to.CopyFromVec(val_from);
 
       CuSubArray<int> idx_to(csr_row_ptr_col_idx_,
                              NumRows() + 1 + NumElements());
       CuSubArray<int> idx_from(smat.csr_row_ptr_col_idx_,
                                smat.NumRows() + 1 + smat.NumElements());
       idx_to.CopyFromArray(idx_from);
 
     } else {
       Resize(smat.NumCols(), smat.NumRows(), smat.NumElements(), kUndefined);
       CuTimer tim;
 
       CUSPARSE_SAFE_CALL(
           cusparse_csr2csc(GetCusparseHandle(), smat.NumRows(), smat.NumCols(),
                            smat.NumElements(), smat.CsrVal(), smat.CsrRowPtr(),
                            smat.CsrColIdx(), CsrVal(), CsrColIdx(), CsrRowPtr(),
                            CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO));
 
       CuDevice::Instantiate().AccuProfile(__func__, tim);
     }
   } else
 #endif
   {
     Smat().CopyFromSmat(smat.Smat(), trans);
   }
 }

◆ CopyToMat()

template void CopyToMat	(	CuMatrixBase< OtherReal > *	dest,
		MatrixTransposeType	trans = `kNoTrans`
	)		const

Definition at line 622 of file cu-sparse-matrix.cc.

References CuSparseMatrix< Real >::CsrColIdx(), CuSparseMatrix< Real >::CsrRowPtr(), CuSparseMatrix< Real >::CsrVal(), CU1DBLOCK, CuMatrixBase< Real >::Data(), CuMatrixBase< Real >::Dim(), KALDI_ASSERT, kaldi::kNoTrans, CuMatrixBase< Real >::Mat(), CuSparseMatrix< Real >::NumCols(), CuMatrixBase< Real >::NumCols(), CuSparseMatrix< Real >::NumElements(), CuSparseMatrix< Real >::NumRows(), CuMatrixBase< Real >::NumRows(), CuMatrixBase< Real >::SetZero(), and CuSparseMatrix< Real >::Smat().

Referenced by kaldi::nnet3::ComputeObjectiveFunction(), CuMatrixBase< float >::CopyFromGeneralMat(), GeneralMatrix::CopyToMat(), kaldi::UnitTestCuSparseMatrixConstructFromIndexes(), kaldi::UnitTestCuSparseMatrixCopyToSmat(), kaldi::UnitTestCuSparseMatrixFrobeniusNorm(), kaldi::UnitTestCuSparseMatrixSelectRowsAndTranspose(), kaldi::UnitTestCuSparseMatrixSum(), kaldi::UnitTestCuSparseMatrixSwap(), and kaldi::UnitTestCuSparseMatrixTraceMatSmat().

                                                                       {
   if (trans == kNoTrans) {
     KALDI_ASSERT(M->NumRows() == NumRows() && M->NumCols() == NumCols());
   } else {
     KALDI_ASSERT(M->NumRows() == NumCols() && M->NumCols() == NumRows());
   }
   M->SetZero();
   if (NumElements() == 0) {
     return;
   }
 
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuTimer tim;
 
     // We use warpSize threads per row to access only the nnz elements.
     // Every CU1DBLOCK/warpSize rows share one thread block.
     // 1D grid to cover all rows.
     const int warpSize = 32;
     dim3 dimBlock(warpSize, CU1DBLOCK / warpSize);
     dim3 dimGrid(n_blocks(NumRows(), dimBlock.y));
 
     if (trans == kNoTrans) {
       cuda_copy_from_smat(dimGrid, dimBlock, M->Data(), M->Dim(), CsrRowPtr(),
                           CsrColIdx(), CsrVal());
     } else {
       cuda_copy_from_smat_trans(dimGrid, dimBlock, M->Data(), M->Dim(),
                                 CsrRowPtr(), CsrColIdx(), CsrVal());
     }
     CU_SAFE_CALL(cudaGetLastError());
     CuDevice::Instantiate().AccuProfile(__func__, tim);
   } else
 #endif
   {
     Smat().CopyToMat(&(M->Mat()), trans);
   }
 }

◆ CopyToSmat()

template void CopyToSmat ( SparseMatrix< OtherReal > * smat ) const

Copy to CPU-based matrix.

We will add the transpose option later when it is necessary.

Definition at line 408 of file cu-sparse-matrix.cc.

References SparseMatrix< Real >::CopyFromSmat(), CuArrayBase< T >::CopyToVec(), CuVectorBase< Real >::CopyToVec(), CuSparseMatrix< Real >::csr_row_ptr_col_idx_, CuSparseMatrix< Real >::CsrVal(), rnnlm::i, rnnlm::j, KALDI_ASSERT, kaldi::kUndefined, rnnlm::n, CuSparseMatrix< Real >::num_cols_, CuSparseMatrix< Real >::NumElements(), CuSparseMatrix< Real >::NumRows(), SparseMatrix< Real >::Resize(), CuSparseMatrix< Real >::Smat(), and SparseMatrix< Real >::Swap().

Referenced by CuSparseMatrix< Real >::Swap(), kaldi::UnitTestCuSparseMatrixCopyToSmat(), and CuSparseMatrix< Real >::Write().

                                                                          {
   KALDI_ASSERT(smat != NULL);
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     if (NumRows() == 0) {
       smat->Resize(0, 0);
       return;
     }
     CuSubArray<int> idx(csr_row_ptr_col_idx_, NumRows() + 1 + NumElements());
     std::vector<int> idx_cpu;
     idx.CopyToVec(&idx_cpu);
 
     CuSubVector<Real> val(CsrVal(), NumElements());
     Vector<OtherReal> val_cpu(NumElements(), kUndefined);
     val.CopyToVec(&val_cpu);
 
     std::vector<std::vector<std::pair<MatrixIndexT, OtherReal> > > pairs(
         NumRows());
     int n = 0;
     for (int i = 0; i < NumRows(); ++i) {
       for (; n < idx_cpu[i + 1]; ++n) {
         const MatrixIndexT j = idx_cpu[NumRows() + 1 + n];
         pairs[i].push_back( { j, val_cpu(n) });
       }
     }
     KALDI_ASSERT(n == NumElements());
     SparseMatrix<OtherReal> tmp(num_cols_, pairs);
     smat->Swap(&tmp);
   } else
 #endif
   {
     smat->CopyFromSmat(this->Smat());
   }
 }

◆ CsrColIdx() [1/2]

const int* CsrColIdx ( ) const

inlineprotected

Returns pointer to the integer array of length nnz_ that contains the column indices of the corresponding elements in array CsrVal()

Definition at line 211 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::csr_row_ptr_col_idx_, and CuSparseMatrix< Real >::num_rows_.

Referenced by CuMatrixBase< float >::AddMatSmat(), CuMatrixBase< float >::AddSmat(), CuMatrixBase< float >::AddSmatMat(), CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CopyToMat(), CuSparseMatrix< Real >::CuSparseMatrix(), CuSparseMatrix< Real >::SelectRows(), and kaldi::TraceMatSmat().

                                {
     return csr_row_ptr_col_idx_ + num_rows_ + 1;
   }

◆ CsrColIdx() [2/2]

int* CsrColIdx ( )

inlineprotected

Definition at line 214 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::csr_row_ptr_col_idx_, CuSparseMatrix< Real >::Destroy(), and CuSparseMatrix< Real >::num_rows_.

                    {
     return csr_row_ptr_col_idx_ + num_rows_ + 1;
   }

◆ CsrRowPtr() [1/2]

const int* CsrRowPtr ( ) const

inlineprotected

Returns pointer to the integer array of length NumRows()+1 that holds indices of the first nonzero element in the i-th row, while the last entry contains nnz_, as zero-based CSR format is used.

Definition at line 202 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::csr_row_ptr_col_idx_.

Referenced by CuMatrixBase< float >::AddMatSmat(), CuMatrixBase< float >::AddSmat(), CuMatrixBase< float >::AddSmatMat(), CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CopyToMat(), CuSparseMatrix< Real >::CuSparseMatrix(), CuSparseMatrix< Real >::Resize(), CuSparseMatrix< Real >::SelectRows(), and kaldi::TraceMatSmat().

                                {
     return csr_row_ptr_col_idx_;
   }

◆ CsrRowPtr() [2/2]

int* CsrRowPtr ( )

inlineprotected

Definition at line 205 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::csr_row_ptr_col_idx_.

                    {
     return csr_row_ptr_col_idx_;
   }

◆ CsrVal() [1/2]

const Real* CsrVal ( ) const

inlineprotected

Returns pointer to the data array of length nnz_ that holds all nonzero values in zero-based CSR format.

Definition at line 192 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::csr_val_.

Referenced by CuMatrixBase< float >::AddMatSmat(), CuMatrixBase< float >::AddSmat(), CuMatrixBase< float >::AddSmatMat(), CuSparseMatrix< Real >::CopyElementsToVec(), CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CopyToMat(), CuSparseMatrix< Real >::CopyToSmat(), CuSparseMatrix< Real >::CuSparseMatrix(), CuSparseMatrix< Real >::Resize(), CuSparseMatrix< Real >::SelectRows(), and kaldi::TraceMatSmat().

                              {
     return csr_val_;
   }

◆ CsrVal() [2/2]

Real* CsrVal ( )

inlineprotected

Definition at line 195 of file cu-sparse-matrix.h.

References CuSparseMatrix< Real >::csr_val_.

                  {
     return csr_val_;
   }

◆ Destroy()

void Destroy ( )

private

Definition at line 301 of file cu-sparse-matrix.cc.

References CuSparseMatrix< Real >::csr_row_ptr_col_idx_, CuSparseMatrix< Real >::csr_val_, CuSparseMatrix< Real >::nnz_, CuSparseMatrix< Real >::num_cols_, CuSparseMatrix< Real >::num_rows_, and CuSparseMatrix< Real >::Smat().

Referenced by CuSparseMatrix< Real >::CsrColIdx(), CuSparseMatrix< Real >::Resize(), and CuSparseMatrix< Real >::~CuSparseMatrix().

                                    {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuTimer tim;
     if (csr_row_ptr_col_idx_) {
       CuDevice::Instantiate().Free(csr_row_ptr_col_idx_);
     }
     if (csr_val_) {
       CuDevice::Instantiate().Free(csr_val_);
     }
     num_rows_ = 0;
     num_cols_ = 0;
     nnz_ = 0;
     csr_row_ptr_col_idx_ = NULL;
     csr_val_ = NULL;
     CuDevice::Instantiate().AccuProfile(__func__, tim);
   } else
 #endif
   {
     Smat().Resize(0, 0);
   }
 }

◆ FrobeniusNorm()

Real FrobeniusNorm ( ) const

Definition at line 97 of file cu-sparse-matrix.cc.

References CuVectorBase< Real >::Norm().

Referenced by kaldi::UnitTestCuSparseMatrixFrobeniusNorm().

                                                {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuSubVector<Real> element_vec(CsrVal(), NumElements());
     return element_vec.Norm(2);
   } else
 #endif
   {
     return Smat().FrobeniusNorm();
   }
 }

◆ NumCols()

MatrixIndexT NumCols ( ) const

Definition at line 58 of file cu-sparse-matrix.cc.

Referenced by CuMatrixBase< float >::AddMatSmat(), CuMatrixBase< float >::AddSmat(), CuMatrixBase< float >::AddSmatMat(), CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CopyToMat(), CuSparseMatrix< Real >::Resize(), CuSparseMatrix< Real >::SelectRows(), kaldi::TraceMatSmat(), kaldi::UnitTestCuSparseMatrixConstructFromIndexes(), kaldi::UnitTestCuSparseMatrixSelectRowsAndTranspose(), and kaldi::UnitTestCuSparseMatrixSwap().

                                                  {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     return num_cols_;
   } else
 #endif
   {
     return Smat().NumCols();
   }
 }

◆ NumElements()

MatrixIndexT NumElements ( ) const

Definition at line 70 of file cu-sparse-matrix.cc.

Referenced by CuMatrixBase< float >::AddMatSmat(), CuMatrixBase< float >::AddSmatMat(), CuSparseMatrix< Real >::CopyElementsToVec(), CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CopyToMat(), CuSparseMatrix< Real >::CopyToSmat(), CuSparseMatrix< Real >::CuSparseMatrix(), CuSparseMatrix< Real >::Resize(), and kaldi::TraceMatSmat().

                                                      {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     return nnz_;
   } else
 #endif
   {
     return Smat().NumElements();
   }
 }

◆ NumRows()

MatrixIndexT NumRows ( ) const

Definition at line 46 of file cu-sparse-matrix.cc.

Referenced by CuMatrixBase< float >::AddMatSmat(), CuMatrixBase< float >::AddSmat(), CuMatrixBase< float >::AddSmatMat(), CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CopyToMat(), CuSparseMatrix< Real >::CopyToSmat(), CuSparseMatrix< Real >::CuSparseMatrix(), CuSparseMatrix< Real >::Resize(), CuSparseMatrix< Real >::SelectRows(), kaldi::TraceMatSmat(), kaldi::UnitTestCuSparseMatrixConstructFromIndexes(), kaldi::UnitTestCuSparseMatrixSelectRowsAndTranspose(), and kaldi::UnitTestCuSparseMatrixSwap().

                                                  {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     return num_rows_;
   } else
 #endif
   {
     return Smat().NumRows();
   }
 }

◆ operator=() [1/2]

CuSparseMatrix< Real > & operator= ( const SparseMatrix< Real > & smat )

Copy from CPU-based matrix.

Definition at line 227 of file cu-sparse-matrix.cc.

References CuSparseMatrix< Real >::CopyFromSmat().

                                     {
   this->CopyFromSmat(smat);
   return *this;
 }

◆ operator=() [2/2]

CuSparseMatrix< Real > & operator= ( const CuSparseMatrix< Real > & smat )

Copy from possibly-GPU-based matrix.

Definition at line 234 of file cu-sparse-matrix.cc.

References CuSparseMatrix< Real >::CopyFromSmat(), and kaldi::kNoTrans.

                                       {
   this->CopyFromSmat(smat, kNoTrans);
   return *this;
 }

◆ Read()

void Read	(	std::istream &	is,
		bool	binary
	)

Definition at line 514 of file cu-sparse-matrix.cc.

References SparseMatrix< Real >::Read(), and CuSparseMatrix< Real >::Swap().

                                                            {
   SparseMatrix<Real> tmp;
   tmp.Read(is, binary);
   this->Swap(&tmp);
 }

◆ Resize()

void Resize	(	const MatrixIndexT	num_rows,
		const MatrixIndexT	num_cols,
		const MatrixIndexT	nnz,
		MatrixResizeType	resize_type = `kSetZero`
	)

protected

Users of this class won't normally have to use Resize.

'nnz' should be determined beforehand when calling this API.

Definition at line 241 of file cu-sparse-matrix.cc.

Referenced by CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CuSparseMatrix(), and CuSparseMatrix< Real >::Smat().

                                                                 {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     KALDI_ASSERT(resize_type == kSetZero || resize_type == kUndefined);
 
     if (num_rows == NumRows() && num_cols == NumCols()
         && nnz == NumElements()) {
       if (resize_type == kSetZero) {
         CuSubVector<Real> val(CsrVal(), NumElements());
         val.Set(0);
       }
       return;
     }
 
     Destroy();
 
     CuTimer tim;
 
     if (num_rows * num_cols == 0) {
       KALDI_ASSERT(num_rows == 0);
       KALDI_ASSERT(num_cols == 0);
       KALDI_ASSERT(nnz == 0);
       num_rows_ = 0;
       num_cols_ = 0;
       nnz_ = 0;
       csr_row_ptr_col_idx_ = static_cast<int*>(CuDevice::Instantiate().Malloc(
           1 * sizeof(int)));
       csr_val_ = NULL;
     } else {
       KALDI_ASSERT(num_rows > 0);
       KALDI_ASSERT(num_cols > 0);
       KALDI_ASSERT(nnz >= 0 && nnz <= num_rows * static_cast<int64>(num_cols));
 
       num_rows_ = num_rows;
       num_cols_ = num_cols;
       nnz_ = nnz;
       csr_row_ptr_col_idx_ = static_cast<int*>(CuDevice::Instantiate().Malloc(
           (num_rows + 1 + nnz) * sizeof(int)));
       csr_val_ = static_cast<Real*>(CuDevice::Instantiate().Malloc(
           nnz * sizeof(Real)));
       CuSubArray<int> row_ptr(CsrRowPtr(), NumRows() + 1);
       row_ptr.Set(nnz);
       if (resize_type == kSetZero) {
         CuSubVector<Real> val(CsrVal(), NumElements());
         val.Set(0);
       }
     }
 
     CuDevice::Instantiate().AccuProfile(__func__, tim);
   } else
 #endif
   {
     Smat().Resize(num_rows, num_cols, resize_type);
   }
 }

◆ SelectRows()

void SelectRows	(	const CuArray< int32 > &	row_indexes,
		const CuSparseMatrix< Real > &	smat_other
	)

Select a subset of the rows of a CuSparseMatrix.

Sets *this to only the rows of 'smat_other' that are listed in 'row_indexes'. 'row_indexes' must satisfy 0 <= row_indexes[i] < smat_other.NumRows().

Definition at line 110 of file cu-sparse-matrix.cc.

References CuArrayBase< T >::CopyToVec(), CuSparseMatrix< Real >::CsrColIdx(), CuSparseMatrix< Real >::CsrRowPtr(), CuSparseMatrix< Real >::CsrVal(), CU1DBLOCK, CuArrayBase< T >::Data(), CuArrayBase< T >::Dim(), rnnlm::i, kaldi::kUndefined, CuSparseMatrix< Real >::NumCols(), CuSparseMatrix< Real >::NumRows(), and CuSparseMatrix< Real >::Smat().

Referenced by kaldi::UnitTestCuSparseMatrixSelectRowsAndTranspose().

                                                                               {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuTimer tim;
 
     // Calculate nnz and row_ptr before copying selected col_idx and val.
     // We do this on CPU for now. We will move this part to GPU is mem copy
     // becomes a bottle-neck here.
     std::vector<int32> row_indexes_cpu(row_indexes.Dim());
     row_indexes.CopyToVec(&row_indexes_cpu);
     CuSubArray<int> other_row_ptr(smat_other.CsrRowPtr(),
                                   smat_other.NumRows() + 1);
     std::vector<int> other_row_ptr_cpu(smat_other.NumRows() + 1);
     other_row_ptr.CopyToVec(&other_row_ptr_cpu);
     int nnz = 0;
     std::vector<int> row_ptr_cpu(row_indexes_cpu.size() + 1);
     for (int i = 0; i < row_indexes_cpu.size(); ++i) {
       row_ptr_cpu[i] = nnz;
       nnz += other_row_ptr_cpu[row_indexes_cpu[i] + 1]
           - other_row_ptr_cpu[row_indexes_cpu[i]];
     }
     row_ptr_cpu[row_indexes_cpu.size()] = nnz;
 
     Resize(row_indexes.Dim(), smat_other.NumCols(), nnz, kUndefined);
     CuSubArray<int> row_ptr(CsrRowPtr(), NumRows() + 1);
     row_ptr.CopyFromVec(row_ptr_cpu);
 
     // We use warpSize threads per row to access only the nnz elements.
     // Every CU1DBLOCK/warpSize rows share one thread block.
     // 1D grid to cover all selected rows.
     const int warpSize = 32;
     dim3 dimBlock(warpSize, CU1DBLOCK / warpSize);
     dim3 dimGrid(n_blocks(row_indexes.Dim(), dimBlock.y));
 
     cuda_select_rows(dimGrid, dimBlock, CsrRowPtr(), CsrColIdx(), CsrVal(),
                      row_indexes.Data(), row_indexes.Dim(),
                      smat_other.CsrRowPtr(), smat_other.CsrColIdx(),
                      smat_other.CsrVal());
 
     CU_SAFE_CALL(cudaGetLastError());
     CuDevice::Instantiate().AccuProfile(__func__, tim);
   } else
 #endif
   {
     std::vector<int32> row_indexes_cpu(row_indexes.Dim());
     row_indexes.CopyToVec(&row_indexes_cpu);
     Smat().SelectRows(row_indexes_cpu, smat_other.Smat());
   }
 }

◆ SetRandn()

void SetRandn ( BaseFloat zero_prob )

Sets up to a pseudo-randomly initialized matrix, with each element zero with probability zero_prob and else normally distributed- mostly for purposes of testing.

Definition at line 497 of file cu-sparse-matrix.cc.

References CuSparseMatrix< Real >::num_cols_, CuSparseMatrix< Real >::num_rows_, SparseMatrix< Real >::SetRandn(), and CuSparseMatrix< Real >::Swap().

                                                        {
   if (num_rows_ == 0)
     return;
   // Use the CPU function for the moment, not efficient...
   SparseMatrix<Real> tmp(num_rows_, num_cols_);
   tmp.SetRandn(zero_prob);
   Swap(&tmp);
 }

◆ Smat() [1/2]

const SparseMatrix<Real>& Smat ( ) const

inlineprotected

Definition at line 178 of file cu-sparse-matrix.h.

Referenced by CuMatrixBase< float >::AddMatSmat(), CuMatrixBase< float >::AddSmat(), CuMatrixBase< float >::AddSmatMat(), CuSparseMatrix< Real >::CopyElementsToVec(), CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CopyToMat(), CuSparseMatrix< Real >::CopyToSmat(), CuSparseMatrix< Real >::CuSparseMatrix(), CuSparseMatrix< Real >::Destroy(), CuSparseMatrix< Real >::Resize(), CuSparseMatrix< Real >::SelectRows(), CuSparseMatrix< Real >::Swap(), and kaldi::TraceMatSmat().

                                                 {
     return *(reinterpret_cast<const SparseMatrix<Real>*>(this));
   }

◆ Smat() [2/2]

SparseMatrix<Real>& Smat ( )

inlineprotected

Definition at line 181 of file cu-sparse-matrix.h.

References kaldi::kSetZero, and CuSparseMatrix< Real >::Resize().

                                     {
     return *(reinterpret_cast<SparseMatrix<Real>*>(this));
   }

◆ Sum()

Real Sum ( ) const

Definition at line 82 of file cu-sparse-matrix.cc.

References CuVectorBase< Real >::Sum().

Referenced by kaldi::nnet3::ComputeObjectiveFunction(), and kaldi::UnitTestCuSparseMatrixSum().

                                      {
   if (NumElements() == 0)
     return 0;
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuSubVector<Real> sum_vec(CsrVal(), NumElements());
     return sum_vec.Sum();
   } else
 #endif
   {
     return Smat().Sum();
   }
 }

◆ Swap() [1/2]

void Swap ( SparseMatrix< Real > * smat )

Swap with CPU-based matrix.

Definition at line 467 of file cu-sparse-matrix.cc.

References CuSparseMatrix< Real >::CopyToSmat(), and CuSparseMatrix< Real >::Smat().

Referenced by CuSparseMatrix< Real >::CuSparseMatrix(), CuSparseMatrix< Real >::Read(), CuSparseMatrix< Real >::SetRandn(), and kaldi::UnitTestCuSparseMatrixSwap().

                                                         {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     CuSparseMatrix<Real> tmp(*smat);
     Swap(&tmp);
     tmp.CopyToSmat(smat);
   } else
 #endif
   {
     Smat().Swap(smat);
   }
 }

◆ Swap() [2/2]

void Swap ( CuSparseMatrix< Real > * smat )

Swap with possibly-CPU-based matrix.

Definition at line 481 of file cu-sparse-matrix.cc.

References CuSparseMatrix< Real >::csr_row_ptr_col_idx_, CuSparseMatrix< Real >::csr_val_, CuSparseMatrix< Real >::nnz_, CuSparseMatrix< Real >::num_cols_, CuSparseMatrix< Real >::num_rows_, CuSparseMatrix< Real >::Smat(), and kaldi::swap().

                                                           {
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     std::swap(num_rows_, smat->num_rows_);
     std::swap(num_cols_, smat->num_cols_);
     std::swap(nnz_, smat->nnz_);
     std::swap(csr_row_ptr_col_idx_, smat->csr_row_ptr_col_idx_);
     std::swap(csr_val_, smat->csr_val_);
   } else
 #endif
   {
     Smat().Swap(&(smat->Smat()));
   }
 }

◆ Write()

void Write	(	std::ostream &	os,
		bool	binary
	)		const

Definition at line 507 of file cu-sparse-matrix.cc.

References CuSparseMatrix< Real >::CopyToSmat(), and SparseMatrix< Real >::Write().

                                                                   {
   SparseMatrix<Real> tmp;
   CopyToSmat(&tmp);
   tmp.Write(os, binary);
 }

Friends And Related Function Documentation

◆ CuMatrixBase< double >

friend class CuMatrixBase< double >

friend

Definition at line 51 of file cu-sparse-matrix.h.

◆ CuMatrixBase< float >

friend class CuMatrixBase< float >

friend

Definition at line 50 of file cu-sparse-matrix.h.

◆ CuMatrixBase< Real >

friend class CuMatrixBase< Real >

friend

Definition at line 52 of file cu-sparse-matrix.h.

◆ CuVectorBase< double >

friend class CuVectorBase< double >

friend

Definition at line 54 of file cu-sparse-matrix.h.

◆ CuVectorBase< float >

friend class CuVectorBase< float >

friend

Definition at line 53 of file cu-sparse-matrix.h.

◆ CuVectorBase< Real >

friend class CuVectorBase< Real >

friend

Definition at line 55 of file cu-sparse-matrix.h.

◆ TraceMatSmat

Real TraceMatSmat	(	const CuMatrixBase< Real > &	A,
		const CuSparseMatrix< Real > &	B,
		MatrixTransposeType	trans
	)

friend

Definition at line 524 of file cu-sparse-matrix.cc.

Referenced by kaldi::TraceMatSmat().

                                              {
   if (A.NumCols() == 0) {
     KALDI_ASSERT(B.NumCols() == 0);
     return 0.0;
   }
   if (B.NumElements() == 0) {
     return 0.0;
   }
   Real result = 0;
 #if HAVE_CUDA == 1
   if (CuDevice::Instantiate().Enabled()) {
     if (trans == kTrans) {
       KALDI_ASSERT(A.NumRows() == B.NumRows() && A.NumCols() == B.NumCols());
     } else {
       KALDI_ASSERT(A.NumCols() == B.NumRows() && A.NumRows() == B.NumCols());
     }
 
     // The Sum() method in CuVector handles a bunch of logic, we use that to
     // comptue the trace.
     CuVector<Real> sum_vec(B.NumElements());
     CuTimer tim;
 
     // We use warpSize threads per row to access only the nnz elements.
     // Every CU1DBLOCK/warpSize rows share one thread block.
     // 1D grid to cover all rows of B.
     const int warpSize = 32;
     dim3 dimBlock(warpSize, CU1DBLOCK / warpSize);
     dim3 dimGrid(n_blocks(B.NumRows(), dimBlock.y));
 
     if (trans == kNoTrans) {
       cuda_trace_mat_smat(dimGrid, dimBlock, A.Data(), A.Dim(), B.CsrRowPtr(),
                           B.CsrColIdx(), B.CsrVal(), sum_vec.Data());
     } else {
       cuda_trace_mat_smat_trans(dimGrid, dimBlock, A.Data(), A.Dim(),
                                 B.CsrRowPtr(), B.CsrColIdx(), B.CsrVal(),
                                 sum_vec.Data());
     }
     result = sum_vec.Sum();
     CuDevice::Instantiate().AccuProfile(__func__, tim);
   } else
 #endif
   {
     result = TraceMatSmat(A.Mat(), B.Smat(), trans);
   }
   return result;
 }

Member Data Documentation

◆ cpu_rows_

std::vector<SparseVector<Real> > cpu_rows_

private

Definition at line 224 of file cu-sparse-matrix.h.

◆ csr_row_ptr_col_idx_

int* csr_row_ptr_col_idx_

private

Definition at line 243 of file cu-sparse-matrix.h.

Referenced by CuSparseMatrix< Real >::CopyFromSmat(), CuSparseMatrix< Real >::CopyToSmat(), CuSparseMatrix< Real >::CsrColIdx(), CuSparseMatrix< Real >::CsrRowPtr(), CuSparseMatrix< Real >::Destroy(), CuSparseMatrix< Real >::Resize(), and CuSparseMatrix< Real >::Swap().

◆ csr_val_

Real* csr_val_

private

Definition at line 246 of file cu-sparse-matrix.h.

Referenced by CuSparseMatrix< Real >::CsrVal(), CuSparseMatrix< Real >::Destroy(), CuSparseMatrix< Real >::Resize(), and CuSparseMatrix< Real >::Swap().

◆ nnz_

MatrixIndexT nnz_

private

Definition at line 239 of file cu-sparse-matrix.h.

Referenced by CuSparseMatrix< Real >::Destroy(), CuSparseMatrix< Real >::Resize(), and CuSparseMatrix< Real >::Swap().

◆ num_cols_

MatrixIndexT num_cols_

private

Definition at line 236 of file cu-sparse-matrix.h.

Referenced by CuSparseMatrix< Real >::CopyToSmat(), CuSparseMatrix< Real >::Destroy(), CuSparseMatrix< Real >::Resize(), CuSparseMatrix< Real >::SetRandn(), and CuSparseMatrix< Real >::Swap().

◆ num_rows_

MatrixIndexT num_rows_

private

Definition at line 235 of file cu-sparse-matrix.h.

Referenced by CuSparseMatrix< Real >::CsrColIdx(), CuSparseMatrix< Real >::Destroy(), CuSparseMatrix< Real >::Resize(), CuSparseMatrix< Real >::SetRandn(), and CuSparseMatrix< Real >::Swap().

The documentation for this class was generated from the following files:

Public Member Functions

Protected Member Functions

Private Member Functions

Private Attributes

Friends

Detailed Description

template<class Real> class kaldi::CuSparseMatrix< Real >

Constructor & Destructor Documentation

◆ CuSparseMatrix() [1/5]

◆ CuSparseMatrix() [2/5]

◆ CuSparseMatrix() [3/5]

◆ CuSparseMatrix() [4/5]

◆ CuSparseMatrix() [5/5]

◆ ~CuSparseMatrix()

Member Function Documentation

◆ CopyElementsToVec()

◆ CopyFromSmat() [1/2]

◆ CopyFromSmat() [2/2]

◆ CopyToMat()

◆ CopyToSmat()

◆ CsrColIdx() [1/2]

◆ CsrColIdx() [2/2]

◆ CsrRowPtr() [1/2]

◆ CsrRowPtr() [2/2]

◆ CsrVal() [1/2]

◆ CsrVal() [2/2]

◆ Destroy()

◆ FrobeniusNorm()

◆ NumCols()

◆ NumElements()

◆ NumRows()

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ Read()

◆ Resize()

◆ SelectRows()

◆ SetRandn()

◆ Smat() [1/2]

◆ Smat() [2/2]

◆ Sum()

◆ Swap() [1/2]

◆ Swap() [2/2]

◆ Write()

Friends And Related Function Documentation

◆ CuMatrixBase< double >

◆ CuMatrixBase< float >

◆ CuMatrixBase< Real >

◆ CuVectorBase< double >

◆ CuVectorBase< float >

◆ CuVectorBase< Real >

◆ TraceMatSmat

Member Data Documentation

◆ cpu_rows_

◆ csr_row_ptr_col_idx_

◆ csr_val_

◆ nnz_

◆ num_cols_

◆ num_rows_

template<class Real>
class kaldi::CuSparseMatrix< Real >