25 #include <cuda_runtime_api.h> 26 #include <cublas_v2.h> 45 template <
typename Real>
48 if (CuDevice::Instantiate().Enabled()) {
53 return Smat().NumRows();
57 template <
typename Real>
60 if (CuDevice::Instantiate().Enabled()) {
65 return Smat().NumCols();
69 template <
typename Real>
72 if (CuDevice::Instantiate().Enabled()) {
77 return Smat().NumElements();
81 template <
typename Real>
83 if (NumElements() == 0)
86 if (CuDevice::Instantiate().Enabled()) {
96 template <
typename Real>
99 if (CuDevice::Instantiate().Enabled()) {
101 return element_vec.
Norm(2);
105 return Smat().FrobeniusNorm();
109 template<
typename Real>
113 if (CuDevice::Instantiate().Enabled()) {
119 std::vector<int32> row_indexes_cpu(row_indexes.
Dim());
123 std::vector<int> other_row_ptr_cpu(smat_other.
NumRows() + 1);
124 other_row_ptr.
CopyToVec(&other_row_ptr_cpu);
126 std::vector<int> row_ptr_cpu(row_indexes_cpu.size() + 1);
127 for (
int i = 0;
i < row_indexes_cpu.size(); ++
i) {
128 row_ptr_cpu[
i] = nnz;
129 nnz += other_row_ptr_cpu[row_indexes_cpu[
i] + 1]
130 - other_row_ptr_cpu[row_indexes_cpu[
i]];
132 row_ptr_cpu[row_indexes_cpu.size()] = nnz;
136 row_ptr.CopyFromVec(row_ptr_cpu);
141 const int warpSize = 32;
142 dim3 dimBlock(warpSize,
CU1DBLOCK / warpSize);
143 dim3 dimGrid(n_blocks(row_indexes.
Dim(), dimBlock.y));
145 cuda_select_rows(dimGrid, dimBlock, CsrRowPtr(), CsrColIdx(), CsrVal(),
146 row_indexes.
Data(), row_indexes.
Dim(),
150 CU_SAFE_CALL(cudaGetLastError());
151 CuDevice::Instantiate().AccuProfile(__func__, tim);
155 std::vector<int32> row_indexes_cpu(row_indexes.
Dim());
157 Smat().SelectRows(row_indexes_cpu, smat_other.
Smat());
161 template<
typename Real>
164 num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
167 if (CuDevice::Instantiate().Enabled()) {
186 std::vector<int32> idx(indexes.
Dim());
193 template<
typename Real>
200 if (CuDevice::Instantiate().Enabled()) {
219 std::vector<int32> idx(indexes.
Dim());
226 template <
typename Real>
233 template <
typename Real>
240 template<
typename Real>
246 if (CuDevice::Instantiate().Enabled()) {
262 if (num_rows * num_cols == 0) {
275 KALDI_ASSERT(nnz >= 0 && nnz <= num_rows * static_cast<int64>(num_cols));
281 (num_rows + 1 + nnz) *
sizeof(
int)));
282 csr_val_ =
static_cast<Real*
>(CuDevice::Instantiate().Malloc(
283 nnz *
sizeof(Real)));
292 CuDevice::Instantiate().AccuProfile(__func__, tim);
296 Smat().Resize(num_rows, num_cols, resize_type);
300 template<
typename Real>
303 if (CuDevice::Instantiate().Enabled()) {
309 CuDevice::Instantiate().Free(
csr_val_);
316 CuDevice::Instantiate().AccuProfile(__func__, tim);
324 template<
typename Real>
325 template<
typename OtherReal>
328 if (CuDevice::Instantiate().Enabled()) {
333 std::vector<int> row_ptr(
NumRows() + 1);
341 col_idx[
n] = ((smat.
Data() +
i)->Data() +
j)->first;
342 val(n) =
static_cast<Real
>(((smat.
Data() +
i)->Data() +
j)->second);
357 this->
Smat().CopyFromSmat(smat);
369 template<
typename Real>
373 if (CuDevice::Instantiate().Enabled()) {
392 cusparse_csr2csc(GetCusparseHandle(), smat.
NumRows(), smat.
NumCols(),
395 CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO));
397 CuDevice::Instantiate().AccuProfile(__func__, tim);
402 Smat().CopyFromSmat(smat.
Smat(), trans);
406 template<
typename Real>
407 template<
typename OtherReal>
411 if (CuDevice::Instantiate().Enabled()) {
417 std::vector<int> idx_cpu;
424 std::vector<std::vector<std::pair<MatrixIndexT, OtherReal> > > pairs(
428 for (; n < idx_cpu[
i + 1]; ++
n) {
430 pairs[
i].push_back( {
j, val_cpu(n) });
451 template<
typename Real>
456 if (CuDevice::Instantiate().Enabled()) {
462 Smat().CopyElementsToVec(&(vec->
Vec()));
466 template <
typename Real>
469 if (CuDevice::Instantiate().Enabled()) {
480 template<
typename Real>
483 if (CuDevice::Instantiate().Enabled()) {
496 template<
typename Real>
506 template<
typename Real>
510 tmp.
Write(os, binary);
513 template<
typename Real>
516 tmp.
Read(is, binary);
523 template <
typename Real>
536 if (CuDevice::Instantiate().Enabled()) {
551 const int warpSize = 32;
552 dim3 dimBlock(warpSize,
CU1DBLOCK / warpSize);
553 dim3 dimGrid(n_blocks(B.
NumRows(), dimBlock.y));
559 cuda_trace_mat_smat_trans(dimGrid, dimBlock, A.
Data(), A.
Dim(),
563 result = sum_vec.Sum();
564 CuDevice::Instantiate().AccuProfile(__func__, tim);
585 if (CuDevice::Instantiate().Enabled()) {
609 KALDI_ERR <<
"Invalid GeneralMatrix type.";
620 template <
typename Real>
621 template <
typename OtherReal>
635 if (CuDevice::Instantiate().Enabled()) {
641 const int warpSize = 32;
642 dim3 dimBlock(warpSize,
CU1DBLOCK / warpSize);
643 dim3 dimGrid(n_blocks(
NumRows(), dimBlock.y));
649 cuda_copy_from_smat_trans(dimGrid, dimBlock, M->
Data(), M->
Dim(),
652 CU_SAFE_CALL(cudaGetLastError());
653 CuDevice::Instantiate().AccuProfile(__func__, tim);
657 Smat().CopyToMat(&(M->
Mat()), trans);
685 if (CuDevice::Instantiate().Enabled()) {
687 cu_mat->
AddMat(alpha, cu_copy);
691 cu_mat->
Mat().AddMat(alpha, mat_);
696 if (CuDevice::Instantiate().Enabled()) {
698 cu_mat->
AddSmat(alpha, cu_smat, trans);
702 cu_mat->
Mat().AddSmat(alpha, smat_, trans);
708 if (CuDevice::Instantiate().Enabled()) {
710 cu_mat->
AddMat(alpha, cu_mat_copy, trans);
714 cu_mat->
Mat().AddMat(alpha, mat, trans);
718 KALDI_ERR <<
"Invalid GeneralMatrix type.";
const MatrixBase< Real > & Mat() const
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
MatrixIndexT NumRows() const
const Real * CsrVal() const
Returns pointer to the data array of length nnz_ that holds all nonzero values in zero-based CSR form...
void CopyToMat(CuMatrixBase< OtherReal > *dest, MatrixTransposeType trans=kNoTrans) const
void Resize(const MatrixIndexT num_rows, const MatrixIndexT num_cols, const MatrixIndexT nnz, MatrixResizeType resize_type=kSetZero)
Users of this class won't normally have to use Resize.
void Swap(SparseMatrix< Real > *smat)
Swap with CPU-based matrix.
void CopyToVec(std::vector< T > *dst) const
This function resizes *dst if needed.
void CopyFromSmat(const SparseMatrix< OtherReal > &smat)
Copy from CPU-based matrix.
Real FrobeniusNorm() const
void SelectRows(const CuArray< int32 > &row_indexes, const CuSparseMatrix< Real > &smat_other)
Select a subset of the rows of a CuSparseMatrix.
void Read(std::istream &is, bool binary)
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
void AddSmat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A.
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
const T * Data() const
Get raw pointer.
void Write(std::ostream &os, bool binary) const
void CopyFromArray(const CuArrayBase< T > &src)
The caller is responsible to ensure dim is equal between *this and src.
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void AddToMat(BaseFloat alpha, MatrixBase< BaseFloat > *mat, MatrixTransposeType trans=kNoTrans) const
Adds alpha times *this to mat.
MatrixIndexT NumCols() const
SparseVector< Real > * Data()
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
void Read(std::istream &os, bool binary)
const int * CsrColIdx() const
Returns pointer to the integer array of length nnz_ that contains the column indices of the correspon...
void Swap(Matrix< Real > *mat)
void CopyFromSmat(const SparseMatrix< OtherReal > &other, MatrixTransposeType trans=kNoTrans)
Copies data from another sparse matrix.
void SetZero()
Math operations, some calling kernels.
const SparseMatrix< Real > & Smat() const
void Set(const T &value)
Set to a constant value.
void CopyElementsToVec(CuVectorBase< Real > *vec) const
Copy elements to CuVector.
MatrixIndexT NumElements() const
int * csr_row_ptr_col_idx_
CuSparseMatrix< Real > & operator=(const SparseMatrix< Real > &smat)
Copy from CPU-based matrix.
void CopyToMat(MatrixBase< BaseFloat > *mat, MatrixTransposeType trans=kNoTrans) const
Copies contents, regardless of type, to "mat", which must be correctly sized.
void CopyToSmat(SparseMatrix< OtherReal > *smat) const
Copy to CPU-based matrix.
const Real * Data() const
Return data pointer (const).
CuSparseMatrix()
Default constructor.
const int * CsrRowPtr() const
Returns pointer to the integer array of length NumRows()+1 that holds indices of the first nonzero el...
Matrix for CUDA computing.
MatrixIndexT NumCols() const
A class representing a vector.
void SetRandn(BaseFloat zero_prob)
Sets up to a pseudo-randomly initialized matrix, with each element zero with probability zero_prob an...
friend Real TraceMatSmat(const CuMatrixBase< Real > &A, const CuSparseMatrix< Real > &B, MatrixTransposeType trans)
const VectorBase< Real > & Vec() const
#define KALDI_ASSERT(cond)
void CopyFromVec(const std::vector< T > &src)
The caller is responsible to ensure dim is equal between *this and src.
MatrixIndexT NumRows() const
void Swap(SparseMatrix< Real > *other)
void Sequence(const T base)
Fill with the sequence [base ...
MatrixIndexT NumRows() const
Dimensions.
MatrixIndexT Dim() const
Return the vector dimension.
void CopyToVec(VectorBase< OtherReal > *dst) const
void Write(std::ostream &os, bool binary) const
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero)
Resizes the matrix; analogous to Matrix::Resize().
MatrixIndexT NumCols() const
MatrixIndexT Dim() const
Dimensions.
Vector for CUDA computing.
void SetRandn(BaseFloat zero_prob)
Sets up to a pseudo-randomly initialized matrix, with each element zero with probability zero_prob an...
MatrixIndexT NumElements() const