cu-sparse-matrix.cc
Go to the documentation of this file.
1 // cudamatrix/cu-sparse-matrix.cc
2 
3 // Copyright 2015 Guoguo Chen
4 // 2015 Johns Hopkins University (author: Daniel Povey)
5 // 2017 Shiyin Kang
6 
7 
8 // See ../../COPYING for clarification regarding multiple authors
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
18 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
19 // MERCHANTABLITY OR NON-INFRINGEMENT.
20 // See the Apache 2 License for the specific language governing permissions and
21 // limitations under the License.
22 
23 
24 #if HAVE_CUDA == 1
25 #include <cuda_runtime_api.h>
26 #include <cublas_v2.h>
27 #endif
28 
29 #include <utility>
30 #include <vector>
31 
32 #include "base/timer.h"
33 #include "cudamatrix/cu-common.h"
34 #include "cudamatrix/cu-vector.h"
35 #include "cudamatrix/cu-matrix.h"
36 #include "cudamatrix/cu-device.h"
37 #include "cudamatrix/cu-kernels.h"
38 #include "cudamatrix/cu-array.h"
39 #include "cudamatrix/cu-math.h"
42 
43 namespace kaldi {
44 
45 template <typename Real>
47 #if HAVE_CUDA == 1
48  if (CuDevice::Instantiate().Enabled()) {
49  return num_rows_;
50  } else
51 #endif
52  {
53  return Smat().NumRows();
54  }
55 }
56 
57 template <typename Real>
59 #if HAVE_CUDA == 1
60  if (CuDevice::Instantiate().Enabled()) {
61  return num_cols_;
62  } else
63 #endif
64  {
65  return Smat().NumCols();
66  }
67 }
68 
69 template <typename Real>
71 #if HAVE_CUDA == 1
72  if (CuDevice::Instantiate().Enabled()) {
73  return nnz_;
74  } else
75 #endif
76  {
77  return Smat().NumElements();
78  }
79 }
80 
81 template <typename Real>
83  if (NumElements() == 0)
84  return 0;
85 #if HAVE_CUDA == 1
86  if (CuDevice::Instantiate().Enabled()) {
87  CuSubVector<Real> sum_vec(CsrVal(), NumElements());
88  return sum_vec.Sum();
89  } else
90 #endif
91  {
92  return Smat().Sum();
93  }
94 }
95 
96 template <typename Real>
98 #if HAVE_CUDA == 1
99  if (CuDevice::Instantiate().Enabled()) {
100  CuSubVector<Real> element_vec(CsrVal(), NumElements());
101  return element_vec.Norm(2);
102  } else
103 #endif
104  {
105  return Smat().FrobeniusNorm();
106  }
107 }
108 
109 template<typename Real>
111  const CuSparseMatrix<Real> &smat_other) {
112 #if HAVE_CUDA == 1
113  if (CuDevice::Instantiate().Enabled()) {
114  CuTimer tim;
115 
116  // Calculate nnz and row_ptr before copying selected col_idx and val.
117  // We do this on CPU for now. We will move this part to GPU is mem copy
118  // becomes a bottle-neck here.
119  std::vector<int32> row_indexes_cpu(row_indexes.Dim());
120  row_indexes.CopyToVec(&row_indexes_cpu);
121  CuSubArray<int> other_row_ptr(smat_other.CsrRowPtr(),
122  smat_other.NumRows() + 1);
123  std::vector<int> other_row_ptr_cpu(smat_other.NumRows() + 1);
124  other_row_ptr.CopyToVec(&other_row_ptr_cpu);
125  int nnz = 0;
126  std::vector<int> row_ptr_cpu(row_indexes_cpu.size() + 1);
127  for (int i = 0; i < row_indexes_cpu.size(); ++i) {
128  row_ptr_cpu[i] = nnz;
129  nnz += other_row_ptr_cpu[row_indexes_cpu[i] + 1]
130  - other_row_ptr_cpu[row_indexes_cpu[i]];
131  }
132  row_ptr_cpu[row_indexes_cpu.size()] = nnz;
133 
134  Resize(row_indexes.Dim(), smat_other.NumCols(), nnz, kUndefined);
135  CuSubArray<int> row_ptr(CsrRowPtr(), NumRows() + 1);
136  row_ptr.CopyFromVec(row_ptr_cpu);
137 
138  // We use warpSize threads per row to access only the nnz elements.
139  // Every CU1DBLOCK/warpSize rows share one thread block.
140  // 1D grid to cover all selected rows.
141  const int warpSize = 32;
142  dim3 dimBlock(warpSize, CU1DBLOCK / warpSize);
143  dim3 dimGrid(n_blocks(row_indexes.Dim(), dimBlock.y));
144 
145  cuda_select_rows(dimGrid, dimBlock, CsrRowPtr(), CsrColIdx(), CsrVal(),
146  row_indexes.Data(), row_indexes.Dim(),
147  smat_other.CsrRowPtr(), smat_other.CsrColIdx(),
148  smat_other.CsrVal());
149 
150  CU_SAFE_CALL(cudaGetLastError());
151  CuDevice::Instantiate().AccuProfile(__func__, tim);
152  } else
153 #endif
154  {
155  std::vector<int32> row_indexes_cpu(row_indexes.Dim());
156  row_indexes.CopyToVec(&row_indexes_cpu);
157  Smat().SelectRows(row_indexes_cpu, smat_other.Smat());
158  }
159 }
160 
161 template<typename Real>
163  MatrixTransposeType trans) :
164  num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
165  NULL) {
166 #if HAVE_CUDA == 1
167  if (CuDevice::Instantiate().Enabled()) {
168  Resize(indexes.Dim(), dim, indexes.Dim(), kUndefined);
169  if (NumElements() == 0) {
170  return;
171  }
172  CuSubArray<int> row_ptr(CsrRowPtr(), NumRows() + 1);
173  row_ptr.Sequence(0);
174  CuSubArray<int> col_idx(CsrColIdx(), NumElements());
175  col_idx.CopyFromArray(indexes);
177  val.Set(1);
178 
179  if (trans == kTrans) {
180  CuSparseMatrix<Real> tmp(*this, kTrans);
181  this->Swap(&tmp);
182  }
183  } else
184 #endif
185  {
186  std::vector<int32> idx(indexes.Dim());
187  indexes.CopyToVec(&idx);
188  SparseMatrix<Real> tmp(idx, dim, trans);
189  Smat().Swap(&tmp);
190  }
191 }
192 
193 template<typename Real>
195  const CuVectorBase<Real> &weights,
196  int32 dim, MatrixTransposeType trans) :
198  NULL) {
199 #if HAVE_CUDA == 1
200  if (CuDevice::Instantiate().Enabled()) {
201  Resize(indexes.Dim(), dim, indexes.Dim(), kUndefined);
202  if (NumElements() == 0) {
203  return;
204  }
205  CuSubArray<int> row_ptr(CsrRowPtr(), NumRows() + 1);
206  row_ptr.Sequence(0);
207  CuSubArray<int> col_idx(CsrColIdx(), NumElements());
208  col_idx.CopyFromArray(indexes);
210  val.CopyFromVec(weights);
211 
212  if (trans == kTrans) {
213  CuSparseMatrix<Real> tmp(*this, kTrans);
214  this->Swap(&tmp);
215  }
216  } else
217 #endif
218  {
219  std::vector<int32> idx(indexes.Dim());
220  indexes.CopyToVec(&idx);
221  SparseMatrix<Real> tmp(idx, weights.Vec(), dim, trans);
222  Smat().Swap(&tmp);
223  }
224 }
225 
226 template <typename Real>
228  const SparseMatrix<Real> &smat) {
229  this->CopyFromSmat(smat);
230  return *this;
231 }
232 
233 template <typename Real>
235  const CuSparseMatrix<Real> &smat) {
236  this->CopyFromSmat(smat, kNoTrans);
237  return *this;
238 }
239 
240 template<typename Real>
242  const MatrixIndexT num_cols,
243  const MatrixIndexT nnz,
244  MatrixResizeType resize_type) {
245 #if HAVE_CUDA == 1
246  if (CuDevice::Instantiate().Enabled()) {
247  KALDI_ASSERT(resize_type == kSetZero || resize_type == kUndefined);
248 
249  if (num_rows == NumRows() && num_cols == NumCols()
250  && nnz == NumElements()) {
251  if (resize_type == kSetZero) {
253  val.Set(0);
254  }
255  return;
256  }
257 
258  Destroy();
259 
260  CuTimer tim;
261 
262  if (num_rows * num_cols == 0) {
263  KALDI_ASSERT(num_rows == 0);
264  KALDI_ASSERT(num_cols == 0);
265  KALDI_ASSERT(nnz == 0);
266  num_rows_ = 0;
267  num_cols_ = 0;
268  nnz_ = 0;
269  csr_row_ptr_col_idx_ = static_cast<int*>(CuDevice::Instantiate().Malloc(
270  1 * sizeof(int)));
271  csr_val_ = NULL;
272  } else {
273  KALDI_ASSERT(num_rows > 0);
274  KALDI_ASSERT(num_cols > 0);
275  KALDI_ASSERT(nnz >= 0 && nnz <= num_rows * static_cast<int64>(num_cols));
276 
277  num_rows_ = num_rows;
278  num_cols_ = num_cols;
279  nnz_ = nnz;
280  csr_row_ptr_col_idx_ = static_cast<int*>(CuDevice::Instantiate().Malloc(
281  (num_rows + 1 + nnz) * sizeof(int)));
282  csr_val_ = static_cast<Real*>(CuDevice::Instantiate().Malloc(
283  nnz * sizeof(Real)));
284  CuSubArray<int> row_ptr(CsrRowPtr(), NumRows() + 1);
285  row_ptr.Set(nnz);
286  if (resize_type == kSetZero) {
288  val.Set(0);
289  }
290  }
291 
292  CuDevice::Instantiate().AccuProfile(__func__, tim);
293  } else
294 #endif
295  {
296  Smat().Resize(num_rows, num_cols, resize_type);
297  }
298 }
299 
300 template<typename Real>
302 #if HAVE_CUDA == 1
303  if (CuDevice::Instantiate().Enabled()) {
304  CuTimer tim;
305  if (csr_row_ptr_col_idx_) {
306  CuDevice::Instantiate().Free(csr_row_ptr_col_idx_);
307  }
308  if (csr_val_) {
309  CuDevice::Instantiate().Free(csr_val_);
310  }
311  num_rows_ = 0;
312  num_cols_ = 0;
313  nnz_ = 0;
314  csr_row_ptr_col_idx_ = NULL;
315  csr_val_ = NULL;
316  CuDevice::Instantiate().AccuProfile(__func__, tim);
317  } else
318 #endif
319  {
320  Smat().Resize(0, 0);
321  }
322 }
323 
324 template<typename Real>
325 template<typename OtherReal>
327 #if HAVE_CUDA == 1
328  if (CuDevice::Instantiate().Enabled()) {
329  Resize(smat.NumRows(), smat.NumCols(), smat.NumElements(), kUndefined);
330  if (NumElements() == 0) {
331  return;
332  }
333  std::vector<int> row_ptr(NumRows() + 1);
334  std::vector<int> col_idx(NumElements());
336 
337  int n = 0;
338  for (int32 i = 0; i < smat.NumRows(); ++i) {
339  row_ptr[i] = n;
340  for (int32 j = 0; j < (smat.Data() + i)->NumElements(); ++j, ++n) {
341  col_idx[n] = ((smat.Data() + i)->Data() + j)->first;
342  val(n) = static_cast<Real>(((smat.Data() + i)->Data() + j)->second);
343  }
344  }
345  row_ptr[NumRows()] = n;
346  KALDI_ASSERT(n == NumElements());
347 
348  CuSubArray<int> cu_row_ptr(CsrRowPtr(), NumRows() + 1);
349  cu_row_ptr.CopyFromVec(row_ptr);
350  CuSubArray<int> cu_col_idx(CsrColIdx(), NumElements());
351  cu_col_idx.CopyFromVec(col_idx);
352  CuSubVector<Real> cu_val(CsrVal(), NumElements());
353  cu_val.CopyFromVec(val);
354  } else
355 #endif
356  {
357  this->Smat().CopyFromSmat(smat);
358  }
359 }
360 template
362 template
364 template
366 template
368 
369 template<typename Real>
371  MatrixTransposeType trans) {
372 #if HAVE_CUDA == 1
373  if (CuDevice::Instantiate().Enabled()) {
374  if (trans == kNoTrans) {
375  Resize(smat.NumRows(), smat.NumCols(), smat.NumElements(), kUndefined);
376 
377  CuSubVector<Real> val_to(CsrVal(), NumElements());
378  CuSubVector<Real> val_from(smat.CsrVal(), smat.NumElements());
379  val_to.CopyFromVec(val_from);
380 
382  NumRows() + 1 + NumElements());
384  smat.NumRows() + 1 + smat.NumElements());
385  idx_to.CopyFromArray(idx_from);
386 
387  } else {
388  Resize(smat.NumCols(), smat.NumRows(), smat.NumElements(), kUndefined);
389  CuTimer tim;
390 
391  CUSPARSE_SAFE_CALL(
392  cusparse_csr2csc(GetCusparseHandle(), smat.NumRows(), smat.NumCols(),
393  smat.NumElements(), smat.CsrVal(), smat.CsrRowPtr(),
394  smat.CsrColIdx(), CsrVal(), CsrColIdx(), CsrRowPtr(),
395  CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO));
396 
397  CuDevice::Instantiate().AccuProfile(__func__, tim);
398  }
399  } else
400 #endif
401  {
402  Smat().CopyFromSmat(smat.Smat(), trans);
403  }
404 }
405 
406 template<typename Real>
407 template<typename OtherReal>
409  KALDI_ASSERT(smat != NULL);
410 #if HAVE_CUDA == 1
411  if (CuDevice::Instantiate().Enabled()) {
412  if (NumRows() == 0) {
413  smat->Resize(0, 0);
414  return;
415  }
417  std::vector<int> idx_cpu;
418  idx.CopyToVec(&idx_cpu);
419 
422  val.CopyToVec(&val_cpu);
423 
424  std::vector<std::vector<std::pair<MatrixIndexT, OtherReal> > > pairs(
425  NumRows());
426  int n = 0;
427  for (int i = 0; i < NumRows(); ++i) {
428  for (; n < idx_cpu[i + 1]; ++n) {
429  const MatrixIndexT j = idx_cpu[NumRows() + 1 + n];
430  pairs[i].push_back( { j, val_cpu(n) });
431  }
432  }
433  KALDI_ASSERT(n == NumElements());
435  smat->Swap(&tmp);
436  } else
437 #endif
438  {
439  smat->CopyFromSmat(this->Smat());
440  }
441 }
442 template
444 template
446 template
448 template
450 
451 template<typename Real>
453  KALDI_ASSERT(vec != NULL);
454  KALDI_ASSERT(this->NumElements() == vec->Dim());
455 #if HAVE_CUDA == 1
456  if (CuDevice::Instantiate().Enabled()) {
458  vec->CopyFromVec(val);
459  } else
460 #endif
461  {
462  Smat().CopyElementsToVec(&(vec->Vec()));
463  }
464 }
465 
466 template <typename Real>
468 #if HAVE_CUDA == 1
469  if (CuDevice::Instantiate().Enabled()) {
470  CuSparseMatrix<Real> tmp(*smat);
471  Swap(&tmp);
472  tmp.CopyToSmat(smat);
473  } else
474 #endif
475  {
476  Smat().Swap(smat);
477  }
478 }
479 
480 template<typename Real>
482 #if HAVE_CUDA == 1
483  if (CuDevice::Instantiate().Enabled()) {
484  std::swap(num_rows_, smat->num_rows_);
485  std::swap(num_cols_, smat->num_cols_);
486  std::swap(nnz_, smat->nnz_);
488  std::swap(csr_val_, smat->csr_val_);
489  } else
490 #endif
491  {
492  Smat().Swap(&(smat->Smat()));
493  }
494 }
495 
496 template<typename Real>
498  if (num_rows_ == 0)
499  return;
500  // Use the CPU function for the moment, not efficient...
502  tmp.SetRandn(zero_prob);
503  Swap(&tmp);
504 }
505 
506 template<typename Real>
507 void CuSparseMatrix<Real>::Write(std::ostream &os, bool binary) const {
508  SparseMatrix<Real> tmp;
509  CopyToSmat(&tmp);
510  tmp.Write(os, binary);
511 }
512 
513 template<typename Real>
514 void CuSparseMatrix<Real>::Read(std::istream &is, bool binary) {
515  SparseMatrix<Real> tmp;
516  tmp.Read(is, binary);
517  this->Swap(&tmp);
518 }
519 
520 template class CuSparseMatrix<float>;
521 template class CuSparseMatrix<double>;
522 
523 template <typename Real>
525  const CuSparseMatrix<Real> &B,
526  MatrixTransposeType trans) {
527  if (A.NumCols() == 0) {
528  KALDI_ASSERT(B.NumCols() == 0);
529  return 0.0;
530  }
531  if (B.NumElements() == 0) {
532  return 0.0;
533  }
534  Real result = 0;
535 #if HAVE_CUDA == 1
536  if (CuDevice::Instantiate().Enabled()) {
537  if (trans == kTrans) {
538  KALDI_ASSERT(A.NumRows() == B.NumRows() && A.NumCols() == B.NumCols());
539  } else {
540  KALDI_ASSERT(A.NumCols() == B.NumRows() && A.NumRows() == B.NumCols());
541  }
542 
543  // The Sum() method in CuVector handles a bunch of logic, we use that to
544  // comptue the trace.
545  CuVector<Real> sum_vec(B.NumElements());
546  CuTimer tim;
547 
548  // We use warpSize threads per row to access only the nnz elements.
549  // Every CU1DBLOCK/warpSize rows share one thread block.
550  // 1D grid to cover all rows of B.
551  const int warpSize = 32;
552  dim3 dimBlock(warpSize, CU1DBLOCK / warpSize);
553  dim3 dimGrid(n_blocks(B.NumRows(), dimBlock.y));
554 
555  if (trans == kNoTrans) {
556  cuda_trace_mat_smat(dimGrid, dimBlock, A.Data(), A.Dim(), B.CsrRowPtr(),
557  B.CsrColIdx(), B.CsrVal(), sum_vec.Data());
558  } else {
559  cuda_trace_mat_smat_trans(dimGrid, dimBlock, A.Data(), A.Dim(),
560  B.CsrRowPtr(), B.CsrColIdx(), B.CsrVal(),
561  sum_vec.Data());
562  }
563  result = sum_vec.Sum();
564  CuDevice::Instantiate().AccuProfile(__func__, tim);
565  } else
566 #endif
567  {
568  result = TraceMatSmat(A.Mat(), B.Smat(), trans);
569  }
570  return result;
571 }
572 
573 template
574 float TraceMatSmat(const CuMatrixBase<float> &A,
575  const CuSparseMatrix<float> &B,
576  MatrixTransposeType trans);
577 template
578 double TraceMatSmat(const CuMatrixBase<double> &A,
579  const CuSparseMatrix<double> &B,
580  MatrixTransposeType trans);
581 
583  MatrixTransposeType trans) const {
584 #if HAVE_CUDA == 1
585  if (CuDevice::Instantiate().Enabled()) {
586  switch (Type()) {
587  case kFullMatrix: {
588  cu_mat->CopyFromMat(mat_);
589  break;
590  }
591  case kSparseMatrix: {
592  CuSparseMatrix<BaseFloat> smat(smat_);
593  smat.CopyToMat(cu_mat, trans);
594  break;
595  }
596  case kCompressedMatrix: {
597  Matrix<BaseFloat> mat(cmat_);
598  if (trans == kNoTrans) {
599  cu_mat->CopyFromMat(mat);
600  break;
601  } else {
602  CuMatrix<BaseFloat> temp_cu;
603  temp_cu.Swap(&mat);
604  cu_mat->CopyFromMat(temp_cu, kTrans);
605  break;
606  }
607  }
608  default:
609  KALDI_ERR << "Invalid GeneralMatrix type.";
610  }
611  return;
612  } else
613 #endif
614  {
615  CopyToMat(&(cu_mat->Mat()), trans);
616  }
617 }
618 
619 
620 template <typename Real>
621 template <typename OtherReal>
623  MatrixTransposeType trans) const {
624  if (trans == kNoTrans) {
625  KALDI_ASSERT(M->NumRows() == NumRows() && M->NumCols() == NumCols());
626  } else {
627  KALDI_ASSERT(M->NumRows() == NumCols() && M->NumCols() == NumRows());
628  }
629  M->SetZero();
630  if (NumElements() == 0) {
631  return;
632  }
633 
634 #if HAVE_CUDA == 1
635  if (CuDevice::Instantiate().Enabled()) {
636  CuTimer tim;
637 
638  // We use warpSize threads per row to access only the nnz elements.
639  // Every CU1DBLOCK/warpSize rows share one thread block.
640  // 1D grid to cover all rows.
641  const int warpSize = 32;
642  dim3 dimBlock(warpSize, CU1DBLOCK / warpSize);
643  dim3 dimGrid(n_blocks(NumRows(), dimBlock.y));
644 
645  if (trans == kNoTrans) {
646  cuda_copy_from_smat(dimGrid, dimBlock, M->Data(), M->Dim(), CsrRowPtr(),
647  CsrColIdx(), CsrVal());
648  } else {
649  cuda_copy_from_smat_trans(dimGrid, dimBlock, M->Data(), M->Dim(),
650  CsrRowPtr(), CsrColIdx(), CsrVal());
651  }
652  CU_SAFE_CALL(cudaGetLastError());
653  CuDevice::Instantiate().AccuProfile(__func__, tim);
654  } else
655 #endif
656  {
657  Smat().CopyToMat(&(M->Mat()), trans);
658  }
659 }
660 
661 // Instantiate the template above.
662 template
664  MatrixTransposeType trans) const;
665 
666 template
668  MatrixTransposeType trans) const;
669 
670 template
672  MatrixTransposeType trans) const;
673 
674 template
676  MatrixTransposeType trans) const;
677 
678 
680  CuMatrixBase<BaseFloat> *cu_mat,
681  MatrixTransposeType trans) const {
682  switch (Type()) {
683  case kFullMatrix: {
684 #if HAVE_CUDA == 1
685  if (CuDevice::Instantiate().Enabled()) {
686  CuMatrix<BaseFloat> cu_copy(mat_);
687  cu_mat->AddMat(alpha, cu_copy);
688  break;
689  }
690 #endif
691  cu_mat->Mat().AddMat(alpha, mat_);
692  break;
693  }
694  case kSparseMatrix: {
695 #if HAVE_CUDA == 1
696  if (CuDevice::Instantiate().Enabled()) {
697  CuSparseMatrix<BaseFloat> cu_smat(smat_);
698  cu_mat->AddSmat(alpha, cu_smat, trans);
699  break;
700  }
701 #endif
702  cu_mat->Mat().AddSmat(alpha, smat_, trans);
703  break;
704  }
705  case kCompressedMatrix: {
706  Matrix<BaseFloat> mat(cmat_);
707 #if HAVE_CUDA == 1
708  if (CuDevice::Instantiate().Enabled()) {
709  CuMatrix<BaseFloat> cu_mat_copy(mat);
710  cu_mat->AddMat(alpha, cu_mat_copy, trans);
711  break;
712  }
713 #endif
714  cu_mat->Mat().AddMat(alpha, mat, trans);
715  break;
716  }
717  default:
718  KALDI_ERR << "Invalid GeneralMatrix type.";
719  }
720 }
721 
722 
723 
724 } // namespace kaldi
const MatrixBase< Real > & Mat() const
Definition: cu-matrix.h:755
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumRows() const
const Real * CsrVal() const
Returns pointer to the data array of length nnz_ that holds all nonzero values in zero-based CSR form...
MatrixResizeType
Definition: matrix-common.h:37
void CopyToMat(CuMatrixBase< OtherReal > *dest, MatrixTransposeType trans=kNoTrans) const
void Resize(const MatrixIndexT num_rows, const MatrixIndexT num_cols, const MatrixIndexT nnz, MatrixResizeType resize_type=kSetZero)
Users of this class won&#39;t normally have to use Resize.
void Swap(SparseMatrix< Real > *smat)
Swap with CPU-based matrix.
void CopyToVec(std::vector< T > *dst) const
This function resizes *dst if needed.
Definition: cu-array-inl.h:177
void CopyFromSmat(const SparseMatrix< OtherReal > &smat)
Copy from CPU-based matrix.
Real Sum() const
Definition: cu-vector.cc:297
void Set(Real value)
Definition: cu-vector.cc:1135
void SelectRows(const CuArray< int32 > &row_indexes, const CuSparseMatrix< Real > &smat_other)
Select a subset of the rows of a CuSparseMatrix.
void Read(std::istream &is, bool binary)
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
void AddSmat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A.
Definition: cu-matrix.cc:985
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
Definition: cu-matrix.cc:954
const T * Data() const
Get raw pointer.
Definition: cu-array.h:52
void Write(std::ostream &os, bool binary) const
void CopyFromArray(const CuArrayBase< T > &src)
The caller is responsible to ensure dim is equal between *this and src.
Definition: cu-array-inl.h:157
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void AddToMat(BaseFloat alpha, MatrixBase< BaseFloat > *mat, MatrixTransposeType trans=kNoTrans) const
Adds alpha times *this to mat.
MatrixIndexT NumCols() const
SparseVector< Real > * Data()
int32 MatrixIndexT
Definition: matrix-common.h:98
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078
void Read(std::istream &os, bool binary)
const int * CsrColIdx() const
Returns pointer to the integer array of length nnz_ that contains the column indices of the correspon...
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
void CopyFromSmat(const SparseMatrix< OtherReal > &other, MatrixTransposeType trans=kNoTrans)
Copies data from another sparse matrix.
void SetZero()
Math operations, some calling kernels.
Definition: cu-matrix.cc:509
const SparseMatrix< Real > & Smat() const
struct rnnlm::@11::@12 n
void Set(const T &value)
Set to a constant value.
Definition: cu-array-inl.h:234
void CopyElementsToVec(CuVectorBase< Real > *vec) const
Copy elements to CuVector.
#define KALDI_ERR
Definition: kaldi-error.h:147
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT NumElements() const
CuSparseMatrix< Real > & operator=(const SparseMatrix< Real > &smat)
Copy from CPU-based matrix.
void CopyToMat(MatrixBase< BaseFloat > *mat, MatrixTransposeType trans=kNoTrans) const
Copies contents, regardless of type, to "mat", which must be correctly sized.
void CopyToSmat(SparseMatrix< OtherReal > *smat) const
Copy to CPU-based matrix.
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
CuSparseMatrix()
Default constructor.
const int * CsrRowPtr() const
Returns pointer to the integer array of length NumRows()+1 that holds indices of the first nonzero el...
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
A class representing a vector.
Definition: kaldi-vector.h:406
void SetRandn(BaseFloat zero_prob)
Sets up to a pseudo-randomly initialized matrix, with each element zero with probability zero_prob an...
friend Real TraceMatSmat(const CuMatrixBase< Real > &A, const CuSparseMatrix< Real > &B, MatrixTransposeType trans)
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void CopyFromVec(const std::vector< T > &src)
The caller is responsible to ensure dim is equal between *this and src.
Definition: cu-array-inl.h:100
MatrixIndexT NumRows() const
Real Norm(Real p)
Definition: cu-vector.cc:193
MatrixTransposeType
Definition: matrix-common.h:32
::MatrixDim Dim() const
Definition: cu-matrix.h:221
void Swap(SparseMatrix< Real > *other)
void Sequence(const T base)
Fill with the sequence [base ...
Definition: cu-array-inl.h:244
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
MatrixIndexT Dim() const
Return the vector dimension.
Definition: cu-array.h:49
void CopyToVec(VectorBase< OtherReal > *dst) const
Definition: cu-vector.cc:938
void Write(std::ostream &os, bool binary) const
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero)
Resizes the matrix; analogous to Matrix::Resize().
MatrixIndexT NumCols() const
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69
Vector for CUDA computing.
Definition: matrix-common.h:72
void SetRandn(BaseFloat zero_prob)
Sets up to a pseudo-randomly initialized matrix, with each element zero with probability zero_prob an...
MatrixIndexT NumElements() const