cu-tp-matrix.cc
Go to the documentation of this file.
1 // cudamatrix/cu-tp-matrix.cc
2 
3 // Copyright 2009-2013 Karel Vesely
4 // 2014-2015 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #if HAVE_CUDA==1
22 #include <cuda_runtime_api.h>
23 #include <cublas_v2.h>
24 #endif
25 
26 #include "base/timer.h"
27 #include "cudamatrix/cu-common.h"
28 #include "cudamatrix/cu-vector.h"
29 #include "cudamatrix/cu-device.h"
30 #include "cudamatrix/cu-kernels.h"
31 #include "cudamatrix/cu-math.h"
32 #include "cudamatrix/cu-matrix.h"
36 
37 namespace kaldi {
38 
39 template<typename Real>
41  CuPackedMatrix<Real>(orig.NumRows(), kUndefined) {
42  KALDI_ASSERT(orig.NumRows() == orig.NumCols());
43  this->CopyFromMat(orig, trans);
44 }
45 
46 
47 template<typename Real>
49 #if HAVE_CUDA==1
50  if (CuDevice::Instantiate().Enabled()) {
51  CuMatrix<Real> tmp(orig);
52  tmp.Cholesky();
53  this->CopyFromMat(tmp, kNoTrans);
54  } else
55 #endif
56  {
57  this->Mat().Cholesky(orig.Mat());
58  }
59 }
60 
61 
62 template<typename Real>
64 #if HAVE_CUDA==1
65  if (CuDevice::Instantiate().Enabled()) {
66  if (this->num_rows_ == 0) return;
67  CuTimer tim;
68  int dimBlock(CU2DBLOCK);
69  int dimGrid(n_blocks(this->NumRows(), CU2DBLOCK));
70  CuMatrix<Real> tmp(this->NumRows(), this->NumRows());
71  int dim = this->NumRows();
72  Real alpha = 1.0;
73  cuda_set_diag(dimGrid, dimBlock, tmp.Data(), alpha, tmp.Dim());
74  CU_SAFE_CALL(cudaGetLastError());
75  CuMatrix<Real> tmp2(dim, dim);
76  tmp2.CopyFromTp(*this);
77  CUBLAS_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride,
78  tmp.Data(), tmp.Dim().stride));
79  this->CopyFromMat(tmp, kNoTrans);
80  CuDevice::Instantiate().AccuProfile(__func__, tim);
81  } else
82 #endif
83  {
84  Mat().Invert();
85  }
86 }
87 
88 template<typename Real>
90  MatrixTransposeType Trans) {
91 #if HAVE_CUDA==1
92  if (CuDevice::Instantiate().Enabled()) {
93  MatrixIndexT num_rows = this->num_rows_;
94  KALDI_ASSERT(num_rows == M.NumRows() && this->num_rows_ == M.NumCols());
95  if (num_rows == 0) return;
96  CuTimer tim;
97  dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
98  dim3 dimGrid(n_blocks(num_rows, CU2DBLOCK), n_blocks(num_rows, CU2DBLOCK));
99  if (Trans == kNoTrans) {
100  cuda_take_lower(dimGrid, dimBlock, M.Data(), this->data_, M.Dim());
101  } else {
102  cuda_take_upper(dimGrid, dimBlock, M.Data(), this->data_, M.Dim());
103  }
104  CU_SAFE_CALL(cudaGetLastError());
105  CuDevice::Instantiate().AccuProfile(__func__, tim);
106  } else
107 #endif
108  {
109  Mat().CopyFromMat(M.Mat(), Trans);
110  }
111 }
112 
113 template<class Real>
115  this->Resize(cu.NumRows());
116  this->CopyFromMat(cu);
117 }
118 template TpMatrix<float>::TpMatrix(const CuTpMatrix<float> &cu);
120 
121 template<class Real>
123  other.CopyToPacked(this);
124 }
125 // instantiate the template above.
126 template void TpMatrix<float>::CopyFromMat(const CuTpMatrix<float> &other);
127 template void TpMatrix<double>::CopyFromMat(const CuTpMatrix<double> &other);
128 
129 template <class Real>
131  this->Resize(in.NumRows(), kUndefined);
132  this->CopyFromPacked(in);
133  return *this;
134 }
135 
136 
137 template class CuTpMatrix<float>;
138 template class CuTpMatrix<double>;
139 
140 } // namespace
const MatrixBase< Real > & Mat() const
Definition: cu-matrix.h:755
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumRows() const
void CopyToPacked(PackedMatrix< Real > *dst) const
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void Cholesky(const CuSpMatrix< Real > &Orig)
Definition: cu-tp-matrix.cc:48
const SpMatrix< Real > & Mat() const
Definition: cu-sp-matrix.h:132
int32 MatrixIndexT
Definition: matrix-common.h:98
#define CU2DBLOCK
Definition: cu-matrixdim.h:61
void Cholesky(CuMatrixBase< Real > *inv_cholesky=NULL)
This function does sets *this to the Cholesky factor of *this (i.e.
Definition: cu-matrix.cc:1987
CuTpMatrix< Real > & operator=(const CuTpMatrix< Real > &in)
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
Matrix for CUDA computing.
Definition: matrix-common.h:69
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void CopyFromTp(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:280
Matrix for CUDA computing.
Definition: matrix-common.h:75
MatrixTransposeType
Definition: matrix-common.h:32
::MatrixDim Dim() const
Definition: cu-matrix.h:221
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Set packed matrix to a specified size (can be zero).
void CopyFromMat(const MatrixBase< Real > &M, MatrixTransposeType Trans=kNoTrans)
CopyFromMat copies the lower triangle of M into *this (or the upper triangle, if Trans == kTrans)...
Definition: tp-matrix.cc:117
int32_cuda stride
Definition: cu-matrixdim.h:49
void CopyFromMat(const CuMatrixBase< Real > &M, MatrixTransposeType Trans=kNoTrans)
Definition: cu-tp-matrix.cc:89
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
const TpMatrix< Real > & Mat() const
Definition: cu-tp-matrix.h:80
void CopyFromPacked(const CuPackedMatrix< Real > &src)