24 #include <cuda_runtime_api.h>    25 #include <cublas_v2.h>    39 template<
typename Real>
    45   if (this->num_rows_ == rows) {
    46     if (resize_type == 
kSetZero) this->SetZero();
    50   if (this->num_rows_ != 0)
    52   if (rows == 0) 
return;
    54   CuDevice &device = CuDevice::Instantiate();
    55   if (device.Enabled()) {
    57     this->num_rows_ = rows;
    58     size_t nr = 
static_cast<size_t>(num_rows_),
    59         num_bytes = ((nr * (nr+1)) / 2) * 
sizeof(Real);
    60     this->
data_ = 
static_cast<Real*
>(device.Malloc(num_bytes));
    62     if (resize_type == 
kSetZero) this->SetZero();
    63     device.AccuProfile(
"CuPackedMatrix::Resize", tim);
    74 template<
typename Real>
    84 template<
typename Real>
    87   if (CuDevice::Instantiate().Enabled()) {
    88     if (this->
data_ != NULL) {
    89       CuDevice::Instantiate().Free(this->
data_);
   100 template<
typename Real>
   103   if (CuDevice::Instantiate().Enabled()) {
   104     if (this->num_rows_ == 0) {
   108         CopyFromPacked(*mat);
   124         this->CopyToPacked(mat);
   136 template<
typename Real>
   140   if (CuDevice::Instantiate().Enabled()) {
   141     if (num_rows_ == 0) 
return; 
   143     size_t nr = 
static_cast<size_t>(num_rows_),
   144         num_bytes = ((nr * (nr+1)) / 2) * 
sizeof(Real);
   147       cudaMemcpyAsync(
data_, src.
data_, num_bytes, cudaMemcpyDeviceToDevice,
   148                       cudaStreamPerThread));
   149     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::CopyFromPacked1",
   154     Mat().CopyFromPacked(src.
Mat());
   158 template<
typename Real>
   162   if (CuDevice::Instantiate().Enabled()) {
   163     if (num_rows_ == 0) 
return; 
   166                                  cudaMemcpyHostToDevice, cudaStreamPerThread));
   167     CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
   168     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::CopyFromPacked2", tim);
   172     Mat().CopyFromPacked(src);
   177 template<
typename Real>
   182   if (CuDevice::Instantiate().Enabled()) {
   183     if (num_rows_ == 0) 
return; 
   185     size_t nr = 
static_cast<size_t>(num_rows_),
   186       num_bytes = ((nr * (nr+1)) / 2) * 
sizeof(Real);
   188     CU_SAFE_CALL(cudaMemcpyAsync(dst->
data_, 
data_, num_bytes,
   189                                  cudaMemcpyDeviceToHost, cudaStreamPerThread));
   190     CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
   191     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::CopyToPackedD2H", tim);
   230 template<
typename Real>
   233   temp.
Read(is, binary);
   238 template<
typename Real>
   241   this->CopyToPacked(&temp);
   242   temp.
Write(os, binary);
   245 template<
typename Real>
   248   if (CuDevice::Instantiate().Enabled()) {
   250     size_t nr = 
static_cast<size_t>(num_rows_),
   251       num_bytes = ((nr * (nr+1)) / 2) * 
sizeof(Real);
   253     CU_SAFE_CALL(cudaMemsetAsync(reinterpret_cast<void*>(this->
data_), 0, 
   254           num_bytes, cudaStreamPerThread));
   255     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::SetZero", tim);
   263 template<
typename Real>
   267   if (CuDevice::Instantiate().Enabled()) {
   268     if (num_rows_ == 0) 
return 0.0;
   275     result = Mat().Trace();
   280 template<
typename Real>
   283   if (CuDevice::Instantiate().Enabled()) {
   284     if (num_rows_ == 0) 
return;
   287     int dimGrid(n_blocks(NumRows(),
CU1DBLOCK));
   288     cuda_set_diag_packed(dimGrid,dimBlock,
data_,alpha,num_rows_);
   289     CU_SAFE_CALL(cudaGetLastError());
   290     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::SetDiag", tim);
   294     Mat().SetDiag(alpha);
   298 template<
typename Real>
   301   if (CuDevice::Instantiate().Enabled()) {
   303     size_t nr = 
static_cast<size_t>(num_rows_),
   304         num_elements = ((nr * (nr+1)) / 2);
   305     CUBLAS_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, 
data_, 1));
   307     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::Scale", tim);
   315 template<
typename Real>
   318   if (CuDevice::Instantiate().Enabled()) {
   321     int dimGrid(n_blocks(NumRows(),
CU1DBLOCK));
   322     cuda_scale_diag_packed(dimGrid,dimBlock,
data_,alpha,num_rows_);
   323     CU_SAFE_CALL(cudaGetLastError());
   324     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::ScaleDiag", tim);
   328     Mat().ScaleDiag(alpha);
   332 template<
typename Real>
   336   if (CuDevice::Instantiate().Enabled()) {
   337     if (num_rows_ == 0) 
return;
   339     size_t nr = num_rows_,
   340         sz = (nr * (nr + 1)) / 2;
   341     cublas_axpy(GetCublasHandle(), sz, alpha, M.
Data(), 1, 
data_, 1);
   342     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::AddPacked", tim);
   346     Mat().AddPacked(alpha, M.
Mat());
   350 template<
typename Real>
   353   if (CuDevice::Instantiate().Enabled()) {
   354     if (num_rows_ == 0) 
return;
   357     int dimGrid(n_blocks(NumRows(),
CU1DBLOCK));
   358     cuda_add_diag_packed(dimGrid,dimBlock,
data_,r,num_rows_);
   359     CU_SAFE_CALL(cudaGetLastError());
   360     CuDevice::Instantiate().AccuProfile(
"CuPackedMatrix::AddToDiag", tim);
   369 template<
typename Real>
   372   if (CuDevice::Instantiate().Enabled()) {
   385 template<
typename Real>
   386 std::ostream &operator << (std::ostream &out, const CuPackedMatrix<Real> &mat) {
   388   mat.CopyToPacked(&temp);
   395 std::ostream &operator << (std::ostream &out, const CuPackedMatrix<float> &mat);
   397 std::ostream &operator << (std::ostream &out, const CuPackedMatrix<double> &mat);
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
Packed symetric matrix class. 
 
void CopyDiagFromPacked(const CuPackedMatrix< Real > &M)
Extracts the diagonal of a packed matrix M; works for Sp or Tp. 
 
const PackedMatrix< Real > & Mat() const
 
void Swap(CuPackedMatrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap. 
 
void Read(std::istream &in, bool binary, bool add=false)
 
void Write(std::ostream &out, bool binary) const
 
void AddPacked(const Real alpha, const CuPackedMatrix< Real > &M)
 
void SetDiag(Real alpha)
< Set to random values of a normal distribution 
 
MatrixIndexT NumRows() const
 
void CopyToPacked(PackedMatrix< Real > *dst) const
 
void AddToDiag(Real r)
< Set the diagonal value to alpha 
 
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
 
MatrixIndexT NumRows() const
 
void SetRandn()
< Set to unit matrix. 
 
void RandGaussian(CuMatrixBase< Real > *tgt)
Fill with Normal random numbers,. 
 
Packed matrix: base class for triangular and symmetric matrices. 
 
void Read(std::istream &in, bool binary)
 
void Swap(PackedMatrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap. 
 
#define KALDI_MEMALIGN_FREE(x)
 
void SetUnit()
< Set to zero 
 
void ScaleDiag(Real alpha)
 
void CopyFromPacked(const PackedMatrix< OtherReal > &orig)
 
size_t SizeInBytes() const
 
#define KALDI_ASSERT(cond)
 
Matrix for CUDA computing. 
 
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Set packed matrix to a specified size (can be zero). 
 
void Write(std::ostream &out, bool binary) const
 
void CopyFromPacked(const CuPackedMatrix< Real > &src)
 
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Set packed matrix to a specified size (can be zero).