CuPackedMatrix< Real > Class Template Reference

Matrix for CUDA computing. More...

#include <matrix-common.h>

Inheritance diagram for CuPackedMatrix< Real >:
Collaboration diagram for CuPackedMatrix< Real >:

Public Member Functions

 CuPackedMatrix ()
 
 CuPackedMatrix (MatrixIndexT r, MatrixResizeType resize_type=kSetZero)
 
 CuPackedMatrix (const PackedMatrix< Real > &orig)
 
 CuPackedMatrix (const CuPackedMatrix< Real > &orig)
 
void SetZero ()
 
void SetUnit ()
 < Set to zero More...
 
void SetRandn ()
 < Set to unit matrix. More...
 
void SetDiag (Real alpha)
 < Set to random values of a normal distribution More...
 
void AddToDiag (Real r)
 < Set the diagonal value to alpha More...
 
void Scale (Real alpha)
 
void ScaleDiag (Real alpha)
 
Real Trace () const
 
 ~CuPackedMatrix ()
 
void Resize (MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
 Set packed matrix to a specified size (can be zero). More...
 
void CopyFromPacked (const CuPackedMatrix< Real > &src)
 
void CopyFromPacked (const PackedMatrix< Real > &src)
 
void CopyToPacked (PackedMatrix< Real > *dst) const
 
void Read (std::istream &in, bool binary)
 
void Write (std::ostream &out, bool binary) const
 
void Destroy ()
 
void Swap (CuPackedMatrix< Real > *other)
 Swaps the contents of *this and *other. Shallow swap. More...
 
void Swap (PackedMatrix< Real > *other)
 Swaps the contents of *this and *other. More...
 
Real * Data ()
 
const Real * Data () const
 
Real operator() (MatrixIndexT r, MatrixIndexT c) const
 
MatrixIndexT NumRows () const
 
MatrixIndexT NumCols () const
 
size_t SizeInBytes () const
 Returns size in bytes of the data held by the matrix. More...
 

Protected Member Functions

const PackedMatrix< Real > & Mat () const
 
PackedMatrix< Real > & Mat ()
 
void AddPacked (const Real alpha, const CuPackedMatrix< Real > &M)
 

Protected Attributes

Real * data_
 
MatrixIndexT num_rows_
 

Private Member Functions

PackedMatrix< Real > & operator= (const PackedMatrix< Real > &other)
 

Friends

class CuMatrixBase< Real >
 
class CuVectorBase< Real >
 
class CuSubMatrix< Real >
 
class CuRand< Real >
 

Detailed Description

template<typename Real>
class kaldi::CuPackedMatrix< Real >

Matrix for CUDA computing.

This is a base class for packed triangular and symmetric matrices. Does the computation on the CUDA card when CUDA is compiled in and we have a suitable GPU (CuDevice::Instantiate().Enabled() == true); otherwise, does it on the CPU.Packed CUDA matrix: base class for triangular and symmetric matrices on a GPU card.

Definition at line 75 of file matrix-common.h.

Constructor & Destructor Documentation

◆ CuPackedMatrix() [1/4]

CuPackedMatrix ( )
inline

Definition at line 60 of file cu-packed-matrix.h.

60 : data_(NULL), num_rows_(0) {}

◆ CuPackedMatrix() [2/4]

CuPackedMatrix ( MatrixIndexT  r,
MatrixResizeType  resize_type = kSetZero 
)
inlineexplicit

Definition at line 62 of file cu-packed-matrix.h.

References CuPackedMatrix< Real >::Resize().

63  :
64  data_(NULL), num_rows_(0) { Resize(r, resize_type); }
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Set packed matrix to a specified size (can be zero).

◆ CuPackedMatrix() [3/4]

CuPackedMatrix ( const PackedMatrix< Real > &  orig)
inlineexplicit

Definition at line 66 of file cu-packed-matrix.h.

References CuPackedMatrix< Real >::CopyFromPacked(), kaldi::kUndefined, PackedMatrix< Real >::num_rows_, and CuPackedMatrix< Real >::Resize().

66  : data_(NULL), num_rows_(0) {
67  Resize(orig.num_rows_, kUndefined);
68  CopyFromPacked(orig);
69  }
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Set packed matrix to a specified size (can be zero).
void CopyFromPacked(const CuPackedMatrix< Real > &src)

◆ CuPackedMatrix() [4/4]

◆ ~CuPackedMatrix()

Member Function Documentation

◆ AddPacked()

void AddPacked ( const Real  alpha,
const CuPackedMatrix< Real > &  M 
)
protected

Definition at line 333 of file cu-packed-matrix.cc.

References CuPackedMatrix< Real >::Data(), data_, KALDI_ASSERT, CuPackedMatrix< Real >::Mat(), and CuPackedMatrix< Real >::NumRows().

Referenced by CuSpMatrix< Real >::AddSp().

333  {
334  KALDI_ASSERT(num_rows_ == M.NumRows());
335 #if HAVE_CUDA == 1
336  if (CuDevice::Instantiate().Enabled()) {
337  if (num_rows_ == 0) return;
338  CuTimer tim;
339  size_t nr = num_rows_,
340  sz = (nr * (nr + 1)) / 2;
341  cublas_axpy(GetCublasHandle(), sz, alpha, M.Data(), 1, data_, 1);
342  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::AddPacked", tim);
343  } else
344 #endif
345  {
346  Mat().AddPacked(alpha, M.Mat());
347  }
348 }
const PackedMatrix< Real > & Mat() const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ AddToDiag()

void AddToDiag ( Real  r)

< Set the diagonal value to alpha

Add this quantity to the diagonal of the matrix.

Definition at line 351 of file cu-packed-matrix.cc.

References CU1DBLOCK, and data_.

Referenced by CuPackedMatrix< Real >::CuPackedMatrix(), kaldi::UnitTestCuPackedMatrixAddToDiag(), kaldi::UnitTestCuSpMatrixAddToDiag(), kaldi::UnitTestCuSpMatrixInvert(), and kaldi::UnitTestCuSpMatrixSetUnit().

351  {
352 #if HAVE_CUDA == 1
353  if (CuDevice::Instantiate().Enabled()) {
354  if (num_rows_ == 0) return;
355  CuTimer tim;
356  int dimBlock(CU1DBLOCK);
357  int dimGrid(n_blocks(NumRows(),CU1DBLOCK));
358  cuda_add_diag_packed(dimGrid,dimBlock,data_,r,num_rows_);
359  CU_SAFE_CALL(cudaGetLastError());
360  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::AddToDiag", tim);
361  } else
362 #endif
363  {
364  // TODO
365  Mat().AddToDiag(r);
366  }
367 }
const PackedMatrix< Real > & Mat() const
MatrixIndexT NumRows() const
#define CU1DBLOCK
Definition: cu-matrixdim.h:57

◆ CopyFromPacked() [1/2]

void CopyFromPacked ( const CuPackedMatrix< Real > &  src)

Definition at line 137 of file cu-packed-matrix.cc.

References data_, CuPackedMatrix< Real >::data_, KALDI_ASSERT, CuPackedMatrix< Real >::Mat(), and CuPackedMatrix< Real >::NumRows().

Referenced by CuSpMatrix< Real >::CopyFromSp(), CuTpMatrix< Real >::CopyFromTp(), CuPackedMatrix< Real >::CuPackedMatrix(), CuTpMatrix< Real >::operator=(), kaldi::UnitTestCuPackedMatrixCopy(), and CuPackedMatrix< Real >::~CuPackedMatrix().

137  {
138  KALDI_ASSERT(src.NumRows() == num_rows_);
139 #if HAVE_CUDA == 1
140  if (CuDevice::Instantiate().Enabled()) {
141  if (num_rows_ == 0) return; // Nothing to do.
142  CuTimer tim;
143  size_t nr = static_cast<size_t>(num_rows_),
144  num_bytes = ((nr * (nr+1)) / 2) * sizeof(Real);
145 
146  CU_SAFE_CALL(
147  cudaMemcpyAsync(data_, src.data_, num_bytes, cudaMemcpyDeviceToDevice,
148  cudaStreamPerThread));
149  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::CopyFromPacked1",
150  tim);
151  } else
152 #endif
153  {
154  Mat().CopyFromPacked(src.Mat());
155  }
156 }
const PackedMatrix< Real > & Mat() const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ CopyFromPacked() [2/2]

void CopyFromPacked ( const PackedMatrix< Real > &  src)

Definition at line 159 of file cu-packed-matrix.cc.

References data_, PackedMatrix< Real >::data_, KALDI_ASSERT, PackedMatrix< Real >::NumRows(), and PackedMatrix< Real >::SizeInBytes().

159  {
160  KALDI_ASSERT(src.NumRows() == num_rows_);
161 #if HAVE_CUDA == 1
162  if (CuDevice::Instantiate().Enabled()) {
163  if (num_rows_ == 0) return; // Nothing to do.
164  CuTimer tim;
165  CU_SAFE_CALL(cudaMemcpyAsync(data_, src.data_, src.SizeInBytes(),
166  cudaMemcpyHostToDevice, cudaStreamPerThread));
167  CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
168  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::CopyFromPacked2", tim);
169  } else
170 #endif
171  {
172  Mat().CopyFromPacked(src);
173  //memcpy(data_, src.Data(), SizeInBytes());
174  }
175 }
const PackedMatrix< Real > & Mat() const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ CopyToPacked()

void CopyToPacked ( PackedMatrix< Real > *  dst) const

Definition at line 178 of file cu-packed-matrix.cc.

References PackedMatrix< Real >::CopyFromPacked(), data_, PackedMatrix< Real >::data_, KALDI_ASSERT, and PackedMatrix< Real >::NumRows().

Referenced by TpMatrix< float >::CopyFromMat(), CuSpMatrix< Real >::CopyToSp(), kaldi::UnitTestCuPackedMatrixCopy(), and CuPackedMatrix< Real >::~CuPackedMatrix().

178  {
179  KALDI_ASSERT(dst->NumRows() == NumRows());
180 
181 #if HAVE_CUDA == 1
182  if (CuDevice::Instantiate().Enabled()) {
183  if (num_rows_ == 0) return; // Nothing to do.
184  CuTimer tim;
185  size_t nr = static_cast<size_t>(num_rows_),
186  num_bytes = ((nr * (nr+1)) / 2) * sizeof(Real);
187 
188  CU_SAFE_CALL(cudaMemcpyAsync(dst->data_, data_, num_bytes,
189  cudaMemcpyDeviceToHost, cudaStreamPerThread));
190  CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
191  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::CopyToPackedD2H", tim);
192  } else
193 #endif
194  {
195  //memcpy(data_, dst->Data(), SizeInBytes());
196  dst->CopyFromPacked(Mat());
197  }
198 }
const PackedMatrix< Real > & Mat() const
MatrixIndexT NumRows() const
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Data() [1/2]

◆ Data() [2/2]

const Real* Data ( ) const
inline

Definition at line 114 of file cu-packed-matrix.h.

References CuPackedMatrix< Real >::data_.

114 { return data_; }

◆ Destroy()

void Destroy ( )

Definition at line 85 of file cu-packed-matrix.cc.

References data_, and KALDI_MEMALIGN_FREE.

Referenced by CuPackedMatrix< Real >::~CuPackedMatrix().

85  {
86 #if HAVE_CUDA == 1
87  if (CuDevice::Instantiate().Enabled()) {
88  if (this->data_ != NULL) {
89  CuDevice::Instantiate().Free(this->data_);
90  }
91  } else
92 #endif
93  {
94  if (this->data_ != NULL) KALDI_MEMALIGN_FREE(this->data_);
95  }
96  this->data_ = NULL;
97  this->num_rows_ = 0;
98 }
#define KALDI_MEMALIGN_FREE(x)
Definition: kaldi-utils.h:60

◆ Mat() [1/2]

const PackedMatrix<Real>& Mat ( ) const
inlineprotected

Definition at line 150 of file cu-packed-matrix.h.

Referenced by CuPackedMatrix< Real >::AddPacked(), CuVectorBase< float >::CopyDiagFromPacked(), and CuPackedMatrix< Real >::CopyFromPacked().

150  {
151  return *(reinterpret_cast<const PackedMatrix<Real>* >(this));
152  }

◆ Mat() [2/2]

PackedMatrix<Real>& Mat ( )
inlineprotected

Definition at line 153 of file cu-packed-matrix.h.

153  {
154  return *(reinterpret_cast<PackedMatrix<Real>* >(this));
155  }

◆ NumCols()

MatrixIndexT NumCols ( ) const
inline

◆ NumRows()

◆ operator()()

Real operator() ( MatrixIndexT  r,
MatrixIndexT  c 
) const
inline

Definition at line 116 of file cu-packed-matrix.h.

References CuPackedMatrix< Real >::data_, KALDI_ASSERT, CuPackedMatrix< Real >::num_rows_, and kaldi::swap().

116  {
117  if (static_cast<UnsignedMatrixIndexT>(c) >
118  static_cast<UnsignedMatrixIndexT>(r))
119  std::swap(c, r);
120  KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
121  static_cast<UnsignedMatrixIndexT>(this->num_rows_));
122 #if HAVE_CUDA == 1
123  if (CuDevice::Instantiate().Enabled()) {
124  Real value;
125  CU_SAFE_CALL(cudaMemcpyAsync(&value, this->data_ + (r * (r+1)) / 2 + c,
126  sizeof(Real), cudaMemcpyDeviceToHost,
127  cudaStreamPerThread));
128  CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
129  return value;
130  } else
131 #endif
132  return this->data_[(r * (r+1)) / 2 + c];
133  }
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ operator=()

PackedMatrix<Real>& operator= ( const PackedMatrix< Real > &  other)
private

◆ Read()

void Read ( std::istream &  in,
bool  binary 
)

Definition at line 231 of file cu-packed-matrix.cc.

References PackedMatrix< Real >::Read().

Referenced by kaldi::UnitTestCuSpMatrixIO(), kaldi::UnitTestCuTpMatrixIO(), and CuPackedMatrix< Real >::~CuPackedMatrix().

231  {
232  PackedMatrix<Real> temp;
233  temp.Read(is, binary);
234  Destroy();
235  Swap(&temp);
236 }
void Swap(CuPackedMatrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap.

◆ Resize()

void Resize ( MatrixIndexT  nRows,
MatrixResizeType  resize_type = kSetZero 
)

Set packed matrix to a specified size (can be zero).

The value of the new data depends on resize_type: -if kSetZero, the new data will be zero -if kUndefined, the new data will be undefined -if kCopyData, the new data will be the same as the old data in any shared positions, and zero elsewhere. This function takes time proportional to the number of data elements.

Definition at line 40 of file cu-packed-matrix.cc.

References data_, KALDI_ASSERT, kaldi::kSetZero, and kaldi::kUndefined.

Referenced by CuPackedMatrix< Real >::CuPackedMatrix(), CuTpMatrix< Real >::operator=(), CuSpMatrix< Real >::Resize(), TpMatrix< float >::TpMatrix(), and CuPackedMatrix< Real >::~CuPackedMatrix().

41  {
42  // This code does not currently support the other resize_type options.
43  KALDI_ASSERT(resize_type == kSetZero || resize_type == kUndefined);
44 
45  if (this->num_rows_ == rows) {
46  if (resize_type == kSetZero) this->SetZero();
47  return;
48  }
49 
50  if (this->num_rows_ != 0)
51  this->Destroy();
52  if (rows == 0) return;
53 #if HAVE_CUDA == 1
54  CuDevice &device = CuDevice::Instantiate();
55  if (device.Enabled()) {
56  CuTimer tim;
57  this->num_rows_ = rows;
58  size_t nr = static_cast<size_t>(num_rows_),
59  num_bytes = ((nr * (nr+1)) / 2) * sizeof(Real);
60  this->data_ = static_cast<Real*>(device.Malloc(num_bytes));
61 
62  if (resize_type == kSetZero) this->SetZero();
63  device.AccuProfile("CuPackedMatrix::Resize", tim);
64  } else
65 #endif
66  { // Let the initializer of SpMatrix<Real> handle the allocation,
67  // and then just do Swap which will switch the pointers.
68  // This wastes a few instructions but is simple to code.
69  SpMatrix<Real> mat(rows, resize_type);
70  this->Swap(&mat);
71  }
72 }
void Swap(CuPackedMatrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Scale()

void Scale ( Real  alpha)

Definition at line 299 of file cu-packed-matrix.cc.

References data_.

Referenced by CuPackedMatrix< Real >::CuPackedMatrix(), and kaldi::UnitTestCuPackedMatrixScale().

299  {
300 #if HAVE_CUDA == 1
301  if (CuDevice::Instantiate().Enabled()) {
302  CuTimer tim;
303  size_t nr = static_cast<size_t>(num_rows_),
304  num_elements = ((nr * (nr+1)) / 2);
305  CUBLAS_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1));
306 
307  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::Scale", tim);
308  } else
309 #endif
310  {
311  Mat().Scale(alpha);
312  }
313 }
const PackedMatrix< Real > & Mat() const

◆ ScaleDiag()

void ScaleDiag ( Real  alpha)

Definition at line 316 of file cu-packed-matrix.cc.

References CU1DBLOCK, and data_.

Referenced by CuPackedMatrix< Real >::CuPackedMatrix(), kaldi::UnitTestCuPackedMatrixScaleDiag(), and kaldi::nnet2::UnitTestPreconditionDirections().

316  {
317 #if HAVE_CUDA == 1
318  if (CuDevice::Instantiate().Enabled()) {
319  CuTimer tim;
320  int dimBlock(CU1DBLOCK);
321  int dimGrid(n_blocks(NumRows(),CU1DBLOCK));
322  cuda_scale_diag_packed(dimGrid,dimBlock,data_,alpha,num_rows_);
323  CU_SAFE_CALL(cudaGetLastError());
324  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::ScaleDiag", tim);
325  } else
326 #endif
327  {
328  Mat().ScaleDiag(alpha);
329  }
330 }
const PackedMatrix< Real > & Mat() const
MatrixIndexT NumRows() const
#define CU1DBLOCK
Definition: cu-matrixdim.h:57

◆ SetDiag()

void SetDiag ( Real  alpha)

< Set to random values of a normal distribution

Definition at line 281 of file cu-packed-matrix.cc.

References CU1DBLOCK, and data_.

Referenced by CuPackedMatrix< Real >::CuPackedMatrix().

281  {
282 #if HAVE_CUDA == 1
283  if (CuDevice::Instantiate().Enabled()) {
284  if (num_rows_ == 0) return;
285  CuTimer tim;
286  int dimBlock(CU1DBLOCK);
287  int dimGrid(n_blocks(NumRows(),CU1DBLOCK));
288  cuda_set_diag_packed(dimGrid,dimBlock,data_,alpha,num_rows_);
289  CU_SAFE_CALL(cudaGetLastError());
290  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::SetDiag", tim);
291  } else
292 #endif
293  {
294  Mat().SetDiag(alpha);
295  }
296 }
const PackedMatrix< Real > & Mat() const
MatrixIndexT NumRows() const
#define CU1DBLOCK
Definition: cu-matrixdim.h:57

◆ SetRandn()

◆ SetUnit()

void SetUnit ( )

< Set to zero

Definition at line 370 of file cu-packed-matrix.cc.

Referenced by CuPackedMatrix< Real >::CuPackedMatrix(), kaldi::UnitTestCuPackedMatrixSetUnit(), and kaldi::nnet2::UnitTestPreconditionDirections().

370  {
371 #if HAVE_CUDA == 1
372  if (CuDevice::Instantiate().Enabled()) {
373  this->SetZero();
374  this->SetDiag(1.0);
375  } else
376 #endif
377  {
378  Mat().SetUnit();
379  }
380 }
const PackedMatrix< Real > & Mat() const
void SetDiag(Real alpha)
< Set to random values of a normal distribution

◆ SetZero()

void SetZero ( )

Definition at line 246 of file cu-packed-matrix.cc.

References data_.

Referenced by CuPackedMatrix< Real >::CuPackedMatrix(), and kaldi::UnitTestCuSpMatrixSetUnit().

246  {
247  #if HAVE_CUDA == 1
248  if (CuDevice::Instantiate().Enabled()) {
249  CuTimer tim;
250  size_t nr = static_cast<size_t>(num_rows_),
251  num_bytes = ((nr * (nr+1)) / 2) * sizeof(Real);
252 
253  CU_SAFE_CALL(cudaMemsetAsync(reinterpret_cast<void*>(this->data_), 0,
254  num_bytes, cudaStreamPerThread));
255  CuDevice::Instantiate().AccuProfile("CuPackedMatrix::SetZero", tim);
256  } else
257  #endif
258  {
259  Mat().SetZero();
260  }
261 }
const PackedMatrix< Real > & Mat() const

◆ SizeInBytes()

size_t SizeInBytes ( ) const
inline

Returns size in bytes of the data held by the matrix.

Definition at line 139 of file cu-packed-matrix.h.

References CuPackedMatrix< Real >::num_rows_.

139  {
140  size_t nr = static_cast<size_t>(num_rows_),
141  num_bytes = ((nr * (nr+1)) / 2) * sizeof(Real);
142  return num_bytes;
143  }

◆ Swap() [1/2]

void Swap ( CuPackedMatrix< Real > *  other)

Swaps the contents of *this and *other. Shallow swap.

Referenced by CuPackedMatrix< Real >::~CuPackedMatrix().

◆ Swap() [2/2]

void Swap ( PackedMatrix< Real > *  other)

Swaps the contents of *this and *other.

Definition at line 101 of file cu-packed-matrix.cc.

References PackedMatrix< Real >::data_, kaldi::kUndefined, PackedMatrix< Real >::num_rows_, PackedMatrix< Real >::Resize(), PackedMatrix< Real >::Swap(), and kaldi::swap().

101  {
102 #if HAVE_CUDA == 1
103  if (CuDevice::Instantiate().Enabled()) {
104  if (this->num_rows_ == 0) {
105  if (mat->num_rows_ != 0) {
106  // *this is empty, but mat is nonempty.
107  Resize(mat->num_rows_, kUndefined);
108  CopyFromPacked(*mat);
109  mat->Resize(0);
110  }
111  // else both are empty.
112  } else { // *this is nonempty.
113  if (mat->num_rows_ != 0) {
114  // Both *this and *mat are nonempty. Recurse to simpler cases.
115  // this could be done more efficiently in the case where
116  // the size does not change.
117  PackedMatrix<Real> temp;
118  this->Swap(&temp); // now temp is full, *this is empty.
119  mat->Swap(&temp); // now mat has data from *this, temp has
120  // data from mat.
121  this->Swap(&temp); // copy data in mat to *this, which is now empty.
122  } else { // *this is full but *mat is empty.
123  mat->Resize(this->num_rows_, kUndefined);
124  this->CopyToPacked(mat);
125  this->Destroy();
126  }
127  }
128  } else
129 #endif
130  {
131  std::swap(mat->data_, this->data_);
132  std::swap(mat->num_rows_, this->num_rows_);
133  }
134 }
void Swap(CuPackedMatrix< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
void CopyToPacked(PackedMatrix< Real > *dst) const
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
void Resize(MatrixIndexT nRows, MatrixResizeType resize_type=kSetZero)
Set packed matrix to a specified size (can be zero).
void CopyFromPacked(const CuPackedMatrix< Real > &src)

◆ Trace()

Real Trace ( ) const

Definition at line 264 of file cu-packed-matrix.cc.

References CuVectorBase< Real >::CopyDiagFromPacked(), kaldi::kUndefined, and CuVectorBase< Real >::Sum().

Referenced by CuPackedMatrix< Real >::CuPackedMatrix(), kaldi::UnitTestCuPackedMatrixTrace(), kaldi::UnitTestCuSpMatrixInvert(), and kaldi::UnitTestTrace().

264  {
265  Real result = 0.0;
266 #if HAVE_CUDA == 1
267  if (CuDevice::Instantiate().Enabled()) {
268  if (num_rows_ == 0) return 0.0;
269  CuVector<Real> tmp(num_rows_, kUndefined);
270  tmp.CopyDiagFromPacked(*this);
271  return tmp.Sum();
272  } else
273 #endif
274  {
275  result = Mat().Trace();
276  }
277  return result;
278 }
const PackedMatrix< Real > & Mat() const

◆ Write()

void Write ( std::ostream &  out,
bool  binary 
) const

Definition at line 239 of file cu-packed-matrix.cc.

References kaldi::kUndefined, and PackedMatrix< Real >::Write().

Referenced by kaldi::UnitTestCuSpMatrixIO(), kaldi::UnitTestCuTpMatrixIO(), and CuPackedMatrix< Real >::~CuPackedMatrix().

239  {
240  PackedMatrix<Real> temp(this->num_rows_, kUndefined);
241  this->CopyToPacked(&temp);
242  temp.Write(os, binary);
243 }
void CopyToPacked(PackedMatrix< Real > *dst) const

Friends And Related Function Documentation

◆ CuMatrixBase< Real >

friend class CuMatrixBase< Real >
friend

Definition at line 55 of file cu-packed-matrix.h.

◆ CuRand< Real >

friend class CuRand< Real >
friend

Definition at line 58 of file cu-packed-matrix.h.

◆ CuSubMatrix< Real >

friend class CuSubMatrix< Real >
friend

Definition at line 57 of file cu-packed-matrix.h.

◆ CuVectorBase< Real >

friend class CuVectorBase< Real >
friend

Definition at line 56 of file cu-packed-matrix.h.

Member Data Documentation

◆ data_

◆ num_rows_


The documentation for this class was generated from the following files: