CuMatrix< Real > Class Template Reference

This class represents a matrix that's stored on the GPU if we have one, and in memory if not. More...

#include <matrix-common.h>

Inheritance diagram for CuMatrix< Real >:
Collaboration diagram for CuMatrix< Real >:

Public Member Functions

 CuMatrix ()
 
 CuMatrix (MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
 Constructor with memory initialisation. More...
 
 CuMatrix (const CuMatrix< Real > &other, MatrixTransposeType trans=kNoTrans)
 
 CuMatrix (const CuBlockMatrix< Real > &other, MatrixTransposeType trans=kNoTrans)
 
 CuMatrix (const CuMatrixBase< Real > &other, MatrixTransposeType trans=kNoTrans)
 
template<typename OtherReal >
 CuMatrix (const MatrixBase< OtherReal > &other, MatrixTransposeType trans=kNoTrans)
 
 CuMatrix (const CuSpMatrix< Real > &M)
 Copy constructor taking SpMatrix... More...
 
template<typename OtherReal >
 CuMatrix (const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
 Copy constructor taking TpMatrix... More...
 
template<typename OtherReal >
 CuMatrix (const CuMatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
 Copy constructor: as above, but from another type. More...
 
CuMatrix< Real > & operator= (const CuMatrixBase< Real > &other)
 
CuMatrix< Real > & operator= (const CuMatrix< Real > &other)
 
CuMatrix< Real > & operator= (const MatrixBase< Real > &other)
 
void Transpose ()
 
void Resize (MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
 Allocate the memory. More...
 
void Swap (Matrix< Real > *mat)
 
void Swap (CuMatrix< Real > *mat)
 
template<typename OtherReal >
void Swap (CuMatrix< OtherReal > *mat)
 
void Read (std::istream &is, bool binary)
 I/O functions. More...
 
 ~CuMatrix ()
 Destructor. More...
 
const Matrix< Real > & Mat () const
 
Matrix< Real > & Mat ()
 
void CompObjfAndDeriv (const std::vector< MatrixElement< Real > > &elements, const CuMatrix< Real > &A, Real *tot_objf, Real *tot_weight)
 Here, A is interpreted as a matrix of probabilities, and "elements" as a list of posteriors (possibly zero-one), and "*this" as a matrix of derivatives w.r.t. More...
 
- Public Member Functions inherited from CuMatrixBase< Real >
void CopyCols (const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
 Copies column r from column indexes[r] of src. More...
 
void AddCols (const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indices)
 Add column indices[r] of src to column r. More...
 
void CopyRows (const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
 Copies row r from row indexes[r] of src. More...
 
void CopyRows (const CuArrayBase< const Real *> &src)
 Copies row r of this matrix from an array of floats at the location given by src[r], where src[r] is assumed to be obtained from the RowData() function of another CuMatrix, or from CuVector::Data() (the point is: the data it points to should be on the GPU if we're using a GPU, and on a CPU otherwise). More...
 
void CopyToRows (const CuArrayBase< Real *> &dst) const
 For each row r of this matrix, copies it to the array of floats at the location given by dst[r], where dst[r] is assumed to be obtained from the RowData() function of another CuMatrix, or from CuVector::Data() (i.e. More...
 
void AddRows (Real alpha, const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
 Does for each row r, this.Row(r) += alpha * src.row(indexes[r]). More...
 
void MulRows (const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
 Does for each row r, this.Row(r) *= alpha * src.row(indexes[r]), where '*=' is elementwise multiplication. More...
 
void AddRows (Real alpha, const CuArrayBase< const Real *> &src)
 Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as the beginning of a region of memory representing a vector of floats, of the same length as this.NumCols(). More...
 
void AddToRows (Real alpha, const CuArrayBase< MatrixIndexT > &indexes, CuMatrixBase< Real > *dst) const
 For each row i of *this, adds this->Row(i) to dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing. More...
 
void AddToRows (Real alpha, const CuArrayBase< Real *> &dst) const
 For each row r of this matrix, adds it (times alpha) to the array of floats at the location given by dst[r], where dst[r] is assumed to be obtained from the RowData() function of another CuMatrix, or from CuVector::Data() (i.e. More...
 
void SumColumnRanges (const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
 For each row r of this and for each column c, sets (*this)(r, c) to the sum src(r, j), where j ranges from indexes[c].first through indexes[c].second - 1. More...
 
void AddRowRanges (const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
 For each row r of this and for each column c, do (*this)(r, c) += src(j, c), where j ranges from indexes[r].first through indexes[r].second - 1. More...
 
void AddToDiag (Real value)
 Adds "value" to the diagonal elements of the matrix. More...
 
MatrixIndexT NumRows () const
 Dimensions. More...
 
MatrixIndexT NumCols () const
 
MatrixIndexT Stride () const
 
::MatrixDim Dim () const
 
Real FrobeniusNorm () const
 
bool IsUnit (Real tol=0.001) const
 
bool ApproxEqual (const CuMatrixBase< Real > &other, float tol=0.01) const
 True if ((*this)-other).FrobeniusNorm() <= tol * this->FrobeniusNorm() More...
 
MatrixIndexT SizeInBytes () const
 Get size of matrix in bytes. More...
 
template<typename OtherReal >
void CopyFromMat (const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
 
void CopyFromGeneralMat (const GeneralMatrix &src, MatrixTransposeType trans=kNoTrans)
 
void CopyFromMat (const MatrixBase< Real > &src, MatrixTransposeType trans=kNoTrans)
 
void CopyFromSp (const CuSpMatrix< Real > &M)
 
template<typename OtherReal >
void CopyFromTp (const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
 
void CopyRangeFromMatClamped (const CuMatrixBase< Real > &src, int32_t start_range, int32_t end_range, int32_t clamp_low, int32_t clamp_high)
 
template<typename OtherReal >
void CopyFromMat (const CuMatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
 
template<typename OtherReal >
void CopyToMat (MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
 
void CopyRowsFromVec (const CuVectorBase< Real > &v)
 This function has two modes of operation. More...
 
void CopyRowsFromVec (const VectorBase< Real > &v)
 Version of CopyRowsFromVec() that takes a CPU-based vector. More...
 
void CopyColsFromVec (const CuVectorBase< Real > &v)
 Copies vector into matrix, column-by-column. More...
 
void CopyColFromVec (const CuVectorBase< Real > &v, const MatrixIndexT col)
 Copy vector into specific column of matrix. More...
 
void Sigmoid (const CuMatrixBase< Real > &src)
 Set each element to the sigmoid of the corresponding element of "src": element by element, x = 1 / (1 + exp(-x)) More...
 
void Heaviside (const CuMatrixBase< Real > &src)
 Set each element to the Heaviside function of the corresponding element of "src", which we define as the function (x > 0 ? 1.0 : 0.0) [note: in general, there are different ways to deal with the situation when x==0. More...
 
void Exp (const CuMatrixBase< Real > &src)
 
void Log (const CuMatrixBase< Real > &src)
 
void Pow (const CuMatrixBase< Real > &src, Real power)
 
void PowAbs (const CuMatrixBase< Real > &src, Real power, bool include_sign=false)
 Apply power to the absolute value of each element. More...
 
void Floor (const CuMatrixBase< Real > &src, Real floor_val)
 
void Ceiling (const CuMatrixBase< Real > &src, Real ceiling_val)
 
void ExpLimited (const CuMatrixBase< Real > &src, Real lower_limit, Real upper_limit)
 This is equivalent to running: Floor(src, lower_limit); Ceiling(src, upper_limit); Exp(src) More...
 
void ExpSpecial (const CuMatrixBase< Real > &src)
 For each element x of the matrix, set it to (x < 0 ? exp(x) : x + 1). More...
 
void SoftMaxPerRow (const CuMatrixBase< Real > &src)
 Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow. More...
 
void LogSoftMaxPerRow (const CuMatrixBase< Real > &src)
 LogSoftmax nonlinearity Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row, with attention to avoiding overflow or underflow. More...
 
void SoftHinge (const CuMatrixBase< Real > &src)
 Apply the function y = log(1 + exp(x)), to each element. More...
 
void GroupPnorm (const CuMatrixBase< Real > &src, Real pow)
 Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p) where G = x.NumCols() / y.NumCols() must be an integer. More...
 
void DiffGroupPnorm (const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv, Real power)
 Differentiate backward through the GroupPnorm function. More...
 
void GroupMax (const CuMatrixBase< Real > &src)
 Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j where G = x.NumCols() / y.NumCols() must be an integer. More...
 
void GroupMaxDeriv (const CuMatrixBase< Real > &input, const CuMatrixBase< Real > &output)
 Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax function above (i.e. More...
 
void ParametricRelu (const CuMatrixBase< Real > &src, const CuVectorBase< Real > &alpha, const CuVectorBase< Real > &beta)
 Compute the parametric rectified linear unit function; element by element, *this = src * (src > 0 ? alpha : beta) More...
 
void DiffParametricRelu (const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff, const CuVectorBase< Real > &alpha, const CuVectorBase< Real > &beta)
 Differentiate backward through the parametric relu function. More...
 
void Tanh (const CuMatrixBase< Real > &src)
 Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src). More...
 
void DiffSigmoid (const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
 Differentiate backward through the sigmoid function. More...
 
void DiffTanh (const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
 Differentiate backward through the tanh function. More...
 
void DiffSoftmaxPerRow (const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
 Differentiate backward through the softmax function. More...
 
void DiffLogSoftmaxPerRow (const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv)
 Differentiate backward through the log softmax function. More...
 
void DiffXent (const CuArrayBase< int32 > &tgt, CuVector< Real > *log_post_tgt)
 Differentiate the block [softmax+cross-entropy] : dE/da = posterior_mat - target_mat, 'E' is error function, 'a' is activation on softmax input. More...
 
void Cholesky (CuMatrixBase< Real > *inv_cholesky=NULL)
 This function does sets *this to the Cholesky factor of *this (i.e. More...
 
void SymInvertPosDef ()
 Inversion for positive definite symmetric matrices. More...
 
void ApplyPow (Real power)
 
void ApplyPowAbs (Real power, bool include_sign=false)
 
void ApplyHeaviside ()
 
void ApplyFloor (Real floor_val)
 
void ApplyCeiling (Real ceiling_val)
 
void ApplyExp ()
 
void ApplyExpLimited (Real lower_limit, Real upper_limit)
 
void ApplyExpSpecial ()
 
void ApplySoftMaxPerRow ()
 
void ApplyLogSoftMaxPerRow ()
 
void ApplyLog ()
 
void FindRowMaxId (CuArray< int32 > *id) const
 Find the id of the maximal element for each row (resizes the 'id' array to the appropriate size). More...
 
void SetZero ()
 Math operations, some calling kernels. More...
 
void Set (Real value)
 
void Add (Real value)
 
void SetZeroAboveDiag ()
 Zeroes all elements for which col > row. More...
 
void Scale (Real value)
 
void MulElements (const CuMatrixBase< Real > &A)
 Multiply two matrices elementwise: C = C .* A. More...
 
void DivElements (const CuMatrixBase< Real > &A)
 Divide two matrices elementwise: C = A ./ A. More...
 
void Max (const CuMatrixBase< Real > &A)
 Do, elementwise, *this = max(*this, A). More...
 
void Min (const CuMatrixBase< Real > &A)
 Do, elementwise, *this = min(*this, A). More...
 
void MulColsVec (const CuVectorBase< Real > &scale)
 scale i'th column by scale[i] More...
 
void MulRowsVec (const CuVectorBase< Real > &scale)
 scale i'th row by scale[i] More...
 
void MulRowsGroupMat (const CuMatrixBase< Real > &src)
 divide each row into src.NumCols() groups, and then scale i'th row's jth group of elements by src[i, j]. More...
 
void DivRowsVec (const CuVectorBase< Real > &div)
 divide i'th row by scale[i] More...
 
void InvertElements ()
 invert the matrix by elements. More...
 
void AddMat (Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
 *this += alpha * A More...
 
void AddSmat (Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
 *this += alpha * A. More...
 
void AddSmatMat (Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, Real beta)
 (*this) = alpha * op(A) * B + beta * (*this), where A is sparse. More...
 
void AddMatSmat (Real alpha, const CuMatrixBase< Real > &A, const CuSparseMatrix< Real > &B, MatrixTransposeType transB, Real beta)
 (*this) = alpha * A * op(B) + beta * (*this), where B is sparse and op(B) is either B or trans(B) depending on the 'transB' argument. More...
 
void AddToElements (Real alpha, const CuArrayBase< int32 > &elements)
 This is a rather special purpose function; we might generalize it later by adding a transpose-type option. More...
 
void AddMatBlocks (Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
 This function is like AddMat (it does *this += alpha * src), except that it supports cases where *this and src have different dimension. More...
 
void AddVecToCols (Real alpha, const CuVectorBase< Real > &col, Real beta=1.0)
 (for each column c of *this), c = alpha * col + beta * c More...
 
void AddVecToRows (Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
 (for each row r of *this), r = alpha * row + beta * r More...
 
void AddMatMat (Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
 C = alpha * A(^T)*B(^T) + beta * C. More...
 
void AddVecVec (Real alpha, const CuVectorBase< Real > &x, const CuVectorBase< Real > &y)
 A = alpha * x * y^T + A . More...
 
void SetMatMatDivMat (const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const CuMatrixBase< Real > &C)
 *this = a * b / c (by element; when c = 0, *this = a) *this can be an alias of a, b or c safely and get expected result. More...
 
void SymAddMat2 (const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transA, Real beta)
 *this = beta * *this + alpha * M M^T, for symmetric matrices. More...
 
void AddMatBlock (Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuBlockMatrix< Real > &B, MatrixTransposeType transB, Real beta)
 This function is like AddMatMat but for where the second argument is of type CuBlockMatrix (a block-diagonal matrix of blocks). More...
 
void AddDiagVecMat (const Real alpha, const CuVectorBase< Real > &v, const CuMatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
 *this = beta * *this + alpha * diag(v) * M [or M^T]. More...
 
void AddMatDiagVec (const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, CuVectorBase< Real > &v, Real beta=1.0)
 
void AddMatMatElements (const Real alpha, const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const Real beta)
 *this = beta * *this + alpha * A .* B (.* element by element multiplication) More...
 
void AddMatSp (const Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuSpMatrix< Real > &B, const Real beta)
 this <– beta*this + alpha*A*B More...
 
void AddSpMat (const Real alpha, const CuSpMatrix< Real > &A, const CuMatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
 this <– beta*this + alpha*SpA*B More...
 
void AddTpMat (const Real alpha, const CuTpMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
 this <– beta*this + alpha*A*B. More...
 
void AddMatTp (const Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuTpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
 this <– beta*this + alpha*A*B. More...
 
void CopyFromBlock (const CuBlockMatrix< Real > &B, MatrixTransposeType trans=kNoTrans)
 
void CopyLowerToUpper ()
 
void CopyUpperToLower ()
 
CuSubMatrix< Real > Range (const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
 
CuSubMatrix< Real > RowRange (const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
 
CuSubMatrix< Real > ColRange (const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
 
const CuSubVector< Real > Row (MatrixIndexT i) const
 
CuSubVector< Real > Row (MatrixIndexT i)
 
CuValue< Real > operator() (MatrixIndexT r, MatrixIndexT c)
 
Real operator() (MatrixIndexT r, MatrixIndexT c) const
 
Real Sum () const
 
Real Max () const
 
Real Min () const
 
Real Trace (bool check_square=true) const
 Return the trace. If check_square = true, will crash if matrix is not square. More...
 
void SetRandn ()
 
void SetRandUniform ()
 
void Write (std::ostream &os, bool binary) const
 
void AddElements (Real alpha, const std::vector< MatrixElement< Real > > &input)
 
void AddElements (Real alpha, const CuArrayBase< Int32Pair > &indexes, const Real *input)
 
void Lookup (const std::vector< Int32Pair > &indexes, Real *output) const
 
void Lookup (const CuArrayBase< Int32Pair > &indexes, Real *output) const
 
void EqualElementMask (const CuMatrixBase< Real > &mat, CuMatrix< Real > *mask) const
 
const Real * RowData (MatrixIndexT r) const
 Get raw row pointer (const). More...
 
Real * RowData (MatrixIndexT r)
 Get raw row pointer. More...
 
const Real * Data () const
 Return data pointer (const). More...
 
Real * Data ()
 Return data pointer. More...
 
const MatrixBase< Real > & Mat () const
 
MatrixBase< Real > & Mat ()
 

Private Member Functions

void Destroy ()
 

Additional Inherited Members

- Protected Member Functions inherited from CuMatrixBase< Real >
 CuMatrixBase ()
 
 CuMatrixBase (Real *data, MatrixIndexT num_rows, MatrixIndexT num_cols, MatrixIndexT stride)
 This constructor takes the #rows, #cols and stride; it's called from the constructor of CuSubMatrix. More...
 
- Protected Attributes inherited from CuMatrixBase< Real >
Real * data_
 GPU data pointer (or regular matrix data pointer,. More...
 
MatrixIndexT num_cols_
 
MatrixIndexT num_rows_
 
MatrixIndexT stride_
 

Detailed Description

template<typename Real>
class kaldi::CuMatrix< Real >

This class represents a matrix that's stored on the GPU if we have one, and in memory if not.

Definition at line 71 of file matrix-common.h.

Constructor & Destructor Documentation

◆ CuMatrix() [1/9]

CuMatrix ( )
inline

Definition at line 799 of file cu-matrix.h.

799 { }

◆ CuMatrix() [2/9]

CuMatrix ( MatrixIndexT  rows,
MatrixIndexT  cols,
MatrixResizeType  resize_type = kSetZero,
MatrixStrideType  stride_type = kDefaultStride 
)
inline

Constructor with memory initialisation.

Definition at line 802 of file cu-matrix.h.

804  {
805  Resize(rows, cols, resize_type, stride_type);
806  }
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ CuMatrix() [3/9]

CuMatrix ( const CuMatrix< Real > &  other,
MatrixTransposeType  trans = kNoTrans 
)

Definition at line 380 of file cu-matrix.cc.

380  {
381  if (trans == kNoTrans)
382  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
383  else
384  this->Resize(other.NumCols(), other.NumRows(), kUndefined);
385  this->CopyFromMat(other, trans);
386 }
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ CuMatrix() [4/9]

CuMatrix ( const CuBlockMatrix< Real > &  other,
MatrixTransposeType  trans = kNoTrans 
)
explicit

Definition at line 195 of file cu-matrix.cc.

196  : CuMatrixBase<Real>() {
197  if (trans == kNoTrans) {
198  Resize(B.NumRows(), B.NumCols(), kUndefined);
199  this->CopyFromBlock(B);
200  } else {
201  Resize(B.NumCols(), B.NumRows(), kUndefined);
202  this->CopyFromBlock(B, kTrans);
203  }
204 }
void CopyFromBlock(const CuBlockMatrix< Real > &B, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:161
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ CuMatrix() [5/9]

CuMatrix ( const CuMatrixBase< Real > &  other,
MatrixTransposeType  trans = kNoTrans 
)
explicit

Definition at line 389 of file cu-matrix.cc.

389  {
390  if (trans == kNoTrans)
391  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
392  else
393  this->Resize(other.NumCols(), other.NumRows(), kUndefined);
394  this->CopyFromMat(other, trans);
395 }
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ CuMatrix() [6/9]

CuMatrix ( const MatrixBase< OtherReal > &  other,
MatrixTransposeType  trans = kNoTrans 
)
explicit

Definition at line 400 of file cu-matrix.cc.

400  {
401  if (trans == kNoTrans)
402  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
403  else
404  this->Resize(other.NumCols(), other.NumRows(), kUndefined);
405  this->CopyFromMat(other, trans);
406 }
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ CuMatrix() [7/9]

CuMatrix ( const CuSpMatrix< Real > &  M)
inlineexplicit

Copy constructor taking SpMatrix...

Definition at line 824 of file cu-matrix.h.

824  : CuMatrixBase<Real>() {
825  Resize(M.NumRows(), M.NumRows(), kUndefined);
826  this->CopyFromSp(M);
827  }
void CopyFromSp(const CuSpMatrix< Real > &M)
Definition: cu-matrix.cc:360
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ CuMatrix() [8/9]

CuMatrix ( const CuTpMatrix< OtherReal > &  M,
MatrixTransposeType  trans = kNoTrans 
)
inlineexplicit

Copy constructor taking TpMatrix...

Definition at line 831 of file cu-matrix.h.

832  : CuMatrixBase<Real>() {
833  Resize(M.NumCols(), M.NumRows(), kUndefined);
834  this->CopyFromTp(M, trans);
835  }
void CopyFromTp(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:280
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ CuMatrix() [9/9]

CuMatrix ( const CuMatrixBase< OtherReal > &  M,
MatrixTransposeType  trans = kNoTrans 
)
explicit

Copy constructor: as above, but from another type.

Copy constructor from another type.

Definition at line 3168 of file cu-matrix.cc.

3169  : CuMatrixBase<Real>() {
3170 
3171  if (trans == kNoTrans) {
3172  Resize(M.NumRows(), M.NumCols());
3173  this->CopyFromMat(M);
3174  } else {
3175  Resize(M.NumCols(), M.NumRows());
3176  this->CopyFromMat(M, kTrans);
3177  }
3178 }
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ ~CuMatrix()

~CuMatrix ( )
inline

Destructor.

Definition at line 877 of file cu-matrix.h.

877 { Destroy(); }
void Destroy()
Definition: cu-matrix.cc:94

Member Function Documentation

◆ CompObjfAndDeriv()

void CompObjfAndDeriv ( const std::vector< MatrixElement< Real > > &  elements,
const CuMatrix< Real > &  A,
Real *  tot_objf,
Real *  tot_weight 
)

Here, A is interpreted as a matrix of probabilities, and "elements" as a list of posteriors (possibly zero-one), and "*this" as a matrix of derivatives w.r.t.

the log-probs. This function does: for each element { row, column, weight } indexed i in the vector "elements", let x(i) = A(row(i), column(i)); then it does (*this)(row(i), column(i)) += weight(i) / x(i), and *tot_objf = weight(i) * log(x(i)), and *tot_weight = weight(i) Preconditions: A must be strictly positive, and no (row, column) pair may be repeated within "elements"

Definition at line 1661 of file cu-matrix.cc.

Referenced by NnetUpdater::ComputeObjfAndDeriv(), kaldi::TestCuMatrixCompObjfAndDeriv(), and kaldi::UnitTestCuMatrixObjfDeriv().

1663  {
1664  { // check the input.
1665  typedef typename std::vector<MatrixElement<Real> >::const_iterator Iter;
1666  MatrixIndexT num_rows = this->num_rows_, num_cols = this->num_cols_;
1667  for (Iter iter = sv_labels.begin(); iter != sv_labels.end(); ++iter) {
1668  KALDI_ASSERT(iter->row < num_rows && iter->row >= 0 &&
1669  iter->column < num_cols && iter->column >= 0);
1670  }
1671  }
1672 
1673 #if HAVE_CUDA == 1
1674  if (CuDevice::Instantiate().Enabled()) {
1675  if (sv_labels.empty()) {
1676  KALDI_WARN << "Empty supervision labels";
1677  *tot_objf = 0.0;
1678  *tot_weight = 0.0;
1679  return;
1680  }
1681  void *addr = CuDevice::Instantiate().Malloc(sv_labels.size() * sizeof(MatrixElement<Real>));
1682  CU_SAFE_CALL(cudaMemcpyAsync(addr, sv_labels.data(), sv_labels.size() *
1683  sizeof(MatrixElement<Real>),
1684  cudaMemcpyHostToDevice,
1685  cudaStreamPerThread));
1686  CuTimer tim;
1687  CuVector<Real> tmp(2, kUndefined);
1688  int dimBlock(CU1DBLOCK);
1689  int dimGrid = 1; // only 1 block here. we have loops in each thread.
1690  cuda_comp_obj_deriv(dimGrid, dimBlock, (MatrixElement<Real>*)addr,
1691  sv_labels.size(), output.Data(), output.Dim(),
1692  this->Data(), this->Dim(), tmp.Data());
1693  Vector<Real> tmp_cpu(tmp);
1694  *tot_objf = tmp_cpu(0);
1695  *tot_weight = tmp_cpu(1);
1696  CuDevice::Instantiate().Free(addr);
1697  CuDevice::Instantiate().AccuProfile(__func__, tim);
1698  } else
1699 #endif
1700  {
1701  *tot_objf = 0.0;
1702  *tot_weight = 0.0;
1703  for(int32 i = 0; i<sv_labels.size(); i++) {
1704  int32 m = sv_labels[i].row, label = sv_labels[i].column;
1705  Real weight = sv_labels[i].weight;
1706  //KALDI_ASSERT(label >= 0 && label < nnet_.OutputDim());
1707  Real this_prob = output(m, label);
1708  KALDI_ASSERT(this_prob >= 0.99e-20); // we floored to 1.0e-20 in SoftmaxLayer.
1709  *tot_objf += weight * kaldi::Log(this_prob);
1710  *tot_weight += weight;
1711  (*this)(m, label) += weight / this_prob;
1712  }
1713  }
1714 }
kaldi::int32 int32
int32 MatrixIndexT
Definition: matrix-common.h:98
double Log(double x)
Definition: kaldi-math.h:100
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
#define KALDI_WARN
Definition: kaldi-error.h:150
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT num_cols_
Definition: cu-matrix.h:785
::MatrixDim Dim() const
Definition: cu-matrix.h:221
MatrixIndexT num_rows_
Definition: cu-matrix.h:786

◆ Destroy()

void Destroy ( )
private

Definition at line 94 of file cu-matrix.cc.

94  {
95 #if HAVE_CUDA == 1
96  if (CuDevice::Instantiate().Enabled()) {
97  if (this->data_ != NULL) {
98  CuTimer tim;
99  CuDevice::Instantiate().Free(this->data_);
100  CuDevice::Instantiate().AccuProfile(__func__, tim);
101  }
102  } else
103 #endif
104  {
105  if (this->data_ != NULL) KALDI_MEMALIGN_FREE(this->data_);
106  }
107  this->data_ = NULL;
108  this->num_rows_ = 0;
109  this->num_cols_ = 0;
110  this->stride_ = 0;
111 }
Real * data_
GPU data pointer (or regular matrix data pointer,.
Definition: cu-matrix.h:777
MatrixIndexT stride_
Definition: cu-matrix.h:787
#define KALDI_MEMALIGN_FREE(x)
Definition: kaldi-utils.h:60
MatrixIndexT num_cols_
Definition: cu-matrix.h:785
MatrixIndexT num_rows_
Definition: cu-matrix.h:786

◆ Mat() [1/2]

const Matrix<Real>& Mat ( ) const
inline

Definition at line 879 of file cu-matrix.h.

Referenced by CuVectorBase< float >::CopyDiagFromMat(), CuRand< float >::RandGaussian(), and CuRand< float >::RandUniform().

879  {
880  return *(reinterpret_cast<const Matrix<Real>* >(this));
881  }

◆ Mat() [2/2]

Matrix<Real>& Mat ( )
inline

Definition at line 882 of file cu-matrix.h.

882  {
883  return *(reinterpret_cast<Matrix<Real>* >(this));
884  }

◆ operator=() [1/3]

CuMatrix<Real>& operator= ( const CuMatrixBase< Real > &  other)
inline

Definition at line 842 of file cu-matrix.h.

842  {
843  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
844  this->CopyFromMat(other);
845  return *this;
846  }
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ operator=() [2/3]

CuMatrix<Real>& operator= ( const CuMatrix< Real > &  other)
inline

Definition at line 848 of file cu-matrix.h.

848  {
849  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
850  this->CopyFromMat(other);
851  return *this;
852  }
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ operator=() [3/3]

CuMatrix<Real>& operator= ( const MatrixBase< Real > &  other)
inline

Definition at line 854 of file cu-matrix.h.

854  {
855  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
856  this->CopyFromMat(other);
857  return *this;
858  }
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50

◆ Read()

void Read ( std::istream &  is,
bool  binary 
)

◆ Resize()

void Resize ( MatrixIndexT  rows,
MatrixIndexT  cols,
MatrixResizeType  resize_type = kSetZero,
MatrixStrideType  stride_type = kDefaultStride 
)

Allocate the memory.

Definition at line 50 of file cu-matrix.cc.

Referenced by NnetComputer::AcceptInput(), RestrictedAttentionComponent::Add(), MaxoutComponent::Backprop(), MaxpoolingComponent::Backprop(), PnormComponent::Backprop(), NormalizeComponent::Backprop(), SigmoidComponent::Backprop(), TanhComponent::Backprop(), PowerComponent::Backprop(), RectifiedLinearComponent::Backprop(), SoftHingeComponent::Backprop(), ScaleComponent::Backprop(), SoftmaxComponent::Backprop(), LogSoftmaxComponent::Backprop(), AffineComponent::Backprop(), SpliceComponent::Backprop(), SpliceMaxComponent::Backprop(), BlockAffineComponent::Backprop(), SumGroupComponent::Backprop(), PermuteComponent::Backprop(), DctComponent::Backprop(), FixedLinearComponent::Backprop(), FixedAffineComponent::Backprop(), DropoutComponent::Backprop(), Convolutional1dComponent::Backprop(), Component::Backpropagate(), NnetOnlineComputer::Compute(), NnetComputer::ComputeLastLayerDeriv(), NnetUpdater::ComputeObjfAndDeriv(), LstmNonlinearityComponent::ConsolidateMemory(), kaldi::CuVectorUnitTestCopyElements(), DecodableNnetSimple::DoNnetComputation(), CuMatrixBase< float >::EqualElementMask(), MultiTaskLoss::Eval(), NnetOnlineComputer::Flush(), NnetRescaler::FormatInput(), NnetBatchComputer::FormatInputs(), LstmNonlinearityComponent::Init(), main(), kaldi::nnet3::MergeTaskOutput(), kaldi::nnet2::NnetComputationChunked(), NnetComputer::NnetComputer(), NnetDiscriminativeComputeObjf::ProcessOutputs(), NnetChainComputeProb::ProcessOutputs(), NnetDiscriminativeTrainer::ProcessOutputs(), NnetOnlineComputer::Propagate(), Component::Propagate(), BlstmProjected::PropagateFnc(), Rbm::Reconstruct(), NnetBatchComputer::SplitUtteranceIntoTasks(), RestrictedAttentionComponent::StoreStats(), kaldi::UnitTestCuSparseMatrixSwap(), kaldi::nnet3::UnitTestNnetInputDerivatives(), kaldi::nnet3::UnitTestNnetModelDerivatives(), ConvolutionComponent::Update(), NaturalGradientAffineComponent::Update(), AffineComponentPreconditioned::Update(), AffineComponentPreconditionedOnline::Update(), and Convolutional1dComponent::Update().

52  {
53  // This code does not currently support the other resize_type options.
54  KALDI_ASSERT(resize_type == kSetZero || resize_type == kUndefined);
55  if (rows * cols == 0) KALDI_ASSERT(rows == 0 && cols == 0);
56  if (this->num_rows_ == rows && this->num_cols_ == cols) {
57  if (resize_type == kSetZero) this->SetZero();
58  return;
59  }
60  if (this->num_rows_ != 0)
61  this->Destroy();
62  if (rows == 0) return;
63 #if HAVE_CUDA == 1
64  if (CuDevice::Instantiate().Enabled()) {
65  CuTimer tim;
66  MatrixIndexT row_bytes = cols * sizeof(Real);
67  size_t pitch;
68  if (stride_type == kDefaultStride) {
69  this->data_ = static_cast<Real*>(CuDevice::Instantiate().MallocPitch(
70  row_bytes, rows, &pitch));
71  this->num_rows_ = rows;
72  this->num_cols_ = cols;
73  this->stride_ = pitch / sizeof(Real);
74  } else { // kStrideEqualNumCols
75  size_t bytes = rows * cols * sizeof(Real);
76  this->data_ = static_cast<Real*>(CuDevice::Instantiate().Malloc(bytes));
77  this->num_rows_ = rows;
78  this->num_cols_ = cols;
79  this->stride_ = cols;
80  }
81  if (resize_type == kSetZero) this->SetZero();
82  CuDevice::Instantiate().AccuProfile("CuMatrix::Resize", tim);
83  } else
84 #endif
85  { // Let the initializer of Matrix<Real> handle the allocation,
86  // and then just do Swap which will switch the pointers.
87  // This wastes a few instructions but is simple to code.
88  Matrix<Real> mat(rows, cols, resize_type, stride_type);
89  this->Swap(&mat);
90  }
91 }
Real * data_
GPU data pointer (or regular matrix data pointer,.
Definition: cu-matrix.h:777
MatrixIndexT stride_
Definition: cu-matrix.h:787
int32 MatrixIndexT
Definition: matrix-common.h:98
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
void SetZero()
Math operations, some calling kernels.
Definition: cu-matrix.cc:509
void Destroy()
Definition: cu-matrix.cc:94
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT num_cols_
Definition: cu-matrix.h:785
MatrixIndexT num_rows_
Definition: cu-matrix.h:786

◆ Swap() [1/3]

void Swap ( Matrix< Real > *  mat)

Definition at line 123 of file cu-matrix.cc.

Referenced by DecodableNnetLoopedOnlineBase::AdvanceChunk(), DecodableNnet2Online::ComputeForFrame(), kaldi::nnet3::ComputeObjectiveFunction(), GeneralMatrix::CopyToMat(), DecodableNnetSimple::DoNnetComputation(), Nnet::Feedforward(), main(), kaldi::nnet3::ReduceRankOfComponents(), NnetRescaler::Rescale(), SingleUtteranceNnet2DecoderThreaded::RunNnetEvaluationInternal(), Matrix< BaseFloat >::Swap(), kaldi::UnitTestSwapCu2Cu(), and kaldi::UnitTestSwapCu2M().

123  {
124 #if HAVE_CUDA == 1
125  if (CuDevice::Instantiate().Enabled()) {
126  if (this->num_rows_ == 0) {
127  if (mat->num_rows_ != 0) {
128  // *this is empty, but mat is nonempty.
129  this->Resize(mat->num_rows_, mat->num_cols_, kUndefined);
130  this->CopyFromMat(*mat);
131  mat->Resize(0, 0);
132  }
133  // else both are empty.
134  } else { // *this is nonempty.
135  if (mat->num_rows_ != 0) {
136  // Both *this and *mat are nonempty. Recurse to simpler cases.
137  // this could be done more efficiently in the case where
138  // the size does not change.
139  Matrix<Real> temp;
140  this->Swap(&temp); // now temp is full, *this is empty.
141  mat->Swap(&temp); // now mat has data from *this, temp has
142  // data from mat.
143  this->Swap(&temp); // copy data in mat to *this, which is now empty.
144  } else { // *this is full but *mat is empty.
145  mat->Resize(this->num_rows_, this->num_cols_, kUndefined);
146  this->CopyToMat(mat);
147  this->Destroy();
148  }
149  }
150  } else
151 #endif
152  {
153  std::swap(mat->data_, this->data_);
154  std::swap(mat->num_cols_, this->num_cols_);
155  std::swap(mat->num_rows_, this->num_rows_);
156  std::swap(mat->stride_, this->stride_);
157  }
158 }
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
void CopyToMat(MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
Definition: cu-matrix.cc:447
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
void Destroy()
Definition: cu-matrix.cc:94
MatrixIndexT num_cols_
Definition: cu-matrix.h:785
void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Allocate the memory.
Definition: cu-matrix.cc:50
MatrixIndexT num_rows_
Definition: cu-matrix.h:786

◆ Swap() [2/3]

void Swap ( CuMatrix< Real > *  mat)

Definition at line 114 of file cu-matrix.cc.

114  {
115  std::swap(mat->data_, this->data_);
116  std::swap(mat->num_cols_, this->num_cols_);
117  std::swap(mat->num_rows_, this->num_rows_);
118  std::swap(mat->stride_, this->stride_);
119 }
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)

◆ Swap() [3/3]

void Swap ( CuMatrix< OtherReal > *  mat)

◆ Transpose()

void Transpose ( )

Definition at line 3190 of file cu-matrix.cc.

Referenced by MultiBasisComponent::BackpropagateFnc(), kaldi::TestCuMatrixTransposeNS(), kaldi::TestCuMatrixTransposeS(), kaldi::UnitTestCuMatrixAddDiagVecMat(), kaldi::UnitTestCuMatrixSymAddMat2(), and kaldi::UnitTestCuMatrixSymInvertPosDef().

3190  {
3191  if (this->num_rows_ == 0)
3192  return;
3193  // Copy and swap for all cases.
3194  // No need for a separate kernel of squared matrix in-place transpose.
3195  // It has the same possible peak performance as copy_transpose,
3196  // if allocate/deallocate overhead can be ignored.
3197  CuMatrix<Real> tmp(*this, kTrans);
3198  this->Swap(&tmp);
3199 }
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
MatrixIndexT num_rows_
Definition: cu-matrix.h:786

The documentation for this class was generated from the following files: