28 #ifndef KALDI_CUDAMATRIX_CU_MATRIX_H_ 29 #define KALDI_CUDAMATRIX_CU_MATRIX_H_ 46 template<
typename Real>
47 Real
TraceMatMat(
const CuMatrixBase<Real> &A,
const CuMatrixBase<Real> &B,
54 template<
typename Real>
56 const std::vector<CuSubMatrix<Real>* > &A,
58 const std::vector<CuSubMatrix<Real>* > &B,
78 template<
typename Real>
202 friend void AddMatMatBatched<Real>(
const Real alpha,
203 std::vector<CuSubMatrix<Real>* > &C,
204 const std::vector<CuSubMatrix<Real>* > &A,
206 const std::vector<CuSubMatrix<Real>* > &B,
228 bool IsUnit(Real tol = 0.001)
const;
237 template<
typename OtherReal>
250 template<
typename OtherReal>
258 int32_t start_range, int32_t end_range,
259 int32_t clamp_low, int32_t clamp_high);
261 template<
typename OtherReal>
265 template<
typename OtherReal>
439 this ->
Pow(*
this, power);
444 this ->
PowAbs(*
this, power, include_sign);
452 this ->
Floor(*
this, floor_val);
456 this ->
Ceiling(*
this, ceiling_val);
465 this ->
ExpLimited(*
this, lower_limit, upper_limit);
490 void Set(Real value);
491 void Add(Real value);
494 void Scale(Real value);
637 return AddMatMat(alpha, M, transA, B, transB, beta);
646 return AddMatMat(alpha, A, transA, M, transB, beta);
658 col_offset, num_cols);
672 static_cast<UnsignedMatrixIndexT>(
num_rows_));
678 static_cast<UnsignedMatrixIndexT>(
num_rows_));
684 static_cast<UnsignedMatrixIndexT>(
num_rows_) &&
685 static_cast<UnsignedMatrixIndexT>(c) <
686 static_cast<UnsignedMatrixIndexT>(
num_cols_));
692 static_cast<UnsignedMatrixIndexT>(
num_rows_) &&
693 static_cast<UnsignedMatrixIndexT>(c) <
694 static_cast<UnsignedMatrixIndexT>(
num_cols_));
703 Real
Trace(
bool check_square =
true)
const;
709 void Write(std::ostream &os,
bool binary)
const;
726 void Lookup(
const std::vector<Int32Pair> &indexes,
795 template<
typename Real>
805 Resize(rows, cols, resize_type, stride_type);
819 template<
typename OtherReal>
830 template <
typename OtherReal>
838 template<
typename OtherReal>
843 this->Resize(other.NumRows(), other.NumCols(),
kUndefined);
870 template<
typename OtherReal>
874 void Read(std::istream &is,
bool binary);
907 template<
typename Real>
936 template<
typename Real>
939 return A.ApproxEqual(B, tol);
942 template<
typename Real>
948 template<
typename Real>
950 return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
953 template<
typename Real>
955 return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols()
956 && M.Stride() == N.Stride());
960 template<
typename Real>
961 std::ostream &operator << (std::ostream &out, const CuMatrixBase<Real> &mat);
964 template<
typename Real>
965 template<
typename OtherReal>
970 M.CopyToMat(
this, trans);
973 template<
typename Real>
974 template<
typename OtherReal>
977 cu.CopyToMat(
this, trans);
const MatrixBase< Real > & Mat() const
CuSubVector< Real > Row(MatrixIndexT i)
MatrixBase< Real > & Mat()
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
MatrixIndexT Stride() const
Real * Data()
Return data pointer.
Matrix()
Empty constructor.
void ApplyCeiling(Real ceiling_val)
This class is a wrapper that enables you to store a matrix in one of three forms: either as a Matrix<...
void SoftHinge(const CuMatrixBase< Real > &src)
Apply the function y = log(1 + exp(x)), to each element.
void ApplyPow(Real power)
Real Trace(bool check_square=true) const
Return the trace. If check_square = true, will crash if matrix is not square.
void GroupMax(const CuMatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j where G = x.NumCols() / y.NumCols() must be an integer.
void Write(std::ostream &os, bool binary) const
const CuSubVector< Real > Row(MatrixIndexT i) const
void ExpLimited(const CuMatrixBase< Real > &src, Real lower_limit, Real upper_limit)
This is equivalent to running: Floor(src, lower_limit); Ceiling(src, upper_limit); Exp(src) ...
void AddSmatMat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, Real beta)
(*this) = alpha * op(A) * B + beta * (*this), where A is sparse.
void CopyToMat(MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
void AddToElements(Real alpha, const CuArrayBase< int32 > &elements)
This is a rather special purpose function; we might generalize it later by adding a transpose-type op...
MatrixIndexT NumRows() const
void AddMatTp(const Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuTpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Base class which provides matrix operations not involving resizing or allocation. ...
Structure containing size of the matrix plus stride.
CuMatrix(const CuSpMatrix< Real > &M)
Copy constructor taking SpMatrix...
void AddRows(Real alpha, const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
void AddRowRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, do (*this)(r, c) += src(j, c), where j ranges from ind...
CuMatrix(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Constructor with memory initialisation.
void AddElements(Real alpha, const std::vector< MatrixElement< Real > > &input)
void AddMatDiagVec(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, CuVectorBase< Real > &v, Real beta=1.0)
void CopyRangeFromMatClamped(const CuMatrixBase< Real > &src, int32_t start_range, int32_t end_range, int32_t clamp_low, int32_t clamp_high)
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
CuMatrix(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy constructor taking TpMatrix...
void ApplyFloor(Real floor_val)
void Log(const CuMatrixBase< Real > &src)
void AddMatBlock(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuBlockMatrix< Real > &B, MatrixTransposeType transB, Real beta)
This function is like AddMatMat but for where the second argument is of type CuBlockMatrix (a block-d...
void AddVecToCols(Real alpha, const CuVectorBase< Real > &col, Real beta=1.0)
(for each column c of *this), c = alpha * col + beta * c
void Ceiling(const CuMatrixBase< Real > &src, Real ceiling_val)
The class CuBlockMatrix holds a vector of objects of type CuMatrix, say, M_1, M_2, .
void AddMatSp(const Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuSpMatrix< Real > &B, const Real beta)
this <– beta*this + alpha*A*B
void AddSmat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A.
void AddToDiag(Real value)
Adds "value" to the diagonal elements of the matrix.
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
void DivRowsVec(const CuVectorBase< Real > &div)
divide i'th row by scale[i]
A class for storing matrices.
void AddMatMatElements(const Real alpha, const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const Real beta)
*this = beta * *this + alpha * A .* B (.* element by element multiplication)
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Real * data_
GPU data pointer (or regular matrix data pointer,.
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MatrixIndexT SizeInBytes() const
Get size of matrix in bytes.
void AddCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indices)
Add column indices[r] of src to column r.
void AddTpMat(const Real alpha, const CuTpMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
void InvertElements()
invert the matrix by elements.
bool ApproxEqual(const CuMatrixBase< Real > &other, float tol=0.01) const
True if ((*this)-other).FrobeniusNorm() <= tol * this->FrobeniusNorm()
void CopyColFromVec(const CuVectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
bool IsUnit(Real tol=0.001) const
The following class is used to simulate non-const references to Real, e.g.
void Floor(const CuMatrixBase< Real > &src, Real floor_val)
void Lookup(const std::vector< Int32Pair > &indexes, Real *output) const
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void AddMatBlocks(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
This function is like AddMat (it does *this += alpha * src), except that it supports cases where *thi...
void SymInvertPosDef()
Inversion for positive definite symmetric matrices.
void Pow(const CuMatrixBase< Real > &src, Real power)
void AddMatMatBatched(const Real alpha, std::vector< CuSubMatrix< Real > * > &C, const std::vector< CuSubMatrix< Real > * > &A, MatrixTransposeType transA, const std::vector< CuSubMatrix< Real > * > &B, MatrixTransposeType transB, const Real beta)
Does multiple matrix multiplications, executing them in parallel using cuBLAS's gemmBatched if we are...
void ApplyLogSoftMaxPerRow()
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
void ApplyPowAbs(Real power, bool include_sign=false)
void CopyFromSp(const CuSpMatrix< Real > &M)
void Sigmoid(const CuMatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src": element by element...
void DiffXent(const CuArrayBase< int32 > &tgt, CuVector< Real > *log_post_tgt)
Differentiate the block [softmax+cross-entropy] : dE/da = posterior_mat - target_mat, 'E' is error function, 'a' is activation on softmax input.
void AddToRows(Real alpha, const CuArrayBase< MatrixIndexT > &indexes, CuMatrixBase< Real > *dst) const
For each row i of *this, adds this->Row(i) to dst->Row(indexes(i)) if indexes(i) >= 0...
void SetZero()
Math operations, some calling kernels.
void SoftMaxPerRow(const CuMatrixBase< Real > &src)
Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow.
void MulRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Does for each row r, this.Row(r) *= alpha * src.row(indexes[r]), where '*=' is elementwise multiplica...
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
void CopyRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies row r from row indexes[r] of src.
void CopyFromBlock(const CuBlockMatrix< Real > &B, MatrixTransposeType trans=kNoTrans)
void SymAddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transA, Real beta)
*this = beta * *this + alpha * M M^T, for symmetric matrices.
void CopyColsFromVec(const CuVectorBase< Real > &v)
Copies vector into matrix, column-by-column.
void GroupPnorm(const CuMatrixBase< Real > &src, Real pow)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p) where G = x...
#define KALDI_PARANOID_ASSERT(cond)
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
void Cholesky(CuMatrixBase< Real > *inv_cholesky=NULL)
This function does sets *this to the Cholesky factor of *this (i.e.
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
void DivElements(const CuMatrixBase< Real > &A)
Divide two matrices elementwise: C = A ./ A.
KALDI_DISALLOW_COPY_AND_ASSIGN(CuMatrixBase)
void DiffSoftmaxPerRow(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the softmax function.
void GroupMaxDeriv(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > &output)
Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax fun...
CuSubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
void DiffTanh(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the tanh function.
Real * RowData(MatrixIndexT r)
Get raw row pointer.
void CopyFromGeneralMat(const GeneralMatrix &src, MatrixTransposeType trans=kNoTrans)
void FindRowMaxId(CuArray< int32 > *id) const
Find the id of the maximal element for each row (resizes the 'id' array to the appropriate size)...
void Heaviside(const CuMatrixBase< Real > &src)
Set each element to the Heaviside function of the corresponding element of "src", which we define as ...
void DiffSigmoid(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the sigmoid function.
void MulColsVec(const CuVectorBase< Real > &scale)
scale i'th column by scale[i]
const Real * Data() const
Return data pointer (const).
void SumColumnRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, sets (*this)(r, c) to the sum src(r, j), where j ranges from indexes[c].first through indexes[c].second - 1.
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
CuMatrixBase(Real *data, MatrixIndexT num_rows, MatrixIndexT num_cols, MatrixIndexT stride)
This constructor takes the #rows, #cols and stride; it's called from the constructor of CuSubMatrix...
Matrix for CUDA computing.
void ApplyExpLimited(Real lower_limit, Real upper_limit)
MatrixIndexT NumCols() const
void ApplySoftMaxPerRow()
void DiffLogSoftmaxPerRow(const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv)
Differentiate backward through the log softmax function.
void DiffGroupPnorm(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv, Real power)
Differentiate backward through the GroupPnorm function.
CuValue< Real > operator()(MatrixIndexT r, MatrixIndexT c)
Class CuArrayBase, CuSubArray and CuArray are analogues of classes CuVectorBase, CuSubVector and CuVe...
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
friend Real TraceMatMat(const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, MatrixTransposeType trans)
void CopyFromTp(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
void MulRowsGroupMat(const CuMatrixBase< Real > &src)
divide each row into src.NumCols() groups, and then scale i'th row's jth group of elements by src[i...
Real FrobeniusNorm() const
void CopyCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies column r from column indexes[r] of src.
void PowAbs(const CuMatrixBase< Real > &src, Real power, bool include_sign=false)
Apply power to the absolute value of each element.
void CopyToRows(const CuArrayBase< Real *> &dst) const
For each row r of this matrix, copies it to the array of floats at the location given by dst[r]...
bool SameDimAndStride(const CuMatrixBase< Real > &M, const CuMatrixBase< Real > &N)
void AddSpMat(const Real alpha, const CuSpMatrix< Real > &A, const CuMatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*SpA*B
void LogSoftMaxPerRow(const CuMatrixBase< Real > &src)
LogSoftmax nonlinearity Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row...
MatrixIndexT NumRows() const
Dimensions.
MatrixIndexT NumCols() const
Provides a vector abstraction class.
void ExpSpecial(const CuMatrixBase< Real > &src)
For each element x of the matrix, set it to (x < 0 ? exp(x) : x + 1).
void SetMatMatDivMat(const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const CuMatrixBase< Real > &C)
*this = a * b / c (by element; when c = 0, *this = a) *this can be an alias of a, b or c safely and g...
const Matrix< Real > & Mat() const
void Tanh(const CuMatrixBase< Real > &src)
Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src).
void ParametricRelu(const CuMatrixBase< Real > &src, const CuVectorBase< Real > &alpha, const CuVectorBase< Real > &beta)
Compute the parametric rectified linear unit function; element by element, *this = src * (src > 0 ...
void DiffParametricRelu(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff, const CuVectorBase< Real > &alpha, const CuVectorBase< Real > &beta)
Differentiate backward through the parametric relu function.
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i'th row by scale[i]
void EqualElementMask(const CuMatrixBase< Real > &mat, CuMatrix< Real > *mask) const
void AddMatSmat(Real alpha, const CuMatrixBase< Real > &A, const CuSparseMatrix< Real > &B, MatrixTransposeType transB, Real beta)
(*this) = alpha * A * op(B) + beta * (*this), where B is sparse and op(B) is either B or trans(B) dep...
void Exp(const CuMatrixBase< Real > &src)
void AddVecVec(Real alpha, const CuVectorBase< Real > &x, const CuVectorBase< Real > &y)
A = alpha * x * y^T + A .
void SetZeroAboveDiag()
Zeroes all elements for which col > row.
Vector for CUDA computing.
void AddDiagVecMat(const Real alpha, const CuVectorBase< Real > &v, const CuMatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].
const Real * RowData(MatrixIndexT r) const
Get raw row pointer (const).