CuVectorBase< Real > Class Template Reference

Vector for CUDA computing. More...

#include <matrix-common.h>

Inheritance diagram for CuVectorBase< Real >:
Collaboration diagram for CuVectorBase< Real >:

Public Member Functions

MatrixIndexT Dim () const
 Dimensions. More...
 
Real * Data ()
 Returns a pointer to the start of the vector's data. More...
 
const Real * Data () const
 Returns a pointer to the start of the vector's data (const). More...
 
void CopyFromVec (const CuVectorBase< Real > &src)
 Copy functions; these will crash if the dimension do not match. More...
 
template<typename OtherReal >
void CopyFromVec (const CuVectorBase< OtherReal > &M)
 
template<typename OtherReal >
void CopyFromVec (const VectorBase< OtherReal > &src)
 
template<typename OtherReal >
void CopyToVec (VectorBase< OtherReal > *dst) const
 
void CopyRowsFromMat (const CuMatrixBase< Real > &M)
 
void CopyRowsFromMat (const MatrixBase< Real > &M)
 
void SetZero ()
 Math operations. More...
 
void Set (Real value)
 
void Add (Real value)
 
void Scale (Real value)
 
void AddVec (Real alpha, const CuVectorBase< Real > &vec, Real beta=1.0)
 
template<typename OtherReal >
void AddVec (Real alpha, const CuVectorBase< OtherReal > &vec, Real beta=1.0)
 
void AddRowSumMat (Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
 Sum the rows of the matrix, add to vector. More...
 
void AddColSumMat (Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
 Sum the columns of the matrix, add to vector. More...
 
void AddTpVec (const Real alpha, const CuTpMatrix< Real > &M, const MatrixTransposeType trans, const CuVectorBase< Real > &v, const Real beta)
 Add triangular matrix times vector: this <– beta*this + alpha*M*v. More...
 
void MulTp (const CuTpMatrix< Real > &M, const MatrixTransposeType trans)
 Multiplies this vector by lower-triangular marix: *this <– *this *M. More...
 
bool ApproxEqual (const CuVectorBase< Real > &other, float tol=0.01) const
 
void InvertElements ()
 
void CopyElements (const CuMatrixBase< Real > &mat, const MatrixTransposeType trans, const CuArrayBase< int32 > &elements)
 Copies selected elements from 'mat' to *this. More...
 
void Floor (const CuVectorBase< Real > &src, Real floor_val, MatrixIndexT *floored_count=NULL)
 
void Ceiling (const CuVectorBase< Real > &src, Real ceiling_val, MatrixIndexT *ceiled_count=NULL)
 
void Pow (const CuVectorBase< Real > &src, Real power)
 
void ApplyFloor (Real floor_val, MatrixIndexT *floored_count=NULL)
 
void ApplyCeiling (Real ceiling_val, MatrixIndexT *ceiled_count=NULL)
 
void ApplyPow (Real power)
 
void ApplySoftMax ()
 
void ApplyLogSoftMax ()
 
void ApplyExp ()
 
void ApplyLog ()
 
Real Sum () const
 
void SetRandn ()
 
void SetRandUniform ()
 
CuSubVector< Real > Range (const MatrixIndexT o, const MatrixIndexT l)
 
const CuSubVector< Real > Range (const MatrixIndexT o, const MatrixIndexT l) const
 
void CopyColFromMat (const CuMatrixBase< Real > &mat, MatrixIndexT col)
 
template<typename OtherReal >
void CopyColFromMat (const CuMatrixBase< OtherReal > &mat, MatrixIndexT col)
 
void AddMatVec (const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, const CuVectorBase< Real > &v, const Real beta)
 
void AddVecVec (Real alpha, const CuVectorBase< Real > &v, const CuVectorBase< Real > &r, Real beta)
 
void AddSpVec (const Real alpha, const CuSpMatrix< Real > &S, const CuVectorBase< Real > &v, const Real beta)
 
void AddDiagMat2 (Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, Real beta)
 Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans), or *this = diag(M^T M) + beta * *this (if trans == kTrans). More...
 
void AddDiagMatMat (Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, const CuMatrixBase< Real > &N, MatrixTransposeType transN, Real beta=1.0)
 Add the diagonal of a matrix product: *this = diag(M N), assuming the "trans" arguments are both kNoTrans; for transpose arguments, it behaves as you would expect. More...
 
CuValue< Real > operator() (MatrixIndexT i)
 
Real Norm (Real p)
 
Real operator() (MatrixIndexT i) const
 
void CopyDiagFromPacked (const CuPackedMatrix< Real > &M)
 Extracts the diagonal of a packed matrix M; works for Sp or Tp. More...
 
void CopyDiagFromMat (const CuMatrix< Real > &M)
 Extracts the diagonal of a matrix. More...
 
Real Max () const
 Returns the maximum value of any element, or -infinity for the empty vector. More...
 
Real Min () const
 Returns the minimum value of any element, or +infinity for the empty vector. More...
 
void ReplaceValue (Real orig, Real changed)
 
void MulElements (const CuVectorBase< Real > &v)
 
void DivElements (const CuVectorBase< Real > &v)
 
const VectorBase< Real > & Vec () const
 
VectorBase< Real > & Vec ()
 
template<>
void CopyColFromMat (const CuMatrixBase< float > &mat, MatrixIndexT col)
 
template<>
void CopyColFromMat (const CuMatrixBase< double > &mat, MatrixIndexT col)
 
template<>
void CopyFromVec (const CuVectorBase< float > &src)
 
template<>
void CopyFromVec (const CuVectorBase< double > &src)
 
template<>
void CopyFromVec (const CuVectorBase< float > &src)
 
template<>
void CopyFromVec (const CuVectorBase< double > &src)
 

Protected Member Functions

 CuVectorBase ()
 Default constructor: make it protected so the user cannot instantiate this class. More...
 

Protected Attributes

Real * data_
 GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU). More...
 
MatrixIndexT dim_
 dimension of the vector More...
 

Private Member Functions

 KALDI_DISALLOW_COPY_AND_ASSIGN (CuVectorBase)
 

Friends

class CuVectorBase< float >
 
class CuVectorBase< double >
 
class CuMatrixBase< Real >
 
class MatrixBase< Real >
 
class CuPackedMatrix< Real >
 
class CuSpMatrix< Real >
 
class CuTpMatrix< Real >
 
class CuRand< Real >
 
template<typename OtherReal >
OtherReal VecVec (const CuVectorBase< OtherReal > &v1, const CuVectorBase< OtherReal > &v2)
 
void cu::Splice (const CuMatrixBase< Real > &src, const CuArray< int32 > &frame_offsets, CuMatrixBase< Real > *tgt)
 

Detailed Description

template<typename Real>
class kaldi::CuVectorBase< Real >

Vector for CUDA computing.

Definition at line 72 of file matrix-common.h.

Constructor & Destructor Documentation

◆ CuVectorBase()

CuVectorBase ( )
inlineprotected

Default constructor: make it protected so the user cannot instantiate this class.

Definition at line 246 of file cu-vector.h.

246 : data_(NULL), dim_(0) { }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250

Member Function Documentation

◆ Add()

void Add ( Real  value)

Definition at line 1157 of file cu-vector.cc.

Referenced by BlockSoftmax::BackpropagateFnc(), LstmNonlinearityComponent::ConsolidateMemory(), CuVectorBase< float >::Data(), kaldi::MeanVariance(), BatchNormComponent::Propagate(), DropoutMaskComponent::Propagate(), SigmoidComponent::RepairGradients(), TanhComponent::RepairGradients(), RectifiedLinearComponent::RepairGradients(), and Xent::ReportPerClass().

1157  {
1158 #if HAVE_CUDA == 1
1159  if (CuDevice::Instantiate().Enabled()) {
1160  CuTimer tim;
1161 
1162  dim3 dimBlock(CU1DBLOCK);
1163  dim3 dimGrid(n_blocks(Dim(), CU1DBLOCK));
1164  ::MatrixDim d = { 1, Dim(), Dim() };
1165 
1166  cuda_add(dimGrid, dimBlock, data_, value, d);
1167  CU_SAFE_CALL(cudaGetLastError());
1168  CuDevice::Instantiate().AccuProfile(__func__, tim);
1169  } else
1170 #endif
1171  {
1172  Vec().Add(value);
1173  }
1174 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ AddColSumMat()

void AddColSumMat ( Real  alpha,
const CuMatrixBase< Real > &  mat,
Real  beta = 1.0 
)

Sum the columns of the matrix, add to vector.

Definition at line 1298 of file cu-vector.cc.

Referenced by LstmNonlinearityComponent::Backprop(), BlockSoftmax::BackpropagateFnc(), kaldi::CuVectorUnitTestAddColSumMat(), CuVectorBase< float >::Data(), CuMatrixBase< float >::DiffLogSoftmaxPerRow(), KlHmm::PropagateFnc(), MultiBasisComponent::PropagateFnc(), kaldi::TestCuVectorAddColSumMat(), kaldi::UnitTestCuVectorAddColSumMat(), kaldi::UnitTestCuVectorAddColSumMatLarge(), kaldi::nnet1::UnitTestLengthNorm(), AffineTransform::Update(), and ConvolutionalComponent::Update().

1299  {
1300 #if HAVE_CUDA == 1
1301  if (CuDevice::Instantiate().Enabled()) {
1302  CuTimer tim;
1303  KALDI_ASSERT(mat.NumRows() == Dim());
1304 
1305  cuda_add_col_sum_mat(mat.NumRows(), CU1DBLOCK, Data(), mat.Data(),
1306  mat.Dim(), alpha, beta);
1307  CU_SAFE_CALL(cudaGetLastError());
1308 
1309  CuDevice::Instantiate().AccuProfile(__func__, tim);
1310  } else
1311 #endif
1312  {
1313  Vec().AddColSumMat(alpha, mat.Mat(), beta);
1314  }
1315 }
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ AddDiagMat2()

void AddDiagMat2 ( Real  alpha,
const CuMatrixBase< Real > &  M,
MatrixTransposeType  trans,
Real  beta 
)

Add the diagonal of a matrix times itself: *this = diag(M M^T) + beta * *this (if trans == kNoTrans), or *this = diag(M^T M) + beta * *this (if trans == kTrans).

Definition at line 595 of file cu-vector.cc.

Referenced by BackpropTruncationComponent::Backprop(), ClipGradientComponent::Backprop(), DiscriminativeComputation::Compute(), kaldi::CuVectorUnitTestAddDiagMat2(), kaldi::cu::DiffNormalizePerRow(), AffineComponentPreconditioned::GetScalingFactor(), OnlinePreconditioner::PreconditionDirections(), OnlinePreconditioner::PreconditionDirectionsInternal(), kaldi::nnet3::PrintParameterStats(), CuVectorBase< float >::Range(), ClipGradientComponent::RepairGradients(), NonlinearComponent::StoreBackpropStats(), kaldi::TestCuVectorAddDiagMat2(), and kaldi::TestCuVectorAddDiagMat2OnVariousShapes().

596  {
597 #if HAVE_CUDA == 1
598  if (CuDevice::Instantiate().Enabled()) {
599  if (dim_ == 0) return;
600  MatrixTransposeType other_trans = (trans == kTrans ? kNoTrans : kTrans);
601  KALDI_ASSERT(dim_ == (trans == kNoTrans ? M.NumRows() : M.NumCols()));
602  this->AddDiagMatMat(alpha, M, trans, M, other_trans, beta);
603  } else
604 #endif
605  {
606  Vec().AddDiagMat2(alpha, M.Mat(), trans, beta);
607  }
608 }
void AddDiagMatMat(Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, const CuMatrixBase< Real > &N, MatrixTransposeType transN, Real beta=1.0)
Add the diagonal of a matrix product: *this = diag(M N), assuming the "trans" arguments are both kNoT...
Definition: cu-vector.cc:611
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixTransposeType
Definition: matrix-common.h:32

◆ AddDiagMatMat()

void AddDiagMatMat ( Real  alpha,
const CuMatrixBase< Real > &  M,
MatrixTransposeType  transM,
const CuMatrixBase< Real > &  N,
MatrixTransposeType  transN,
Real  beta = 1.0 
)

Add the diagonal of a matrix product: *this = diag(M N), assuming the "trans" arguments are both kNoTrans; for transpose arguments, it behaves as you would expect.

Definition at line 611 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestAddDiagMatMat(), kaldi::cu::DiffNormalizePerRow(), CuMatrixBase< float >::DiffSoftmaxPerRow(), kaldi::nnet3::attention::GetAttentionDotProducts(), kaldi::nnet2::PreconditionDirections(), CuVectorBase< float >::Range(), RestrictedAttentionComponent::StoreStats(), kaldi::TestCuVectorAddDiagMatMat(), kaldi::TestCuVectorAddDiagMatMatShape(), kaldi::nnet3::UnitTestPreconditionDirectionsOnline(), and kaldi::nnet2::UnitTestPreconditionDirectionsOnline().

614  {
615 #if HAVE_CUDA == 1
616  if (CuDevice::Instantiate().Enabled()) {
617  CuTimer tim;
618 
619  if (transM != transN) {
620  KALDI_ASSERT(M.NumCols() == N.NumCols());
621  KALDI_ASSERT(M.NumRows() == N.NumRows());
622  if (transM == kNoTrans) {
623  // Case 1: diag(M*N') == sum(M.*N, 2)
624  // 1D grid and 1D block. One block per row of N.
625  // 1D grid expands along the column of N.
626  int dimBlock(CU1DBLOCK);
627  int dimGrid(M.NumRows());
628  cuda_add_diag_mat_mat_MNT(dimGrid, dimBlock, alpha, M.Data(), M.Dim(),
629  N.Data(), N.Stride(), beta, data_);
630  } else {
631  // Case 2: diag(M'*N) == sum(M.*N, 1)
632  // 16x16 or 8x32 2D block for coalesced memory access.
633  // Grid shape is designed as follows,
634  // 1. for small matrices, use 1D grid with only 1 row of 16x16 block,
635  // to avoid multiple kernel launch;
636  // 2. for large enough matrices (no matter thin or fat),
637  // use 1- or 2-D grid so that the grid contains
638  // at least and not much larger than 'kOptNumBlocks' blocks
639  // to fully utilize the GPU;
640  const int32 warpSize = 32;
641  const int32 kOptNumBlocks = 512;
642  const int32 tile_dim =
643  (N.NumRows() < 4096 && N.NumCols() < kOptNumBlocks * warpSize) ?
644  16 : 32;
645  dim3 dimBlock(tile_dim, CU1DBLOCK / tile_dim);
646  dim3 dimGrid(n_blocks(N.NumCols(), dimBlock.x),
647  n_blocks(N.NumRows(), dimBlock.y));
648  dimGrid.y = std::min(dimGrid.y, (kOptNumBlocks - 1) / dimGrid.x + 1);
649  dimGrid.y = tile_dim == 16 ? 1 : dimGrid.y;
650  if (dimGrid.y > 1) {
651  CuMatrix<Real> buf(dimGrid.y, N.NumCols());
652  cuda_add_diag_mat_mat_MTN(dimGrid, dimBlock, Real(1), M.Data(),
653  M.Stride(), N.Data(), N.Dim(), Real(0),
654  buf.Data(), buf.Stride());
655  this->AddRowSumMat(alpha, buf, beta);
656  } else {
657  cuda_add_diag_mat_mat_MTN(dimGrid, dimBlock, alpha, M.Data(),
658  M.Stride(), N.Data(), N.Dim(), beta, data_,
659  dim_);
660  }
661  }
662  } else {
663  KALDI_ASSERT(M.NumCols() == N.NumRows());
664  KALDI_ASSERT(N.NumCols() == M.NumRows());
665  if (transM == kNoTrans) {
666  // Case 3: diag(M*N) == sum(M'.*N, 1)
667  // 16x16 or 8x32 2D block for matrix transpose and coalesced memory access.
668  // One block per 'tile_dim' columns of N.
669  // 1D grid expands along the row of N.
670  int tile_dim =
671  sizeof(Real) == sizeof(float) && N.NumCols() >= 2048 ? 32 : 16;
672  dim3 dimBlock(tile_dim, CU1DBLOCK / tile_dim);
673  dim3 dimGrid(n_blocks(N.NumCols(), tile_dim));
674  cuda_add_diag_mat_mat_MN(dimGrid, dimBlock, alpha, M.Data(), M.Stride(),
675  N.Data(), N.Dim(), beta, data_);
676  } else {
677  // Case 4: diag(M'*N') == sum(N'.*M, 1)
678  // Same kernel and config as case 3 except M and N are swapped.
679  int tile_dim =
680  sizeof(Real) == sizeof(float) && N.NumCols() >= 2048 ? 32 : 16;
681  dim3 dimBlock(tile_dim, CU1DBLOCK / tile_dim);
682  dim3 dimGrid(n_blocks(M.NumCols(), tile_dim));
683  cuda_add_diag_mat_mat_MN(dimGrid, dimBlock, alpha, N.Data(), N.Stride(),
684  M.Data(), M.Dim(), beta, data_);
685  }
686  }
687  CU_SAFE_CALL(cudaGetLastError());
688 
689  CuDevice::Instantiate().AccuProfile(__func__, tim);
690  } else
691 #endif
692  {
693  Vec().AddDiagMatMat(alpha, M.Mat(), transM, N.Mat(), transN, beta);
694  }
695 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
kaldi::int32 int32
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
Definition: cu-vector.cc:1277

◆ AddMatVec()

void AddMatVec ( const Real  alpha,
const CuMatrixBase< Real > &  M,
MatrixTransposeType  trans,
const CuVectorBase< Real > &  v,
const Real  beta 
)

Definition at line 506 of file cu-vector.cc.

Referenced by ModelCollapser::CollapseComponentsAffine(), kaldi::CuVectorUnitTestAddMatVec(), ModelCollapser::PreMultiplyAffineParameters(), CuVectorBase< float >::Range(), kaldi::nnet2::UnitTestGenericComponentInternal(), and kaldi::VecMatVec().

510  {
511  KALDI_ASSERT((trans == kNoTrans && M.NumCols() == v.dim_ && M.NumRows() == dim_) ||
512  (trans == kTrans && M.NumRows() == v.dim_ && M.NumCols() == dim_));
513  KALDI_ASSERT(&v != this);
514 #if HAVE_CUDA == 1
515  if (CuDevice::Instantiate().Enabled()) {
516  if (dim_ == 0) return;
517  CuTimer tim;
518 
519  // Everything is backwards in CuBlas. We need to reverse rows, columns,
520  // transpose-ness.
521  CUBLAS_SAFE_CALL(cublas_gemv(GetCublasHandle(),
522  (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T),
523  M.NumCols(), M.NumRows(), alpha, M.Data(),
524  M.Stride(), v.Data(), 1, beta, data_, 1));
525 
526  CuDevice::Instantiate().AccuProfile(__func__, tim);
527  } else
528 #endif
529  {
530  Vec().AddMatVec(alpha,M.Mat(),trans,v.Vec(),beta);
531  }
532 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ AddRowSumMat()

void AddRowSumMat ( Real  alpha,
const CuMatrixBase< Real > &  mat,
Real  beta = 1.0 
)

Sum the rows of the matrix, add to vector.

Definition at line 1277 of file cu-vector.cc.

Referenced by LstmNonlinearityComponent::ConsolidateMemory(), kaldi::CuVectorUnitTestAddRowSumMat(), CuVectorBase< float >::Data(), Xent::Eval(), SentenceAveragingComponent::PropagateFnc(), RestrictedAttentionComponent::StoreStats(), NonlinearComponent::StoreStatsInternal(), kaldi::TestCuVectorAddRowSumMat(), kaldi::UnitTestCuVectorAddRowSumMat(), kaldi::UnitTestCuVectorAddRowSumMatLarge(), ConvolutionComponent::Update(), SentenceAveragingComponent::Update(), Convolutional1dComponent::Update(), NaturalGradientPerElementScaleComponent::Update(), and NonlinearComponent::UpdateStats().

1278  {
1279  KALDI_ASSERT(mat.NumCols() == Dim());
1280  if (Dim() == 0)
1281  return;
1282 #if HAVE_CUDA == 1
1283  if (CuDevice::Instantiate().Enabled()) {
1284  CuTimer tim;
1285  cuda_add_row_sum_mat(mat.NumCols(), CU1DBLOCK, Data(), mat.Data(),
1286  mat.Dim(), alpha, beta);
1287  CU_SAFE_CALL(cudaGetLastError());
1288 
1289  CuDevice::Instantiate().AccuProfile(__func__, tim);
1290  } else
1291 #endif
1292  {
1293  Vec().AddRowSumMat(alpha, mat.Mat(), beta);
1294  }
1295 }
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ AddSpVec()

void AddSpVec ( const Real  alpha,
const CuSpMatrix< Real > &  S,
const CuVectorBase< Real > &  v,
const Real  beta 
)

Definition at line 535 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestAddSpVec(), CuVectorBase< float >::Range(), and kaldi::nnet2::UnitTestPreconditionDirections().

538  {
539  KALDI_ASSERT(M.NumCols() == v.dim_ && M.NumRows() == dim_);
540  KALDI_ASSERT(&v != this);
541 #if HAVE_CUDA == 1
542  if (CuDevice::Instantiate().Enabled()) {
543  if (dim_ == 0) return;
544  CuTimer tim;
545 
546  // Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but
547  // in CUBLAS, for some stupid reason, everything is reversed.
548  CUBLAS_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(),
549  alpha, M.Data(), v.Data(), 1, beta, data_, 1));
550 
551  CuDevice::Instantiate().AccuProfile(__func__, tim);
552  } else
553 #endif
554  {
555  Vec().AddSpVec(alpha,M.Mat(),v.Vec(),beta);
556  }
557 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ AddTpVec()

void AddTpVec ( const Real  alpha,
const CuTpMatrix< Real > &  M,
const MatrixTransposeType  trans,
const CuVectorBase< Real > &  v,
const Real  beta 
)

Add triangular matrix times vector: this <– beta*this + alpha*M*v.

Works even if rv == *this.

Definition at line 698 of file cu-vector.cc.

Referenced by CuVectorBase< float >::Data(), kaldi::UnitTestCuVectorAddTp(), and kaldi::UnitTestCuVectorAddTpVec().

701  {
702  KALDI_ASSERT(dim_ == v.dim_ && dim_ == M.NumRows());
703 #if HAVE_CUDA == 1
704  if (CuDevice::Instantiate().Enabled()) {
705  if (dim_ == 0) return;
706  CuTimer tim;
707  if (beta == 0.0) {
708  if (&v != this) CopyFromVec(v);
709  MulTp(M, trans);
710  if (alpha != 1.0) Scale(alpha);
711  } else {
712  CuVector<Real> tmp(v);
713  tmp.MulTp(M, trans);
714  if (beta != 1.0) Scale(beta); // *this <-- beta * *this
715  AddVec(alpha, tmp, 1.0); // *this += alpha * M * v
716  }
717  CuDevice::Instantiate().AccuProfile(__func__, tim);
718  } else
719 #endif
720  {
721  Vec().AddTpVec(alpha, M.Mat(), trans, v.Vec(), beta);
722  }
723 }
void MulTp(const CuTpMatrix< Real > &M, const MatrixTransposeType trans)
Multiplies this vector by lower-triangular marix: *this <– *this *M.
Definition: cu-vector.cc:727
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078
void AddVec(Real alpha, const CuVectorBase< Real > &vec, Real beta=1.0)
Definition: cu-vector.cc:1237
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Scale(Real value)
Definition: cu-vector.cc:1216

◆ AddVec() [1/2]

void AddVec ( Real  alpha,
const CuVectorBase< Real > &  vec,
Real  beta = 1.0 
)

Definition at line 1237 of file cu-vector.cc.

Referenced by BatchNormComponent::Add(), CuVectorBase< float >::ApproxEqual(), LstmNonlinearityComponent::Backprop(), LstmNonlinearityComponent::ConsolidateMemory(), kaldi::CuVectorUnitTestAddDiagMatMat(), kaldi::CuVectorUnitTestAddVec(), kaldi::CuVectorUnitTestAddVecCross(), CuVectorBase< float >::Data(), SigmoidComponent::RepairGradients(), TanhComponent::RepairGradients(), RectifiedLinearComponent::RepairGradients(), Xent::ReportPerClass(), BatchNormComponent::StoreStats(), kaldi::UnitTestCuMatrixAddDiagVecMat(), and kaldi::UnitTestCuVectorAddVec().

1238  {
1239  KALDI_ASSERT(vec.Dim() == Dim());
1240 
1241 #if HAVE_CUDA == 1
1242  if (CuDevice::Instantiate().Enabled()) {
1243  CuTimer tim;
1244  int32 dim = this->dim_;
1245  Real *data = this->data_;
1246  const Real *vec_data = vec.data_;
1247  if (beta != 1.0) CU_SAFE_CALL(cuda_scal(GetCublasHandle(), dim, beta, data, 1));
1248  if (alpha != 0.0) CU_SAFE_CALL(cuda_axpy(GetCublasHandle(), dim, alpha, vec_data, 1, data, 1));
1249  CuDevice::Instantiate().AccuProfile(__func__, tim);
1250  } else
1251  #endif
1252  {
1253  if (beta != 1.0) Vec().Scale(beta);
1254  Vec().AddVec(alpha, vec.Vec());
1255  }
1256 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
kaldi::int32 int32
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ AddVec() [2/2]

void AddVec ( Real  alpha,
const CuVectorBase< OtherReal > &  vec,
Real  beta = 1.0 
)

Definition at line 1261 of file cu-vector.cc.

1262  {
1263  // We could implement this directly, without using a temporary-- this can
1264  // be done later, when we have time.
1265  CuVector<Real> temp(vec);
1266  this->AddVec(alpha, temp, beta);
1267 }
void AddVec(Real alpha, const CuVectorBase< Real > &vec, Real beta=1.0)
Definition: cu-vector.cc:1237

◆ AddVecVec()

void AddVecVec ( Real  alpha,
const CuVectorBase< Real > &  v,
const CuVectorBase< Real > &  r,
Real  beta 
)

Definition at line 560 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestAddVecVec(), BatchNormComponent::Propagate(), CuVectorBase< float >::Range(), BatchNormComponent::Read(), and BatchNormComponent::Write().

561  {
562  KALDI_ASSERT((dim_ == v.dim_ && dim_ == r.dim_));
563  KALDI_ASSERT(this != &v && this != &r);
564 #if HAVE_CUDA == 1
565  if (CuDevice::Instantiate().Enabled()) {
566  if (dim_ == 0) return;
567  CuTimer tim;
568  int dimBlock(CU1DBLOCK);
569  int dimGrid(n_blocks(dim_,CU1DBLOCK));
570 
571  cuda_add_vec_vec(dimGrid, dimBlock, alpha, data_, v.Data(), r.Data(), beta, dim_);
572  CU_SAFE_CALL(cudaGetLastError());
573  CuDevice::Instantiate().AccuProfile("CuVectorBase::AddVecVec", tim);
574  } else
575 #endif
576  {
577  Vec().AddVecVec(alpha, v.Vec(), r.Vec(), beta);
578  }
579 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ ApplyCeiling()

void ApplyCeiling ( Real  ceiling_val,
MatrixIndexT ceiled_count = NULL 
)
inline

Definition at line 143 of file cu-vector.h.

Referenced by kaldi::CuVectorUnitTestApplyCeiling(), kaldi::CuVectorUnitTestApplyCeilingNoCount(), kaldi::TestCuVectorApplyCeiling(), and kaldi::TestCuVectorApplyCeilingNoCount().

143  {
144  this -> Ceiling(*this, ceiling_val, ceiled_count);
145  };
void Ceiling(const CuVectorBase< Real > &src, Real ceiling_val, MatrixIndexT *ceiled_count=NULL)
Definition: cu-vector.cc:385

◆ ApplyExp()

void ApplyExp ( )

Definition at line 442 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyPow(), StatisticsPoolingComponent::Backprop(), and kaldi::CuVectorUnitTestApplyExp().

442  {
443 #if HAVE_CUDA == 1
444  if (CuDevice::Instantiate().Enabled()) {
445  if (dim_ == 0) return;
446  CuTimer tim;
447  int dimBlock(CU1DBLOCK);
448  int dimGrid(n_blocks(dim_,CU1DBLOCK));
449 
450  cuda_vec_apply_exp(dimGrid, dimBlock, data_, dim_);
451  CU_SAFE_CALL(cudaGetLastError());
452  CuDevice::Instantiate().AccuProfile("CuVectorBase::ApplyExp", tim);
453  } else
454 #endif
455  {
456  Vec().ApplyExp();
457  }
458 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

◆ ApplyFloor()

void ApplyFloor ( Real  floor_val,
MatrixIndexT floored_count = NULL 
)
inline

◆ ApplyLog()

void ApplyLog ( )

Definition at line 462 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyPow(), and kaldi::CuVectorUnitTestApplyLog().

462  {
463 #if HAVE_CUDA == 1
464  if (CuDevice::Instantiate().Enabled()) {
465  if (dim_ == 0) return;
466  CuTimer tim;
467  int dimBlock(CU1DBLOCK);
468  int dimGrid(n_blocks(dim_,CU1DBLOCK));
469 
470  CuVector<Real> flag(1);
471  cuda_vec_apply_log(dimGrid, dimBlock, data_, flag.Data(), dim_);
472  CU_SAFE_CALL(cudaGetLastError());
473  if (flag(0) > 0)
474  KALDI_ERR << "Trying to take log of a negative number.";
475  CuDevice::Instantiate().AccuProfile("CuVectorBase::ApplyLog", tim);
476  } else
477 #endif
478  {
479  Vec().ApplyLog();
480  }
481 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define KALDI_ERR
Definition: kaldi-error.h:147
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

◆ ApplyLogSoftMax()

void ApplyLogSoftMax ( )

Definition at line 484 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyPow().

484  {
485 #if HAVE_CUDA == 1
486  if (CuDevice::Instantiate().Enabled()) {
487  if (dim_ == 0) return;
488  CuTimer tim;
489  size_t dimBlock = CU1DBLOCK;
490  size_t dimGrid = 1; // dimGrid value represent the number of rows
491  ::MatrixDim dim = { 1, this->dim_, this->dim_};
492 
493  cuda_log_softmax_reduce(dimGrid, dimBlock, data_, data_, dim, this->dim_);
494  CU_SAFE_CALL(cudaGetLastError());
495  CuDevice::Instantiate().AccuProfile(__func__, tim);
496  } else
497 #endif
498  {
499  Vec().ApplyLogSoftMax();
500  }
501 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

◆ ApplyPow()

void ApplyPow ( Real  power)
inline

◆ ApplySoftMax()

void ApplySoftMax ( )

Definition at line 334 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyPow(), kaldi::CuVectorUnitTestApplySoftMax(), and kaldi::TestCuVectorSoftmax().

334  {
335 #if HAVE_CUDA == 1
336  if (CuDevice::Instantiate().Enabled()) {
337  if (dim_ == 0) return;
338  CuTimer tim;
339  size_t dimBlock = CU1DBLOCK;
340  size_t dimGrid = 1; // dimGrid value represent the number of rows
341  ::MatrixDim dim = { 1, this->dim_, this->dim_};
342  cuda_softmax_reduce(dimGrid, dimBlock, data_, data_, dim, this->dim_);//actually dim is not stride...
343  CU_SAFE_CALL(cudaGetLastError());
344  CuDevice::Instantiate().AccuProfile(__func__, tim);
345  } else
346 #endif
347  {
348  Vec().ApplySoftMax();
349  }
350 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

◆ ApproxEqual()

bool ApproxEqual ( const CuVectorBase< Real > &  other,
float  tol = 0.01 
) const

Definition at line 583 of file cu-vector.cc.

Referenced by CuVectorBase< float >::Data(), and kaldi::nnet2::UnitTestPreconditionDirections().

583  {
584  if (dim_ != other.dim_) KALDI_ERR << "ApproxEqual: size mismatch "
585  << dim_ << " vs. " << other.dim_;
586  KALDI_ASSERT(tol >= 0.0);
587  CuVector<Real> tmp(*this);
588  tmp.AddVec(-1.0, other);
589  BaseFloat tmp_norm = sqrt(VecVec(tmp, tmp)), this_norm = sqrt(VecVec(*this, *this));
590  return tmp_norm <= static_cast<Real>(tol) * this_norm;
591 }
friend OtherReal VecVec(const CuVectorBase< OtherReal > &v1, const CuVectorBase< OtherReal > &v2)
float BaseFloat
Definition: kaldi-types.h:29
#define KALDI_ERR
Definition: kaldi-error.h:147
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Ceiling()

void Ceiling ( const CuVectorBase< Real > &  src,
Real  ceiling_val,
MatrixIndexT ceiled_count = NULL 
)

Definition at line 385 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyCeiling(), and CuVectorBase< float >::Data().

386  {
387 #if HAVE_CUDA == 1
388  if (CuDevice::Instantiate().Enabled()) {
389  int dimBlock(CU1DBLOCK);
390  int dimGrid(n_blocks(dim_,CU1DBLOCK));
391  if (ceiled_count == nullptr) {
392  if (dim_ == 0) return;
393  CuTimer tim;
394  // We are calling a function meant for matrices, by viewing the
395  // vector as a matrix with a single row.
396  ::MatrixDim dim = {1, Dim(), 1};
397  cuda_ceiling(dimGrid, dimBlock, this->data_, src.Data(), ceiling_val, dim, 1);
398 
399  CuDevice::Instantiate().AccuProfile("CuVectorBase::CeilingNoCount", tim);
400  } else {
401  if (dim_ == 0) { *ceiled_count = 0; return; }
402  CuTimer tim;
403 
404  CuVector<float> count_vec(dim_, kUndefined);
405 
406  cuda_vec_apply_ceiling(dimGrid, dimBlock, data_, ceiling_val, count_vec.Data(), dim_);
407  CU_SAFE_CALL(cudaGetLastError());
408  *ceiled_count = count_vec.Sum();
409  CuDevice::Instantiate().AccuProfile("CuVectorBase::Ceiling", tim);
410  }
411  } else
412 #endif
413  {
414  Vec().Ceiling(src.Vec(), ceiling_val, ceiled_count);
415  }
416 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ CopyColFromMat() [1/4]

void CopyColFromMat ( const CuMatrixBase< float > &  mat,
MatrixIndexT  col 
)

Definition at line 122 of file cu-vector.cc.

122  {
123  KALDI_ASSERT(col < mat.NumCols());
124  KALDI_ASSERT(dim_ == mat.NumRows());
125 #if HAVE_CUDA == 1
126  if (CuDevice::Instantiate().Enabled()) {
127  CuTimer tim;
128  int dimBlock(CU1DBLOCK);
129  int dimGrid(n_blocks(dim_,CU1DBLOCK));
130 
131  cuda_copy_col_from_mat_df(dimGrid, dimBlock, data_, col, mat.Data(), mat.Dim(), dim_);
132  CU_SAFE_CALL(cudaGetLastError());
133  CuDevice::Instantiate().AccuProfile("CuVectorBase::CopyColFromMat", tim);
134  } else
135 #endif
136  {
137  Vec().CopyColFromMat(mat.Mat(), col);
138  }
139 }
const MatrixBase< Real > & Mat() const
Definition: cu-matrix.h:755
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
::MatrixDim Dim() const
Definition: cu-matrix.h:221
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

◆ CopyColFromMat() [2/4]

void CopyColFromMat ( const CuMatrixBase< double > &  mat,
MatrixIndexT  col 
)

Definition at line 144 of file cu-vector.cc.

144  {
145  KALDI_ASSERT(col < mat.NumCols());
146  KALDI_ASSERT(dim_ == mat.NumRows());
147 #if HAVE_CUDA == 1
148  if (CuDevice::Instantiate().Enabled()) {
149  CuTimer tim;
150  int dimBlock(CU1DBLOCK);
151  int dimGrid(n_blocks(dim_,CU1DBLOCK));
152 
153  cuda_copy_col_from_mat_fd(dimGrid, dimBlock, data_, col, mat.Data(), mat.Dim(), dim_);
154  CU_SAFE_CALL(cudaGetLastError());
155  CuDevice::Instantiate().AccuProfile("CuVectorBase::CopyColFromMat", tim);
156  } else
157 #endif
158  {
159  Vec().CopyColFromMat(mat.Mat(), col);
160  }
161 }
const MatrixBase< Real > & Mat() const
Definition: cu-matrix.h:755
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
::MatrixDim Dim() const
Definition: cu-matrix.h:221
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215

◆ CopyColFromMat() [3/4]

void CopyColFromMat ( const CuMatrixBase< Real > &  mat,
MatrixIndexT  col 
)

Definition at line 103 of file cu-vector.cc.

Referenced by StatisticsPoolingComponent::Backprop(), kaldi::CuVectorUnitTestCopyFromMat(), CuVectorBase< float >::Range(), kaldi::UnitTestCuMatrixAddDiagVecMat(), NaturalGradientAffineComponent::Update(), AffineComponentPreconditioned::Update(), AffineComponentPreconditionedOnline::Update(), BlockAffineComponentPreconditioned::Update(), and TdnnComponent::UpdateNaturalGradient().

103  {
104  KALDI_ASSERT(col < mat.NumCols());
105  KALDI_ASSERT(dim_ == mat.NumRows());
106 #if HAVE_CUDA == 1
107  if (CuDevice::Instantiate().Enabled()) {
108  CuTimer tim;
109  cublas_copy(GetCublasHandle(),
110  this->dim_, mat.Data() + col, mat.Stride(), this->data_, 1);
111  CU_SAFE_CALL(cudaGetLastError());
112  CuDevice::Instantiate().AccuProfile("CuVectorBase::CopyColFromMat", tim);
113  } else
114 #endif
115  {
116  Vec().CopyColFromMat(mat.Mat(),col);
117  }
118 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ CopyColFromMat() [4/4]

void CopyColFromMat ( const CuMatrixBase< OtherReal > &  mat,
MatrixIndexT  col 
)

◆ CopyDiagFromMat()

void CopyDiagFromMat ( const CuMatrix< Real > &  M)

Extracts the diagonal of a matrix.

Definition at line 1198 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestAddDiagMatMat(), kaldi::CuVectorUnitTestCopyDiagFromMat(), and CuVectorBase< float >::operator()().

1198  {
1199 #if HAVE_CUDA == 1
1200  if (CuDevice::Instantiate().Enabled()) {
1201  KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols()));
1202  CuTimer tim;
1203  CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1,
1204  data_, 1));
1205 
1206  CuDevice::Instantiate().AccuProfile(__func__, tim);
1207  } else
1208 #endif
1209  {
1210  Vec().CopyDiagFromMat(M.Mat());
1211  }
1212 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ CopyDiagFromPacked()

void CopyDiagFromPacked ( const CuPackedMatrix< Real > &  M)

Extracts the diagonal of a packed matrix M; works for Sp or Tp.

Definition at line 1177 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestCopyDiagFromPacked(), CuVectorBase< float >::operator()(), CuPackedMatrix< Real >::Trace(), and kaldi::TraceSpSp().

1177  {
1178 #if HAVE_CUDA == 1
1179  if (CuDevice::Instantiate().Enabled()) {
1180  KALDI_ASSERT(dim_ == M.NumRows());
1181  if (dim_ == 0) return;
1182  CuTimer tim;
1183  int dimBlock(CU1DBLOCK);
1184  int dimGrid(n_blocks(Dim(), CU1DBLOCK));
1185  cuda_vec_copy_diag_from_packed(dimGrid, dimBlock, data_, M.Data(), dim_);
1186  CU_SAFE_CALL(cudaGetLastError());
1187 
1188  CuDevice::Instantiate().AccuProfile(__func__, tim);
1189  } else
1190 #endif
1191  {
1192  Vec().CopyDiagFromPacked(M.Mat());
1193  }
1194 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ CopyElements()

void CopyElements ( const CuMatrixBase< Real > &  mat,
const MatrixTransposeType  trans,
const CuArrayBase< int32 > &  elements 
)

Copies selected elements from 'mat' to *this.

Expects this->Dim() to equal elements.Dim(). If trans == kNoTrans, expects mat.NumRows() to equal this.Dim(), and for each i, copies mat(i, elements[i]) to (*this)(i). If trans == kTrans, expects mat.NumCols() to equal this.Dim(), and for each i, copies mat(elements[i], i) to (*this)(i).

Definition at line 1340 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestCopyElements(), and CuVectorBase< float >::Data().

1342  {
1343  KALDI_ASSERT(elements.Dim() == Dim());
1344 #if HAVE_CUDA == 1
1345  if (CuDevice::Instantiate().Enabled()) {
1346  CuTimer tim;
1347 
1348  dim3 dimBlock(CU1DBLOCK);
1349  dim3 dimGrid(n_blocks(Dim(), CU1DBLOCK));
1350 
1351  cuda_vector_copy_elements(dimGrid, dimBlock, this->data_, Dim(),
1352  mat.Data(), mat.Stride(), trans == kTrans,
1353  elements.Data());
1354  CU_SAFE_CALL(cudaGetLastError());
1355  CuDevice::Instantiate().AccuProfile(__func__, tim);
1356  } else
1357 #endif
1358  {
1359  VectorBase<Real> &this_vec = this->Vec();
1360  const MatrixBase<Real> &src_mat = mat.Mat();
1361  const int32* index_map = elements.Data();
1362  KALDI_ASSERT((Dim() == mat.NumRows() && trans == kNoTrans)
1363  || (Dim() == mat.NumCols() && trans == kTrans));
1364  for (int32 i = 0; i < Dim(); i++) {
1365  int32 j = index_map[i];
1366  KALDI_ASSERT(j >= 0);
1367  if (trans == kNoTrans) {
1368  KALDI_ASSERT(j < mat.NumCols());
1369  this_vec(i) = src_mat(i, j);
1370  } else {
1371  KALDI_ASSERT(j < mat.NumRows());
1372  this_vec(i) = src_mat(j, i);
1373  }
1374  }
1375  }
1376 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
kaldi::int32 int32
const T * Data() const
Get raw pointer.
Definition: cu-array.h:52
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
friend class MatrixBase< Real >
Definition: cu-vector.h:55
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT Dim() const
Return the vector dimension.
Definition: cu-array.h:49
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ CopyFromVec() [1/7]

void CopyFromVec ( const CuVectorBase< Real > &  src)

Copy functions; these will crash if the dimension do not match.

The operator = in class CuVector will also change the sizes for you.

Definition at line 1078 of file cu-vector.cc.

Referenced by BlockAffineComponent::BlockAffineComponent(), CuSparseMatrix< Real >::CopyElementsToVec(), CuSparseMatrix< Real >::CopyFromSmat(), CuVectorBase< float >::CopyFromVec(), CuMatrixBase< float >::CopyRangeFromMatClamped(), CuSparseMatrix< Real >::CuSparseMatrix(), CuVector< float >::CuVector(), kaldi::CuVectorUnitTestCopyCross(), kaldi::CuVectorUnitTestCopyCross2(), CuVectorBase< float >::Data(), SoftmaxComponent::MixUp(), CuVector< float >::operator=(), OnlinePreconditionerSimple::PreconditionDirections(), OnlineNaturalGradientSimple::PreconditionDirections(), BatchNormComponent::Propagate(), CuRand< float >::RandGaussian(), kaldi::nnet1::RandUniform(), DctComponent::Reorder(), RectifiedLinearComponent::RepairGradients(), NnetBatchComputer::SplitUtteranceIntoTasks(), kaldi::TestCuVectorCopyFromVec(), kaldi::UnitTestCuMatrixAddDiagVecMat(), kaldi::UnitTestCuMatrixAddVecToCols(), kaldi::UnitTestCuMatrixAddVecToRows(), kaldi::UnitTestCuMatrixAddVecVec(), kaldi::UnitTestCuMatrixDivRowsVec(), kaldi::UnitTestCuMatrixMulColsVec(), kaldi::UnitTestCuMatrixMulRowsVec(), kaldi::UnitTestCuVectorAddColSumMat(), kaldi::UnitTestCuVectorAddColSumMatLarge(), kaldi::UnitTestCuVectorAddRowSumMat(), kaldi::UnitTestCuVectorAddRowSumMatLarge(), kaldi::UnitTestCuVectorAddTpVec(), kaldi::UnitTestCuVectorAddVec(), kaldi::UnitTestCuVectorCopyFromVec(), kaldi::UnitTestCuVectorInvertElements(), and kaldi::UnitTestCuVectorMulTp().

1078  {
1079  KALDI_ASSERT(src.Dim() == dim_);
1080 #if HAVE_CUDA == 1
1081  if (CuDevice::Instantiate().Enabled()) {
1082  if (dim_ == 0) return;
1083  CuTimer tim;
1084  CU_SAFE_CALL(
1085  cudaMemcpyAsync(data_, src.data_, src.dim_ * sizeof(Real),
1086  cudaMemcpyDeviceToDevice, cudaStreamPerThread));
1087  CuDevice::Instantiate().AccuProfile(__func__, tim);
1088  } else
1089  #endif
1090  {
1091  memcpy(static_cast<void*>(data_), static_cast<void*>(src.data_),
1092  dim_ * sizeof(Real));
1093  }
1094 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ CopyFromVec() [2/7]

void CopyFromVec ( const CuVectorBase< OtherReal > &  M)

Definition at line 377 of file cu-vector.h.

377  {
378  v.CopyToVec(&this);
379 }

◆ CopyFromVec() [3/7]

void CopyFromVec ( const VectorBase< OtherReal > &  src)

Definition at line 904 of file cu-vector.cc.

904  {
905 #if HAVE_CUDA == 1
906  if (CuDevice::Instantiate().Enabled()) {
907  if (sizeof(Real) != sizeof(OtherReal)) {
908  CuVector<OtherReal> temp(dim_, kUndefined);
909  temp.CopyFromVec(src);
910  this->CopyFromVec(temp);
911  } else {
912  KALDI_ASSERT(src.Dim() == dim_);
913  if (dim_ == 0) return;
914  CuTimer tim;
915  CU_SAFE_CALL(cudaMemcpyAsync(data_, src.Data(), src.Dim()*sizeof(Real),
916  cudaMemcpyHostToDevice, cudaStreamPerThread));
917  CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
918  CuDevice::Instantiate().AccuProfile("CuVector::CopyFromVecH2D", tim);
919  }
920  } else
921  #endif
922  {
923  Vec().CopyFromVec(src);
924  }
925 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ CopyFromVec() [4/7]

void CopyFromVec< float > ( const CuVectorBase< float > &  src)

◆ CopyFromVec() [5/7]

void CopyFromVec< double > ( const CuVectorBase< double > &  src)

◆ CopyFromVec() [6/7]

void CopyFromVec ( const CuVectorBase< float > &  src)

Definition at line 869 of file cu-vector.cc.

869  {
870  KALDI_ASSERT(src.Dim() == dim_);
871 #if HAVE_CUDA == 1
872  if (CuDevice::Instantiate().Enabled()) {
873  if (dim_ == 0) return;
874  CuTimer tim;
875  CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1));
876  CuDevice::Instantiate().AccuProfile(__func__, tim);
877  } else
878 #endif
879  {
880  Vec().CopyFromVec(src.Vec());
881  }
882 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ CopyFromVec() [7/7]

void CopyFromVec ( const CuVectorBase< double > &  src)

Definition at line 886 of file cu-vector.cc.

886  {
887  KALDI_ASSERT(src.Dim() == dim_);
888 #if HAVE_CUDA == 1
889  if (CuDevice::Instantiate().Enabled()) {
890  if (dim_ == 0) return;
891  CuTimer tim;
892  CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1));
893  CuDevice::Instantiate().AccuProfile(__func__, tim);
894  } else
895 #endif
896  {
897  Vec().CopyFromVec(src.Vec());
898  }
899 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ CopyRowsFromMat() [1/2]

void CopyRowsFromMat ( const CuMatrixBase< Real > &  M)

Definition at line 164 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestCopyFromMat(), and CuVectorBase< float >::Data().

164  {
165  KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows());
166 #if HAVE_CUDA == 1
167  if (CuDevice::Instantiate().Enabled()) {
168  if (dim_ == 0) return;
169  CuTimer tim;
170  if (mat.Stride() == mat.NumCols() && mat.NumRows() != 0) {
171  CU_SAFE_CALL(
172  cudaMemcpyAsync(data_, mat.Data(), sizeof(Real)*dim_,
173  cudaMemcpyDeviceToDevice, cudaStreamPerThread));
174  } else {
175  Real* vec_data = data_;
176  for (MatrixIndexT r = 0; r < mat.NumRows(); r++) {
177  CU_SAFE_CALL(cudaMemcpyAsync(vec_data, mat.RowData(r),
178  sizeof(Real) * mat.NumCols(),
179  cudaMemcpyDeviceToDevice,
180  cudaStreamPerThread));
181  vec_data += mat.NumCols();
182  }
183  }
184  CuDevice::Instantiate().AccuProfile("CuVectorBase::CopyRowsFromMat", tim);
185  } else
186 #endif
187  {
188  Vec().CopyRowsFromMat(mat.Mat());
189  }
190 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
int32 MatrixIndexT
Definition: matrix-common.h:98
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ CopyRowsFromMat() [2/2]

void CopyRowsFromMat ( const MatrixBase< Real > &  M)

Definition at line 218 of file cu-vector.cc.

218  {
219  KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows());
220 #if HAVE_CUDA == 1
221  if (CuDevice::Instantiate().Enabled()) {
222  if (dim_ == 0) return;
223  CuTimer tim;
224  if (mat.Stride() == mat.NumCols()) {
225  CU_SAFE_CALL(cudaMemcpyAsync(data_, mat.Data(), sizeof(Real)*dim_,
226  cudaMemcpyHostToDevice, cudaStreamPerThread));
227  } else {
228  Real* vec_data = data_;
229  for (MatrixIndexT r = 0; r < mat.NumRows(); r++) {
230  CU_SAFE_CALL(cudaMemcpyAsync(vec_data, mat.RowData(r),
231  sizeof(Real) * mat.NumCols(),
232  cudaMemcpyHostToDevice, cudaStreamPerThread));
233  vec_data += mat.NumCols();
234  }
235  }
236  CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
237  CuDevice::Instantiate().AccuProfile(__func__, tim);
238  } else
239 #endif
240  {
241  Vec().CopyRowsFromMat(mat);
242  }
243 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
int32 MatrixIndexT
Definition: matrix-common.h:98
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ CopyToVec()

template void CopyToVec ( VectorBase< OtherReal > *  dst) const

Definition at line 938 of file cu-vector.cc.

Referenced by CuSparseMatrix< Real >::CopyToSmat(), CuVectorBase< float >::CopyToVec(), kaldi::nnet1::CountCorrectFramesWeighted(), CuVectorBase< float >::Data(), kaldi::nnet1::MomentStatistics(), kaldi::UnitTestCuDiffXent(), kaldi::UnitTestCuVectorAddColSumMat(), kaldi::UnitTestCuVectorAddColSumMatLarge(), kaldi::UnitTestCuVectorAddRowSumMat(), kaldi::UnitTestCuVectorAddRowSumMatLarge(), kaldi::UnitTestCuVectorAddTpVec(), kaldi::UnitTestCuVectorAddVec(), kaldi::UnitTestCuVectorInvertElements(), kaldi::UnitTestCuVectorMulTp(), kaldi::UnitTestMulTp(), and kaldi::UnitTestVector().

938  {
939  KALDI_ASSERT(dim_ == dst->Dim());
940 #if HAVE_CUDA == 1
941  if (CuDevice::Instantiate().Enabled()) {
942  if (sizeof(Real) != sizeof(OtherReal)) {
943  CuVector<OtherReal> temp(*this);
944  temp.CopyToVec(dst);
945  } else {
946  if (dim_ == 0) return;
947  CuTimer tim;
948  CU_SAFE_CALL(cudaMemcpyAsync(dst->Data(), this->data_,
949  sizeof(Real) * dim_, cudaMemcpyDeviceToHost,
950  cudaStreamPerThread));
951  CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
952  CuDevice::Instantiate().AccuProfile(__func__, tim);
953  }
954  } else
955 #endif
956  {
957  dst->CopyFromVec(this->Vec());
958  }
959 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Data() [1/2]

Real* Data ( )
inline

Returns a pointer to the start of the vector's data.

Definition at line 72 of file cu-vector.h.

Referenced by CuMatrixBase< float >::AddDiagVecMat(), CuMatrixBase< float >::AddMatDiagVec(), CuVectorBase< float >::AddMatVec(), CuVectorBase< float >::AddSpVec(), CuSpMatrix< Real >::AddVec2(), CuVectorBase< float >::AddVecVec(), CuMatrixBase< float >::AddVecVec(), CuVectorBase< float >::ApplyLog(), StatisticsPoolingComponent::Backprop(), kaldi::cu::BackpropLstmNonlinearity(), CuVectorBase< float >::Ceiling(), CuMatrix< float >::CompObjfAndDeriv(), CuMatrixBase< float >::CopyColFromVec(), CuMatrixBase< float >::CopyColsFromVec(), CuVectorBase< float >::CopyFromVec(), MatrixBase< float >::CopyRowsFromVec(), CuMatrixBase< float >::CopyRowsFromVec(), CuVectorBase< float >::DivElements(), kaldi::cu::EnsureNonzero(), CuVectorBase< float >::Floor(), NnetBatchComputer::FormatInputs(), CuVectorBase< float >::Max(), CuMatrixBase< float >::Max(), CuVectorBase< float >::Min(), CuMatrixBase< float >::Min(), CuVectorBase< float >::MulElements(), CuVectorBase< float >::Pow(), CuRand< float >::RandGaussian(), CuRand< float >::RandUniform(), NnetBatchComputer::SplitUtteranceIntoTasks(), RestrictedAttentionComponent::StoreStats(), CuVectorBase< float >::Sum(), CuMatrixBase< float >::Sum(), kaldi::TestCuVectorCopyFromVec(), CuMatrixBase< float >::Trace(), kaldi::TraceMatMat(), and kaldi::VecVec().

72 { return data_; }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248

◆ Data() [2/2]

const Real* Data ( ) const
inline

Returns a pointer to the start of the vector's data (const).

Definition at line 74 of file cu-vector.h.

74 { return data_; }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248

◆ Dim()

MatrixIndexT Dim ( ) const
inline

Dimensions.

Definition at line 69 of file cu-vector.h.

Referenced by DiscriminativeObjectiveInfo::AccumulateGradients(), DiscriminativeObjectiveInfo::AccumulateOutput(), NonlinearComponent::Add(), CuMatrixBase< float >::AddDiagVecMat(), CuMatrixBase< float >::AddMatDiagVec(), NnetStats::AddStatsFromNnet(), CuVectorBase< float >::AddVec(), CuSpMatrix< Real >::AddVec2(), CuMatrixBase< float >::AddVecToCols(), CuMatrixBase< float >::AddVecToRows(), CuMatrixBase< float >::AddVecVec(), AffineComponent::AffineComponent(), kaldi::cu::BackpropLstmNonlinearity(), Convolutional1dComponent::Convolutional1dComponent(), ConvolutionComponent::ConvolutionComponent(), CuMatrixBase< float >::CopyColFromVec(), CuMatrixBase< float >::CopyColsFromVec(), CuSparseMatrix< Real >::CopyElementsToVec(), CuVectorBase< float >::CopyFromVec(), MatrixBase< float >::CopyRowsFromVec(), CuMatrixBase< float >::CopyRowsFromVec(), kaldi::nnet1::CountCorrectFramesWeighted(), kaldi::CuRandGaussianVectorSpeedTest(), kaldi::CuRandUniformVectorSpeedTest(), CuVector< float >::CuVector(), CuMatrixBase< float >::DivRowsVec(), kaldi::cu::EnsureNonzero(), Xent::Eval(), ModelCollapser::GetDiagonallyPreModifiedComponentIndex(), AffineComponentPreconditionedOnline::GetScalingFactor(), FixedScaleComponent::Init(), FixedBiasComponent::Init(), PerElementScaleComponent::Init(), kaldi::MeanVariance(), SoftmaxComponent::MixUp(), kaldi::nnet1::MomentStatistics(), CuMatrixBase< float >::MulColsVec(), CuMatrixBase< float >::MulRowsVec(), NaturalGradientAffineComponent::NaturalGradientAffineComponent(), CuVector< float >::operator=(), CuMatrixBase< float >::ParametricRelu(), ModelCollapser::PreMultiplyAffineParameters(), kaldi::nnet3::PrintParameterStats(), CuRand< float >::RandGaussian(), CuRand< float >::RandUniform(), kaldi::nnet1::RandUniform(), AffineTransform::SetBias(), NnetBatchComputer::SplitUtteranceIntoTasks(), BatchNormComponent::StoreStats(), kaldi::TestCuVectorCopyFromVec(), kaldi::VecMatVec(), and kaldi::VecVec().

69 { return dim_; }
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250

◆ DivElements()

void DivElements ( const CuVectorBase< Real > &  v)

Definition at line 857 of file cu-vector.cc.

Referenced by CuVectorBase< float >::operator()().

857  {
858  // this just creates a matrix and calls the matrix version.
859  KALDI_ASSERT(dim_ == v.dim_);
860  CuSubMatrix<Real> this_mat(this->Data(), 1, dim_, dim_),
861  v_mat(v.Data(), 1, dim_, dim_);
862  this_mat.DivElements(v_mat);
863 }
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
Real * Data()
Returns a pointer to the start of the vector&#39;s data.
Definition: cu-vector.h:72

◆ Floor()

void Floor ( const CuVectorBase< Real > &  src,
Real  floor_val,
MatrixIndexT floored_count = NULL 
)

Definition at line 353 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyFloor(), and CuVectorBase< float >::Data().

353  {
354 #if HAVE_CUDA == 1
355  if (CuDevice::Instantiate().Enabled()) {
356  int dimBlock(CU1DBLOCK);
357  int dimGrid(n_blocks(dim_,CU1DBLOCK));
358  if (floored_count == nullptr) {
359  if (dim_ == 0) return;
360  CuTimer tim;
361  // We are calling a function meant for matrices, by viewing the
362  // vector as a matrix with a single row.
363  ::MatrixDim dim = {1, Dim(), 1};
364  cuda_floor(dimGrid, dimBlock, this->data_, src.Data(), floor_val, dim, 1);
365  CuDevice::Instantiate().AccuProfile("CuVectorBase::FloorNoCount", tim);
366  } else {
367  if (dim_ == 0) { *floored_count = 0; return; }
368  CuTimer tim;
369 
370  CuVector<float> count_vec(dim_, kUndefined);
371 
372  cuda_vec_apply_floor(dimGrid, dimBlock, data_, floor_val, count_vec.Data(), dim_);
373  CU_SAFE_CALL(cudaGetLastError());
374  *floored_count = count_vec.Sum();
375  CuDevice::Instantiate().AccuProfile("CuVectorBase::Floor", tim);
376  }
377  } else
378 #endif
379  {
380  Vec().Floor(src.Vec(), floor_val, floored_count);
381  }
382 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ InvertElements()

void InvertElements ( )

Definition at line 1318 of file cu-vector.cc.

Referenced by ScaleAndOffsetComponent::BackpropInternal(), kaldi::CuVectorUnitTestInvertElements(), CuVectorBase< float >::Data(), kaldi::UnitTestCuVectorInvertElements(), AffineTransform::Update(), and ConvolutionalComponent::Update().

1318  {
1319 #if HAVE_CUDA == 1
1320  if (CuDevice::Instantiate().Enabled()) {
1321  CuTimer tim;
1322 
1323  dim3 dimBlock(CU1DBLOCK, 1);
1324  dim3 dimGrid(n_blocks(dim_, CU1DBLOCK));
1325  MatrixDim d = {1, dim_, dim_};
1326 
1327  cuda_invert_elements(dimGrid, dimBlock, data_, d);
1328  CU_SAFE_CALL(cudaGetLastError());
1329 
1330  CuDevice::Instantiate().AccuProfile(__func__, tim);
1331  } else
1332 #endif
1333  {
1334  Vec().InvertElements();
1335  }
1336 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

◆ KALDI_DISALLOW_COPY_AND_ASSIGN()

KALDI_DISALLOW_COPY_AND_ASSIGN ( CuVectorBase< Real >  )
private

◆ Max()

Real Max ( ) const

Returns the maximum value of any element, or -infinity for the empty vector.

Definition at line 782 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestMax(), ModelCollapser::GetDiagonallyPreModifiedComponentIndex(), CuMatrixBase< float >::Max(), and CuVectorBase< float >::operator()().

782  {
783  Real result = 0.0;
784 #if HAVE_CUDA == 1
785  if (CuDevice::Instantiate().Enabled()) {
786  if (dim_ == 0) { // max of an empty set is -infinity.
787  return -std::numeric_limits<Real>::infinity();
788  }
789  CuTimer tim;
790 
791  // Small vectors are copied to RAM and reduced on CPU.
792  // The length is chosen by cu-vector-speed-test
793  if (dim_ < 4096) {
794  Vector<Real> ans_cpu(*this);
795  result = ans_cpu.Max();
796  } else {
797  // Use no more than 256 blocks (still too many?)
798  int dimBlock = CU1DBLOCK;
799  int dimGrid = n_blocks(dim_, dimBlock);
800  if (dimGrid > 256) {
801  dimGrid = 256;
802  }
803  CuVector<Real> ans(dimGrid, kUndefined);
804  cuda_vec_max(dimGrid, dimBlock, data_, ans.Data(), dim_, 1);
805  CU_SAFE_CALL(cudaGetLastError());
806  Vector<Real> ans_cpu(ans);
807  result = ans_cpu.Max();
808  }
809 
810  CuDevice::Instantiate().AccuProfile(__func__, tim);
811  } else
812 #endif
813  {
814  result = (this->Vec()).Max();
815  }
816  return result;
817 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
Definition: cu-vector.cc:782
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

◆ Min()

Real Min ( ) const

Returns the minimum value of any element, or +infinity for the empty vector.

Definition at line 744 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestMin(), ModelCollapser::GetDiagonallyPreModifiedComponentIndex(), CuMatrixBase< float >::Min(), and CuVectorBase< float >::operator()().

744  {
745  Real result = 0.0;
746 #if HAVE_CUDA == 1
747  if (CuDevice::Instantiate().Enabled()) {
748  if (dim_ == 0) { // min of an empty set is infinity.
749  return std::numeric_limits<Real>::infinity();
750  }
751  CuTimer tim;
752 
753  // Small vectors are copied to RAM and reduced on CPU.
754  // The length is chosen by cu-vector-speed-test
755  if (dim_ < 4096) {
756  Vector<Real> ans_cpu(*this);
757  result = ans_cpu.Min();
758  } else {
759  // Use no more than 256 blocks (still too many?)
760  int dimBlock = CU1DBLOCK;
761  int dimGrid = n_blocks(dim_, dimBlock);
762  if (dimGrid > 256) {
763  dimGrid = 256;
764  }
765  CuVector<Real> ans(dimGrid, kUndefined);
766  cuda_vec_min(dimGrid, dimBlock, data_, ans.Data(), dim_, 1);
767  CU_SAFE_CALL(cudaGetLastError());
768  Vector<Real> ans_cpu(ans);
769  result = ans_cpu.Min();
770  }
771 
772  CuDevice::Instantiate().AccuProfile(__func__, tim);
773  } else
774 #endif
775  {
776  result = (this->Vec()).Min();
777  }
778  return result;
779 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
Real Min() const
Returns the minimum value of any element, or +infinity for the empty vector.
Definition: cu-vector.cc:744

◆ MulElements()

void MulElements ( const CuVectorBase< Real > &  v)

Definition at line 838 of file cu-vector.cc.

Referenced by BackpropTruncationComponent::Backprop(), kaldi::CuVectorUnitTestInvertElements(), AffineComponentPreconditionedOnline::GetScalingFactor(), CuVectorBase< float >::operator()(), Xent::ReportPerClass(), and kaldi::UnitTestVector().

838  {
839  KALDI_ASSERT(dim_ == v.dim_);
840 #if HAVE_CUDA == 1
841  if (CuDevice::Instantiate().Enabled()) {
842  if (dim_ == 0) return;
843  CuTimer tim;
844  int dimBlock(CU1DBLOCK);
845  int dimGrid(n_blocks(dim_, CU1DBLOCK));
846  cuda_vec_mul_elements(dimGrid, dimBlock, data_, v.Data(), dim_);
847  CU_SAFE_CALL(cudaGetLastError());
848  CuDevice::Instantiate().AccuProfile("CuVectorBase::MulElements", tim);
849  } else
850 #endif
851  {
852  Vec().MulElements(v.Vec());
853  }
854 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ MulTp()

void MulTp ( const CuTpMatrix< Real > &  M,
const MatrixTransposeType  trans 
)

Multiplies this vector by lower-triangular marix: *this <– *this *M.

Definition at line 727 of file cu-vector.cc.

Referenced by CuVectorBase< float >::AddTpVec(), CuVectorBase< float >::Data(), kaldi::UnitTestCuVectorMulTp(), and kaldi::UnitTestMulTp().

727  {
728  KALDI_ASSERT(M.NumRows() == dim_);
729 #if HAVE_CUDA == 1
730  if (CuDevice::Instantiate().Enabled()) {
731  if (dim_ == 0) return;
732  CuTimer tim;
733  cublas_tpmv(GetCublasHandle(), (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T),
734  M.NumRows(), M.Data(), data_, 1);
735  CuDevice::Instantiate().AccuProfile("CuVectorBase::MulTp", tim);
736  } else
737 #endif
738  {
739  Vec().MulTp(M.Mat(), trans);
740  }
741 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Norm()

Real Norm ( Real  p)

Definition at line 193 of file cu-vector.cc.

Referenced by kaldi::CuVectorUnitTestNorm(), CuSparseMatrix< Real >::FrobeniusNorm(), and CuVectorBase< float >::operator()().

193  {
194 #if HAVE_CUDA == 1
195  if (CuDevice::Instantiate().Enabled()) {
196  CuTimer tim;
197  Real ans;
198  KALDI_ASSERT(p == 1.0 || p == 2.0);
199  if (dim_ == 0) return 0.0;
200  if (p == 1.0) {
201  cublas_asum(GetCublasHandle(), dim_, data_, 1, &ans);
202  } else {
203  cublas_nrm2(GetCublasHandle(), dim_, data_, 1, &ans);
204  }
205  CuDevice::Instantiate().AccuProfile(__func__, tim);
206  if (ans != ans) {
207  KALDI_ERR << "NaN in norm " << *this;
208  }
209  return ans;
210  } else
211 #endif
212  {
213  return Vec().Norm(p);
214  }
215 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define KALDI_ERR
Definition: kaldi-error.h:147
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ operator()() [1/2]

CuValue<Real> operator() ( MatrixIndexT  i)
inline

Definition at line 196 of file cu-vector.h.

196  {
197  KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
198  static_cast<UnsignedMatrixIndexT>(dim_));
199  return CuValue<Real>(data_ + i);
200  }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define KALDI_PARANOID_ASSERT(cond)
Definition: kaldi-error.h:206
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250

◆ operator()() [2/2]

Real operator() ( MatrixIndexT  i) const
inline

Definition at line 204 of file cu-vector.h.

204  {
205  KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
206  static_cast<UnsignedMatrixIndexT>(dim_));
207  return CuValue<Real>(data_ + i); // will be casted to Real.
208  }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define KALDI_PARANOID_ASSERT(cond)
Definition: kaldi-error.h:206
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250

◆ Pow()

void Pow ( const CuVectorBase< Real > &  src,
Real  power 
)

Definition at line 419 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyPow(), and CuVectorBase< float >::Data().

419  {
420 #if HAVE_CUDA == 1
421  if (CuDevice::Instantiate().Enabled()) {
422  if (dim_ == 0) return;
423  CuTimer tim;
424  // for this particular kernel, x is #rows, y is #cols. so
425  // fake matrix with 1 row, Dim() cols.
426  dim3 dimBlock(CU1DBLOCK, 1);
427  dim3 dimGrid(n_blocks(Dim(), CU1DBLOCK), 1);
428  ::MatrixDim fake_matrix_dim = { 1, Dim(), 1 };
429  // num_cols is Dim(), num_rows is 1, stride is 1 (it's a don't-care).
430  cuda_pow(dimGrid, dimBlock, this->data_, src.Data(), power, fake_matrix_dim, 1);
431  CU_SAFE_CALL(cudaGetLastError());
432  CuDevice::Instantiate().AccuProfile("CuVectorBase::ApplyPow", tim);
433  } else
434 #endif
435  {
436  Vec().Pow(src.Vec(), power);
437  }
438 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ Range() [1/2]

CuSubVector<Real> Range ( const MatrixIndexT  o,
const MatrixIndexT  l 
)
inline

Definition at line 160 of file cu-vector.h.

Referenced by ModelCollapser::CollapseComponentsAffine(), CuRand< float >::RandGaussian(), and kaldi::UnitTestCuSubVector().

160  {
161  return CuSubVector<Real>(*this, o, l);
162  }

◆ Range() [2/2]

const CuSubVector<Real> Range ( const MatrixIndexT  o,
const MatrixIndexT  l 
) const
inline

Definition at line 164 of file cu-vector.h.

165  {
166  return CuSubVector<Real>(*this, o, l);
167  }

◆ ReplaceValue()

void ReplaceValue ( Real  orig,
Real  changed 
)

Definition at line 820 of file cu-vector.cc.

Referenced by CuVectorBase< float >::operator()(), and kaldi::UnitTestCuVectorReplaceValue().

820  {
821 #if HAVE_CUDA == 1
822  if (CuDevice::Instantiate().Enabled()) {
823  if (dim_ == 0) return;
824  CuTimer tim;
825  int dimBlock(CU1DBLOCK);
826  int dimGrid(n_blocks(dim_, CU1DBLOCK));
827  cuda_replace_value(dimGrid, dimBlock, data_, dim_, orig, changed);
828  CU_SAFE_CALL(cudaGetLastError());
829  CuDevice::Instantiate().AccuProfile(__func__, tim);
830  } else
831 #endif
832  {
833  Vec().ReplaceValue(orig, changed);
834  }
835 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

◆ Scale()

void Scale ( Real  value)

Definition at line 1216 of file cu-vector.cc.

Referenced by BlockSoftmax::BackpropagateFnc(), kaldi::CuVectorUnitTestAddDiagMatMat(), kaldi::CuVectorUnitTestScale(), CuVectorBase< float >::Data(), kaldi::nnet2::PreconditionDirections(), DropoutMaskComponent::Propagate(), BatchNormComponent::Read(), RectifiedLinearComponent::RepairGradients(), Xent::ReportPerClass(), BatchNormComponent::Scale(), kaldi::UnitTestCuMatrixAddDiagVecMat(), kaldi::UnitTestVector(), AffineTransform::Update(), ConvolutionalComponent::Update(), and BatchNormComponent::Write().

1216  {
1217  #if HAVE_CUDA == 1
1218  if (CuDevice::Instantiate().Enabled()) {
1219  if (Dim() == 0 ) return;
1220 
1221  CuTimer tim;
1222  dim3 dimBlock(CU1DBLOCK);
1223  dim3 dimGrid(n_blocks(Dim(), CU1DBLOCK));
1224  ::MatrixDim d = { 1, Dim(), Dim() };
1225  cuda_scale(dimGrid, dimBlock, data_, value, d);
1226  CU_SAFE_CALL(cudaGetLastError());
1227 
1228  CuDevice::Instantiate().AccuProfile(__func__, tim);
1229  } else
1230  #endif
1231  {
1232  Vec().Scale(value);
1233  }
1234 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ Set()

void Set ( Real  value)

Definition at line 1135 of file cu-vector.cc.

Referenced by CuSparseMatrix< Real >::CuSparseMatrix(), kaldi::CuVectorUnitTestSum(), CuVectorBase< float >::Data(), FixedScaleComponent::InitFromConfig(), StatisticsPoolingComponent::Propagate(), CuSparseMatrix< Real >::Resize(), kaldi::TestCuVectorVecVecOne(), kaldi::nnet1::UnitTestLengthNorm(), and kaldi::nnet1::UnitTestSimpleSentenceAveragingComponent().

1135  {
1136 #if HAVE_CUDA == 1
1137  if (CuDevice::Instantiate().Enabled()) {
1138  CuTimer tim;
1139 
1140  dim3 dimBlock(CU1DBLOCK);
1141  dim3 dimGrid(n_blocks(Dim(), CU1DBLOCK));
1142  ::MatrixDim d = { 1, Dim(), Dim() };
1143 
1144  cuda_set_const(dimGrid, dimBlock, data_, value, d);
1145  CU_SAFE_CALL(cudaGetLastError());
1146  CuDevice::Instantiate().AccuProfile(__func__, tim);
1147  } else
1148 #endif
1149  {
1150  Vec().Set(value);
1151  }
1152 }
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
MatrixIndexT Dim() const
Dimensions.
Definition: cu-vector.h:69

◆ SetRandn()

void SetRandn ( )

Definition at line 281 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyPow(), LstmNonlinearityComponent::ConsolidateMemory(), kaldi::CuVectorUnitTestAddDiagMat2(), kaldi::CuVectorUnitTestAddDiagMatMat(), kaldi::CuVectorUnitTestAddMatVec(), kaldi::CuVectorUnitTestAddSpVec(), kaldi::CuVectorUnitTestAddVec(), kaldi::CuVectorUnitTestAddVecCross(), kaldi::CuVectorUnitTestAddVecExtra(), kaldi::CuVectorUnitTestAddVecVec(), kaldi::CuVectorUnitTestApplyCeiling(), kaldi::CuVectorUnitTestApplyCeilingNoCount(), kaldi::CuVectorUnitTestApplyExp(), kaldi::CuVectorUnitTestApplyFloor(), kaldi::CuVectorUnitTestApplyFloorNoCount(), kaldi::CuVectorUnitTestApplyLog(), kaldi::CuVectorUnitTestApplyPow(), kaldi::CuVectorUnitTestApplySoftMax(), kaldi::CuVectorUnitTestApproxEqual(), kaldi::CuVectorUnitTestCopyCross(), kaldi::CuVectorUnitTestCopyCross2(), kaldi::CuVectorUnitTestCopyElements(), kaldi::CuVectorUnitTestInvertElements(), kaldi::CuVectorUnitTestMax(), kaldi::CuVectorUnitTestMin(), kaldi::CuVectorUnitTestScale(), kaldi::CuVectorUnitTestVecVec(), FixedScaleComponent::InitFromConfig(), FixedBiasComponent::InitFromConfig(), ConvolutionComponent::PerturbParams(), TimeHeightConvolutionComponent::PerturbParams(), TdnnComponent::PerturbParams(), RepeatedAffineComponent::PerturbParams(), ConstantComponent::PerturbParams(), AffineComponent::PerturbParams(), BlockAffineComponent::PerturbParams(), PerElementScaleComponent::PerturbParams(), PerElementOffsetComponent::PerturbParams(), ConstantFunctionComponent::PerturbParams(), Convolutional1dComponent::PerturbParams(), ScaleAndOffsetComponent::PerturbParams(), kaldi::TestCuMatrixAddDiagVecMat(), kaldi::TestCuMatrixDivRowsVec(), kaldi::TestCuVectorAddColSumMat(), kaldi::TestCuVectorAddDiagMat2(), kaldi::TestCuVectorAddDiagMat2OnVariousShapes(), kaldi::TestCuVectorAddDiagMatMat(), kaldi::TestCuVectorAddDiagMatMatShape(), kaldi::TestCuVectorAddRowSumMat(), kaldi::TestCuVectorApplyCeiling(), kaldi::TestCuVectorApplyCeilingNoCount(), kaldi::TestCuVectorApplyFloor(), kaldi::TestCuVectorApplyFloorNoCount(), kaldi::TestCuVectorCopyFromVec(), kaldi::TestCuVectorSoftmax(), kaldi::TestCuVectorSum(), kaldi::TestCuVectorVecVecOne(), kaldi::UnitTestBackpropLstmNonlinearity(), kaldi::UnitTestCuMatrixAddDiagVecMat(), kaldi::UnitTestCuMatrixAddMatDiagVec(), kaldi::UnitTestCuMatrixCopyColsFromVec(), kaldi::UnitTestCuMatrixCopyRowsFromVec(), kaldi::UnitTestCuSubVector(), kaldi::UnitTestCuVectorIO(), kaldi::nnet2::UnitTestFixedBiasComponent(), kaldi::nnet2::UnitTestFixedScaleComponent(), kaldi::nnet2::UnitTestGenericComponentInternal(), kaldi::UnitTestVecMatVec(), and kaldi::UnitTestVector().

281  {
282  if (dim_ == 0) return;
283  CuRand<Real> tmp;
284  tmp.RandGaussian(this);
285 }
friend class CuRand< Real >
Definition: cu-vector.h:66
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250

◆ SetRandUniform()

void SetRandUniform ( )

Definition at line 288 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyPow().

288  {
289  if (dim_ == 0) return;
290  CuRand<Real> tmp;
291  tmp.RandUniform(this);
292 }
friend class CuRand< Real >
Definition: cu-vector.h:66
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250

◆ SetZero()

void SetZero ( )

Math operations.

Definition at line 1098 of file cu-vector.cc.

Referenced by StatisticsPoolingComponent::Backprop(), CuVectorBase< float >::Data(), DiscriminativeObjectiveInfo::Reset(), BatchNormComponent::Scale(), and BatchNormComponent::ZeroStats().

1098  {
1099  if (dim_==0 || data_==NULL) return;
1100 #if HAVE_CUDA == 1
1101  if (CuDevice::Instantiate().Enabled()) {
1102  KALDI_ASSERT(dim_>=0);
1103  KALDI_ASSERT(data_!=NULL);
1104  CuTimer tim;
1105  CU_SAFE_CALL(cudaMemsetAsync(data_, 0, dim_*sizeof(Real),
1106  cudaStreamPerThread));
1107  CuDevice::Instantiate().AccuProfile("CuVector::SetZero", tim);
1108  } else
1109 #endif
1110  {
1111  Vec().SetZero();
1112  }
1113 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

◆ Sum()

Real Sum ( ) const

Definition at line 297 of file cu-vector.cc.

Referenced by CuVectorBase< float >::ApplyPow(), CuVectorBase< float >::Ceiling(), LstmNonlinearityComponent::ConsolidateMemory(), kaldi::CuVectorUnitTestCopyDiagFromMat(), Xent::Eval(), CuVectorBase< float >::Floor(), AffineComponentPreconditionedOnline::GetScalingFactor(), kaldi::MeanVariance(), SoftmaxComponent::MixUp(), OnlinePreconditioner::PreconditionDirectionsInternal(), kaldi::nnet3::PrintParameterStats(), SigmoidComponent::RepairGradients(), TanhComponent::RepairGradients(), RectifiedLinearComponent::RepairGradients(), Xent::Report(), CuSparseMatrix< Real >::Sum(), CuMatrixBase< float >::Sum(), kaldi::TestCuVectorSum(), CuPackedMatrix< Real >::Trace(), and kaldi::UnitTestCuMatrixAddDiagVecMat().

297  {
298  if (dim_ == 0)
299  return 0.0;
300 #if HAVE_CUDA == 1
301  if (CuDevice::Instantiate().Enabled()) {
302  Real result;
303  CuTimer tim;
304 
305  // Small vectors are copied to RAM and reduced on CPU.
306  // The length is chosen by cu-vector-speed-test
307  if (dim_ < 4096) {
308  Vector<Real> ans_cpu(*this);
309  result = ans_cpu.Sum();
310  } else {
311  // Use no more than 256 blocks (still too many?)
312  int dimBlock = CU1DBLOCK;
313  int dimGrid = n_blocks(dim_, dimBlock);
314  if (dimGrid > 256) {
315  dimGrid = 256;
316  }
317  CuVector<Real> ans(dimGrid, kUndefined);
318  cuda_vec_sum(dimGrid, dimBlock, data_, ans.Data(), dim_, 1);
319  CU_SAFE_CALL(cudaGetLastError());
320  Vector<Real> ans_cpu(ans);
321  result = ans_cpu.Sum();
322  }
323 
324  CuDevice::Instantiate().AccuProfile(__func__, tim);
325  return result;
326  } else
327 #endif
328  {
329  return Vec().Sum();
330  }
331 }
Real * data_
GPU data pointer (or regular data pointer if CUDA is not compiled in or we have no GPU)...
Definition: cu-vector.h:248
#define CU1DBLOCK
Definition: cu-matrixdim.h:57
MatrixIndexT dim_
dimension of the vector
Definition: cu-vector.h:250
const VectorBase< Real > & Vec() const
Definition: cu-vector.h:235

◆ Vec() [1/2]

◆ Vec() [2/2]

VectorBase<Real>& Vec ( )
inline

Definition at line 238 of file cu-vector.h.

238  {
239  return *(reinterpret_cast<VectorBase<Real>* >(this));
240  }

Friends And Related Function Documentation

◆ cu::Splice

void cu::Splice ( const CuMatrixBase< Real > &  src,
const CuArray< int32 > &  frame_offsets,
CuMatrixBase< Real > *  tgt 
)
friend

◆ CuMatrixBase< Real >

friend class CuMatrixBase< Real >
friend

Definition at line 54 of file cu-vector.h.

◆ CuPackedMatrix< Real >

friend class CuPackedMatrix< Real >
friend

Definition at line 56 of file cu-vector.h.

◆ CuRand< Real >

friend class CuRand< Real >
friend

Definition at line 66 of file cu-vector.h.

◆ CuSpMatrix< Real >

friend class CuSpMatrix< Real >
friend

Definition at line 57 of file cu-vector.h.

◆ CuTpMatrix< Real >

friend class CuTpMatrix< Real >
friend

Definition at line 58 of file cu-vector.h.

◆ CuVectorBase< double >

friend class CuVectorBase< double >
friend

Definition at line 53 of file cu-vector.h.

◆ CuVectorBase< float >

friend class CuVectorBase< float >
friend

Definition at line 52 of file cu-vector.h.

◆ MatrixBase< Real >

friend class MatrixBase< Real >
friend

Definition at line 55 of file cu-vector.h.

◆ VecVec

OtherReal VecVec ( const CuVectorBase< OtherReal > &  v1,
const CuVectorBase< OtherReal > &  v2 
)
friend

Member Data Documentation

◆ data_

◆ dim_


The documentation for this class was generated from the following files: