36 using namespace kaldi;
41 template<
typename Real>
43 return (
sizeof(Real) == 8 ?
"<double>" :
"<float>");
54 for (; tim.
Elapsed() < time_in_secs; iter++) {
59 KALDI_LOG<<
"For CuMatrix::TestCuMatrixSum" << NameOf<Real>() <<
", for dim = " 60 << dim <<
", speed was " << gflops <<
" gigaflops, result = " << result;
71 for (; tim.
Elapsed() < time_in_secs; iter++) {
76 KALDI_LOG<<
"For CuMatrix::TestCuMatrixMax" << NameOf<Real>() <<
", for dim = " 77 << dim <<
", speed was " << gflops <<
" gigaflops, result = " << result;
88 for (; tim.
Elapsed() < time_in_secs; iter++) {
93 KALDI_LOG<<
"For CuMatrix::TestCuMatrixMin" << NameOf<Real>() <<
", for dim = " 94 << dim <<
", speed was " << gflops <<
" gigaflops, result = " << result;
106 for (; tim.
Elapsed() < time_in_secs; iter++) {
112 KALDI_LOG<<
"For CuMatrix::DivRowsVec" << NameOf<Real>() <<
", for dim = " 113 << dim <<
", speed was " << gflops <<
" gigaflops.";
123 for (; tim.
Elapsed() < time_in_secs; iter++) {
128 KALDI_LOG<<
"For CuMatrix::TransposeNS" << NameOf<Real>() <<
", for dim = " 129 << dim <<
", speed was " << gflops <<
" gigaflops.";
139 for (; tim.
Elapsed() < time_in_secs; iter++) {
144 KALDI_LOG<<
"For CuMatrix::TransposeS" << NameOf<Real>() <<
", for dim = " 145 << dim <<
", speed was " << gflops <<
" gigaflops.";
157 for (; tim.
Elapsed() < time_in_secs; iter++) {
159 Mf.CopyFromMat(Md,
kTrans);
163 KALDI_LOG<<
"For CuMatrix::TransposeCross" << NameOf<Real>() <<
", for dim = " 164 << dim <<
", speed was " << gflops <<
" gigaflops.";
170 int32 num_col_blocks) {
172 CuMatrix<Real> A(dim, dim), B(dim * num_row_blocks, dim * num_col_blocks);
177 for (;tim.
Elapsed() < time_in_secs; iter++) {
178 for (
int32 i = 0;
i < num_row_blocks;
i++) {
179 for (
int32 j = 0;
j < num_col_blocks;
j++) {
185 BaseFloat gflops = (fdim * fdim * num_row_blocks * num_col_blocks * iter)
187 KALDI_LOG <<
"For CuMatrix::AddMat" << NameOf<Real>() <<
", for dim = " 188 << dim <<
"numRowBlocks = "<< num_row_blocks <<
"numColBlocks = " 189 << num_col_blocks <<
", speed was " << gflops <<
" gigaflops.";
193 int32 num_row_blocks,
194 int32 num_col_blocks) {
196 CuMatrix<Real> A(dim, dim), B(dim * num_row_blocks, dim * num_col_blocks);
201 for (;tim.
Elapsed() < time_in_secs; iter++) {
202 A.AddMatBlocks(0.0, B);
205 BaseFloat gflops = (fdim * fdim * num_row_blocks * num_col_blocks * iter)
207 KALDI_LOG <<
"For CuMatrix::AddMatBlocks" << NameOf<Real>() <<
", for dim = " 208 << dim <<
", numRowBlocks = "<< num_row_blocks <<
", numColBlocks = " 209 << num_col_blocks <<
", speed was " << gflops <<
" gigaflops.";
219 for (;tim.
Elapsed() < time_in_secs; iter++) {
225 KALDI_LOG <<
"For CuMatrix::AddMatMat" << NameOf<Real>() <<
", for dim = " 226 << dim <<
", speed was " << gflops <<
" gigaflops.";
230 std::vector<CuMatrix<Real>* > a(batchCount), b(batchCount), c(batchCount);
231 std::vector<CuSubMatrix<Real>* > A, B, C;
233 for (
int32 i = 0;
i < batchCount;
i++) {
250 for (;tim.
Elapsed() < time_in_secs; iter++) {
252 static_cast<Real>(0.0));
254 for (
int32 i = 0;
i< batchCount;
i++) {
255 delete a[
i];
delete b[
i];
delete c[
i];
256 delete A[
i];
delete B[
i];
delete C[
i];
260 BaseFloat gflops = (fdim * fdim * fdim * iter * batchCount) / (tim.
Elapsed() * 1.0e+09);
261 KALDI_LOG <<
"For CuMatrix::AddMatMatBatched" << NameOf<Real>() <<
", for dim = " << dim
262 <<
", batchSize = " << batchCount <<
", speed was " << gflops <<
" gigaflops.";
273 for (;tim.
Elapsed() < time_in_secs; iter++)
278 KALDI_LOG <<
"For CuMatrix::AddDiagVecMat" << NameOf<Real>()
279 << (trans ==
kTrans ?
"[trans]" :
"[no-trans]")
280 <<
", for dim = " << dim <<
", speed was " 281 << gflops <<
" gigaflops.";
295 for (;tim.
Elapsed() < time_in_secs; iter++) {
302 KALDI_LOG <<
"For CuMatrix::TestCuInvertPosDef" << NameOf<Real>() <<
", for dim = " 303 << dim <<
", speed was " << gflops <<
" gigaflops.";
307 template<
typename Real>
319 std::vector<MatrixElement<Real> > labels;
320 for(
int i = 0;
i < n_r;
i++) {
321 for(
int j = 0;
j < n_c;
j++) {
323 if (
Rand() % n_c == 0) {
334 Real a = 0.0, b = 0.0;
335 for (;tim.
Elapsed() < time_in_secs; iter++)
339 KALDI_LOG <<
"For CuMatrix::CompObjfAndDeriv" << NameOf<Real>() <<
", for dim = " 340 << dim <<
", speed was " << gflops <<
" gigaflops.";
367 template<
typename Real>
370 int32 dimM = dim, dimN = dimM +
Rand() % 5;
378 std::vector<int32> Hmax(dimM);
384 for (;tim.
Elapsed() < time_in_secs; iter++)
390 KALDI_LOG <<
"For CuMatrix::FindRowMaxId" << NameOf<Real>() <<
", for dim = " 391 << dim <<
", speed was " << gflops <<
" gigaflops.";
396 Real max=-1.0e+20;
int32 idx=-1;
398 if(Hi(r,c) > max) { idx=c; max=Hi(r,c); }
402 std::vector<int32> Hmax2(dimM);
417 for (;tim.
Elapsed() < time_in_secs; iter++) {
423 KALDI_LOG <<
"For CuMatrix::Sigmoid" << NameOf<Real>() <<
", for dim = " 424 << dim <<
", speed was " << gflops <<
" gigaflops.";
434 for (;tim.
Elapsed() < time_in_secs; iter++) {
440 KALDI_LOG <<
"For CuMatrix::Heaviside" << NameOf<Real>() <<
", for dim = " 441 << dim <<
", speed was " << gflops <<
" gigaflops.";
448 int32 group_size = 5;
454 for (;tim.
Elapsed() < time_in_secs; iter++) {
455 M.MulRowsGroupMat(N);
459 BaseFloat gflops = (fdim * fdim * group_size * iter) / (tim.
Elapsed() * 1.0e+09);
460 KALDI_LOG <<
"For CuMatrix::MulRowsGroupMat" << NameOf<Real>() <<
", for dim = " 461 << dim <<
", speed was " << gflops <<
" gigaflops.";
472 for (; tim.
Elapsed() < time_in_secs; iter++) {
473 N.DiffSoftmaxPerRow(M, L);
478 KALDI_LOG <<
"For CuMatrix::DiffSoftmaxPerRow" << NameOf<Real>() <<
", for dim = " 479 << dim <<
", speed was " << gflops <<
" gigaflops.";
490 for (; tim.
Elapsed() < time_in_secs; iter++) {
491 N.DiffLogSoftmaxPerRow(M, L);
496 KALDI_LOG <<
"For CuMatrix::DiffLogSoftmaxPerRow" << NameOf<Real>() <<
", for dim = " 497 << dim <<
", speed was " << gflops <<
" gigaflops.";
507 for (;tim.
Elapsed() < time_in_secs; iter++) {
513 KALDI_LOG <<
"For CuMatrix::Softmax" << NameOf<Real>() <<
", for dim = " 514 << dim <<
", speed was " << gflops <<
" gigaflops.";
525 for (;tim.
Elapsed() < time_in_secs; iter++) {
531 KALDI_LOG <<
"For CuMatrix::LogSoftmax" << NameOf<Real>() <<
", for dim = " 532 << dim <<
", speed was " << gflops <<
" gigaflops.";
538 int32 group_size = 4;
543 for (;tim.
Elapsed() < time_in_secs; iter++)
548 KALDI_LOG <<
"For CuMatrix::GroupPnorm" << NameOf<Real>() <<
", for dim = " 549 << dim <<
", speed was " << gflops <<
" gigaflops.";
555 int32 group_size = 8;
564 for (; tim.
Elapsed() < time_in_secs; iter++)
565 id.DiffGroupPnorm(iv, ov, od, 2.0);
569 KALDI_LOG <<
"For CuMatrix::DiffGroupPnorm" << NameOf<Real>() <<
", for dim = " 570 << dim <<
", speed was " << gflops <<
" gigaflops.";
575 int32 group_size = 4;
580 for (;tim.
Elapsed() < time_in_secs; iter++)
585 KALDI_LOG <<
"For CuMatrix::GroupMax" << NameOf<Real>() <<
", for dim = " 586 << dim <<
", speed was " << gflops <<
" gigaflops.";
595 for (; tim.
Elapsed() < time_in_secs;) {
596 for (
int group_size = 1; group_size <= dim; group_size++) {
597 if (dim % group_size == 0) {
607 KALDI_LOG <<
"For CuMatrix::GroupMax (all group sizes)" << NameOf<Real>()
608 <<
", for dim = " << dim <<
", speed was " << gflops
614 int32 group_size = 4;
615 CuMatrix<Real> M(dim, dim), N(dim, dim / group_size), O(dim, dim);
621 for (;tim.
Elapsed() < time_in_secs; iter++)
626 KALDI_LOG <<
"For CuMatrix::GroupMaxDeriv" << NameOf<Real>() <<
", for dim = " 627 << dim <<
", speed was " << gflops <<
" gigaflops.";
640 for (;tim.
Elapsed() < time_in_secs; iter++) {
645 KALDI_LOG <<
"For CuMatrix::TraceMatMat" << NameOf<Real>()
646 << (trans ==
kTrans ?
" [transposed]" :
"") <<
", for dim = " 647 << dim <<
", speed was " << gflops <<
" gigaflops.";
659 for (;tim.
Elapsed() < time_in_secs; iter++)
664 KALDI_LOG <<
"For CuMatrix::Cholesky" << NameOf<Real>()
665 <<
", for dim = " << dim <<
", speed was " << gflops <<
" gigaflops.";
676 for (; tim.
Elapsed() < time_in_secs; iter++) {
683 KALDI_LOG <<
"For CuMatrix::CopyLowerToUpper" << NameOf<Real>() <<
", for dim = " 684 << dim <<
", speed was " << gflops <<
" gigaflops.";
696 for (; tim.
Elapsed() < time_in_secs; iter++) {
706 KALDI_LOG <<
"For CuMatrix::CopyFromTp" << (trans ==
kNoTrans ?
"[NoTrans]":
"[Trans]")
707 << NameOf<Real>() <<
", for dim = " 708 << dim <<
", speed was " << gflops <<
" gigaflops.";
720 for (; tim.
Elapsed() < time_in_secs; iter++) {
730 KALDI_LOG <<
"For CuMatrix::CopyFromSp" << NameOf<Real>() <<
", for dim = " 731 << dim <<
", speed was " << gflops <<
" gigaflops.";
742 for (; tim.
Elapsed() < time_in_secs; iter++) {
749 KALDI_LOG <<
"For CuMatrix::CopyUpperToLower" << NameOf<Real>() <<
", for dim = " 750 << dim <<
", speed was " << gflops <<
" gigaflops.";
758 for (; tim.
Elapsed() < time_in_secs; iter++) {
763 KALDI_LOG <<
"For CuMatrix::TestCuMatrixResize" << NameOf<Real>() <<
", for dim = " 764 << dim <<
", speed was " << gflops <<
" gigaflops.";
773 for (; tim.
Elapsed() < time_in_secs; iter++)
777 KALDI_LOG <<
"For CuMatrix::SetZeroAboveDiag" << NameOf<Real>() <<
", for dim = " 778 << dim <<
", speed was " << gflops <<
" gigaflops.";
781 template<
typename Real>
784 int32 dimM = dim, dimN = dim;
787 std::vector<Int32Pair> indices;
788 std::vector<Real> reference;
789 std::vector<Real> output;
791 int32 num_index = dim * dim;
792 output.resize(num_index);
793 for (
int32 j = 0;
j < num_index;
j++) {
800 indices.push_back(tmp_pair);
801 reference.push_back(H(r, c));
805 for (; tim.
Elapsed()< time_in_secs; iter++)
806 H.
Lookup(indices, &(output[0]));
810 KALDI_LOG <<
"For CuMatrix::Lookup" << NameOf<Real>() <<
", for dim = " 811 << dim <<
", speed was " << gflops <<
" gigaflops.";
820 std::vector<int32> reorder(dim);
828 for (; tim.
Elapsed() < time_in_secs; iter++) {
829 M.CopyRows(N, reorder_cuda);
834 KALDI_LOG <<
"For CuMatrix::CopyRows" << NameOf<Real>() <<
", for dim = " 835 << dim <<
", speed was " << gflops <<
" gigaflops.";
844 std::vector<const Real*> reorder_src(dim, NULL);
852 for (; tim.
Elapsed() < time_in_secs; iter++) {
853 M.CopyRows(reorder_src_cuda);
858 KALDI_LOG <<
"For CuMatrix::CopyRows" << NameOf<Real>() <<
", for dim = " 859 << dim <<
", speed was " << gflops <<
" gigaflops.";
868 std::vector<Real*> reorder_dst(dim, NULL);
876 for (; tim.
Elapsed() < time_in_secs; iter++) {
877 M.CopyToRows(reorder_dst_cuda);
882 KALDI_LOG <<
"For CuMatrix::CopyToRows" << NameOf<Real>() <<
", for dim = " 883 << dim <<
", speed was " << gflops <<
" gigaflops.";
892 std::vector<int32> reorder(dim);
900 for (; tim.
Elapsed() < time_in_secs; iter++) {
901 M.AddRows(0.5, N, reorder_cuda);
906 KALDI_LOG <<
"For CuMatrix::AddRows" << NameOf<Real>() <<
", for dim = " 907 << dim <<
", speed was " << gflops <<
" gigaflops.";
916 std::vector<const Real*> reorder_src(dim, NULL);
924 for (; tim.
Elapsed() < time_in_secs; iter++) {
925 M.AddRows(0.5, reorder_src_cuda);
930 KALDI_LOG <<
"For CuMatrix::AddRows" << NameOf<Real>() <<
", for dim = " 931 << dim <<
", speed was " << gflops <<
" gigaflops.";
940 std::vector<Real*> reorder_dst(dim, NULL);
948 for (; tim.
Elapsed() < time_in_secs; iter++) {
949 M.AddToRows(0.5, reorder_dst_cuda);
954 KALDI_LOG <<
"For CuMatrix::AddToRows" << NameOf<Real>() <<
", for dim = " 955 << dim <<
", speed was " << gflops <<
" gigaflops.";
964 std::vector<Int32Pair> indexes(dim);
966 indexes[
i].first =
i;
967 indexes[
i].second =
i + 1;
973 for (; tim.
Elapsed() < time_in_secs; iter++) {
974 M.AddRowRanges(N, indexes_cuda);
979 KALDI_LOG <<
"For CuMatrix::AddRowRanges" << NameOf<Real>() <<
", for dim = " 980 << dim <<
", speed was " << gflops <<
" gigaflops.";
991 std::vector<std::vector<std::pair<MatrixIndexT, Real> > > pairs(dim);
992 for (
auto && row : pairs) {
1000 for (;tim.
Elapsed() < time_in_secs; iter++) {
1005 KALDI_LOG <<
"For CuSparseMatrix::TraceMatSmat" << NameOf<Real>()
1006 << (trans ==
kTrans ?
" [transposed]" :
"") <<
", for dim = " 1007 << dim <<
", speed was " << gflops <<
" gigaflops.";
1013 std::vector<int32> sizes;
1014 sizes.push_back(16);
1015 sizes.push_back(32);
1016 sizes.push_back(64);
1017 sizes.push_back(128);
1018 sizes.push_back(256);
1019 sizes.push_back(512);
1020 sizes.push_back(1024);
1021 int32 ns = sizes.size();
1022 for (
int32 s = 0; s < ns; s++)
1023 TestCuMatrixDivRowsVec<Real>(sizes[s]);
1024 for (
int32 s = 0; s < ns; s++)
1025 TestCuMatrixResize<Real>(sizes[s]);
1026 for (
int32 s = 0; s < ns; s++)
1027 TestCuMatrixAddMat<Real>(sizes[s], 3, 3);
1028 for (
int32 s = 0; s < ns; s++)
1029 TestCuMatrixAddMatBlocks<Real>(sizes[s], 3, 3);
1030 for (
int32 s = 0; s < ns; s++)
1031 TestCuMatrixMatMat<Real>(sizes[s]);
1032 for (
int32 s = 0; s + 1 < ns; s++)
1033 TestCuMatrixMatMatBatched<Real>(sizes[s], 10);
1034 for (
int32 s = 0; s < ns; s++) {
1035 TestCuMatrixAddDiagVecMat<Real>(sizes[s],
kNoTrans);
1036 TestCuMatrixAddDiagVecMat<Real>(sizes[s],
kTrans);
1038 for (
int32 s = 0; s < ns; s++)
1039 TestSymInvertPosDef<Real>(sizes[s]);
1040 for (
int32 s = 0; s < ns; s++)
1041 TestCuMatrixCholesky<Real>(sizes[s]);
1042 for (
int32 s = 0; s < ns; s++)
1043 TestCuMatrixSigmoid<Real>(sizes[s]);
1044 for (
int32 s = 0; s < ns; s++)
1045 TestCuMatrixHeaviside<Real>(sizes[s]);
1046 for (
int32 s = 0; s < ns; s++)
1047 TestCuFindRowMaxId<Real>(sizes[s]);
1048 for (
int32 s = 0; s < ns; s++)
1049 TestCuMatrixCompObjfAndDeriv<Real>(sizes[s]);
1050 for (
int32 s = 0; s < ns; s++)
1051 TestCuMatrixMulRowsGroupMat<Real>(sizes[s]);
1052 for (
int32 s = 0; s < ns; s++)
1053 TestCuMatrixSoftmax<Real>(sizes[s]);
1054 for (
int32 s = 0; s < ns; s++)
1055 TestCuMatrixDiffSoftmax<Real>(sizes[s]);
1056 for (
int32 s = 0; s < ns; s++)
1057 TestCuMatrixDiffLogSoftmax<Real>(sizes[s]);
1058 for (
int32 s = 0; s < ns; s++)
1059 TestCuMatrixLogSoftmax<Real>(sizes[s]);
1060 for (
int32 s = 0; s < ns; s++)
1061 TestCuMatrixGroupPnorm<Real>(sizes[s]);
1062 for (
int32 s = 0; s < ns; s++)
1063 TestCuMatrixDiffGroupPnorm<Real>(sizes[s]);
1064 for (
int32 s = 0; s < ns; s++)
1065 TestCuMatrixGroupMax<Real>(sizes[s]);
1066 for (
int32 s = 0; s < ns; s++)
1067 TestCuMatrixGroupMaxAllGroupSizes<Real>(sizes[s]);
1068 for (
int32 s = 0; s < ns; s++)
1069 TestCuMatrixGroupMaxDeriv<Real>(sizes[s]);
1070 for (
int32 s = 0; s < ns; s++)
1071 TestCuMatrixTraceMatMat<Real>(sizes[s]);
1072 for (
int32 s = 0; s < ns; s++)
1073 TestCuSparseMatrixTraceMatSmat<Real>(sizes[s]);
1074 for (
int32 s = 0; s < ns; s++)
1075 TestCuMatrixCopyLowerToUpper<Real>(sizes[s]);
1076 for (
int32 s = 0; s < ns; s++)
1077 TestCuMatrixCopyFromTp<Real>(sizes[s],
kNoTrans);
1078 for (
int32 s = 0; s < ns; s++)
1079 TestCuMatrixCopyFromTp<Real>(sizes[s],
kTrans);
1080 for (
int32 s = 0; s < ns; s++)
1081 TestCuMatrixCopyFromSp<Real>(sizes[s]);
1082 for (
int32 s = 0; s < ns; s++)
1083 TestCuMatrixCopyUpperToLower<Real>(sizes[s]);
1084 for (
int32 s = 0; s < ns; s++)
1085 TestCuMatrixSetZeroAboveDiag<Real>(sizes[s]);
1086 for (
int32 s = 0; s + 2 < ns; s++)
1087 TestCuMatrixLookup<Real>(sizes[s]);
1088 for (
int32 s = 0; s < ns; s++)
1089 TestCuMatrixCopyRows1<Real>(sizes[s]);
1090 for (
int32 s = 0; s < ns; s++)
1091 TestCuMatrixCopyRows2<Real>(sizes[s]);
1092 for (
int32 s = 0; s < ns; s++)
1093 TestCuMatrixCopyToRows<Real>(sizes[s]);
1094 for (
int32 s = 0; s < ns; s++)
1095 TestCuMatrixAddRows1<Real>(sizes[s]);
1096 for (
int32 s = 0; s < ns; s++)
1097 TestCuMatrixAddRows2<Real>(sizes[s]);
1098 for (
int32 s = 0; s < ns; s++)
1099 TestCuMatrixAddToRows<Real>(sizes[s]);
1100 for (
int32 s = 0; s < ns; s++)
1101 TestCuMatrixAddRowRanges<Real>(sizes[s]);
1102 for (
int32 s = 0; s < ns; s++)
1103 TestCuMatrixTransposeCross<Real>(sizes[s]);
1104 for (
int32 s = 0; s < ns; s++)
1105 TestCuMatrixTransposeS<Real>(sizes[s]);
1106 for (
int32 s = 0; s < ns; s++)
1107 TestCuMatrixTransposeNS<Real>(sizes[s]);
1108 for (
int32 s = 0; s < ns; s++)
1109 TestCuMatrixSum<Real>(sizes[s]);
1110 for (
int32 s = 0; s < ns; s++)
1111 TestCuMatrixMax<Real>(sizes[s]);
1112 for (
int32 s = 0; s < ns; s++)
1113 TestCuMatrixMin<Real>(sizes[s]);
1124 for (loop = 0; loop < 2; loop++) {
1126 CuDevice::Instantiate().SelectGpuId(
"no");
1128 CuDevice::Instantiate().SelectGpuId(
"yes");
1131 kaldi::CudaMatrixSpeedTest<float>();
1133 if (CuDevice::Instantiate().DoublePrecisionSupported()) {
1134 kaldi::CudaMatrixSpeedTest<double>();
1136 KALDI_WARN <<
"Double precision not supported";
1139 kaldi::CudaMatrixSpeedTest<double>();
1143 CuDevice::Instantiate().PrintProfile();
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
void TestCuMatrixCholesky(int32 dim)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void TestCuMatrixLookup(int32 dim)
Packed symetric matrix class.
void TestCuMatrixCopyToRows(int32 dim)
void TestCuMatrixSetZeroAboveDiag(int32 dim)
void TestCuMatrixGroupMaxAllGroupSizes(int32 dim)
void GroupMax(const CuMatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j where G = x.NumCols() / y.NumCols() must be an integer.
void TestCuMatrixTransposeCross(int32 dim)
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
void CopyToVec(std::vector< T > *dst) const
This function resizes *dst if needed.
void TestCuMatrixResize(int32 size_multiple)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void TestCuMatrixSum(int32 dim)
void TestSymInvertPosDef(int32 dim)
void TestCuMatrixDiffSoftmax(int32 dim)
void ApplyFloor(Real floor_val)
void TestCuMatrixMulRowsGroupMat(int32 dim)
void TestCuMatrixAddRowRanges(int32 dim)
void AddToDiag(Real value)
Adds "value" to the diagonal elements of the matrix.
void DivRowsVec(const CuVectorBase< Real > &div)
divide i'th row by scale[i]
A class for storing matrices.
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
void TestCuMatrixMatMatBatched(int32 dim, int32 batchCount)
void TestCuMatrixMin(int32 dim)
void Min(const CuMatrixBase< Real > &A)
Do, elementwise, *this = min(*this, A).
void TestCuMatrixCopyRows2(int32 dim)
void SetRandn()
< Set to unit matrix.
void InvertElements()
invert the matrix by elements.
void TestCuMatrixCopyUpperToLower(int32 dim)
void Lookup(const std::vector< Int32Pair > &indexes, Real *output) const
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
void SymInvertPosDef()
Inversion for positive definite symmetric matrices.
void CompObjfAndDeriv(const std::vector< MatrixElement< Real > > &elements, const CuMatrix< Real > &A, Real *tot_objf, Real *tot_weight)
Here, A is interpreted as a matrix of probabilities, and "elements" as a list of posteriors (possibly...
void AddMatMatBatched(const Real alpha, std::vector< CuSubMatrix< Real > * > &C, const std::vector< CuSubMatrix< Real > * > &A, MatrixTransposeType transA, const std::vector< CuSubMatrix< Real > * > &B, MatrixTransposeType transB, const Real beta)
Does multiple matrix multiplications, executing them in parallel using cuBLAS's gemmBatched if we are...
void TestCuMatrixGroupPnorm(int32 dim)
void TestCuMatrixAddMat(int32 dim, int32 num_row_blocks, int32 num_col_blocks)
void Max(const CuMatrixBase< Real > &A)
Do, elementwise, *this = max(*this, A).
void TestCuMatrixDiffLogSoftmax(int32 dim)
void TestCuMatrixTransposeNS(int32 dim)
void CopyFromSp(const CuSpMatrix< Real > &M)
void Sigmoid(const CuMatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src": element by element...
void TestCuMatrixAddDiagVecMat(int32 dim, MatrixTransposeType trans)
void SetZero()
Math operations, some calling kernels.
void SoftMaxPerRow(const CuMatrixBase< Real > &src)
Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow.
void TestCuMatrixAddRows2(int32 dim)
void SymAddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transA, Real beta)
*this = beta * *this + alpha * M M^T, for symmetric matrices.
void TestCuMatrixCopyLowerToUpper(int32 dim)
void TestCuMatrixMax(int32 dim)
void SetRandn()
Sets to random values of a normal distribution.
void TestCuMatrixGroupMax(int32 dim)
void GroupPnorm(const CuMatrixBase< Real > &src, Real pow)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p) where G = x...
Packed symetric matrix class.
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
void Cholesky(CuMatrixBase< Real > *inv_cholesky=NULL)
This function does sets *this to the Cholesky factor of *this (i.e.
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
void CudaMatrixSpeedTest()
void GroupMaxDeriv(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > &output)
Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax fun...
int Rand(struct RandomState *state)
static void TestCuMatrixCompObjfAndDeriv(int32 dim)
void FindRowMaxId(CuArray< int32 > *id) const
Find the id of the maximal element for each row (resizes the 'id' array to the appropriate size)...
void TestCuMatrixGroupMaxDeriv(int32 dim)
void TestCuSparseMatrixTraceMatSmat(int32 dim)
void TestCuMatrixCopyFromTp(int32 dim, MatrixTransposeType trans)
void TestCuMatrixDivRowsVec(int32 dim)
#define KALDI_ASSERT(cond)
void TestCuMatrixSoftmax(int32 dim)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void TestCuMatrixAddRows1(int32 dim)
void TestCuMatrixCopyFromSp(int32 dim)
void CopyFromTp(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Real TraceMatSmat(const MatrixBase< Real > &A, const SparseMatrix< Real > &B, MatrixTransposeType trans)
void TestCuMatrixTransposeS(int32 dim)
void LogSoftMaxPerRow(const CuMatrixBase< Real > &src)
LogSoftmax nonlinearity Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row...
void TestCuMatrixDiffGroupPnorm(int32 dim)
void TestCuMatrixAddToRows(int32 dim)
void TestCuMatrixAddMatBlocks(int32 dim, int32 num_row_blocks, int32 num_col_blocks)
static void TestCuFindRowMaxId(int32 dim)
void TestCuMatrixSigmoid(int32 dim)
void TestCuMatrixTraceMatMat(int32 dim)
double Elapsed() const
Returns time in seconds.
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
void TestCuMatrixCopyRows1(int32 dim)
void TestCuMatrixLogSoftmax(int32 dim)
void SetZeroAboveDiag()
Zeroes all elements for which col > row.
void TestCuMatrixMatMat(int32 dim)
void AddDiagVecMat(const Real alpha, const CuVectorBase< Real > &v, const CuMatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].
void TestCuMatrixHeaviside(int32 dim)
const Real * RowData(MatrixIndexT r) const
Get raw row pointer (const).