36 using namespace kaldi;
44 template<
typename Real>
52 template<
typename Real>
63 template<
typename Real>
75 template<
typename Real>
111 template<
typename Real>
142 template<
typename Real>
164 template<
typename Real>
180 template<
typename Real>
197 template<
typename Real>
203 BaseFloat lower_limit = -0.2, upper_limit = 0.2;
218 template<
typename Real>
238 template<
typename Real>
253 template<
typename Real>
269 template<
typename Real>
287 template<
typename Real>
290 Real power[] = { 1.4, 1.6, 0.1234, 2.123, 0, 1, 2,
291 std::numeric_limits<Real>::infinity() };
292 for (
int32 K = 5; K < 7; K++) {
293 for (
int32 i = 0;
i < 2 *
sizeof(power) /
sizeof(Real); ++
i) {
294 Real p = power[
i / 2];
312 template<
typename Real>
316 for (
int32 K = 5; K < 7; K++) {
320 if (rand () % 2 == 0)
333 template<
typename Real>
348 template<
typename Real>
359 if (pow != 1.0 && pow != 2.0 && pow != 3.0)
372 template<
typename Real>
394 template<
typename Real>
396 for (
int32 p = 0; p < 2; p++) {
397 int32 num_rows = 100 +
Rand() % 255, num_cols;
398 if (p <= 2) num_cols = 128;
399 else if (p <= 4) num_cols = 256;
400 else num_cols = 100 +
Rand() % 200;
403 if (p % 2 == 0) vec_dim = num_cols;
404 else vec_dim = num_cols * num_rows;
421 template<
typename Real>
423 for (
int32 p = 0; p < 2; p++) {
428 if (p % 2 == 0) vec_dim = num_rows;
429 else vec_dim = num_cols * num_rows;
446 template<
typename Real>
448 for (
int32 p = 0; p < 2; p++) {
450 num_rows2 = 10 +
Rand() % 10,
451 num_cols = 10 +
Rand() % 10;
456 N2(num_rows2, num_cols), O(num_rows2, num_cols);
457 std::vector<int32> reorder(num_rows2);
458 std::vector<const Real*> reorder_src(num_rows2, NULL);
459 for (
int32 i = 0;
i < num_rows2;
i++) {
460 reorder[
i] = -1 + (
Rand() % (num_rows1 + 1));
461 if (reorder[
i] != -1) {
468 N1.CopyRows(M, reorder_cuda);
469 N2.CopyRows(reorder_src_cuda);
471 for (
int32 i = 0;
i < num_rows2;
i++)
472 for (
int32 j = 0;
j < num_cols;
j++)
473 if (reorder[
i] < 0) O(
i,
j) = 0;
474 else O(
i,
j) = M(reorder[
i],
j);
482 template<
typename Real>
484 for (
int32 p = 0; p < 2; p++) {
486 num_rows2 = 10 +
Rand() % 10,
487 num_cols = 10 +
Rand() % 10;
492 std::vector<Real*> reorder_dst(num_rows1, NULL);
493 unordered_map<MatrixIndexT, bool> used_index;
494 for (
int32 i = 0;
i < num_rows1;
i++) {
496 if (used_index.find(index) == used_index.end()) {
497 used_index[index] =
true;
502 reorder_dst[
i] = N.RowData(index);
503 for (
int32 j = 0;
j < num_cols;
j++)
504 O(index,
j) = M(
i,
j);
516 template<
typename Real>
518 for (
int32 p = 0; p < 2; p++) {
520 num_rows2 = 10 +
Rand() % 10,
521 num_cols = 10 +
Rand() % 10;
526 N2(num_rows2, num_cols), O(num_rows2, num_cols);
527 std::vector<int32> reorder(num_rows2);
528 std::vector<const Real*> reorder_src(num_rows2, NULL);
529 for (
int32 i = 0;
i < num_rows2;
i++) {
530 reorder[
i] = -1 + (
Rand() % (num_rows1 + 1));
531 if (reorder[
i] != -1)
536 static_cast<Real
>((
Rand() % num_rows2)) /
static_cast<Real
>(num_rows1);
540 N1.AddRows(alpha, M, reorder_cuda);
541 N2.AddRows(alpha, reorder_src_cuda);
543 for (
int32 i = 0;
i < num_rows2;
i++) {
544 if (reorder[
i] != -1) {
545 for (
int32 j = 0;
j < num_cols;
j++) {
546 O(
i,
j) += alpha * M(reorder[
i],
j);
557 template<
typename Real>
559 for (
int32 p = 0; p < 2; p++) {
561 num_rows2 = 10 +
Rand() % 10,
562 num_cols = 10 +
Rand() % 10;
567 O(num_rows2, num_cols);
568 std::vector<int32> reorder(num_rows2);
569 std::vector<const Real*> reorder_src(num_rows2, NULL);
570 for (
int32 i = 0;
i < num_rows2;
i++) {
571 reorder[
i] = -1 + (
Rand() % (num_rows1 + 1));
572 if (reorder[
i] != -1)
577 N1.MulRows(M, reorder_cuda);
579 for (
int32 i = 0;
i < num_rows2;
i++) {
580 if (reorder[
i] != -1) {
582 M_row(M, reorder[
i]);
583 O_row.MulElements(M_row);
593 template<
typename Real>
595 for (
int32 p = 0; p < 2; p++) {
597 num_rows2 = 10 +
Rand() % 10,
598 num_cols = 10 +
Rand() % 10;
603 static_cast<Real
>((
Rand() % num_rows2)) /
static_cast<Real
>(num_rows1);
606 O(num_rows2, num_cols);
607 std::vector<int32> reorder(num_rows1);
608 std::vector<Real*> reorder_dst(num_rows1, NULL);
609 unordered_map<MatrixIndexT, bool> used_index;
610 for (
int32 i = 0;
i < num_rows1;
i++) {
612 if (used_index.find(index) == used_index.end()) {
613 used_index[index] =
true;
619 reorder_dst[
i] = N1.RowData(index);
620 for (
int32 j = 0;
j < num_cols;
j++)
621 O(index,
j) += alpha * M(
i,
j);
635 template<
typename Real>
639 if (
Rand() % 3 == 0) { M = 0; N = 0; }
661 if (
Rand() % 3 == 0) { M = 0; N = 0; }
672 template<
typename Real>
674 for (
int32 p = 0; p < 2; p++) {
676 num_cols2 = 10 +
Rand() % 10,
677 num_rows = 10 +
Rand() % 10;
680 std::vector<Int32Pair> indices(num_cols2);
681 for (
int32 i = 0;
i < num_cols2;
i++) {
682 indices[
i].first =
Rand() % num_cols1;
683 int32 headroom = num_cols1 - indices[
i].first,
684 size = (
Rand() % headroom) + 1;
685 indices[
i].second = indices[
i].first + size;
687 indices[
i].second <= num_cols1 &&
688 indices[
i].first >= 0);
695 int32 start = indices[
j].first, end = indices[
j].second;
712 template<
typename Real>
714 for (
int32 p = 0; p < 10; p++) {
716 num_rows2 = 10 +
Rand() % 10,
717 num_cols = 10 +
Rand() % 10;
722 std::vector<Int32Pair> indexes(num_rows2);
724 indexes[
i].first =
Rand() % num_rows1;
725 int32 headroom = num_rows1 - indexes[
i].first,
726 size = (
Rand() % headroom) + 1;
727 indexes[
i].second = indexes[
i].first + size;
729 indexes[
i].second <= num_rows1 &&
730 indexes[
i].first >= 0);
735 int32 start = indexes[
i].first, end = indexes[
i].second;
738 dst1(
i,
j) += src(i2,
j);
752 template<
typename Real>
754 for (
int32 p = 0; p < 2; p++) {
756 num_cols2 = 10 +
Rand() % 10,
757 num_rows = 10 +
Rand() % 10;
762 std::vector<int32> reorder(num_cols2);
763 for (
int32 i = 0;
i < num_cols2;
i++)
764 reorder[
i] = -1 + (
Rand() % (num_cols1 + 1));
767 N.CopyCols(M, reorder_gpu);
769 for (
int32 i = 0;
i < num_rows;
i++)
770 for (
int32 j = 0;
j < num_cols2;
j++)
771 if (reorder[
j] < 0) O(
i,
j) = 0;
772 else O(
i,
j) = M(
i, reorder[
j]);
777 template<
typename Real>
779 for (
int i = 0;
i < 2; ++
i) {
780 int rows = 10 +
Rand() % 40;
781 int cols = 10 +
Rand() % 50;
800 mat.
AddSmat(alpha, smat, trans);
801 cumat.
AddSmat(alpha, cusmat, trans);
809 template<
typename Real>
811 for (
int i = 0;
i < 2; ++
i) {
812 int m = 10 +
Rand() % 40;
813 int k = 10 +
Rand() % 60;
814 int n = 10 +
Rand() % 50;
838 result.
AddMatSmat(alpha, mat, smat, trans, beta);
839 curesult.
AddMatSmat(alpha, cumat, cusmat, trans, beta);
847 template<
typename Real>
849 for (
int i = 0;
i < 2; ++
i) {
850 int m = 10 +
Rand() % 40;
851 int k = 10 +
Rand() % 60;
852 int n = 10 +
Rand() % 50;
876 result.
AddSmatMat(alpha, smat, trans, mat, beta);
877 curesult.
AddSmatMat(alpha, cusmat, trans, cumat, beta);
885 template<
typename Real>
887 for (
int32 p = 0; p < 2; p++) {
889 num_cols2 = 10 +
Rand() % 10,
890 num_rows = 10 +
Rand() % 10;
895 std::vector<int32> reorder(num_cols2);
896 for (
int32 i = 0;
i < num_cols2;
i++)
897 reorder[
i] = -1 + (
Rand() % (num_cols1 + 1));
900 N.AddCols(M, reorder_gpu);
902 for (
int32 i = 0;
i < num_rows;
i++)
903 for (
int32 j = 0;
j < num_cols2;
j++)
904 if (reorder[
j] < 0) O(
i,
j) = 0;
905 else O(
i,
j) = M(
i, reorder[
j]);
911 template<
typename Real>
932 template<
typename Real>
953 template<
typename Real>
973 template<
typename Real>
992 template<
typename Real>
1017 template<
typename Real>
1044 template<
typename Real>
1066 template<
typename Real>
1089 template<
typename Real>
1112 template<
typename Real>
1137 template<
typename Real>
1140 int32 dimM = 100 +
Rand() % 200, dimNs = 100 +
Rand() % 200;
1143 int32 dimN = group_size * dimNs;
1163 template<
typename Real>
1165 Real p[] = { 1.234, 2.345, 1, 2, std::numeric_limits<Real>::infinity() };
1166 for (
int i = 0;
i < 2 *
sizeof(p) /
sizeof(Real);
i++) {
1167 int32 dimM = 100 +
Rand() % 200, dimNs = 100 +
Rand() % 200;
1170 int32 dimN = group_size * dimNs;
1203 template<
typename Real>
1205 int32 dimM = 100 +
Rand() % 200, dimNs = 100 +
Rand() % 200;
1209 int32 dimN = group_size * dimNs;
1214 if (rand () % 2 == 0)
1237 for (
int p = 0; p < 4; p++) {
1240 Real alpha = 0.43243, beta = 1.423;
1258 for (
int32 r = 0; r < dimM; r++) {
1263 Mcheckrow.
Scale(beta);
1264 Mcheckrow.
AddVec(alpha * V(r), Nrow);
1267 M.AddDiagVecMat(alpha, V, N, trans, beta);
1275 for (
int p = 0; p < 2; p++) {
1277 Real alpha = 0.43243, beta = 1.423;
1295 M.AddMatDiagVec(alpha, N, trans, V, beta);
1304 Real alpha = 0.43243, beta = 1.423;
1305 CuMatrix<Real> M(dimM, dimN), A(dimM, dimN), B(dimM, dimN), buf(dimM, dimN);
1314 M.AddMatMatElements(alpha, A, B, beta);
1322 CuMatrix<Real> M(dimM, dimN), A(dimM, dimN), B(dimM, dimN), C(dimM, dimN);
1331 M.SetMatMatDivMat(A,B,C);
1337 M.SetMatMatDivMat(A,B,C);
1341 template<
typename Real>
1366 template<
typename Real>
1392 for (
int i = 0;
i < 10;
i++) {
1424 template<
typename Real>
1426 for (
int32 l = 0; l < 5; l++) {
1433 num_col_blocks * block_cols);
1437 for (
int32 rb = 0; rb < num_row_blocks; rb++) {
1438 for (
int32 cb = 0; cb < num_col_blocks; cb++) {
1440 rb * block_rows, block_rows,
1441 cb * block_cols, block_cols);
1442 dst_copy.
AddMat(alpha, src_part);
1451 template<
typename Real>
1453 for (
int32 l = 0; l < 5; l++) {
1460 num_col_blocks * block_cols);
1464 for (
int32 rb = 0; rb < num_row_blocks; rb++) {
1465 for (
int32 cb = 0; cb < num_col_blocks; cb++) {
1467 rb * block_rows, block_rows,
1468 cb * block_cols, block_cols);
1481 template<
typename Real>
1483 for (
int32 l = 0; l < 5; l++) {
1490 num_col_blocks * block_cols);
1494 for (
int32 rb = 0; rb < num_row_blocks; rb++) {
1495 for (
int32 cb = 0; cb < num_col_blocks; cb++) {
1497 rb * block_rows, block_rows,
1498 cb * block_cols, block_cols);
1499 dst_copy_part.
AddMat(alpha, src);
1510 template<
typename Real>
1519 template<
typename Real>
1528 template<
typename Real>
1537 template<
typename Real>
1560 template<
typename Real>
1582 template<
typename Real>
1598 Real alpha = 0.3, beta = 1.75432;
1601 M2.
AddMatMat(alpha, N, trans, N, other_trans, beta);
1612 template<
typename Real>
1615 int32 dimM = 10 +
Rand() % 200, dimN = dimM + 20;
1637 Real alpha = 0.3, beta = 1.75432;
1659 template<
typename Real>
1690 template<
typename Real>
1714 template<
typename Real>
1718 bool old_mode = CuDevice::Instantiate().SetDebugStrideMode(
false);
1720 const int32 batchCount = 10;
1721 std::vector<Matrix<Real>* > Ha(batchCount), Hb(batchCount), Hc1(batchCount), Hc2(batchCount);
1722 std::vector<CuMatrix<Real>* > Da(batchCount), Db(batchCount), Dc1(batchCount), Dc2(batchCount);
1723 std::vector<SubMatrix<Real>* > HA, HB, HC1, HC2;
1724 std::vector<CuSubMatrix<Real>* > DA, DB, DC1, DC2;
1726 for (
int32 i = 0;
i < batchCount;
i++) {
1739 Hc1[i]->NumCols()));
1741 Hc2[i]->NumCols()));
1755 Dc1[i]->NumCols()));
1757 Dc2[i]->NumCols()));
1761 static_cast<Real>(0.0f));
1763 static_cast<Real>(0.0f));
1770 for (
int32 i = 0;
i< batchCount;
i++) {
1772 (*HC2[
i]).AddMatMat(0.5f, *(HA[i]),
kTrans, *(HB[
i]),
kTrans, 0.0f);
1773 DC1[
i]->CopyToMat(&Hca1);
1774 DC2[
i]->CopyToMat(&Hca2);
1777 delete Ha[
i];
delete Hb[
i];
delete Hc1[
i];
delete Hc2[
i];
1778 delete HA[
i];
delete HB[
i];
delete HC1[
i];
delete HC2[
i];
1779 delete Da[
i];
delete Db[
i];
delete Dc1[
i];
delete Dc2[
i];
1780 delete DA[
i];
delete DB[
i];
delete DC1[
i];
delete DC2[
i];
1783 CuDevice::Instantiate().SetDebugStrideMode(old_mode);
1788 template<
typename Real>
1802 template<
typename Real>
1817 template<
typename Real>
1832 template<
typename Real>
1848 template<
typename Real>
1872 template<
typename Real>
1877 if (
i % 2 == 0) dimN += 5;
1890 template<
typename Real>
1916 template<
typename Real>
1940 template<
typename Real>
1942 const int32 X=4321, Y=19;
1943 Real alpha=0.1, beta=0.7;
1961 Hv.
AddVec(alpha,Hv_accu);
1971 template<
typename Real>
1999 template<
typename Real>
2001 const int32 X=19, Y=4321;
2002 Real alpha=0.5, beta=0.7;
2020 Hv.
AddVec(alpha, Hv_accu);
2028 template<
typename Real>
2030 for (
int32 iter = 0 ; iter < 10; iter++) {
2031 int32 M1 = 1 + rand () % 10, M2 = 1 +
Rand() % 1, M3 = 1 +
Rand() % 10, M = M1 + M2 + M3,
2032 N1 = 1 + rand () % 10, N2 = 1 +
Rand() % 1, N3 = 1 +
Rand() % 10, N = N1 + N2 + N3,
2038 submat2 = mat.
Range(M1, M2, N1, N2);
2039 Real f1 = mat(M1 + m, N1 + n), f2 = submat1(m, n), f3 = submat2(m, n);
2047 template<
typename Real>
2075 template<
typename Real>
2092 template<
typename Real>
2110 template<
class Real>
2115 if (
i % 5 == 0) { dimM = 0; dimN = 0; }
2118 std::ostringstream os;
2119 bool binary = (
i % 4 < 2);
2120 mat.
Write(os, binary);
2123 std::istringstream is(os.str());
2124 mat2.
Read(is, binary);
2130 template<
typename Real>
2156 template<
typename Real>
2165 diff.AddMat(-1.0, Bm);
2166 Real norm = diff.FrobeniusNorm();
2167 KALDI_ASSERT((norm <= tol * A.FrobeniusNorm()) == (A.ApproxEqual(B, tol)));
2172 template<
typename Real>
2194 template<
typename Real,
typename OtherReal>
2223 template<
typename Real>
2238 Ho(r, c) = 1.0/(1.0+exp(-Hi(r, c)));
2250 template<
typename Real>
2269 Ho(r, c) = Hy(r, c)*(1.0 - Hy(r, c)) * Hi(r, c);
2280 template<
typename Real>
2324 template<
typename Real>
2369 template<
typename Real>
2373 int row = 10 +
Rand() % 40;
2374 int col = 10 +
Rand() % 50;
2396 Ho.
Row(r).ApplySoftMax();
2406 template<
typename Real>
2410 int row = 10 +
Rand() % 300;
2411 int col = 10 +
Rand() % 300;
2433 Ho.
Row(r).ApplyLogSoftMax();
2443 template<
typename Real>
2453 std::vector<int32> Hmax(dimM);
2461 Real max=-1.0e+20;
int32 idx=-1;
2463 if(Hi(r,c) > max) { idx=c; max=Hi(r,c); }
2468 std::vector<int32> Hmax2(dimM);
2482 template<
typename Real>
2491 std::vector<int32> Htgt(X);
2505 int32 col_tgt = Htgt[r];
2506 Hlogpost(r) =
Log(Hi(r, col_tgt));
2507 Hi(r, col_tgt) -= 1.0;
2532 template<
typename Real>
2553 template<
typename Real>
2573 template<
typename Real>
2592 template<
typename Real>
2611 Ho(r, c) = (1.0 - Hy(r, c)*Hy(r, c)) * Hi(r, c);
2626 template <
typename Real>
2648 for (
int32 pow = 1; pow < 5; pow++) {
2653 Real observed_moment = Mpow.
Sum() / (rows * cols);
2662 Real deviation = sqrt(expected_twice_moment - expected_moment * expected_moment);
2663 Real allowed_deviation = k * deviation / sqrt(static_cast<Real>(rows * cols));
2668 Real lower_bound = expected_moment - allowed_deviation,
2669 upper_bound = expected_moment + allowed_deviation;
2670 KALDI_ASSERT(observed_moment >= lower_bound && observed_moment <= upper_bound);
2676 template <
typename Real>
2687 central_moments(0) = 0.0;
2688 central_moments(1) = 0.0;
2689 central_moments(2) = 1.0 / 12;
2690 central_moments(3) = 0.0;
2691 central_moments(4) = 1.0 / 80;
2693 for (
int32 pow = 1; pow < central_moments.
Dim(); pow++) {
2696 Real observed_moment = Mpow.
Sum() / (rows * cols);
2699 Real expected_moment = central_moments(pow);
2703 Real allowed_deviation = k / sqrt(static_cast<Real>(rows * cols));
2704 Real lower_bound = expected_moment - allowed_deviation,
2705 upper_bound = expected_moment + allowed_deviation;
2706 if (!(observed_moment >= lower_bound && observed_moment <= upper_bound)) {
2708 KALDI_ERR <<
"Bad observed " << pow <<
"'th moment " << observed_moment
2709 <<
", expected " << expected_moment <<
", allowed range " 2710 << lower_bound <<
" to " << upper_bound;
2717 template<
typename Real>
2719 for (
int i = 1;
i < 2; ++
i) {
2721 if (
i == 8) dim = 0;
2737 template<
typename Real>
2739 for (
int i = 1;
i < 2; ++
i) {
2741 if (
i == 8) dim = 0;
2750 Real aval = A_copy(
i,
j), aorigval = A_orig(
i,
j);
2758 template<
typename Real>
2760 for (
int i = 1;
i < 10; ++
i) {
2762 if (
i == 8) dim = 0;
2780 template<
typename Real>
2788 std::vector<MatrixElement<Real> > labels;
2789 for(
int i = 0;
i < n_r;
i++) {
2790 for(
int j = 0;
j < n_c;
j++) {
2792 if (
Rand() % n_c == 0) {
2795 labels.push_back(t);
2821 template<
typename Real>
2831 std::vector<MatrixElement<Real> > input;
2832 std::set<Int32Pair> input_index;
2833 std::vector<Int32Pair> input_index_v;
2834 Real *input_value =
new Real[num_elements];
2836 for (
int32 j = 0;
j < num_elements;
j++) {
2842 }
while (input_index.find(tmp_pair)!=input_index.end());
2843 input_index.insert(tmp_pair);
2847 input_index_v.push_back(tmp_pair);
2849 Real offset = -1 + (0.33 * (
Rand() % 5));
2850 M(r, c) += scale * offset;
2853 input_value[
j] = offset;
2858 H_copy.
AddElements(scale, cu_input_index, input_value);
2859 delete[] input_value;
2866 template<
typename Real>
2873 std::vector<int32> elements(NR, -1);
2875 for (
int32 r = 0; r < NR; r++) {
2888 template<
typename Real>
2896 std::vector<Int32Pair> indices;
2897 std::vector<Real> reference;
2898 std::vector<Real> output;
2899 output.resize(num_elements);
2902 for (
int32 j = 0;
j < num_elements;
j++) {
2909 indices.push_back(tmp_pair);
2910 reference.push_back(H(r, c));
2913 H.
Lookup(indices, &(output[0]));
2919 template<
typename Real>
2926 m1.EqualElementMask(m1,&mask_same);
2927 m1.EqualElementMask(m2,&mask_different);
2936 m1.Range(1,5,2,5).EqualElementMask(m3.Range(0,5,0,5),&m4);
2942 UnitTestCuMatrixApplyExpSpecial<Real>();
2943 UnitTestCuMatrixApplyExpLimited<Real>();
2944 UnitTextCuMatrixAddSmatMat<Real>();
2945 UnitTextCuMatrixAddMatSmat<Real>();
2946 UnitTextCuMatrixAddSmat<Real>();
2947 UnitTestCuMatrixTraceMatMat<Real>();
2948 UnitTestCuMatrixObjfDeriv<Real>();
2950 UnitTestCuMatrixCopyCross<Real>();
2951 UnitTestCuMatrixCopyCross2<Real>();
2952 UnitTestCuMatrixApplyLog<Real>();
2953 UnitTestCuMatrixApplyExp<Real>();
2954 UnitTestCuMatrixSetRandn<Real>();
2955 UnitTestCuMatrixSetRandUniform<Real>();
2956 UnitTestCuMatrixScale<Real>();
2957 UnitTestCuMatrixSigmoid<Real>();
2958 UnitTestCuMatrixSoftHinge<Real>();
2959 UnitTestCuMatrixApplyPow<Real>();
2960 UnitTestCuMatrixApplyPowAbs<Real>();
2961 UnitTestCuMatrixSet<Real>();
2962 UnitTestCuMatrixAdd<Real>();
2963 UnitTestCuMatrixApplyFloor<Real>();
2964 UnitTestCuMatrixApplyCeiling<Real>();
2965 UnitTestCuMatrixApplyHeaviside<Real>();
2966 UnitTestCuMatrixHeaviside<Real>();
2967 UnitTestCuMatrixMulElements<Real>();
2968 UnitTestCuMatrixDivElements<Real>();
2969 UnitTestCuMatrixMax<Real>();
2970 UnitTestCuMatrixMin<Real>();
2971 UnitTestCuMatrixMulColsVec<Real>();
2972 UnitTestCuMatrixMulRowsVec<Real>();
2973 UnitTestCuMatrixDivRowsVec<Real>();
2974 UnitTestCuMatrixAddMat<Real>();
2975 UnitTestCuMatrixAddMatBlocks1<Real>();
2976 UnitTestCuMatrixAddMatBlocks1Trans<Real>();
2977 UnitTestCuMatrixAddMatBlocks2<Real>();
2978 UnitTestCuMatrixReduceSum<Real>();
2979 UnitTestCuMatrixReduceMax<Real>();
2980 UnitTestCuMatrixReduceMin<Real>();
2981 UnitTestCuMatrixAddVecToCols<Real>();
2982 UnitTestCuMatrixAddVecToRows<Real>();
2983 UnitTestCuMatrixAddMatMat<Real>();
2984 UnitTestCuMatrixAddVecVec<Real>();
2985 UnitTestCuMatrixSymAddMat2<Real>();
2986 UnitTestCuMatrixAddMatMatBatched<Real>();
2987 UnitTestCuMatrixSymInvertPosDef<Real>();
2988 UnitTestCuMatrixCopyFromMat<Real>();
2989 UnitTestCuMatrixCopyFromTp<Real>();
2990 UnitTestCuMatrixAddMatTp<Real>();
2991 UnitTestCuMatrixCopyCols<Real>();
2992 UnitTestCuMatrixAddCols<Real>();
2993 UnitTestCuMatrixSumColumnRanges<Real>();
2994 UnitTestCuMatrixCopyRows<Real>();
2995 UnitTestCuMatrixCopyRowsFromVec<Real>();
2996 UnitTestCuMatrixCopyColsFromVec<Real>();
2997 UnitTestCuMatrixCopyToRows<Real>();
2998 UnitTestCuMatrixAddRows<Real>();
2999 UnitTestCuMatrixMulRows<Real>();
3000 UnitTestCuMatrixAddToRows<Real>();
3001 UnitTestCuMatrixAddRowRanges<Real>();
3002 UnitTestCuMatrixAddTpMat<Real>();
3003 UnitTestCuMatrixTranspose<Real>();
3004 UnitTestCuMatrixCopyUpperToLower<Real>();
3005 UnitTestCuMatrixCopyLowerToUpper<Real>();
3006 UnitTestCuMatrixSetZeroAboveDiag<Real>();
3007 UnitTestCuMatrixAddElements<Real>();
3008 UnitTestCuMatrixAddToElements<Real>();
3009 UnitTestCuMatrixLookup<Real>();
3010 UnitTestCuMatrixEqualElementMask<Real>();
3012 UnitTestCuVectorAddVec<Real>();
3013 UnitTestCuVectorAddRowSumMat<Real>();
3014 UnitTestCuVectorAddRowSumMatLarge<Real>();
3015 UnitTestCuVectorAddColSumMat<Real>();
3016 UnitTestCuVectorAddColSumMatLarge<Real>();
3017 UnitTestCuSubMatrix<Real>();
3018 UnitTestCuMatrixInvertElements<Real>();
3019 UnitTestCuVectorInvertElements<Real>();
3020 UnitTestCuMatrixIO<Real>();
3021 UnitTestCuSigmoid<Real>();
3022 UnitTestCuApproxEqual<Real>();
3023 UnitTestCuCopy<Real, float>();
3025 if (CuDevice::Instantiate().DoublePrecisionSupported())
3027 UnitTestCuCopy<Real, double>();
3028 UnitTestCuMatrixAddToDiag<Real>();
3029 UnitTestCuMatrixAdd2<Real>();
3030 UnitTestCuDiffSigmoid<Real>();
3031 UnitTestCuDiffSoftmax<Real>();
3032 UnitTestCuDiffLogSoftmax<Real>();
3033 UnitTestCuMatrixGroupPnorm<Real>();
3034 UnitTestCuMatrixDiffGroupPnorm<Real>();
3035 UnitTestCuMatrixGroupMax<Real>();
3036 UnitTestCuMatrixGroupMaxDeriv<Real>();
3037 UnitTestCuMatrixMulRowsVec<Real>();
3038 UnitTestCuMatrixMulRowsGroupMat<Real>();
3039 UnitTestCuFindRowMaxId<Real>();
3040 UnitTestCuSoftmax<Real>();
3041 UnitTestCuLogSoftmax<Real>();
3042 UnitTestCuDiffXent<Real>();
3043 UnitTestCheck<Real>();
3044 UnitTestSwapCu2Cu<Real>();
3045 UnitTestSwapCu2M<Real>();
3046 UnitTestCuMatrixAddDiagVecMat<Real>();
3047 UnitTestCuMatrixAddMatDiagVec<Real>();
3048 UnitTestCuMatrixAddMatMatElements<Real>();
3049 UnitTestCuMatrixSetMatMatDivMat<Real>();
3050 UnitTestCuTanh<Real>();
3051 UnitTestCuCholesky<Real>();
3052 UnitTestCuDiffTanh<Real>();
3053 UnitTestCuVectorAddTpVec<Real>();
3054 UnitTestCuVectorMulTp<Real>();
3064 bool test_threads =
true;
3067 int32 num_threads = 4;
3071 for (loop = 0; loop < 2; loop++) {
3072 CuDevice::Instantiate().SetDebugStrideMode(
true);
3074 CuDevice::Instantiate().AllowMultithreading();
3076 CuDevice::Instantiate().SelectGpuId(
"no");
3078 CuDevice::Instantiate().SelectGpuId(
"yes");
3082 KALDI_LOG <<
"Doing matrix unit test with " 3083 << num_threads <<
" threads.";
3084 std::vector<std::thread*> threads;
3085 for (
int32 i = 0;
i < num_threads - 1;
i++)
3086 threads.push_back(
new std::thread(kaldi::CudaMatrixUnitTest<float>));
3088 kaldi::CudaMatrixUnitTest<float>();
3089 for (
size_t i = 0;
i < threads.size();
i++) {
3094 kaldi::CudaMatrixUnitTest<float>();
3098 if (CuDevice::Instantiate().DoublePrecisionSupported()) {
3099 kaldi::CudaMatrixUnitTest<double>();
3101 KALDI_WARN <<
"Double precision not supported";
3104 kaldi::CudaMatrixUnitTest<double>();
3108 KALDI_LOG <<
"Tests without GPU use succeeded.";
3110 KALDI_LOG <<
"Tests with GPU use (if available) succeeded.";
3113 CuDevice::Instantiate().PrintProfile();
static void UnitTestCuMatrixAdd()
static void UnitTestCuMatrixMulRowsVec()
void UnitTestCuMatrixCopyCross2()
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void Add(const Real alpha)
Add a scalar to each element.
static void UnitTestCuMatrixCopyRowsFromVec()
Real Min() const
Returns minimum element of matrix.
void ApplyCeiling(Real ceiling_val)
static void UnitTestCuMatrixAddMatMatElements()
Packed symetric matrix class.
static void UnitTestCuMatrixAddMatMatBatched()
void SoftHinge(const CuMatrixBase< Real > &src)
Apply the function y = log(1 + exp(x)), to each element.
void ApplyPow(Real power)
void Tanh(const MatrixBase< Real > &src)
Set each element to the tanh of the corresponding element of "src".
static void UnitTestCuMatrixCopyToRows()
void CopyFromVec(const std::vector< T > &src)
This function resizes if needed.
static int32 DoubleFactorial(int32 i)
void GroupMax(const CuMatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j where G = x.NumCols() / y.NumCols() must be an integer.
static void UnitTestCuMatrixAddRows()
void MulTp(const CuTpMatrix< Real > &M, const MatrixTransposeType trans)
Multiplies this vector by lower-triangular marix: *this <– *this *M.
void Write(std::ostream &os, bool binary) const
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
const CuSubVector< Real > Row(MatrixIndexT i) const
void Transpose()
Transpose the matrix.
static void UnitTestCuMatrixScale()
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
static void UnitTestCuMatrixMin()
static void UnitTestCuLogSoftmax()
void CopyToVec(std::vector< T > *dst) const
This function resizes *dst if needed.
void AddSmatMat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, Real beta)
(*this) = alpha * op(A) * B + beta * (*this), where A is sparse.
void CopyToMat(MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
void AddTpMat(const Real alpha, const TpMatrix< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
void AddToElements(Real alpha, const CuArrayBase< int32 > &elements)
This is a rather special purpose function; we might generalize it later by adding a transpose-type op...
static void UnitTestCuMatrixAddRowRanges()
void AddMatTp(const Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuTpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
static void UnitTestCuMatrixSoftHinge()
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Real Cond() const
Returns condition number by computing Svd.
static void UnitTestCuMatrixApplyExpSpecial()
Base class which provides matrix operations not involving resizing or allocation. ...
void CopyColFromMat(const CuMatrixBase< Real > &mat, MatrixIndexT col)
static void UnitTextCuMatrixAddMatSmat()
static void UnitTestCuMatrixReduceSum()
void AddRowRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, do (*this)(r, c) += src(j, c), where j ranges from ind...
Real Max() const
Returns maximum element of matrix.
bool WithProb(BaseFloat prob, struct RandomState *state)
void AddElements(Real alpha, const std::vector< MatrixElement< Real > > &input)
Real Trace(bool check_square=true) const
Returns trace of matrix.
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
static void UnitTestCuMatrixApplyPow()
static void UnitTestCuMatrixGroupMaxDeriv()
static void UnitTestCuVectorAddColSumMatLarge()
static void UnitTestCuMatrixSetZeroAboveDiag()
void ApplyFloor(Real floor_val)
void GroupPnormDeriv(const MatrixBase< Real > &input, const MatrixBase< Real > &output, Real power)
Calculate derivatives for the GroupPnorm function above...
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
void AddVecToCols(Real alpha, const CuVectorBase< Real > &col, Real beta=1.0)
(for each column c of *this), c = alpha * col + beta * c
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
float RandGauss(struct RandomState *state=NULL)
void AddToDiag(const Real alpha)
Add a scalar to each diagonal element.
void AddSmat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A.
void MulTp(const TpMatrix< Real > &M, const MatrixTransposeType trans)
Multiplies this vector by lower-triangular matrix: *this <– *this *M.
static void UnitTestCuMatrixCopyUpperToLower()
void MulRowsGroupMat(const MatrixBase< Real > &src)
Divide each row into src.NumCols() equal groups, and then scale i'th row's j'th group of elements by ...
void AddToDiag(Real value)
Adds "value" to the diagonal elements of the matrix.
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
void DivRowsVec(const CuVectorBase< Real > &div)
divide i'th row by scale[i]
A class for storing matrices.
void AddMatMatElements(const Real alpha, const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const Real beta)
*this = beta * *this + alpha * A .* B (.* element by element multiplication)
static void UnitTestCuMatrixApplyExpLimited()
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
static void UnitTestCuMatrixSetRandUniform()
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
void DivElements(const MatrixBase< Real > &A)
Divide each element by the corresponding element of a given matrix.
void Min(const CuMatrixBase< Real > &A)
Do, elementwise, *this = min(*this, A).
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
static void UnitTestCuMatrixCopyFromMat()
static void UnitTestCuMatrixAddMatDiagVec()
static void UnitTestCuMatrixMax()
static void UnitTestCuMatrixMulRowsGroupMat()
static void UnitTestCuMatrixAddMatBlocks1Trans()
static void UnitTestCuMatrixInvertElements()
static void UnitTestCuMatrixAddElements()
static void UnitTestCuMatrixCopyFromTp()
void GroupMax(const MatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j Requires src.NumRows() == this->NumRows() an...
static void UnitTestCuMatrixApplyPowAbs()
void SetRandUniform()
Sets to numbers uniformly distributed on (0, 1)
void AddTpMat(const Real alpha, const CuTpMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
static void UnitTestCuMatrixAddToDiag()
static void UnitTestCuMatrixMulRows()
static void UnitTestCuMatrixObjfDeriv()
void InvertElements()
invert the matrix by elements.
bool ApproxEqual(const CuMatrixBase< Real > &other, float tol=0.01) const
True if ((*this)-other).FrobeniusNorm() <= tol * this->FrobeniusNorm()
static void UnitTestCuSigmoid()
static void UnitTestCuMatrixGroupMax()
bool IsUnit(Real tol=0.001) const
static void InitRand(VectorBase< Real > *v)
static void UnitTestCuMatrixCopyColsFromVec()
void SetRandn()
< Set to unit matrix.
void Lookup(const std::vector< Int32Pair > &indexes, Real *output) const
static void UnitTestCuMatrixEqualElementMask()
static void UnitTestCuMatrixSymInvertPosDef()
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
void AddMatBlocks(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
This function is like AddMat (it does *this += alpha * src), except that it supports cases where *thi...
static void UnitTestCuMatrixAddMatMat()
void SymInvertPosDef()
Inversion for positive definite symmetric matrices.
static void UnitTestCuMatrixSetRandn()
static void UnitTestCuDiffXent()
static void UnitTestCuMatrixLookup()
static void UnitTestCuMatrixAddCols()
static void UnitTextCuMatrixAddSmatMat()
void CompObjfAndDeriv(const std::vector< MatrixElement< Real > > &elements, const CuMatrix< Real > &A, Real *tot_objf, Real *tot_weight)
Here, A is interpreted as a matrix of probabilities, and "elements" as a list of posteriors (possibly...
static void UnitTestCuVectorAddVec()
void SoftHinge(const MatrixBase< Real > &src)
Set each element to y = log(1 + exp(x))
void AddMatMatBatched(const Real alpha, std::vector< CuSubMatrix< Real > * > &C, const std::vector< CuSubMatrix< Real > * > &A, MatrixTransposeType transA, const std::vector< CuSubMatrix< Real > * > &B, MatrixTransposeType transB, const Real beta)
Does multiple matrix multiplications, executing them in parallel using cuBLAS's gemmBatched if we are...
static void UnitTestCuMatrixApplyLog()
static void UnitTextCuMatrixAddSmat()
static void UnitTestCuApproxEqual()
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
static void UnitTestCuMatrixHeaviside()
static void UnitTestCuSubMatrix()
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
void Max(const CuMatrixBase< Real > &A)
Do, elementwise, *this = max(*this, A).
static void UnitTestCuVectorAddTpVec()
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
void ApplyPowAbs(Real power, bool include_sign=false)
static void UnitTestCuMatrixSymAddMat2()
static void UnitTestCuMatrixIO()
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
void Sigmoid(const CuMatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src": element by element...
void Scale(Real alpha)
Multiply each element with a scalar value.
void DiffXent(const CuArrayBase< int32 > &tgt, CuVector< Real > *log_post_tgt)
Differentiate the block [softmax+cross-entropy] : dE/da = posterior_mat - target_mat, 'E' is error function, 'a' is activation on softmax input.
void Swap(Matrix< Real > *mat)
void AddToRows(Real alpha, const CuArrayBase< MatrixIndexT > &indexes, CuMatrixBase< Real > *dst) const
For each row i of *this, adds this->Row(i) to dst->Row(indexes(i)) if indexes(i) >= 0...
void SetZero()
Math operations, some calling kernels.
void SoftMaxPerRow(const CuMatrixBase< Real > &src)
Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow.
static void UnitTestCuVectorInvertElements()
void AddVecToRows(const Real alpha, const VectorBase< OtherReal > &v)
[each row of *this] += alpha * v
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
static void UnitTestCuMatrixApplyFloor()
static void UnitTestCuMatrixAddMat()
static void UnitTestCuFindRowMaxId()
static void UnitTestCuDiffLogSoftmax()
static void UnitTestCuMatrixAdd2()
void SymAddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transA, Real beta)
*this = beta * *this + alpha * M M^T, for symmetric matrices.
static void UnitTestCuMatrixAddMatBlocks1()
void CopyColsFromVec(const CuVectorBase< Real > &v)
Copies vector into matrix, column-by-column.
static void UnitTestCuVectorAddRowSumMat()
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
void SetRandn()
Sets to random values of a normal distribution.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
static void UnitTestCuMatrixAddToRows()
static void UnitTestCuDiffSigmoid()
static void UnitTestCuMatrixApplyCeiling()
static void UnitTestCuMatrixMulElements()
void GroupPnorm(const CuMatrixBase< Real > &src, Real pow)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p) where G = x...
Packed symetric matrix class.
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
void Cholesky(CuMatrixBase< Real > *inv_cholesky=NULL)
This function does sets *this to the Cholesky factor of *this (i.e.
static void UnitTestCuSoftmax()
static void UnitTestCuMatrixApplyExp()
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
void DivElements(const CuMatrixBase< Real > &A)
Divide two matrices elementwise: C = A ./ A.
void DiffSoftmaxPerRow(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the softmax function.
MatrixIndexT Dim() const
Returns the dimension of the vector.
Real Sum() const
Returns sum of all elements in matrix.
static void UnitTestCuMatrixAddDiagVecMat()
void GroupMaxDeriv(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > &output)
Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax fun...
void Scale(Real alpha)
Multiplies all elements by this constant.
void ApplyCeiling(Real ceiling_val)
static void UnitTestCuMatrixDivElements()
void MulElements(const MatrixBase< Real > &A)
Element by element multiplication with a given matrix.
void DiffTanh(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the tanh function.
int Rand(struct RandomState *state)
void SetRandn()
Set vector to random normally-distributed noise.
static void UnitTestCuMatrixSetMatMatDivMat()
static void UnitTestCuMatrixAddMatBlocks2()
void FindRowMaxId(CuArray< int32 > *id) const
Find the id of the maximal element for each row (resizes the 'id' array to the appropriate size)...
void MulColsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = (*this) * diag(scale).
void Heaviside(const CuMatrixBase< Real > &src)
Set each element to the Heaviside function of the corresponding element of "src", which we define as ...
void AddVec(Real alpha, const CuVectorBase< Real > &vec, Real beta=1.0)
void MulRowsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = diag(scale) * (*this).
void AddSmatMat(Real alpha, const SparseMatrix< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, Real beta)
(*this) = alpha * op(A) * B + beta * (*this), where A is sparse.
void DiffSigmoid(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the sigmoid function.
void MulColsVec(const CuVectorBase< Real > &scale)
scale i'th column by scale[i]
static void UnitTestCuMatrixSet()
void SumColumnRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, sets (*this)(r, c) to the sum src(r, j), where j ranges from indexes[c].first through indexes[c].second - 1.
static void UnitTestCuMatrixDiffGroupPnorm()
static void UnitTestCuMatrixAddToElements()
void GroupPnorm(const MatrixBase< Real > &src, Real power)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / p).
static void UnitTestCuMatrixTranspose()
static void UnitTestCuDiffSoftmax()
void ApplyExpLimited(Real lower_limit, Real upper_limit)
MatrixIndexT NumCols() const
void InvertElements()
Inverts all the elements of the matrix.
static void UnitTestCuMatrixCopyCols()
void DiffLogSoftmaxPerRow(const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv)
Differentiate backward through the log softmax function.
void DiffGroupPnorm(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv, Real power)
Differentiate backward through the GroupPnorm function.
void AddTpVec(const Real alpha, const TpMatrix< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add triangular matrix times vector: this <– beta*this + alpha*M*v.
A class representing a vector.
void SetRandn(BaseFloat zero_prob)
Sets up to a pseudo-randomly initialized matrix, with each element zero with probability zero_prob an...
void InvertElements()
Invert all elements.
static void UnitTestCuMatrixSigmoid()
void UnitTestCuMatrixCopyCross()
static void UnitTestCuMatrixGroupPnorm()
void AddSmat(Real alpha, const SparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A [or A^T].
void AddVecToCols(const Real alpha, const VectorBase< OtherReal > &v)
[each col of *this] += alpha * v
#define KALDI_ASSERT(cond)
static void UnitTestCuMatrixDivRowsVec()
void Read(std::istream &is, bool binary)
I/O functions.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void CudaMatrixUnitTest()
void CopyFromTp(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
static void UnitTestCuMatrixCopyRows()
static void UnitTestCuMatrixReduceMax()
static void UnitTestCuMatrixSumColumnRanges()
static void UnitTestCuDiffTanh()
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
static void UnitTestCuMatrixAddVecToRows()
void MulRowsGroupMat(const CuMatrixBase< Real > &src)
divide each row into src.NumCols() groups, and then scale i'th row's jth group of elements by src[i...
static void UnitTestCuMatrixAddTpMat()
void AddDiagMatMat(Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const MatrixBase< Real > &N, MatrixTransposeType transN, Real beta=1.0)
Add the diagonal of a matrix product: *this = diag(M N), assuming the "trans" arguments are both kNoT...
static void UnitTestCuMatrixAddVecToCols()
static void UnitTestCuMatrixAddMatTp()
static void UnitTestCuMatrixApplyHeaviside()
void CopyToRows(const CuArrayBase< Real *> &dst) const
For each row r of this matrix, copies it to the array of floats at the location given by dst[r]...
static void UnitTestCuCopy()
static void UnitTestCuMatrixAddVecVec()
static void UnitTestCuMatrixMulColsVec()
static void RandZeroToOneMatrix(MatrixBase< Real > *mat)
void AddDiagVecMat(const Real alpha, const VectorBase< Real > &v, const MatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].
static void UnitTestCuVectorAddRowSumMatLarge()
void LogSoftMaxPerRow(const CuMatrixBase< Real > &src)
LogSoftmax nonlinearity Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row...
void GroupMaxDeriv(const MatrixBase< Real > &input, const MatrixBase< Real > &output)
Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax fun...
void AddMatTp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const TpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
MatrixIndexT NumRows() const
Dimensions.
Provides a vector abstraction class.
void AddColSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of columns of M) + beta * *this.
static void UnitTestCuVectorAddColSumMat()
void SetZero()
Set vector to all zeros.
void ApplyFloor(Real floor_val)
void AddTpVec(const Real alpha, const CuTpMatrix< Real > &M, const MatrixTransposeType trans, const CuVectorBase< Real > &v, const Real beta)
Add triangular matrix times vector: this <– beta*this + alpha*M*v.
void ApplyPow(Real power)
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
static void UnitTestCuVectorMulTp()
void Tanh(const CuMatrixBase< Real > &src)
Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src).
void AddMatSmat(Real alpha, const MatrixBase< Real > &A, const SparseMatrix< Real > &B, MatrixTransposeType transB, Real beta)
(*this) = alpha * A * op(B) + beta * (*this), where B is sparse and op(B) is either B or trans(B) dep...
void Invert(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void CopyToVec(VectorBase< OtherReal > *dst) const
static void UnitTestCuMatrixCopyLowerToUpper()
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i'th row by scale[i]
static void UnitTestCuMatrixReduceMin()
Sub-matrix representation.
void AddMatSmat(Real alpha, const CuMatrixBase< Real > &A, const CuSparseMatrix< Real > &B, MatrixTransposeType transB, Real beta)
(*this) = alpha * A * op(B) + beta * (*this), where B is sparse and op(B) is either B or trans(B) dep...
void CopyColsFromVec(const VectorBase< Real > &v)
Copies vector into matrix, column-by-column.
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
void AddVecVec(Real alpha, const CuVectorBase< Real > &x, const CuVectorBase< Real > &y)
A = alpha * x * y^T + A .
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
void Set(Real)
Sets all elements to a specific value.
void SetZeroAboveDiag()
Zeroes all elements for which col > row.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
void ApplyPowAbs(Real power, bool include_sign=false)
static void UnitTestCuMatrixTraceMatMat()
void Sigmoid(const MatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src".
const Real * RowData(MatrixIndexT r) const
Get raw row pointer (const).
static void UnitTestCuCholesky()