cu-matrix-test.cc
Go to the documentation of this file.
1 // cudamatrix/cu-matrix-test.cc
2 
3 // Copyright 2010 Karel Vesely
4 // 2013 Lucas Ondel
5 // 2013 Johns Hopkins University (author: Daniel Povey)
6 // 2013 Hainan Xu
7 // 2013 Xiaohui Zhang
8 // 2013 Johns Hopkins University (author: Guoguo Chen)
9 // 2017 Hossein Hadian
10 // 2017 Shiyin Kang
11 
12 // See ../../COPYING for clarification regarding multiple authors
13 //
14 // Licensed under the Apache License, Version 2.0 (the "License");
15 // you may not use this file except in compliance with the License.
16 // You may obtain a copy of the License at
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
22 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
23 // MERCHANTABLITY OR NON-INFRINGEMENT.
24 // See the Apache 2 License for the specific language governing permissions and
25 // limitations under the License.
26 
27 
28 #include <iostream>
29 #include <vector>
30 #include <cstdlib>
31 
32 #include "base/kaldi-common.h"
33 #include "util/common-utils.h"
35 
36 using namespace kaldi;
37 
38 
39 namespace kaldi {
40 
41 /*
42  * INITIALIZERS
43  */
44 template<typename Real>
45 static void InitRand(VectorBase<Real> *v) {
46  for (MatrixIndexT i = 0; i < v->Dim(); i++)
47  (*v)(i) = RandGauss();
48 }
49 
50 
51 
52 template<typename Real>
53 static void InitRand(MatrixBase<Real> *M) {
54  do {
55  for (MatrixIndexT i = 0;i < M->NumRows();i++)
56  for (MatrixIndexT j = 0;j < M->NumCols();j++)
57  (*M)(i, j) = RandGauss();
58  } while (M->NumRows() != 0 && M->Cond() > 100);
59 }
60 
61 
62 
63 template<typename Real>
65  for(int32 r=0; r<mat->NumRows(); r++)
66  for(int32 c=0; c<mat->NumCols(); c++)
67  (*mat)(r,c) = RandUniform();
68 }
69 
70 
71 /*
72  * Unit tests
73  */
74 
75 template<typename Real>
77  for (int32 i = 0; i < 2; i++) {
78  int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
79  CuMatrix<Real> A(M, N);
80  A.SetRandUniform();
81  // Add bias to avoid numbers close to zero
82  A.Add(0.1);
83  if (i % 2 == 1) {
84  CuMatrix<Real> B(M, N);
85  B.SetRandn();
86  // add a bias to avoid numerical failure when comparing r2 and r3
87  B.Add(0.1);
88  Real r1 = TraceMatMat(A, B, kTrans),
91  Matrix<Real> X(B, kTrans);
92  KALDI_LOG << "Xsum = " << X.Sum();
93  Matrix<Real> Y(B, kTrans);
94  KALDI_LOG << "Ysum = " << Y.Sum();
95  KALDI_LOG << "Bsum = " << B.Sum();
96  KALDI_ASSERT(ApproxEqual(r1, r2));
97  KALDI_ASSERT(ApproxEqual(r2, r3));
98  } else {
99  CuMatrix<Real> B(N, M);
100  B.SetRandn();
101  Real r1 = TraceMatMat(A, B, kNoTrans),
104  KALDI_ASSERT(ApproxEqual(r1, r2));
105  KALDI_ASSERT(ApproxEqual(r2, r3));
106  }
107  }
108 }
109 
110 
111 template<typename Real>
112 static void UnitTestCuCholesky() {
113  for (int32 i = 0; i < 2; i++) {
114  int32 M = 1 + Rand() % 10, N = M + 5;
115 
116  CuMatrix<Real> A(M, N);
117  A.SetRandn();
118  CuMatrix<Real> S(M, M);
119  // SymAddMat2 only copies lower triangle.
120  // it's OK- Cholesky only reads the lower triangle.
121  S.SymAddMat2(1.0, A, kNoTrans, 0.0);
122 
123  CuMatrix<Real> C(S);
124  C.Cholesky();
125 
126  CuMatrix<Real> S2(M, M);
127  S2.AddMatMat(1.0, C, kNoTrans, C, kTrans, 0.0);
128  S.CopyLowerToUpper();
129  KALDI_ASSERT(S.ApproxEqual(S2));
130  }
131 }
132 
133 
134 
135 
136 
137 
138 
139 /*
140  * CuMatrix
141  */
142 template<typename Real>
144  int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
145  Matrix<Real> H(M, N);
146  H.SetRandUniform(); // Using uniform distribution to ensure positive numbers
147  H.Add(0.1); // Add bias to eliminate zeros
148  H.MulElements(H); // make numbers positive
149 
150  CuMatrix<Real> D(H);
151 
152  D.ApplyLog();
153  H.ApplyLog();
154 
155  Matrix<Real> H2(D);
156 
157  KALDI_ASSERT(ApproxEqual(H,H2));
158 }
159 
160 
161 /*
162  * CuMatrix
163  */
164 template<typename Real>
166  int32 M = 10 + Rand() % 20;
167  int32 N = 10 + Rand() % 20;
168  Matrix<Real> H(M, N);
169  H.SetRandn();
170 
171  CuMatrix<Real> D(H);
172 
173  D.ApplyExpSpecial();
174  H.ApplyExpSpecial();
175 
176  Matrix<Real> H2(D);
177  KALDI_ASSERT(ApproxEqual(H,H2));
178 }
179 
180 template<typename Real>
182  int32 M = 10 + Rand() % 20, N = 10 + Rand() % 20;
183  Matrix<Real> H(M, N);
184  H.SetRandn();
185 
186  CuMatrix<Real> D(H);
187 
188  D.ApplyExp();
189  H.ApplyExp();
190 
191  Matrix<Real> H2(D);
192 
193  KALDI_ASSERT(ApproxEqual(H,H2));
194 }
195 
196 
197 template<typename Real>
199  int32 M = 10 + Rand() % 20, N = 10 + Rand() % 20;
200  Matrix<Real> H(M, N);
201  H.SetRandn();
202 
203  BaseFloat lower_limit = -0.2, upper_limit = 0.2;
204 
205  CuMatrix<Real> D(H);
206  D.ApplyExpLimited(lower_limit, upper_limit);
207 
208  H.ApplyFloor(lower_limit);
209  H.ApplyCeiling(upper_limit);
210  H.ApplyExp();
211  Matrix<Real> H2(D);
212 
213  KALDI_ASSERT(ApproxEqual(H,H2));
214 }
215 
216 
217 
218 template<typename Real>
219 static void UnitTestCuMatrixSigmoid() {
220  for (int32 i = 0; i < 2; i++) {
221  int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
222  Matrix<Real> H(M, N);
223  H.SetRandn();
224  H.MulElements(H); // make numbers positive
225 
226  CuMatrix<Real> D(H);
227  CuMatrix<Real> E(M, N);
228 
229  E.Sigmoid(D);
230  H.Sigmoid(H);
231 
232  Matrix<Real> H2(E);
233 
234  KALDI_ASSERT(ApproxEqual(H, H2));
235  }
236 }
237 
238 template<typename Real>
239 static void UnitTestCuMatrixScale() {
240  int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
241  Matrix<Real> H(M, N);
242  H.SetRandn();
243 
244  BaseFloat scale = -1 + (0.33 * (Rand() % 5));
245  CuMatrix<Real> D(H);
246  D.Scale(scale);
247  H.Scale(scale);
248  Matrix<Real> E(D);
249 
250  KALDI_ASSERT(ApproxEqual(H, E));
251 }
252 
253 template<typename Real>
254 static void UnitTestCuMatrixAdd() {
255  int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
256  Matrix<Real> H(M, N);
257  H.SetRandn();
258 
259  BaseFloat offset = -1 + (0.33 * (Rand() % 5));
260  CuMatrix<Real> D(H);
261  D.Add(offset);
262  H.Add(offset);
263  Matrix<Real> E(D);
264 
265  KALDI_ASSERT(ApproxEqual(H, E));
266 }
267 
268 
269 template<typename Real>
271  int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
272  Matrix<Real> H(M, N);
273  H.SetRandn();
274  H.MulElements(H); // make numbers positive
275 
276  CuMatrix<Real> D(H);
277  CuMatrix<Real> E(M, N);
278 
279  E.SoftHinge(D);
280  H.SoftHinge(H);
281 
282  Matrix<Real> H2(E);
283 
284  KALDI_ASSERT(ApproxEqual(H,H2));
285 }
286 
287 template<typename Real>
289  int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
290  Real power[] = { 1.4, 1.6, 0.1234, 2.123, 0, 1, 2,
291  std::numeric_limits<Real>::infinity() };
292  for (int32 K = 5; K < 7; K++) {
293  for (int32 i = 0; i < 2 * sizeof(power) / sizeof(Real); ++i) {
294  Real p = power[i / 2];
295  int32 N_src = N * K;
296  Matrix<Real> H_src(M, N_src);
297  H_src.SetRandn();
298  if (i % 2 == 0)
299  H_src.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to
300  // do derivatives.
301  Matrix<Real> H(M, N);
302  H.GroupPnorm(H_src, p);
303  CuMatrix<Real> D(H_src);
304  CuMatrix<Real> E(M, N);
305  E.GroupPnorm(D, p);
306  Matrix<Real> H2(E);
307  KALDI_ASSERT(ApproxEqual(H, H2));
308  }
309  }
310 }
311 
312 template<typename Real>
314  int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
315  // M = 256; N = 256;
316  for (int32 K = 5; K < 7; K++) {
317  int32 N_src = N * K;
318  Matrix<Real> H_src(M, N_src);
319  H_src.SetRandn();
320  if (rand () % 2 == 0)
321  H_src.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to
322  // do derivatives.
323  Matrix<Real> H(M, N);
324  H.GroupMax(H_src);
325  CuMatrix<Real> D(H_src);
326  CuMatrix<Real> E(M, N);
327  E.GroupMax(D);
328  Matrix<Real> H2(E);
329  KALDI_ASSERT(ApproxEqual(H,H2));
330  }
331 }
332 
333 template<typename Real>
334 static void UnitTestCuMatrixSet() {
335  for (int32 i = 0; i < 2; i++) {
336  BaseFloat value= 0.333;
337  int32 dimM = 10 + Rand() % 600, dimN = 10 + Rand() % 400;
338  CuMatrix<Real> m1(dimM, dimN);
339  Matrix<Real> m2(dimM, dimN);
340  m1.Set(value);
341  m2.Set(value);
342  Matrix<Real> m3(m1);
343  KALDI_ASSERT(ApproxEqual(m2, m3));
344  }
345 }
346 
347 
348 template<typename Real>
350 
351  for (int32 i = 0; i < 2; i++) {
352  BaseFloat pow = 0.5 * (Rand() % 6);
353 
354  Matrix<Real> H(10 + Rand() % 60, 10 + Rand() % 20);
355  H.SetRandn();
356  H.Row(0).Set(0.0);
357  if (i == 2) { Matrix<Real> tmp(H, kTrans); H = tmp; }
358 
359  if (pow != 1.0 && pow != 2.0 && pow != 3.0)
360  H.MulElements(H); //make numbers positive
361 
362  CuMatrix<Real> cH(H);
363 
364  cH.ApplyPow(pow);
365 
366  H.ApplyPow(pow);
367  Matrix<Real> H2(cH);
368  KALDI_ASSERT(ApproxEqual(H, H2));
369  }
370 }
371 
372 template<typename Real>
374 
375  for (int32 i = 0; i < 2; i++) {
376  BaseFloat pow = 0.5 * (Rand() % 6);
377 
378  Matrix<Real> H(10 + Rand() % 60, 10 + Rand() % 20);
379  H.SetRandn();
380  H.Row(0).Set(0.0);
381  if (i == 2) { Matrix<Real> tmp(H, kTrans); H = tmp; }
382 
383  CuMatrix<Real> cH(H);
384 
385  cH.ApplyPowAbs(pow, true);
386 
387  H.ApplyPowAbs(pow, true);
388  Matrix<Real> H2(cH);
389  KALDI_ASSERT(ApproxEqual(H, H2));
390  }
391 }
392 
393 
394 template<typename Real>
396  for (int32 p = 0; p < 2; p++) {
397  int32 num_rows = 100 + Rand() % 255, num_cols;
398  if (p <= 2) num_cols = 128;
399  else if (p <= 4) num_cols = 256;
400  else num_cols = 100 + Rand() % 200;
401 
402  int32 vec_dim;
403  if (p % 2 == 0) vec_dim = num_cols;
404  else vec_dim = num_cols * num_rows;
405 
406  CuVector<Real> cu_vec(vec_dim);
407  cu_vec.SetRandn();
408  Vector<Real> vec(cu_vec);
409 
410  CuMatrix<Real> cu_mat(num_rows, num_cols);
411  cu_mat.CopyRowsFromVec(cu_vec);
412  Matrix<Real> mat(num_rows, num_cols);
413  mat.CopyRowsFromVec(vec);
414 
415  Matrix<Real> mat2(cu_mat);
416  KALDI_ASSERT(ApproxEqual(mat, mat2));
417  }
418 }
419 
420 
421 template<typename Real>
423  for (int32 p = 0; p < 2; p++) {
424  int32 num_rows = 100 + Rand() % 255;
425  int32 num_cols = 100 + Rand() % 200;
426 
427  int32 vec_dim;
428  if (p % 2 == 0) vec_dim = num_rows;
429  else vec_dim = num_cols * num_rows;
430 
431  CuVector<Real> cu_vec(vec_dim);
432  cu_vec.SetRandn();
433  Vector<Real> vec(cu_vec);
434 
435  CuMatrix<Real> cu_mat(num_rows, num_cols);
436  cu_mat.CopyColsFromVec(cu_vec);
437  Matrix<Real> mat(num_rows, num_cols);
438  mat.CopyColsFromVec(vec);
439 
440  Matrix<Real> mat2(cu_mat);
441  KALDI_ASSERT(ApproxEqual(mat, mat2));
442  }
443 }
444 
445 
446 template<typename Real>
448  for (int32 p = 0; p < 2; p++) {
449  MatrixIndexT num_rows1 = 10 + Rand() % 10,
450  num_rows2 = 10 + Rand() % 10,
451  num_cols = 10 + Rand() % 10;
452  CuMatrix<Real> M(num_rows1, num_cols);
453  M.SetRandn();
454 
455  CuMatrix<Real> N1(num_rows2, num_cols),
456  N2(num_rows2, num_cols), O(num_rows2, num_cols);
457  std::vector<int32> reorder(num_rows2);
458  std::vector<const Real*> reorder_src(num_rows2, NULL);
459  for (int32 i = 0; i < num_rows2; i++) {
460  reorder[i] = -1 + (Rand() % (num_rows1 + 1));
461  if (reorder[i] != -1) {
462  reorder_src[i] = M.RowData(reorder[i]);
463  }
464  }
465 
466  CuArray<int32> reorder_cuda(reorder);
467  CuArray<const Real*> reorder_src_cuda(reorder_src);
468  N1.CopyRows(M, reorder_cuda);
469  N2.CopyRows(reorder_src_cuda);
470 
471  for (int32 i = 0; i < num_rows2; i++)
472  for (int32 j = 0; j < num_cols; j++)
473  if (reorder[i] < 0) O(i, j) = 0;
474  else O(i, j) = M(reorder[i], j);
475 
476  KALDI_ASSERT(ApproxEqual(N1, O));
477  KALDI_ASSERT(ApproxEqual(N2, O));
478  }
479 }
480 
481 
482 template<typename Real>
484  for (int32 p = 0; p < 2; p++) {
485  MatrixIndexT num_rows1 = 10 + Rand() % 10,
486  num_rows2 = 10 + Rand() % 10,
487  num_cols = 10 + Rand() % 10;
488  CuMatrix<Real> M(num_rows1, num_cols);
489  M.SetRandn();
490 
491  CuMatrix<Real> N(num_rows2, num_cols), O(num_rows2, num_cols);
492  std::vector<Real*> reorder_dst(num_rows1, NULL);
493  unordered_map<MatrixIndexT, bool> used_index;
494  for (int32 i = 0; i < num_rows1; i++) {
495  MatrixIndexT index = -1 + (Rand() % (num_rows2 + 1));
496  if (used_index.find(index) == used_index.end()) {
497  used_index[index] = true;
498  } else {
499  index = -1;
500  }
501  if (index != -1) {
502  reorder_dst[i] = N.RowData(index);
503  for (int32 j = 0; j < num_cols; j++)
504  O(index, j) = M(i, j);
505  }
506  }
507 
508  CuArray<Real*> reorder_dst_cuda(reorder_dst);
509  M.CopyToRows(reorder_dst_cuda);
510 
511  KALDI_ASSERT(ApproxEqual(N, O));
512  }
513 }
514 
515 
516 template<typename Real>
517 static void UnitTestCuMatrixAddRows() {
518  for (int32 p = 0; p < 2; p++) {
519  MatrixIndexT num_rows1 = 10 + Rand() % 10,
520  num_rows2 = 10 + Rand() % 10,
521  num_cols = 10 + Rand() % 10;
522  CuMatrix<Real> M(num_rows1, num_cols);
523  M.SetRandn();
524 
525  CuMatrix<Real> N1(num_rows2, num_cols),
526  N2(num_rows2, num_cols), O(num_rows2, num_cols);
527  std::vector<int32> reorder(num_rows2);
528  std::vector<const Real*> reorder_src(num_rows2, NULL);
529  for (int32 i = 0; i < num_rows2; i++) {
530  reorder[i] = -1 + (Rand() % (num_rows1 + 1));
531  if (reorder[i] != -1)
532  reorder_src[i] = M.RowData(reorder[i]);
533  }
534 
535  Real alpha =
536  static_cast<Real>((Rand() % num_rows2)) / static_cast<Real>(num_rows1);
537 
538  CuArray<int32> reorder_cuda(reorder);
539  CuArray<const Real*> reorder_src_cuda(reorder_src);
540  N1.AddRows(alpha, M, reorder_cuda);
541  N2.AddRows(alpha, reorder_src_cuda);
542 
543  for (int32 i = 0; i < num_rows2; i++) {
544  if (reorder[i] != -1) {
545  for (int32 j = 0; j < num_cols; j++) {
546  O(i, j) += alpha * M(reorder[i], j);
547  }
548  }
549  }
550 
551  KALDI_ASSERT(ApproxEqual(N1, O));
552  KALDI_ASSERT(ApproxEqual(N2, O));
553  }
554 }
555 
556 
557 template<typename Real>
558 static void UnitTestCuMatrixMulRows() {
559  for (int32 p = 0; p < 2; p++) {
560  MatrixIndexT num_rows1 = 10 + Rand() % 10,
561  num_rows2 = 10 + Rand() % 10,
562  num_cols = 10 + Rand() % 10;
563  CuMatrix<Real> M(num_rows1, num_cols);
564  M.SetRandn();
565 
566  CuMatrix<Real> N1(num_rows2, num_cols),
567  O(num_rows2, num_cols);
568  std::vector<int32> reorder(num_rows2);
569  std::vector<const Real*> reorder_src(num_rows2, NULL);
570  for (int32 i = 0; i < num_rows2; i++) {
571  reorder[i] = -1 + (Rand() % (num_rows1 + 1));
572  if (reorder[i] != -1)
573  reorder_src[i] = M.RowData(reorder[i]);
574  }
575 
576  CuArray<int32> reorder_cuda(reorder);
577  N1.MulRows(M, reorder_cuda);
578 
579  for (int32 i = 0; i < num_rows2; i++) {
580  if (reorder[i] != -1) {
581  CuSubVector<Real> O_row(O, i),
582  M_row(M, reorder[i]);
583  O_row.MulElements(M_row);
584  }
585  }
586 
587  KALDI_ASSERT(ApproxEqual(N1, O));
588  }
589 }
590 
591 
592 
593 template<typename Real>
595  for (int32 p = 0; p < 2; p++) {
596  MatrixIndexT num_rows1 = 10 + Rand() % 10,
597  num_rows2 = 10 + Rand() % 10,
598  num_cols = 10 + Rand() % 10;
599  CuMatrix<Real> M(num_rows1, num_cols);
600  M.SetRandn();
601 
602  Real alpha =
603  static_cast<Real>((Rand() % num_rows2)) / static_cast<Real>(num_rows1);
604 
605  CuMatrix<Real> N1(num_rows2, num_cols), N2(num_rows2, num_cols),
606  O(num_rows2, num_cols);
607  std::vector<int32> reorder(num_rows1);
608  std::vector<Real*> reorder_dst(num_rows1, NULL);
609  unordered_map<MatrixIndexT, bool> used_index;
610  for (int32 i = 0; i < num_rows1; i++) {
611  MatrixIndexT index = -1 + (Rand() % (num_rows2 + 1));
612  if (used_index.find(index) == used_index.end()) {
613  used_index[index] = true;
614  } else {
615  index = -1;
616  }
617  reorder[i] = index;
618  if (index != -1) {
619  reorder_dst[i] = N1.RowData(index);
620  for (int32 j = 0; j < num_cols; j++)
621  O(index, j) += alpha * M(i, j);
622  }
623  }
624 
625  CuArray<int32> reorder_cuda(reorder);
626  CuArray<Real*> reorder_dst_cuda(reorder_dst);
627  M.AddToRows(alpha, reorder_dst_cuda);
628  M.AddToRows(alpha, reorder_cuda, &N2);
629  KALDI_ASSERT(ApproxEqual(N1, O));
630  KALDI_ASSERT(ApproxEqual(N2, O));
631  }
632 }
633 
634 
635 template<typename Real>
637  for (int32 i = 0; i < 2; i++) {
638  int32 M = 100 + Rand() % 255, N = 100 + Rand() % 255;
639  if (Rand() % 3 == 0) { M = 0; N = 0; }
640  CuMatrix<Real> mat1(M, N);
641  mat1.SetRandn();
642  if (i % 2 == 0) {
643  CuMatrix<float> mat2(M, N);
644  mat2.CopyFromMat(mat1);
645  CuMatrix<Real> mat3(M, N);
646  mat3.CopyFromMat(mat2);
647  KALDI_ASSERT(ApproxEqual(mat1, mat3));
648  } else {
649  CuMatrix<float> mat2(N, M);
650  mat2.CopyFromMat(mat1, kTrans);
651  CuMatrix<Real> mat3(M, N);
652  mat3.CopyFromMat(mat2, kTrans);
653  KALDI_ASSERT(ApproxEqual(mat1, mat3));
654  }
655  }
656 }
657 
658 template<typename Real> void UnitTestCuMatrixCopyCross2() {
659  for (int32 i = 0; i < 2; i++) {
660  int32 M = 100 + Rand() % 255, N = 100 + Rand() % 255;
661  if (Rand() % 3 == 0) { M = 0; N = 0; }
662  CuMatrix<Real> mat1(M, N);
663  mat1.SetRandn();
664  Matrix<float> mat2(M, N);
665  mat2.CopyFromMat(mat1);
666  CuMatrix<Real> mat3(M, N);
667  mat3.CopyFromMat(mat2);
668  KALDI_ASSERT(ApproxEqual(mat1, mat3));
669  }
670 }
671 
672 template<typename Real>
674  for (int32 p = 0; p < 2; p++) {
675  MatrixIndexT num_cols1 = 10 + Rand() % 10,
676  num_cols2 = 10 + Rand() % 10,
677  num_rows = 10 + Rand() % 10;
678  Matrix<Real> src(num_rows, num_cols1);
679  Matrix<Real> dst(num_rows, num_cols2);
680  std::vector<Int32Pair> indices(num_cols2);
681  for (int32 i = 0; i < num_cols2; i++) {
682  indices[i].first = Rand() % num_cols1;
683  int32 headroom = num_cols1 - indices[i].first,
684  size = (Rand() % headroom) + 1;
685  indices[i].second = indices[i].first + size;
686  KALDI_ASSERT(indices[i].second >= indices[i].first &&
687  indices[i].second <= num_cols1 &&
688  indices[i].first >= 0);
689  // In the test we allow second == first.
690  }
691  src.SetRandn();
692  // Simple computation:
693  for (MatrixIndexT i = 0; i < num_rows; i++) {
694  for (MatrixIndexT j = 0; j < num_cols2; j++) {
695  int32 start = indices[j].first, end = indices[j].second;
696  Real sum = 0.0;
697  for (MatrixIndexT j2 = start; j2 < end; j2++)
698  sum += src(i, j2);
699  dst(i, j) = sum;
700  }
701  }
702  CuMatrix<Real> cu_src(src);
703  CuMatrix<Real> cu_dst(num_rows, num_cols2, kUndefined);
704  CuArray<Int32Pair> indices_tmp(indices);
705  cu_dst.SumColumnRanges(cu_src, indices_tmp);
706  Matrix<Real> dst2(cu_dst);
707  KALDI_ASSERT(ApproxEqual(dst, dst2));
708  }
709 }
710 
711 
712 template<typename Real>
714  for (int32 p = 0; p < 10; p++) {
715  MatrixIndexT num_rows1 = 10 + Rand() % 10,
716  num_rows2 = 10 + Rand() % 10,
717  num_cols = 10 + Rand() % 10;
718  Matrix<Real> src(num_rows1, num_cols); src.SetRandn();
719  Matrix<Real> dst(num_rows2, num_cols); dst.SetRandn();
720 
721  // Computes the indexes.
722  std::vector<Int32Pair> indexes(num_rows2);
723  for (MatrixIndexT i = 0; i < num_rows2; i++) {
724  indexes[i].first = Rand() % num_rows1;
725  int32 headroom = num_rows1 - indexes[i].first,
726  size = (Rand() % headroom) + 1;
727  indexes[i].second = indexes[i].first + size;
728  KALDI_ASSERT(indexes[i].second >= indexes[i].first &&
729  indexes[i].second <= num_rows1 &&
730  indexes[i].first >= 0);
731  }
732  // Computes reference matrix.
733  Matrix<Real> dst1(dst);
734  for (MatrixIndexT i = 0; i < num_rows2; i++) {
735  int32 start = indexes[i].first, end = indexes[i].second;
736  for (MatrixIndexT j = 0; j < num_cols; j++) {
737  for (MatrixIndexT i2 = start; i2 < end; i2++)
738  dst1(i, j) += src(i2, j);
739  }
740  }
741 
742  CuMatrix<Real> cu_src(src);
743  CuMatrix<Real> cu_dst(dst);
744  CuArray<Int32Pair> cu_indexes(indexes);
745  cu_dst.AddRowRanges(cu_src, cu_indexes);
746  Matrix<Real> dst2(cu_dst);
747  KALDI_ASSERT(ApproxEqual(dst1, dst2));
748  }
749 }
750 
751 
752 template<typename Real>
754  for (int32 p = 0; p < 2; p++) {
755  MatrixIndexT num_cols1 = 10 + Rand() % 10,
756  num_cols2 = 10 + Rand() % 10,
757  num_rows = 10 + Rand() % 10;
758  CuMatrix<Real> M(num_rows, num_cols1);
759  M.SetRandn();
760 
761  CuMatrix<Real> N(num_rows, num_cols2), O(num_rows, num_cols2);
762  std::vector<int32> reorder(num_cols2);
763  for (int32 i = 0; i < num_cols2; i++)
764  reorder[i] = -1 + (Rand() % (num_cols1 + 1));
765 
766  CuArray<int32> reorder_gpu(reorder);
767  N.CopyCols(M, reorder_gpu);
768 
769  for (int32 i = 0; i < num_rows; i++)
770  for (int32 j = 0; j < num_cols2; j++)
771  if (reorder[j] < 0) O(i, j) = 0;
772  else O(i, j) = M(i, reorder[j]);
773  KALDI_ASSERT(ApproxEqual(N, O));
774  }
775 }
776 
777 template<typename Real>
778 static void UnitTextCuMatrixAddSmat() {
779  for (int i = 0; i < 2; ++i) {
780  int rows = 10 + Rand() % 40;
781  int cols = 10 + Rand() % 50;
782  int srows = rows;
783  int scols = cols;
784 
785  MatrixTransposeType trans = (i % 2 == 0) ? kNoTrans : kTrans;
786  if (trans == kTrans) {
787  std::swap(srows, scols);
788  }
789 
790  Real alpha = 0.345;
791 
792  Matrix<Real> mat(rows, cols);
793  mat.SetRandn();
794  CuMatrix<Real> cumat(mat);
795 
796  SparseMatrix<Real> smat(srows, scols);
797  smat.SetRandn(0.5);
798  CuSparseMatrix<Real> cusmat(smat);
799 
800  mat.AddSmat(alpha, smat, trans);
801  cumat.AddSmat(alpha, cusmat, trans);
802 
803  Matrix<Real> mat2(cumat);
804 
805  KALDI_ASSERT(ApproxEqual(mat, mat2));
806  }
807 }
808 
809 template<typename Real>
811  for (int i = 0; i < 2; ++i) {
812  int m = 10 + Rand() % 40;
813  int k = 10 + Rand() % 60;
814  int n = 10 + Rand() % 50;
815  int srows = k;
816  int scols = n;
817 
818  MatrixTransposeType trans = (i % 2 == 0) ? kNoTrans : kTrans;
819  if (trans == kTrans) {
820  std::swap(srows, scols);
821  }
822 
823  Real alpha = 0.345;
824  Real beta = 0.567;
825 
826  Matrix<Real> mat(m, k);
827  mat.SetRandn();
828  CuMatrix<Real> cumat(mat);
829 
830  Matrix<Real> result(m, n);
831  result.SetRandn();
832  CuMatrix<Real> curesult(result);
833 
834  SparseMatrix<Real> smat(srows, scols);
835  smat.SetRandn(0.8);
836  CuSparseMatrix<Real> cusmat(smat);
837 
838  result.AddMatSmat(alpha, mat, smat, trans, beta);
839  curesult.AddMatSmat(alpha, cumat, cusmat, trans, beta);
840 
841  Matrix<Real> result2(curesult);
842 
843  KALDI_ASSERT(ApproxEqual(result, result2));
844  }
845 }
846 
847 template<typename Real>
849  for (int i = 0; i < 2; ++i) {
850  int m = 10 + Rand() % 40;
851  int k = 10 + Rand() % 60;
852  int n = 10 + Rand() % 50;
853  int srows = m;
854  int scols = k;
855 
856  MatrixTransposeType trans = (i % 2 == 0) ? kNoTrans : kTrans;
857  if (trans == kTrans) {
858  std::swap(srows, scols);
859  }
860 
861  Real alpha = 0.345;
862  Real beta = 0.567;
863 
864  SparseMatrix<Real> smat(srows, scols);
865  smat.SetRandn(0.8);
866  CuSparseMatrix<Real> cusmat(smat);
867 
868  Matrix<Real> mat(k, n);
869  mat.SetRandn();
870  CuMatrix<Real> cumat(mat);
871 
872  Matrix<Real> result(m, n);
873  result.SetRandn();
874  CuMatrix<Real> curesult(result);
875 
876  result.AddSmatMat(alpha, smat, trans, mat, beta);
877  curesult.AddSmatMat(alpha, cusmat, trans, cumat, beta);
878 
879  Matrix<Real> result2(curesult);
880 
881  KALDI_ASSERT(ApproxEqual(result, result2));
882  }
883 }
884 
885 template<typename Real>
886 static void UnitTestCuMatrixAddCols() {
887  for (int32 p = 0; p < 2; p++) {
888  MatrixIndexT num_cols1 = 10 + Rand() % 10,
889  num_cols2 = 10 + Rand() % 10,
890  num_rows = 10 + Rand() % 10;
891  CuMatrix<Real> M(num_rows, num_cols1);
892  M.SetRandn();
893 
894  CuMatrix<Real> N(num_rows, num_cols2), O(num_rows, num_cols2);
895  std::vector<int32> reorder(num_cols2);
896  for (int32 i = 0; i < num_cols2; i++)
897  reorder[i] = -1 + (Rand() % (num_cols1 + 1));
898 
899  CuArray<int32> reorder_gpu(reorder);
900  N.AddCols(M, reorder_gpu);
901 
902  for (int32 i = 0; i < num_rows; i++)
903  for (int32 j = 0; j < num_cols2; j++)
904  if (reorder[j] < 0) O(i, j) = 0;
905  else O(i, j) = M(i, reorder[j]);
906  KALDI_ASSERT(ApproxEqual(N, O));
907  }
908 }
909 
910 
911 template<typename Real>
913 
914  for (int32 i = 0; i < 3; i++) {
915  BaseFloat floor = 0.33 * (Rand() % 6);
916 
917  Matrix<Real> H(10 + Rand() % 600, 10 + Rand() % 20);
918  H.SetRandn();
919  if (i == 2) { Matrix<Real> tmp(H, kTrans); H = tmp; }
920 
921  CuMatrix<Real> cH(H);
922 
923  cH.ApplyFloor(floor);
924 
925  H.ApplyFloor(floor);
926  Matrix<Real> H2(cH);
927 
928  KALDI_ASSERT(ApproxEqual(H, H2));
929  }
930 }
931 
932 template<typename Real>
934 
935  for (int32 i = 0; i < 3; i++) {
936  BaseFloat ceiling = 0.33 * (Rand() % 6);
937 
938  Matrix<Real> H(10 + Rand() % 600, 10 + Rand() % 20);
939  H.SetRandn();
940  if (i == 2) { Matrix<Real> tmp(H,kTrans); H = tmp; }
941 
942  CuMatrix<Real> cH(H);
943 
944  cH.ApplyCeiling(ceiling);
945 
946  H.ApplyCeiling(ceiling);
947  Matrix<Real> H2(cH);
948 
949  KALDI_ASSERT(ApproxEqual(H, H2));
950  }
951 }
952 
953 template<typename Real>
955 
956  for (int32 i = 0; i < 1; i++) {
957  Matrix<Real> H(10 + Rand() % 60, 10 + Rand() % 20);
958  H.SetRandn();
959  H.Row(0).Set(0.0);
960  if (i == 2) { Matrix<Real> tmp(H, kTrans); H = tmp; }
961 
962 
963  CuMatrix<Real> cH(H);
964 
965  cH.ApplyHeaviside();
966  H.ApplyHeaviside();
967  Matrix<Real> H2(cH);
968  KALDI_ASSERT(ApproxEqual(H, H2));
969  }
970 }
971 
972 
973 template<typename Real>
975 
976  for (int32 i = 0; i < 1; i++) {
977  Matrix<Real> H(10 + Rand() % 60, 10 + Rand() % 20);
978  H.SetRandn();
979  H.Row(0).Set(0.0);
980  if (i == 2) { Matrix<Real> tmp(H, kTrans); H = tmp; }
981 
982  CuMatrix<Real> cH(H);
983  CuMatrix<Real> cH2(H.NumRows(), H.NumCols(), kUndefined);
984  cH2.Heaviside(cH);
985  H.ApplyHeaviside();
986  Matrix<Real> H2(cH2);
987  KALDI_ASSERT(ApproxEqual(H, H2));
988  }
989 }
990 
991 
992 template<typename Real>
994  for (int32 i = 0; i < 2; i++) {
995  MatrixIndexT dimM = 100 + Rand() % 256, dimN = 100 + Rand() % 256;
996 
997  Matrix<Real> Ha(dimM, dimN);
998  Matrix<Real> Hb(dimM, dimN);
999  Ha.SetRandn();
1000  Hb.SetRandn();
1001 
1002  CuMatrix<Real> Da(dimM, dimN);
1003  CuMatrix<Real> Db(dimM, dimN);
1004  Da.CopyFromMat(Ha);
1005  Db.CopyFromMat(Hb);
1006 
1007  Da.MulElements(Db);
1008  Ha.MulElements(Hb);
1009 
1010  Matrix<Real> Ha2(dimM, dimN);
1011  Da.CopyToMat(&Ha2);
1012 
1013  KALDI_ASSERT(ApproxEqual(Ha,Ha2));
1014  }
1015 }
1016 
1017 template<typename Real>
1019  for (int32 i = 0; i < 2; i++) {
1020  MatrixIndexT dimM = 100 + Rand() % 256, dimN = 100 + Rand() % 256;
1021 
1022  Matrix<Real> Ha(dimM, dimN);
1023  Matrix<Real> Hb(dimM, dimN);
1024  Ha.SetRandn();
1025 
1026  Hb.SetRandUniform(); // Use uniform distirbution t ensure positive numbers
1027  Hb.Add(0.1); // Add bias to ensure we do not divide by zero
1028 
1029  CuMatrix<Real> Da(dimM, dimN);
1030  CuMatrix<Real> Db(dimM, dimN);
1031  Da.CopyFromMat(Ha);
1032  Db.CopyFromMat(Hb);
1033 
1034  Da.DivElements(Db);
1035  Ha.DivElements(Hb);
1036 
1037  Matrix<Real> Ha2(dimM, dimN);
1038  Da.CopyToMat(&Ha2);
1039 
1040  KALDI_ASSERT(ApproxEqual(Ha,Ha2));
1041  }
1042 }
1043 
1044 template<typename Real>
1045 static void UnitTestCuMatrixMax() {
1046  Matrix<Real> Ha(100,100);
1047  Matrix<Real> Hb(100,100);
1048  Ha.SetRandn();
1049  Hb.SetRandn();
1050 
1051  CuMatrix<Real> Da(100,100);
1052  CuMatrix<Real> Db(100,100);
1053 
1054  Da.CopyFromMat(Ha);
1055  Db.CopyFromMat(Hb);
1056 
1057  Da.Max(Db);
1058  Ha.Max(Hb);
1059 
1060  Matrix<Real> Ha2(100,100);
1061  Da.CopyToMat(&Ha2);
1062 
1063  KALDI_ASSERT(ApproxEqual(Ha,Ha2));
1064 }
1065 
1066 template<typename Real>
1067 static void UnitTestCuMatrixMin() {
1068  Matrix<Real> Ha(100,100);
1069  Matrix<Real> Hb(100,100);
1070  Ha.SetRandn();
1071  Hb.SetRandn();
1072 
1073  CuMatrix<Real> Da(100,100);
1074  CuMatrix<Real> Db(100,100);
1075  Da.CopyFromMat(Ha);
1076  Db.CopyFromMat(Hb);
1077 
1078  Da.Min(Db);
1079  Ha.Min(Hb);
1080 
1081  Matrix<Real> Ha2(100,100);
1082  Da.CopyToMat(&Ha2);
1083 
1084  KALDI_ASSERT(ApproxEqual(Ha, Ha2));
1085 }
1086 
1087 
1088 
1089 template<typename Real>
1091  Matrix<Real> Hm(100,99);
1092  Vector<Real> Hv(99);
1093  Hm.SetRandn();
1094  InitRand(&Hv);
1095 
1096  CuMatrix<Real> Dm(100,99);
1097  CuVector<Real> Dv(99);
1098  Dm.CopyFromMat(Hm);
1099  Dv.CopyFromVec(Hv);
1100 
1101  Dm.MulColsVec(Dv);
1102  Hm.MulColsVec(Hv);
1103 
1104  Matrix<Real> Hm2(100,99);
1105  Dm.CopyToMat(&Hm2);
1106 
1107  KALDI_ASSERT(ApproxEqual(Hm,Hm2));
1108 }
1109 
1110 
1111 
1112 template<typename Real>
1114  for (int32 i = 0; i < 2; i++) {
1115  int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200;
1116  // int32 dimM = 256, dimN = 256;
1117  Matrix<Real> Hm(dimM, dimN);
1118  Vector<Real> Hv(dimM);
1119  Hm.SetRandn();
1120  InitRand(&Hv);
1121 
1122  CuMatrix<Real> Dm(dimM, dimN);
1123  CuVector<Real> Dv(dimM);
1124  Dm.CopyFromMat(Hm);
1125  Dv.CopyFromVec(Hv);
1126 
1127  Dm.MulRowsVec(Dv);
1128  Hm.MulRowsVec(Hv);
1129 
1130  Matrix<Real> Hm2(dimM, dimN);
1131  Dm.CopyToMat(&Hm2);
1132 
1133  KALDI_ASSERT(ApproxEqual(Hm,Hm2));
1134  }
1135 }
1136 
1137 template<typename Real>
1139  for (int32 i = 0; i < 2; i++) {
1140  int32 dimM = 100 + Rand() % 200, dimNs = 100 + Rand() % 200;
1141  int32 group_size = 1 + Rand() % 10;
1142  //int32 group_size = 1;
1143  int32 dimN = group_size * dimNs;
1144  Matrix<Real> Hm(dimM, dimN);
1145  Matrix<Real> Hs(dimM, dimNs);
1146  Hm.SetRandn();
1147  Hs.SetRandn();
1148 
1149  CuMatrix<Real> Dm(dimM, dimN);
1150  CuMatrix<Real> Ds(dimM, dimNs);
1151  Dm.CopyFromMat(Hm);
1152  Ds.CopyFromMat(Hs);
1153 
1154  Dm.MulRowsGroupMat(Ds);
1155  Hm.MulRowsGroupMat(Hs);
1156 
1157  Matrix<Real> Hm2(dimM, dimN);
1158  Dm.CopyToMat(&Hm2);
1159  KALDI_ASSERT(ApproxEqual(Hm,Hm2));
1160  }
1161 }
1162 
1163 template<typename Real>
1165  Real p[] = { 1.234, 2.345, 1, 2, std::numeric_limits<Real>::infinity() };
1166  for (int i = 0; i < 2 * sizeof(p) / sizeof(Real); i++) {
1167  int32 dimM = 100 + Rand() % 200, dimNs = 100 + Rand() % 200;
1168  int32 group_size = 1 + Rand() % 10;
1169  BaseFloat power = p[i / 2];
1170  int32 dimN = group_size * dimNs;
1171  Matrix<Real> Hiv(dimM, dimN);
1172  Matrix<Real> Hov(dimM, dimNs);
1173  Matrix<Real> Hid(dimM, dimN);
1174  Matrix<Real> Hod(dimM, dimNs);
1175  Hiv.SetRandn();
1176  Hod.SetRandn();
1177  if (i % 2 == 0)
1178  Hiv.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to
1179  // do derivatives.
1180  Hov.GroupPnorm(Hiv, power);
1181  CuMatrix<Real> Div(dimM, dimN);
1182  CuMatrix<Real> Dov(dimM, dimNs);
1183  CuMatrix<Real> Did(dimM, dimN);
1184  CuMatrix<Real> Dod(dimM, dimNs);
1185  Div.CopyFromMat(Hiv);
1186  Dod.CopyFromMat(Hod);
1187  Dov.CopyFromMat(Hov);
1188 
1189  // GPU
1190  Did.DiffGroupPnorm(Div, Dov, Dod, power);
1191 
1192  // CPU
1193  Hid.GroupPnormDeriv(Hiv, Hov, power);
1194  Hid.MulRowsGroupMat(Hod);
1195 
1196  Matrix<Real> Hid2(dimM, dimN);
1197  Did.CopyToMat(&Hid2);
1198  KALDI_ASSERT(ApproxEqual(Hid, Hid2));
1199  }
1200 }
1201 
1202 
1203 template<typename Real>
1205  int32 dimM = 100 + Rand() % 200, dimNs = 100 + Rand() % 200;
1206  int32 group_size = 1 + Rand() % 10;
1207  // int32 dimM = 256, dimNs = 2;
1208  // int32 group_size = 2;
1209  int32 dimN = group_size * dimNs;
1210  Matrix<Real> Hm(dimM, dimN);
1211  Matrix<Real> Hr(dimM, dimN);
1212  Matrix<Real> Hs(dimM, dimNs);
1213  Hs.SetRandn();
1214  if (rand () % 2 == 0)
1215  Hm.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to
1216  // do derivatives.
1217  Hs.GroupMax(Hm);
1218 
1219  CuMatrix<Real> Dm(dimM, dimN);
1220  CuMatrix<Real> Dr(dimM, dimN);
1221  CuMatrix<Real> Ds(dimM, dimNs);
1222  Dm.CopyFromMat(Hm);
1223  Dr.CopyFromMat(Hr);
1224  Ds.CopyFromMat(Hs);
1225 
1226  // KALDI_LOG << "Hr " << Hr << " Dr " << Dr << "Ds" << Ds << " Hs " << Hs ;
1227  Dr.GroupMaxDeriv(Dm, Ds);
1228  Hr.GroupMaxDeriv(Hm, Hs);
1229 
1230  // KALDI_LOG << "Hr " << Hr << " Dr " << Dr << "Ds" << Ds << " Hs " << Hs ;
1231  Matrix<Real> Hr2(dimM, dimN);
1232  Dr.CopyToMat(&Hr2);
1233  KALDI_ASSERT(ApproxEqual(Hr,Hr2));
1234 }
1235 
1236 template<typename Real> static void UnitTestCuMatrixAddDiagVecMat() {
1237  for (int p = 0; p < 4; p++) {
1238  MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255;
1239  //MatrixIndexT dimM = 10 + Rand() % 2, dimN = 10 + Rand() % 2;
1240  Real alpha = 0.43243, beta = 1.423;
1241  CuMatrix<Real> M(dimM, dimN), N(dimM, dimN);
1242  M.SetRandn();
1243  N.SetRandn();
1244  MatrixTransposeType trans = (p % 2 == 0 ? kNoTrans : kTrans);
1245  if (trans == kTrans)
1246  N.Transpose();
1247 
1248  KALDI_ASSERT(M.Sum() != 0.0);
1249  KALDI_ASSERT(N.Sum() != 0.0);
1250 
1251  CuVector<Real> V(dimM);
1252  V.SetRandn();
1253 
1254  KALDI_ASSERT(V.Sum() != 0.0);
1255 
1256  CuMatrix<Real> Mcheck(M);
1257 
1258  for (int32 r = 0; r < dimM; r++) {
1259  CuSubVector<Real> Mcheckrow(Mcheck, r);
1260  CuVector<Real> Nrow(dimN);
1261  if (trans == kTrans) Nrow.CopyColFromMat(N, r);
1262  else Nrow.CopyFromVec(N.Row(r));
1263  Mcheckrow.Scale(beta);
1264  Mcheckrow.AddVec(alpha * V(r), Nrow);
1265  }
1266 
1267  M.AddDiagVecMat(alpha, V, N, trans, beta);
1268  KALDI_ASSERT(ApproxEqual(M, Mcheck));
1269  KALDI_ASSERT(M.Sum() != 0.0);
1270  }
1271 }
1272 
1273 template<typename Real> static void UnitTestCuMatrixAddMatDiagVec() {
1274  // M <- alpha * N[^T] * diag(v) + beta * M
1275  for (int p = 0; p < 2; p++) {
1276  MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255;
1277  Real alpha = 0.43243, beta = 1.423;
1278 
1279  CuMatrix<Real> M(dimM, dimN), N(dimM, dimN), buf(dimM, dimN);
1280  M.SetRandn();
1281  N.SetRandn();
1282  buf.CopyFromMat(N);
1283  MatrixTransposeType trans = (p % 2 == 0 ? kNoTrans : kTrans);
1284  if (trans == kTrans)
1285  N.Transpose();
1286 
1287  CuVector<Real> V(dimN);
1288  V.SetRandn();
1289 
1290  CuMatrix<Real> Mcheck(M);
1291  Mcheck.Scale(beta);
1292  buf.MulColsVec(V);
1293  Mcheck.AddMat(alpha, buf, kNoTrans);
1294 
1295  M.AddMatDiagVec(alpha, N, trans, V, beta);
1296  KALDI_ASSERT(ApproxEqual(M, Mcheck));
1297  KALDI_ASSERT(M.Sum() != 0.0);
1298  }
1299 }
1300 
1301 template<typename Real> static void UnitTestCuMatrixAddMatMatElements() {
1302  // M <- alpha *(A .* B) + beta * M
1303  MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255;
1304  Real alpha = 0.43243, beta = 1.423;
1305  CuMatrix<Real> M(dimM, dimN), A(dimM, dimN), B(dimM, dimN), buf(dimM, dimN);
1306  M.SetRandn();
1307  A.SetRandn();
1308  B.SetRandn();
1309 
1310  CuMatrix<Real> Mcheck(M);
1311  buf.CopyFromMat(A); buf.MulElements(B);
1312  Mcheck.Scale(beta); Mcheck.AddMat(alpha, buf, kNoTrans);
1313 
1314  M.AddMatMatElements(alpha, A, B, beta);
1315  KALDI_ASSERT(ApproxEqual(M, Mcheck));
1316  KALDI_ASSERT(M.Sum() != 0.0);
1317 }
1318 
1319 template<typename Real> static void UnitTestCuMatrixSetMatMatDivMat() {
1320  // M = a * b / c (by element; when c = 0, M = a)
1321  MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255;
1322  CuMatrix<Real> M(dimM, dimN), A(dimM, dimN), B(dimM, dimN), C(dimM, dimN);
1323  CuMatrix<Real> ref(dimM, dimN);
1324  M.SetRandn();
1325  A.SetRandn();
1326  B.SetRandn();
1327  C.SetRandn();
1328 
1329  C.ApplyFloor(0.01); // make sure there are no zeros.
1330 
1331  M.SetMatMatDivMat(A,B,C);
1332  ref.AddMatMatElements(1.0, A, B, 0.0);
1333  ref.DivElements(C);
1334  KALDI_ASSERT(ApproxEqual(M, ref));
1335 
1336  C.SetZero();
1337  M.SetMatMatDivMat(A,B,C);
1338  KALDI_ASSERT(ApproxEqual(M, A));
1339 }
1340 
1341 template<typename Real>
1343  MatrixIndexT dimM = 1000, dimN = 5;
1344  Matrix<Real> Hm(dimM, dimN);
1345  Vector<Real> Hv(dimM);
1346  Hm.SetRandn();
1347  InitRand(&Hv);
1348 
1349  CuMatrix<Real> Dm(dimM, dimN);
1350  CuVector<Real> Dv(dimM);
1351  Dm.CopyFromMat(Hm);
1352  Dv.CopyFromVec(Hv);
1353 
1354  Dm.DivRowsVec(Dv);
1355  Hv.InvertElements();
1356  Hm.MulRowsVec(Hv);
1357 
1358  Matrix<Real> Hm2(dimM, dimN);
1359  Dm.CopyToMat(&Hm2);
1360 
1361  KALDI_ASSERT(ApproxEqual(Hm, Hm2));
1362 }
1363 
1364 
1365 
1366 template<typename Real>
1367 static void UnitTestCuMatrixAddMat() {
1368  Matrix<Real> Ha(100,100);
1369  Matrix<Real> Hb(100,100);
1370  Ha.SetRandn();
1371  Hb.SetRandn();
1372 
1373  CuMatrix<Real> Da(100,100);
1374  CuMatrix<Real> Db(100,100);
1375  Da.CopyFromMat(Ha);
1376  Db.CopyFromMat(Hb);
1377 
1378  Da.AddMat(0.5,Db);
1379  Ha.AddMat(0.5,Hb);
1380 
1381  Matrix<Real> Ha2(100,100);
1382  Da.CopyToMat(&Ha2);
1383 
1384  KALDI_ASSERT(ApproxEqual(Ha,Ha2));
1385 
1386  //check use with submatrix
1387  CuMatrix<Real> mat1(10,10,kSetZero);
1388  mat1.AddMat(1.0,Da.Range(5,10,12,10)); //different stride for mat1,mat2
1389  CuMatrix<Real> mat2(Da.Range(5,10,12,10));
1390  KALDI_ASSERT(ApproxEqual(mat1,mat2));
1391 
1392  for (int i = 0; i < 10; i++) {
1393  int32 N = 5 * (10 + Rand() % 10), M = 100 + Rand() % 50;
1394  Matrix<Real> Hc(N,M);
1395  Matrix<Real> Hd(M,N);
1396  Hc.SetRandn();
1397  Hd.SetRandn();
1398 
1399  CuMatrix<Real> Dc(N,M);
1400  CuMatrix<Real> Dd(M,N);
1401  Dc.CopyFromMat(Hc);
1402  Dd.CopyFromMat(Hd);
1403 
1404  Real alpha = 0.5;
1405  Dc.AddMat(alpha,Dd,kTrans);
1406  Hc.AddMat(alpha,Hd,kTrans);
1407 
1408  Matrix<Real> Hc2(N,M);
1409  Dc.CopyToMat(&Hc2);
1410  KALDI_ASSERT(ApproxEqual(Hc,Hc2));
1411 
1412  // check use with submatrix
1413  CuMatrix<Real> mat3(N/5,M,kSetZero);
1414  mat3.AddMat(1.0, Dd.Range(0,M,0,N/5),kTrans);
1415 
1416  CuMatrix<Real> mat4(Dd.Range(0,M,0,N/5),kTrans);
1417  KALDI_ASSERT(ApproxEqual(mat3,mat4));
1418  }
1419 }
1420 
1421 
1422 // this tests the branch of AddMatBlocks() that is taken when
1423 // 'this' has a smaller dimension than 'src' (it sums).
1424 template<typename Real>
1426  for (int32 l = 0; l < 5; l++) {
1427  int32 num_row_blocks = RandInt(1, 10), num_col_blocks = RandInt(1, 20);
1428  int32 block_rows = RandInt(1, 100), block_cols = RandInt(1, 100);
1429  BaseFloat alpha = RandInt(3, 10);
1430  CuMatrix<Real> dst(block_rows, block_cols);
1431  dst.SetRandn();
1432  CuMatrix<Real> src(num_row_blocks * block_rows,
1433  num_col_blocks * block_cols);
1434  src.SetRandn();
1435 
1436  CuMatrix<Real> dst_copy(dst);
1437  for (int32 rb = 0; rb < num_row_blocks; rb++) {
1438  for (int32 cb = 0; cb < num_col_blocks; cb++) {
1439  CuSubMatrix<Real> src_part(src,
1440  rb * block_rows, block_rows,
1441  cb * block_cols, block_cols);
1442  dst_copy.AddMat(alpha, src_part);
1443  }
1444  }
1445  dst.AddMatBlocks(alpha, src);
1446  KALDI_ASSERT(ApproxEqual(dst, dst_copy));
1447  }
1448 }
1449 
1450 // this is as UnitTestCuMatrixAddMatBlocks1, but tests with transpose.
1451 template<typename Real>
1453  for (int32 l = 0; l < 5; l++) {
1454  int32 num_row_blocks = RandInt(1, 10), num_col_blocks = RandInt(1, 20);
1455  int32 block_rows = RandInt(1, 100), block_cols = RandInt(1, 100);
1456  BaseFloat alpha = RandInt(3, 10);
1457  CuMatrix<Real> dst(block_cols, block_rows);
1458  dst.SetRandn();
1459  CuMatrix<Real> src(num_row_blocks * block_rows,
1460  num_col_blocks * block_cols);
1461  src.SetRandn();
1462 
1463  CuMatrix<Real> dst_copy(dst);
1464  for (int32 rb = 0; rb < num_row_blocks; rb++) {
1465  for (int32 cb = 0; cb < num_col_blocks; cb++) {
1466  CuSubMatrix<Real> src_part(src,
1467  rb * block_rows, block_rows,
1468  cb * block_cols, block_cols);
1469  dst_copy.AddMat(alpha, src_part, kTrans);
1470  }
1471  }
1472  dst.AddMatBlocks(alpha, src, kTrans);
1473  KALDI_ASSERT(ApproxEqual(dst, dst_copy));
1474  }
1475 }
1476 
1477 
1478 // this tests the branch of AddMatBlocks() that is taken when
1479 // 'this' has a larger dimension than 'src'. In this case, it does
1480 // a broadcasting rather than a summing operation.
1481 template<typename Real>
1483  for (int32 l = 0; l < 5; l++) {
1484  int32 num_row_blocks = RandInt(1, 10), num_col_blocks = RandInt(1, 20);
1485  int32 block_rows = RandInt(1, 100), block_cols = RandInt(1, 100);
1486  BaseFloat alpha = RandInt(3, 10);
1487  CuMatrix<Real> src(block_rows, block_cols);
1488  src.SetRandn();
1489  CuMatrix<Real> dst(num_row_blocks * block_rows,
1490  num_col_blocks * block_cols);
1491  src.SetRandn();
1492 
1493  CuMatrix<Real> dst_copy(dst);
1494  for (int32 rb = 0; rb < num_row_blocks; rb++) {
1495  for (int32 cb = 0; cb < num_col_blocks; cb++) {
1496  CuSubMatrix<Real> dst_copy_part(dst_copy,
1497  rb * block_rows, block_rows,
1498  cb * block_cols, block_cols);
1499  dst_copy_part.AddMat(alpha, src);
1500  }
1501  }
1502  dst.AddMatBlocks(alpha, src);
1503  KALDI_ASSERT(ApproxEqual(dst, dst_copy));
1504  }
1505 }
1506 
1507 
1508 
1509 
1510 template<typename Real>
1512  int32 M = 100 + Rand() % 300, N = 100 + Rand() % 300;
1513  CuMatrix<Real> A(M, N);
1514  A.SetRandn();
1515  Matrix<Real> mA(A);
1516  KALDI_ASSERT(ApproxEqual(mA.Sum(), A.Sum()));
1517 }
1518 
1519 template<typename Real>
1521  int32 M = 100 + Rand() % 300, N = 100 + Rand() % 300;
1522  CuMatrix<Real> A(M, N);
1523  A.SetRandn();
1524  Matrix<Real> mA(A);
1525  KALDI_ASSERT(ApproxEqual(mA.Max(), A.Max()));
1526 }
1527 
1528 template<typename Real>
1530  int32 M = 100 + Rand() % 300, N = 100 + Rand() % 300;
1531  CuMatrix<Real> A(M, N);
1532  A.SetRandn();
1533  Matrix<Real> mA(A);
1534  KALDI_ASSERT(ApproxEqual(mA.Min(), A.Min()));
1535 }
1536 
1537 template<typename Real>
1539  Matrix<Real> Hm(100,99);
1540  Vector<Real> Hv(100);
1541  Hm.SetRandn();
1542  InitRand(&Hv);
1543 
1544  CuMatrix<Real> Dm(100,99);
1545  CuVector<Real> Dv(100);
1546  Dm.CopyFromMat(Hm);
1547  Dv.CopyFromVec(Hv);
1548 
1549  Dm.AddVecToCols(0.5,Dv);
1550  Hm.AddVecToCols(0.5,Hv);
1551 
1552  Matrix<Real> Hm2(100,99);
1553  Dm.CopyToMat(&Hm2);
1554 
1555  KALDI_ASSERT(ApproxEqual(Hm,Hm2));
1556 }
1557 
1558 
1559 
1560 template<typename Real>
1562  Matrix<Real> Hm(100,99);
1563  Vector<Real> Hv(99);
1564  Hm.SetRandn();
1565  InitRand(&Hv);
1566 
1567  CuMatrix<Real> Dm(100,99);
1568  CuVector<Real> Dv(99);
1569  Dm.CopyFromMat(Hm);
1570  Dv.CopyFromVec(Hv);
1571 
1572  Dm.AddVecToRows(0.5,Dv);
1573  Hm.AddVecToRows(0.5,Hv);
1574 
1575  Matrix<Real> Hm2(100,99);
1576  Dm.CopyToMat(&Hm2);
1577 
1578  KALDI_ASSERT(ApproxEqual(Hm,Hm2));
1579 }
1580 
1581 
1582 template<typename Real>
1584  for (int32 i = 0; i < 2; i++) {
1585  int32 dimM = 10 + Rand() % 200, dimN = 10 + Rand() % 30;
1586  if (i == 8) {
1587  dimM = 0;
1588  dimN = 0;
1589  }
1590  CuMatrix<Real> M(dimM, dimM); // square matrix..
1591  CuMatrix<Real> N(dimM, dimN);
1592  M.SetRandn();
1593  N.SetRandn();
1594  MatrixTransposeType trans = (i % 2 == 0 ? kTrans : kNoTrans),
1595  other_trans = (trans == kTrans ? kNoTrans : kTrans);
1596  if (trans == kTrans) N.Transpose();
1597  CuMatrix<Real> M2(M);
1598  Real alpha = 0.3, beta = 1.75432;
1599  M.SymAddMat2(alpha, N, trans, beta);
1600 
1601  M2.AddMatMat(alpha, N, trans, N, other_trans, beta);
1602 
1603  CuTpMatrix<Real> T1(M), T2(M2);
1604  CuMatrix<Real> X1(T1), X2(T2); // so we can test equality.
1605  KALDI_ASSERT(ApproxEqual(X1, X2));
1606  KALDI_ASSERT(dimM == 0 || X1.Trace() != 0);
1607  }
1608 }
1609 
1610 
1611 
1612 template<typename Real>
1614  for (int32 i = 0; i < 2; i++) {
1615  int32 dimM = 10 + Rand() % 200, dimN = dimM + 20;
1616  // dimN > dimM, so will be PSD almost surely.
1617  if (i == 8) {
1618  dimM = 0;
1619  dimN = 0;
1620  }
1621  if (i == 0) {
1622  dimM = 2;
1623  dimN = 5;
1624  }
1625  if (i == 1) {
1626  dimM = 9;
1627  dimN = 20;
1628  }
1629  CuMatrix<Real> M(dimM, dimM); // square matrix..
1630  CuMatrix<Real> N(dimM, dimN);
1631  N.SetRandn();
1632  MatrixTransposeType trans = (i % 2 == 0 ? kTrans : kNoTrans);
1633  // MatrixTranposeType other_trans = (trans == kTrans ? kNoTrans : kTrans);
1634 
1635  if (trans == kTrans) N.Transpose();
1636  CuMatrix<Real> M2(M);
1637  Real alpha = 0.3, beta = 1.75432;
1638  M.SymAddMat2(alpha, N, trans, beta);
1639  // M.AddMatMat(alpha, N, trans, N, other_trans, beta);
1640  CuSpMatrix<Real> spTemp(M, kTakeLower);
1641  SpMatrix<Real> S(spTemp);
1642  S.Invert();
1643  CuSpMatrix<Real> spTemp2(M, kTakeLower);
1644  CuMatrix<Real> M_orig(spTemp2);
1645  M.SymInvertPosDef();
1646  CuSpMatrix<Real> spTemp3(M, kTakeLower);
1647  CuMatrix<Real> M_inverted(spTemp3);
1648  CuMatrix<Real> M_prod(dimM, dimM);
1649  M_prod.AddMatMat(Real(1.0), M_orig, kNoTrans, M_inverted, kNoTrans, Real(0.0));
1650  KALDI_ASSERT(M_prod.IsUnit());
1651  CuSpMatrix<Real> spTemp4(M, kTakeLower);
1652  SpMatrix<Real> S2(spTemp4);
1653  KALDI_ASSERT(ApproxEqual(S, S2, (Real)0.1));
1654  KALDI_ASSERT(dimM == 0 || S.Trace() != 0);
1655  }
1656 }
1657 
1658 
1659 template<typename Real>
1661  Matrix<Real> Ha(200,100);
1662  Matrix<Real> Hb(100,200);
1663  Matrix<Real> Hc1(200,200);
1664  Matrix<Real> Hc2(100,100);
1665  Ha.SetRandn();
1666  Hb.SetRandn();
1667 
1668  CuMatrix<Real> Da(200,100);
1669  CuMatrix<Real> Db(100,200);
1670  Da.CopyFromMat(Ha);
1671  Db.CopyFromMat(Hb);
1672  CuMatrix<Real> Dc1(200,200);
1673  CuMatrix<Real> Dc2(100,100);
1674 
1675  Dc1.AddMatMat(0.5f,Da,kNoTrans,Db,kNoTrans,0.0f);
1676  Dc2.AddMatMat(0.5f,Da,kTrans,Db,kTrans,0.0f);
1677  Hc1.AddMatMat(0.5f,Ha,kNoTrans,Hb,kNoTrans,0.0f);
1678  Hc2.AddMatMat(0.5f,Ha,kTrans,Hb,kTrans,0.0f);
1679 
1680  Matrix<Real> Hc1a(200,200);
1681  Matrix<Real> Hc2a(100,100);
1682  Dc1.CopyToMat(&Hc1a);
1683  Dc2.CopyToMat(&Hc2a);
1684 
1685  KALDI_ASSERT(ApproxEqual(Hc1,Hc1a));
1686  KALDI_ASSERT(ApproxEqual(Hc2,Hc2a));
1687 }
1688 
1689 
1690 template<typename Real>
1692  Vector<Real> x(100);
1693  Vector<Real> y(200);
1694  x.SetRandn();
1695  y.SetRandn();
1696 
1697  CuVector<Real> Cux(100);
1698  CuVector<Real> Cuy(200);
1699  Cux.CopyFromVec(x);
1700  Cuy.CopyFromVec(y);
1701 
1702  Matrix<Real> A(100,200);
1703  CuMatrix<Real> CuA(100,200);
1704 
1705  A.AddVecVec(0.5f, x, y);
1706  CuA.AddVecVec(0.5f, Cux, Cuy);
1707  Matrix<Real> A2(100, 200);
1708  CuA.CopyToMat(&A2);
1709 
1710  KALDI_ASSERT(ApproxEqual(A,A2));
1711 }
1712 
1713 
1714 template<typename Real>
1716  // Random stride is disabled as AddMatMatBatched requires consistent stride
1717 #if HAVE_CUDA == 1
1718  bool old_mode = CuDevice::Instantiate().SetDebugStrideMode(false);
1719 #endif
1720  const int32 batchCount = 10;
1721  std::vector<Matrix<Real>* > Ha(batchCount), Hb(batchCount), Hc1(batchCount), Hc2(batchCount);
1722  std::vector<CuMatrix<Real>* > Da(batchCount), Db(batchCount), Dc1(batchCount), Dc2(batchCount);
1723  std::vector<SubMatrix<Real>* > HA, HB, HC1, HC2;
1724  std::vector<CuSubMatrix<Real>* > DA, DB, DC1, DC2;
1725 
1726  for (int32 i = 0; i < batchCount; i++) {
1727  // first create a Matrix intance and then creat a SubMatrix instance from that
1728  Ha[i] = new Matrix<Real>(200, 100);
1729  Hb[i] = new Matrix<Real>(100, 200);
1730  Hc1[i] = new Matrix<Real>(200, 200);
1731  Hc2[i] = new Matrix<Real>(100, 100);
1732  Ha[i]->SetRandn();
1733  Hb[i]->SetRandn();
1734  HA.push_back(new SubMatrix<Real>(*(Ha[i]), 0, Ha[i]->NumRows(), 0,
1735  Ha[i]->NumCols()));
1736  HB.push_back(new SubMatrix<Real>(*(Hb[i]), 0, Hb[i]->NumRows(), 0,
1737  Hb[i]->NumCols()));
1738  HC1.push_back(new SubMatrix<Real>(*(Hc1[i]), 0, Hc1[i]->NumRows(), 0,
1739  Hc1[i]->NumCols()));
1740  HC2.push_back(new SubMatrix<Real>(*(Hc2[i]), 0, Hc2[i]->NumRows(), 0,
1741  Hc2[i]->NumCols()));
1742 
1743  // first create a CuMatrix intance and then creat a CuSubMatrix instance from that
1744  Da[i] = new CuMatrix<Real>(200, 100);
1745  Db[i] = new CuMatrix<Real>(100, 200);
1746  Dc1[i] = new CuMatrix<Real>(200, 200);
1747  Dc2[i] = new CuMatrix<Real>(100, 100);
1748  Da[i]->CopyFromMat(*(Ha[i]));
1749  Db[i]->CopyFromMat(*(Hb[i]));
1750  DA.push_back(new CuSubMatrix<Real>(*(Da[i]), 0, Da[i]->NumRows(), 0,
1751  Da[i]->NumCols()));
1752  DB.push_back(new CuSubMatrix<Real>(*(Db[i]), 0, Db[i]->NumRows(), 0,
1753  Db[i]->NumCols()));
1754  DC1.push_back(new CuSubMatrix<Real>(*(Dc1[i]), 0, Dc1[i]->NumRows(), 0,
1755  Dc1[i]->NumCols()));
1756  DC2.push_back(new CuSubMatrix<Real>(*(Dc2[i]), 0, Dc2[i]->NumRows(), 0,
1757  Dc2[i]->NumCols()));
1758  }
1759 
1760  AddMatMatBatched(static_cast<Real>(0.5f), DC1, DA, kNoTrans, DB, kNoTrans,
1761  static_cast<Real>(0.0f));
1762  AddMatMatBatched(static_cast<Real>(0.5f), DC2, DA, kTrans, DB, kTrans,
1763  static_cast<Real>(0.0f));
1764 
1765  // used to store results from DC1 and DC2 for equality check
1766  Matrix<Real> Hca1(200,200);
1767  Matrix<Real> Hca2(100,100);
1768 
1769  // equality check
1770  for (int32 i = 0; i< batchCount; i++) {
1771  (*HC1[i]).AddMatMat(0.5f, *(HA[i]), kNoTrans, *(HB[i]), kNoTrans, 0.0f);
1772  (*HC2[i]).AddMatMat(0.5f, *(HA[i]), kTrans, *(HB[i]), kTrans, 0.0f);
1773  DC1[i]->CopyToMat(&Hca1);
1774  DC2[i]->CopyToMat(&Hca2);
1775  KALDI_ASSERT(ApproxEqual(*(HC1[i]), Hca1));
1776  KALDI_ASSERT(ApproxEqual(*(HC2[i]), Hca2));
1777  delete Ha[i]; delete Hb[i]; delete Hc1[i]; delete Hc2[i];
1778  delete HA[i]; delete HB[i]; delete HC1[i]; delete HC2[i];
1779  delete Da[i]; delete Db[i]; delete Dc1[i]; delete Dc2[i];
1780  delete DA[i]; delete DB[i]; delete DC1[i]; delete DC2[i];
1781  }
1782 #if HAVE_CUDA == 1
1783  CuDevice::Instantiate().SetDebugStrideMode(old_mode);
1784 #endif
1785 }
1786 
1787 
1788 template<typename Real>
1790  for (int32 i = 0; i < 10; i++) {
1791  int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200;
1792  Matrix<Real> M(dimM, dimN);
1793  CuMatrix<Real> Mc(M);
1794  Real alpha = 5.5;
1795  M.AddToDiag(alpha);
1796  Mc.AddToDiag(alpha);
1797  Matrix<Real> M2(Mc);
1798  KALDI_ASSERT(ApproxEqual(M, M2));
1799  }
1800 }
1801 
1802 template<typename Real>
1803 static void UnitTestCuMatrixAdd2() {
1804  for (int32 i = 0; i < 10; i++) {
1805  int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200;
1806  Matrix<Real> M(dimM, dimN);
1807  CuMatrix<Real> Mc(M);
1808  Real alpha = 5.5;
1809  M.Add(alpha);
1810  Mc.Add(alpha);
1811  Matrix<Real> M2(Mc);
1812  KALDI_ASSERT(ApproxEqual(M, M2));
1813  }
1814 }
1815 
1816 
1817 template<typename Real>
1819  for (int32 i = 1; i < 10; i++) {
1820  MatrixIndexT dim = 5 * i + Rand() % 10;
1821 
1822  Matrix<Real> A(dim, dim);
1823  A.SetRandn();
1824  CuMatrix<Real> E(A);
1825  CuMatrix<Real> B(dim, dim);
1826  B.CopyFromMat(E);
1827 
1828  KALDI_ASSERT(ApproxEqual<Real>(B, E));
1829  }
1830 }
1831 
1832 template<typename Real>
1834  for (int32 i = 1; i < 10; i++) {
1835  MatrixIndexT dim = 5 * i + Rand() % 10;
1836  TpMatrix<Real> A(dim);
1837  A.SetRandn();
1838  CuTpMatrix<Real> E(A);
1839  Matrix<Real> B(dim, dim);
1840  CuMatrix<Real> C(dim, dim);
1841  B.CopyFromTp(A, kNoTrans);
1842  C.CopyFromTp(E, kNoTrans);
1843  CuMatrix<Real> D(B);
1844  KALDI_ASSERT(ApproxEqual<Real>(D, C));
1845  }
1846 }
1847 
1848 template<typename Real>
1850  for (int32 i = 1; i < 10; i++) {
1851  MatrixIndexT dim = 5 * i + Rand() % 10;
1852 
1853  Matrix<Real> A(dim, dim);
1854  Matrix<Real> B(dim, dim);
1855  TpMatrix<Real> C(dim);
1856  A.SetRandn();
1857  B.SetRandn();
1858  C.SetRandn();
1859  CuMatrix<Real> D(A);
1860  CuMatrix<Real> E(B);
1861  CuTpMatrix<Real> F(C);
1862 
1863  A.AddMatTp(1.0, B, kNoTrans, C, kNoTrans, 1.0);
1864  D.AddMatTp(1.0, E, kNoTrans, F, kNoTrans, 1.0);
1865 
1866  CuMatrix<Real> G(A);
1867  KALDI_ASSERT(ApproxEqual<Real>(G, D));
1868  }
1869 }
1870 
1871 
1872 template<typename Real>
1874  for (int32 i = 1; i < 2; i++) {
1875  MatrixIndexT dimM = 5 * i + Rand() % 10,
1876  dimN = dimM;
1877  if (i % 2 == 0) dimN += 5;
1878 
1879  CuMatrix<Real> A(dimM, dimN);
1880  A.SetRandn();
1881  CuMatrix<Real> B(A, kTrans);
1882 
1883  Matrix<Real> hA(A);
1884  Matrix<Real> hB(B);
1885  hB.Transpose();
1886  KALDI_ASSERT(ApproxEqual(hA, hB));
1887  }
1888 }
1889 
1890 template<typename Real>
1892  for (int32 i = 1; i < 10; i++) {
1893  MatrixIndexT dim = 5 * i + Rand() % 10;
1894 
1895  Matrix<Real> A(dim, dim);
1896  Matrix<Real> B(dim, dim);
1897  TpMatrix<Real> C(dim);
1898  A.SetRandn();
1899  B.SetRandn();
1900  C.SetRandn();
1901  CuMatrix<Real> D(A);
1902  CuMatrix<Real> E(B);
1903  CuTpMatrix<Real> F(C);
1904 
1905  A.AddTpMat(1.0, C, kNoTrans, B, kNoTrans, 1.0);
1906  D.AddTpMat(1.0, F, kNoTrans, E, kNoTrans, 1.0);
1907 
1908  CuMatrix<Real> G(A);
1909  KALDI_ASSERT(ApproxEqual<Real>(G, D));
1910  }
1911 }
1912 
1913 /*
1914  * CuVector unit tests
1915  */
1916 template<typename Real>
1917 static void UnitTestCuVectorAddVec() {
1918  Vector<Real> Hv(777);
1919  Vector<Real> Hw(777);
1920  InitRand(&Hv);
1921  InitRand(&Hw);
1922 
1923  CuVector<Real> Dv(777);
1924  CuVector<Real> Dw(777);
1925  Dv.CopyFromVec(Hv);
1926  Dw.CopyFromVec(Hw);
1927 
1928  Dv.AddVec(0.1,Dw,0.9);
1929  Hv.Scale(0.9);
1930  Hv.AddVec(0.1,Hw);
1931 
1932  Vector<Real> Hv2(777);
1933  Dv.CopyToVec(&Hv2);
1934 
1935  KALDI_ASSERT(ApproxEqual(Hv,Hv2));
1936 }
1937 
1938 
1939 
1940 template<typename Real>
1942  const int32 X=4321, Y=19;
1943  Real alpha=0.1, beta=0.7;
1944 
1945  Matrix<Real> Hm(X,Y);
1946  Vector<Real> Hv(Y);
1947  Vector<Real> Hv_accu(Y);
1948  Hm.SetRandn();
1949  InitRand(&Hv);
1950 
1951  CuMatrix<Real> Dm(X,Y);
1952  CuVector<Real> Dv(Y);
1953  Dm.CopyFromMat(Hm);
1954  Dv.CopyFromVec(Hv);
1955 
1956  Dv.AddRowSumMat(alpha,Dm,beta);
1957 
1958  Hv_accu.SetZero();
1959  Hv_accu.AddRowSumMat(1.0, Hm);
1960  Hv.Scale(beta);
1961  Hv.AddVec(alpha,Hv_accu);
1962 
1963  Vector<Real> Hv2(Y);
1964  Dv.CopyToVec(&Hv2);
1965 
1966  KALDI_ASSERT(ApproxEqual(Hv,Hv2));
1967 }
1968 
1969 
1970 
1971 template<typename Real>
1973  Matrix<Real> Hm(1000,990);
1974  Vector<Real> Hv(990);
1975  Vector<Real> Hv_accu(990);
1976  Hm.SetRandn();
1977  InitRand(&Hv);
1978 
1979  CuMatrix<Real> Dm(1000,990);
1980  CuVector<Real> Dv(990);
1981  Dm.CopyFromMat(Hm);
1982  Dv.CopyFromVec(Hv);
1983 
1984  Dv.AddRowSumMat(0.5,Dm,0.7);
1985 
1986  Hv_accu.SetZero();
1987  Hv_accu.AddRowSumMat(1.0, Hm);
1988  Hv.Scale(0.7);
1989  Hv.AddVec(0.5,Hv_accu);
1990 
1991  Vector<Real> Hv2(990);
1992  Dv.CopyToVec(&Hv2);
1993 
1994  KALDI_ASSERT(ApproxEqual(Hv,Hv2));
1995 }
1996 
1997 
1998 
1999 template<typename Real>
2001  const int32 X=19, Y=4321;
2002  Real alpha=0.5, beta=0.7;
2003 
2004  Matrix<Real> Hm(X,Y);
2005  Vector<Real> Hv(X);
2006  Vector<Real> Hv_accu(X);
2007  Hm.SetRandn();
2008  InitRand(&Hv);
2009 
2010  CuMatrix<Real> Dm(X,Y);
2011  CuVector<Real> Dv(X);
2012  Dm.CopyFromMat(Hm);
2013  Dv.CopyFromVec(Hv);
2014 
2015  Dv.AddColSumMat(alpha,Dm,beta);
2016 
2017  Hv_accu.SetZero();
2018  Hv_accu.AddColSumMat(1.0, Hm);
2019  Hv.Scale(beta);
2020  Hv.AddVec(alpha, Hv_accu);
2021 
2022  Vector<Real> Hv2(X);
2023  Dv.CopyToVec(&Hv2);
2024 
2025  KALDI_ASSERT(ApproxEqual(Hv,Hv2));
2026 }
2027 
2028 template<typename Real>
2029 static void UnitTestCuSubMatrix() {
2030  for (int32 iter = 0 ; iter < 10; iter++) {
2031  int32 M1 = 1 + rand () % 10, M2 = 1 + Rand() % 1, M3 = 1 + Rand() % 10, M = M1 + M2 + M3,
2032  N1 = 1 + rand () % 10, N2 = 1 + Rand() % 1, N3 = 1 + Rand() % 10, N = N1 + N2 + N3,
2033  m = Rand() % M2, n = Rand() % N2;
2034  CuMatrix<Real> mat(M, N);
2035  mat.SetRandn();
2036  CuSubMatrix<Real> submat1(mat, M1, M2,
2037  N1, N2),
2038  submat2 = mat.Range(M1, M2, N1, N2);
2039  Real f1 = mat(M1 + m, N1 + n), f2 = submat1(m, n), f3 = submat2(m, n);
2040  KALDI_ASSERT(f1 == f2);
2041  KALDI_ASSERT(f2 == f3);
2042  }
2043 }
2044 
2045 
2046 
2047 template<typename Real>
2049  Matrix<Real> Hm(1000,990);
2050  Vector<Real> Hv(1000);
2051  Vector<Real> Hv_accu(1000);
2052  Hm.SetRandn();
2053  InitRand(&Hv);
2054 
2055  CuMatrix<Real> Dm(1000,990);
2056  CuVector<Real> Dv(1000);
2057  Dm.CopyFromMat(Hm);
2058  Dv.CopyFromVec(Hv);
2059 
2060  Dv.AddColSumMat(0.5, Dm, 0.7);
2061 
2062  Hv_accu.SetZero();
2063  Hv_accu.AddColSumMat(1.0, Hm);
2064  Hv.Scale(0.7);
2065  Hv.AddVec(0.5,Hv_accu);
2066 
2067  Vector<Real> Hv2(1000);
2068  Dv.CopyToVec(&Hv2);
2069 
2070  KALDI_ASSERT(ApproxEqual(Hv,Hv2));
2071 }
2072 
2073 
2074 
2075 template<typename Real>
2077  Vector<Real> Hv(777);
2078  InitRand(&Hv);
2079 
2080  CuVector<Real> Dv(777);
2081  Dv.CopyFromVec(Hv);
2082 
2083  Dv.InvertElements();
2084  Hv.InvertElements();
2085 
2086  Vector<Real> Hv2(777);
2087  Dv.CopyToVec(&Hv2);
2088 
2089  KALDI_ASSERT(ApproxEqual(Hv,Hv2));
2090 }
2091 
2092 template<typename Real>
2094  Matrix<Real> Hm(77, 77);
2095  InitRand(&Hm);
2096 
2097  CuMatrix<Real> Dm(77, 77);
2098  Dm.CopyFromMat(Hm);
2099 
2100  Dm.InvertElements();
2101  Hm.InvertElements();
2102 
2103  Matrix<Real> Hm2(77, 77);
2104  Dm.CopyToMat(&Hm2);
2105 
2106  KALDI_ASSERT(ApproxEqual(Hm,Hm2));
2107 }
2108 
2109 
2110 template<class Real>
2111 static void UnitTestCuMatrixIO() {
2112  for (int32 i = 0; i < 10; i++) {
2113  int32 dimM = 100 + Rand() % 255, dimN = 10 + Rand() % 20;
2114  if (i % 2 == 0) std::swap(dimM, dimN);
2115  if (i % 5 == 0) { dimM = 0; dimN = 0; }
2116  CuMatrix<Real> mat(dimM, dimN);
2117  mat.SetRandn();
2118  std::ostringstream os;
2119  bool binary = (i % 4 < 2);
2120  mat.Write(os, binary);
2121 
2122  CuMatrix<Real> mat2;
2123  std::istringstream is(os.str());
2124  mat2.Read(is, binary);
2125  KALDI_ASSERT(ApproxEqual(mat, mat2));
2126  }
2127 }
2128 
2129 
2130 template<typename Real>
2132  Vector<Real> Hv(300);
2133  InitRand(&Hv);
2134  CuVector<Real> Dv(300);
2135  Dv.CopyFromVec(Hv);
2136  Vector<Real> Hv1(300);
2137  InitRand(&Hv1);
2138  CuVector<Real> Dv1(300);
2139  Dv1.CopyFromVec(Hv1);
2140 
2141  TpMatrix<Real> Hm(300);
2142  Hm.SetRandn();
2143  CuTpMatrix<Real> Dm(Hm);
2144 
2145  //gpu
2146  Dv.AddTpVec(1.0,Dm,kNoTrans,Dv1,1.0);
2147  //cpu
2148  Hv.AddTpVec(1.0,Hm,kNoTrans,Hv1,1.0);
2149 
2150  Vector<Real> Hv2(300);
2151  Dv.CopyToVec(&Hv2);
2152 
2153  KALDI_ASSERT(ApproxEqual(Hv,Hv2));
2154 }
2155 
2156 template<typename Real>
2157 static void UnitTestCuApproxEqual() {
2158  Real tol = 0.1;
2159  for (int32 i = 0; i < 2; i++) {
2160  int32 M = 1 + Rand() % 10, N = 1 + Rand() % 10;
2161  CuMatrix<Real> A(M, N), B(M, N);
2162  A.SetRandn();
2163  B.SetRandn();
2164  Matrix<Real> diff(A), Bm(B);
2165  diff.AddMat(-1.0, Bm);
2166  Real norm = diff.FrobeniusNorm();
2167  KALDI_ASSERT((norm <= tol * A.FrobeniusNorm()) == (A.ApproxEqual(B, tol)));
2168  tol *= 2.0;
2169  }
2170 }
2171 
2172 template<typename Real>
2173 static void UnitTestCuVectorMulTp() {
2174  Vector<Real> Hv(300);
2175  InitRand(&Hv);
2176  CuVector<Real> Dv(300);
2177  Dv.CopyFromVec(Hv);
2178 
2179  TpMatrix<Real> Hm(300);
2180  Hm.SetRandn();
2181  CuTpMatrix<Real> Dm(Hm);
2182 
2183  //gpu
2184  Dv.MulTp(Dm,kNoTrans);
2185  //cpu
2186  Hv.MulTp(Hm,kNoTrans);
2187 
2188  Vector<Real> Hv2(300);
2189  Dv.CopyToVec(&Hv2);
2190 
2191  KALDI_ASSERT(ApproxEqual(Hv,Hv2));
2192 }
2193 
2194 template<typename Real, typename OtherReal>
2195 static void UnitTestCuCopy() {
2196  for (int32 i = 0; i < 10; i++) {
2197  int32 M = 1 + Rand() % 10, N = 1 + Rand() % 10;
2198  CuMatrix<Real> A(M, N);
2200  CuMatrix<Real> C(B, kTrans);
2201  CuMatrix<Real> D(N, M);
2202  D.CopyFromMat(C, kTrans);
2203  CuMatrix<OtherReal> E(N, M);
2204  E.CopyFromMat(D, kNoTrans);
2205  CuMatrix<Real> F(M, N);
2206  F.CopyFromMat(E, kTrans);
2207 
2208  Matrix<OtherReal> G(M, N);
2209  G.CopyFromMat(F, kNoTrans);
2210  CuMatrix<Real> H(N, M);
2211  H.CopyFromMat(G, kTrans);
2212  Matrix<OtherReal> I(M, N);
2213  I.CopyFromMat(H, kTrans);
2214  CuMatrix<Real> J(I, kTrans);
2215  Matrix<OtherReal> K(J, kTrans);
2216  CuMatrix<Real> L(K, kNoTrans);
2217 
2218  KALDI_ASSERT(A.ApproxEqual(L));
2219  }
2220 
2221 }
2222 
2223 template<typename Real>
2224 static void UnitTestCuSigmoid() {
2225  Matrix<Real> Hi(100,111);
2226  Matrix<Real> Ho(100,111);
2227  Hi.SetRandn();
2228 
2229  CuMatrix<Real> Di(100,111);
2230  CuMatrix<Real> Do(100,111);
2231  Di.CopyFromMat(Hi);
2232 
2233  //gpu
2234  Do.Sigmoid(Di);
2235  //cpu
2236  for(MatrixIndexT r=0; r < Hi.NumRows(); r++) {
2237  for(MatrixIndexT c=0; c < Hi.NumCols(); c++) {
2238  Ho(r, c) = 1.0/(1.0+exp(-Hi(r, c)));
2239  }
2240  }
2241 
2242  Matrix<Real> Ho2(100,111);
2243  Do.CopyToMat(&Ho2);
2244 
2245  KALDI_ASSERT(ApproxEqual(Ho,Ho2));
2246 }
2247 
2248 
2249 
2250 template<typename Real>
2251 static void UnitTestCuDiffSigmoid() {
2252  Matrix<Real> Hi(100,111);
2253  Matrix<Real> Ho(100,111);
2254  Matrix<Real> Hy(100,111);
2255  Hi.SetRandn();
2256  RandZeroToOneMatrix(&Hy);
2257 
2258  CuMatrix<Real> Di(100,111);
2259  CuMatrix<Real> Do(100,111);
2260  CuMatrix<Real> Dy(100,111);
2261  Di.CopyFromMat(Hi);
2262  Dy.CopyFromMat(Hy);
2263 
2264  //gpu
2265  Do.DiffSigmoid(Dy, Di);
2266  //cpu
2267  for(MatrixIndexT r=0; r<Ho.NumRows(); r++) {
2268  for(MatrixIndexT c=0; c<Ho.NumCols(); c++) {
2269  Ho(r, c) = Hy(r, c)*(1.0 - Hy(r, c)) * Hi(r, c);
2270  }
2271  }
2272 
2273  Matrix<Real> Ho2(100,111);
2274  Do.CopyToMat(&Ho2);
2275 
2276  KALDI_ASSERT(ApproxEqual(Ho,Ho2));
2277 }
2278 
2279 
2280 template<typename Real>
2281 static void UnitTestCuDiffSoftmax() {
2282  for (int32 i = 0; i < 4; i++) {
2283  int m = RandInt(10, 280), n = RandInt(10, 280);
2284  Matrix<Real> Hi(m, n);
2285  Matrix<Real> Ho(m, n);
2286  Matrix<Real> Hy(m, n);
2287  Hi.SetRandn();
2288  RandZeroToOneMatrix(&Hy);
2289 
2290  CuMatrix<Real> Di(m, n);
2291  CuMatrix<Real> Do(m, n);
2292  CuMatrix<Real> Dy(m, n);
2293  Di.CopyFromMat(Hi);
2294  Dy.CopyFromMat(Hy);
2295 
2296  //gpu
2297  if (i % 2 == 0) {
2298  Do.DiffSoftmaxPerRow(Dy, Di);
2299  } else {
2300  // in-place.
2301  Do.CopyFromMat(Di);
2302  Do.DiffSoftmaxPerRow(Dy, Do);
2303  }
2304  //cpu
2305  {
2306  const MatrixBase<Real> &P(Hy), &E(Hi);
2307  MatrixBase<Real> &D(Ho);
2308  D.CopyFromMat(P);
2309  D.MulElements(E);
2310  // At this point, D = P .* E (in matlab notation)
2311  Vector<Real> pe_vec(D.NumRows()); // For each row i, the dot product (p_t . e_t).
2312  pe_vec.AddDiagMatMat(1.0, P, kNoTrans, E, kTrans, 0.0);
2313  D.AddDiagVecMat(-1.0, pe_vec, P, kNoTrans, 1.0); // does D -= diag(pe_vec) * P.
2314  }
2315 
2316  Matrix<Real> Ho2(m, n);
2317  Do.CopyToMat(&Ho2);
2318 
2319  KALDI_ASSERT(ApproxEqual(Ho, Ho2));
2320  }
2321 }
2322 
2323 
2324 template<typename Real>
2326  for (int32 i = 0; i < 4; i++) {
2327  int m = RandInt(10, 280), n = RandInt(10, 280);
2328  Matrix<Real> Hi(m, n);
2329  Matrix<Real> Ho(m, n);
2330  Matrix<Real> Hy(m, n);
2331  Hi.SetRandn();
2332  RandZeroToOneMatrix(&Hy);
2333 
2334  CuMatrix<Real> Di(m, n);
2335  CuMatrix<Real> Do(m, n);
2336  CuMatrix<Real> Dy(m, n);
2337  Di.CopyFromMat(Hi);
2338  Dy.CopyFromMat(Hy);
2339 
2340  //gpu
2341  if (i % 2 == 0) {
2342  Do.DiffLogSoftmaxPerRow(Dy, Di);
2343  } else {
2344  // in-place.
2345  Do.CopyFromMat(Di);
2346  Do.DiffLogSoftmaxPerRow(Dy, Do);
2347  }
2348  //cpu
2349  {
2350  const MatrixBase<Real> &Y(Hy), &E(Hi);
2351  MatrixBase<Real> &D(Ho);
2352  D.CopyFromMat(Y);
2353  D.ApplyExp(); // exp(y)
2354  Vector<Real> E_sum(D.NumRows()); // Initializes to zero
2355  E_sum.AddColSumMat(1.0, E); // Sum(e)
2356  D.MulRowsVec(E_sum); // exp(y) Sum(e)
2357  D.Scale(-1.0); // - exp(y) Sum(e)
2358  D.AddMat(1.0, E, kNoTrans); // e - exp(y_i) Sum(e)
2359  }
2360 
2361  Matrix<Real> Ho2(m, n);
2362  Do.CopyToMat(&Ho2);
2363 
2364  KALDI_ASSERT(ApproxEqual(Ho, Ho2));
2365  }
2366 }
2367 
2368 
2369 template<typename Real>
2370 static void UnitTestCuSoftmax() {
2371 
2372  for (int32 i = 0; i < 4; i++) {
2373  int row = 10 + Rand() % 40;
2374  int col = 10 + Rand() % 50;
2375 
2376  Matrix<Real> Hi(row,col);
2377  Matrix<Real> Ho(row,col);
2378  Hi.SetRandn();
2379  Hi.Scale(5.0);
2380 
2381  CuMatrix<Real> Di(row, col);
2382  CuMatrix<Real> Do(row, col);
2383  Di.CopyFromMat(Hi);
2384 
2385  //gpu
2386  if (i % 2 == 0) {
2387  Do.SoftMaxPerRow(Di);
2388  } else {
2389  // in-place
2390  Do.CopyFromMat(Di);
2391  Do.SoftMaxPerRow(Do);
2392  }
2393  //cpu
2394  Ho.CopyFromMat(Hi);
2395  for(MatrixIndexT r=0; r<Ho.NumRows(); r++) {
2396  Ho.Row(r).ApplySoftMax();
2397  }
2398 
2399  Matrix<Real> Ho2(Do);
2400 
2401  KALDI_ASSERT(ApproxEqual(Ho,Ho2,(Real)0.00001));
2402  }
2403 }
2404 
2405 
2406 template<typename Real>
2407 static void UnitTestCuLogSoftmax() {
2408 
2409  for (int32 i = 0; i < 50; i++) {
2410  int row = 10 + Rand() % 300;
2411  int col = 10 + Rand() % 300;
2412 
2413  Matrix<Real> Hi(row, col);
2414  Matrix<Real> Ho(row, col);
2415  Hi.SetRandn();
2416  Hi.Scale(5.0);
2417 
2418  CuMatrix<Real> Di(row, col);
2419  CuMatrix<Real> Do(row, col);
2420  Di.CopyFromMat(Hi);
2421 
2422  //gpu
2423  if (i % 2 == 0) {
2424  Do.LogSoftMaxPerRow(Di);
2425  } else {
2426  // in-place.
2427  Do.CopyFromMat(Di);
2428  Do.LogSoftMaxPerRow(Do);
2429  }
2430  //cpu
2431  Ho.CopyFromMat(Hi);
2432  for(MatrixIndexT r=0; r<Ho.NumRows(); r++) {
2433  Ho.Row(r).ApplyLogSoftMax();
2434  }
2435 
2436  Matrix<Real> Ho2(Do);
2437 
2438  KALDI_ASSERT(ApproxEqual(Ho, Ho2, (Real)0.00001));
2439  }
2440 }
2441 
2442 
2443 template<typename Real>
2444 static void UnitTestCuFindRowMaxId() {
2445  for (int32 i = 0; i < 2; i++) {
2446  int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200;
2447  Matrix<Real> Hi(dimM, dimN);
2448  Hi.SetRandn();
2449 
2450  CuMatrix<Real> Di(dimM, dimN);
2451  Di.CopyFromMat(Hi);
2452 
2453  std::vector<int32> Hmax(dimM);
2454  CuArray<int32> Dmax(dimN);
2455 
2456  // on gpu
2457  Di.FindRowMaxId(&Dmax);
2458 
2459  // on cpu
2460  for(MatrixIndexT r=0; r<Hi.NumRows(); r++) {
2461  Real max=-1.0e+20; int32 idx=-1;
2462  for(MatrixIndexT c=0; c<Hi.NumCols(); c++) {
2463  if(Hi(r,c) > max) { idx=c; max=Hi(r,c); }
2464  }
2465  Hmax[r] = idx;
2466  }
2467 
2468  std::vector<int32> Hmax2(dimM);
2469  Dmax.CopyToVec(&Hmax2);
2470 
2471  // If the same value were generated randomly we can get to a case
2472  // where the GPU and CPU return different columns. Both would be correct.
2473  // Thus check that the max for each row is the same and not the index.
2474  for (MatrixIndexT r=0; r<Hi.NumRows(); r++) {
2475  KALDI_ASSERT(Hi(r, Hmax[r]) == Di(r, Hmax2[r]));
2476  }
2477  }
2478 }
2479 
2480 
2481 
2482 template<typename Real>
2483 static void UnitTestCuDiffXent() {
2484  int32 X=100, Y=111;
2485  //nnet output / diff
2486  Matrix<Real> Hi(X,Y);
2487  RandZeroToOneMatrix(&Hi);
2488  CuMatrix<Real> Di(X,Y);
2489  Di.CopyFromMat(Hi);
2490  //target vector
2491  std::vector<int32> Htgt(X);
2492  for(int32 i=0; i<X; i++) {
2493  Htgt[i] = Rand()%Y;
2494  }
2495  CuArray<int32> Dtgt(X);
2496  Dtgt.CopyFromVec(Htgt);
2497  //logpost vector
2498  Vector<Real> Hlogpost(X);
2499  CuVector<Real> Dlogpost(X);
2500 
2501  //gpu
2502  Di.DiffXent(Dtgt, &Dlogpost);
2503  //cpu
2504  for(MatrixIndexT r=0; r<Hi.NumRows(); r++) {
2505  int32 col_tgt = Htgt[r];
2506  Hlogpost(r) = Log(Hi(r, col_tgt));
2507  Hi(r, col_tgt) -= 1.0;
2508  }
2509 
2510  Matrix<Real> Hi2(X,Y);
2511  Di.CopyToMat(&Hi2);
2512  Vector<Real> Hlogpost2(X);
2513  Dlogpost.CopyToVec(&Hlogpost2);
2514 
2515  KALDI_ASSERT(ApproxEqual(Hi,Hi2));
2516  KALDI_ASSERT(ApproxEqual(Hlogpost,Hlogpost2));
2517 }
2518 
2519 template<typename Real> void UnitTestCheck() {
2520  Matrix<Real> Hi(100,111);
2521  Hi.SetRandn();
2522 
2523  CuMatrix<Real> Di(100,111);
2524  Di.CopyFromMat(Hi);
2525 
2526  CuMatrix<Real> Dj(Di);
2527  KALDI_LOG << Dj.NumRows();
2528 
2529 
2530 }
2531 
2532 template<typename Real>
2534  Matrix<Real> Hi(100,111);
2535  Hi.SetRandn();
2536  CuMatrix<Real> Di(100,111);
2537  Di.CopyFromMat(Hi);
2538 
2539  Matrix<Real> Hi2(110,121);
2540  Hi2.SetRandn();
2541  CuMatrix<Real> Di2(110,121);
2542  Di2.CopyFromMat(Hi2);
2543 
2544  Di.Swap(&Di2);
2545  Matrix<Real> Hf(Di.NumRows(), Di.NumCols());
2546  Di.CopyToMat(&Hf);
2547  Matrix<Real> Hf2(Di2.NumRows(), Di2.NumCols());
2548  Di2.CopyToMat(&Hf2);
2549  KALDI_ASSERT(ApproxEqual(Hi,Hf2));
2550  KALDI_ASSERT(ApproxEqual(Hi2,Hf));
2551 }
2552 
2553 template<typename Real>
2555  Matrix<Real> Hi(100,111);
2556  Hi.SetRandn();
2557  CuMatrix<Real> Di(100,111);
2558  Di.CopyFromMat(Hi);
2559 
2560  Matrix<Real> Hi2(110,121);
2561  Hi2.SetRandn();
2562  Matrix<Real> Di2(110,121);
2563  Di2.CopyFromMat(Hi2);
2564 
2565  Di.Swap(&Hi2);
2566  Matrix<Real> Hf(Di.NumRows(), Di.NumCols());
2567  Di.CopyToMat(&Hf);
2568  KALDI_ASSERT(ApproxEqual(Di2,Hf));
2569  KALDI_ASSERT(ApproxEqual(Hi2,Hi));
2570 }
2571 
2572 
2573 template<typename Real>
2575  Matrix<Real> H(100,110);
2576  H.SetRandn();
2577  CuMatrix<Real> D(100,110);
2578  D.CopyFromMat(H);
2579 
2580  //gpu
2581  CuMatrix<Real> Di(100,110);
2582  Di.Tanh(D);
2583  Matrix<Real> Df(Di.NumRows(), Di.NumCols());
2584  Di.CopyToMat(&Df);
2585 
2586  //cpu
2587  Matrix<Real> Hf(H.NumRows(), H.NumCols());
2588  Hf.Tanh(H);
2589  KALDI_ASSERT(ApproxEqual(Df,Hf));
2590 }
2591 
2592 template<typename Real>
2593 static void UnitTestCuDiffTanh() {
2594  Matrix<Real> Hi(100,111);
2595  Matrix<Real> Ho(100,111);
2596  Matrix<Real> Hy(100,111);
2597  Hi.SetRandn();
2598  RandZeroToOneMatrix(&Hy);
2599 
2600  CuMatrix<Real> Di(100,111);
2601  CuMatrix<Real> Do(100,111);
2602  CuMatrix<Real> Dy(100,111);
2603  Di.CopyFromMat(Hi);
2604  Dy.CopyFromMat(Hy);
2605 
2606  //gpu
2607  Do.DiffTanh(Dy, Di);
2608  //cpu
2609  for(MatrixIndexT r=0; r<Ho.NumRows(); r++) {
2610  for(MatrixIndexT c=0; c<Ho.NumCols(); c++) {
2611  Ho(r, c) = (1.0 - Hy(r, c)*Hy(r, c)) * Hi(r, c);
2612  }
2613  }
2614 
2615  Matrix<Real> Ho2(100,111);
2616  Do.CopyToMat(&Ho2);
2617 
2618  KALDI_ASSERT(ApproxEqual(Ho,Ho2));
2619 }
2620 
2621 // just need this for testing function below. Compute n!!
2623  if (i <= 0) { return 1; } else { return i * DoubleFactorial(i - 2); }
2624 }
2625 
2626 template <typename Real>
2628 
2629 
2630  if (false) {
2631  // This block tests consistency when called twice.
2632  // It has been disabled since we added multi-threaded testing,
2633  // since consistency wouldn't be expected if other threads were running.
2634  int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200;
2635  Matrix<Real> M(dimM, dimN), N(dimM, dimN);
2636  srand(104);
2637  M.SetRandn();
2638  srand(104);
2639  N.SetRandn();
2640  KALDI_ASSERT(ApproxEqual(M, N));
2641  }
2642 
2643  for (int32 i = 0; i < 5; i++) {
2644  MatrixIndexT rows = 100 + Rand() % 50, cols = 100 + Rand() % 50;
2645  CuMatrix<Real> M(rows, cols);
2646  M.SetRandn();
2647 
2648  for (int32 pow = 1; pow < 5; pow++) {
2649  // test moments 1 through 4 of
2650  // the distribution.
2651  CuMatrix<Real> Mpow(M);
2652  Mpow.ApplyPow(pow);
2653  Real observed_moment = Mpow.Sum() / (rows * cols);
2654  // see http://en.wikipedia.org/wiki/Normal_distribution#Moments,
2655  // note that mu = 0 and sigma = 1.
2656  Real expected_moment = (pow % 2 == 1 ? 0 : DoubleFactorial(pow - 1));
2657  Real expected_twice_moment = DoubleFactorial(2 * pow - 1);
2658  Real k = 10.0; // This is just a constant we use to give us some wiggle
2659  // room before rejecting the distribution... e.g. 20 sigma,
2660  // quite approximately.
2661  // VAR(X) = E(X^2) - (E(X))^2
2662  Real deviation = sqrt(expected_twice_moment - expected_moment * expected_moment);
2663  Real allowed_deviation = k * deviation / sqrt(static_cast<Real>(rows * cols));
2664  // give it a bit more wiggle room for higher powers.. this is quite
2665  // unscientific, it would be better to involve the absolute moments or
2666  // something like that, and use one of those statistical inequalities,
2667  // but it involves the gamma function and it's too much hassle to implement.
2668  Real lower_bound = expected_moment - allowed_deviation,
2669  upper_bound = expected_moment + allowed_deviation;
2670  KALDI_ASSERT(observed_moment >= lower_bound && observed_moment <= upper_bound);
2671  }
2672  }
2673 }
2674 
2675 
2676 template <typename Real>
2678  for (int32 i = 0; i < 2; i++) {
2679  MatrixIndexT rows = 180 + Rand() % 200, cols = 200 + Rand() % 200;
2680  CuMatrix<Real> M(rows, cols);
2681  M.SetRandUniform();
2682 
2683  M.Add(-0.5); // we'll be testing the central moments, so
2684  // center it around zero first.
2685  // Got these moments from http://mathworld.wolfram.com/UniformDistribution.html
2686  Vector<Real> central_moments(5);
2687  central_moments(0) = 0.0;
2688  central_moments(1) = 0.0;
2689  central_moments(2) = 1.0 / 12; // times (b - a)^2, which equals 1.
2690  central_moments(3) = 0.0;
2691  central_moments(4) = 1.0 / 80; // times (b - a)^4, which equals 1.
2692 
2693  for (int32 pow = 1; pow < central_moments.Dim(); pow++) {
2694  CuMatrix<Real> Mpow(M);
2695  Mpow.ApplyPow(pow);
2696  Real observed_moment = Mpow.Sum() / (rows * cols);
2697  // see http://en.wikipedia.org/wiki/Normal_distribution#Moments,
2698  // note that mu = 0 and sigma = 1.
2699  Real expected_moment = central_moments(pow);
2700  Real k = 20.0; // This is just a constant we use to give us some wiggle
2701  // room before rejecting the distribution... e.g. 10 sigma,
2702  // quite approximately.
2703  Real allowed_deviation = k / sqrt(static_cast<Real>(rows * cols));
2704  Real lower_bound = expected_moment - allowed_deviation,
2705  upper_bound = expected_moment + allowed_deviation;
2706  if (!(observed_moment >= lower_bound && observed_moment <= upper_bound)) {
2707  KALDI_LOG << "Random matrix is " << M;
2708  KALDI_ERR << "Bad observed " << pow << "'th moment " << observed_moment
2709  << ", expected " << expected_moment << ", allowed range "
2710  << lower_bound << " to " << upper_bound;
2711  }
2712  }
2713  }
2714 }
2715 
2716 
2717 template<typename Real>
2719  for (int i = 1; i < 2; ++i) {
2720  MatrixIndexT dim = 10 * i + Rand() % 4 + (i == 9 ? 255 : 0);
2721  if (i == 8) dim = 0;
2722  CuMatrix<Real> A(dim, dim);
2723  A.SetRandn();
2724  Matrix<Real> A2(A);
2725  A.CopyLowerToUpper();
2726  Matrix<Real> A3(A);
2727  for (int32 i = 0; i < dim; i++) {
2728  for (int32 j = 0; j <= i; j++) {
2729  KALDI_ASSERT(A3(i, j) == A3(j, i));
2730  KALDI_ASSERT(A3(i, j) == A2(i, j));
2731  }
2732  }
2733  KALDI_ASSERT(dim == 0 || A3.Trace() != 0);
2734  }
2735 }
2736 
2737 template<typename Real>
2739  for (int i = 1; i < 2; ++i) {
2740  MatrixIndexT dim = 10 * i + Rand() % 4 + (i == 9 ? 255 : 0);
2741  if (i == 8) dim = 0;
2742  CuMatrix<Real> A(dim, dim);
2743  A.SetRandn();
2744  Matrix<Real> A_orig(A);
2745  A.SetZeroAboveDiag();
2746  Matrix<Real> A_copy(A);
2747 
2748  for (int32 i = 0; i < dim; i++) {
2749  for (int32 j = 0; j < dim; j++) {
2750  Real aval = A_copy(i, j), aorigval = A_orig(i, j);
2751  KALDI_ASSERT(aval == (j > i ? 0.0 : aorigval));
2752  }
2753  }
2754  }
2755 }
2756 
2757 
2758 template<typename Real>
2760  for (int i = 1; i < 10; ++i) {
2761  MatrixIndexT dim = 10 * i + Rand() % 4 + (i == 9 ? 255 : 0);
2762  if (i == 8) dim = 0;
2763  CuMatrix<Real> A(dim, dim);
2764  A.SetRandn();
2765  Matrix<Real> A2(A);
2766  A.CopyUpperToLower();
2767  Matrix<Real> A3(A);
2768  //KALDI_LOG << "A2 is " << A2 << " A3 is " << A3;
2769  for (int32 i = 0; i < dim; i++) {
2770  for (int32 j = i; j < dim; j++) {
2771  KALDI_ASSERT(A3(i, j) == A3(j, i));
2772  KALDI_ASSERT(A3(i, j) == A2(i, j));
2773  }
2774  }
2775  KALDI_ASSERT(dim == 0 || A3.Trace() != 0);
2776  }
2777 }
2778 
2779 
2780 template<typename Real>
2782  int32 n_r = 100 + Rand() % 200, n_c = 20 + Rand() % 30;
2783  CuMatrix<Real> A(n_r, n_c), B(n_r, n_c);
2784  B.SetRandn();
2785  B.Add(1.0);
2786  B.ApplyFloor(1.0e-10);
2787 
2788  std::vector<MatrixElement<Real> > labels;
2789  for(int i = 0; i < n_r; i++) {
2790  for(int j = 0; j < n_c; j++) {
2791  // have approximately one weight per row of the matrix.
2792  if (Rand() % n_c == 0) {
2793  A(i, j) = RandUniform();
2794  MatrixElement<Real> t = {i, j, A(i, j)};
2795  labels.push_back(t);
2796  }
2797  }
2798  }
2799  CuMatrix<Real> C(n_r, n_c);
2800  C.Set(0);
2801  Real a = 0, b = 0;
2802 
2803  // (sv_labels, logprobs, &tot_objf, &tot_weight)
2804  C.CompObjfAndDeriv(labels, B, &a, &b);
2805 
2806  KALDI_ASSERT(ApproxEqual(b, A.Sum()));
2807 
2808  Real sum2; // sum(i, j) A(i, j) log(B(i, j));
2809  {
2810  CuMatrix<Real> Bcopy(B);
2811  Bcopy.ApplyLog();
2812  sum2 = TraceMatMat(Bcopy, A, kTrans);
2813  }
2814  KALDI_ASSERT(ApproxEqual(a, sum2));
2815 
2816  B.InvertElements();
2817  A.MulElements(B); // each element of A is now A(i, j) / B(i, j);
2818  KALDI_ASSERT(ApproxEqual(A, C));
2819 }
2820 
2821 template<typename Real>
2823  for (int32 i = 0; i < 2; i++) {
2824  int32 dimM = 100 + Rand() % 50, dimN = 100 + Rand() % 50;
2825  // int32 dimM = 256, dimN = 256;
2826  CuMatrix<Real> H(dimM, dimN);
2827  H.SetRandn();
2828  CuMatrix<Real> H_copy(H);
2829  CuMatrix<Real> M(H);
2830  int32 num_elements = 100 + Rand() % 10;
2831  std::vector<MatrixElement<Real> > input;
2832  std::set<Int32Pair> input_index; //Set used to ensure unique elements
2833  std::vector<Int32Pair> input_index_v;
2834  Real *input_value = new Real[num_elements];
2835  BaseFloat scale = -1 + (0.33 * (Rand() % 5));
2836  for (int32 j = 0; j < num_elements; j++) {
2837  Int32Pair tmp_pair;
2838  // Generate a unique random index
2839  do {
2840  tmp_pair.first = Rand() % dimM;
2841  tmp_pair.second = Rand() % dimN;
2842  } while (input_index.find(tmp_pair)!=input_index.end());
2843  input_index.insert(tmp_pair);
2844 
2845  MatrixIndexT r = tmp_pair.first;
2846  MatrixIndexT c = tmp_pair.second;
2847  input_index_v.push_back(tmp_pair);
2848 
2849  Real offset = -1 + (0.33 * (Rand() % 5));
2850  M(r, c) += scale * offset;
2851  MatrixElement<Real> t = {r, c, offset};
2852  input.push_back(t);
2853  input_value[j] = offset;
2854  }
2855 
2856  H.AddElements(scale, input);
2857  CuArray<Int32Pair> cu_input_index(input_index_v);
2858  H_copy.AddElements(scale, cu_input_index, input_value);
2859  delete[] input_value;
2860 
2861  KALDI_ASSERT(ApproxEqual(H, M));
2862  KALDI_ASSERT(ApproxEqual(H_copy, M));
2863  }
2864 }
2865 
2866 template<typename Real>
2868  for (int32 i = 0; i < 2; i++) {
2869  int32 NR = 100 + Rand() % 50, NC = 100 + Rand() % 50;
2870  CuMatrix<Real> A(NR, NC);
2871  A.SetRandn();
2872  CuMatrix<Real> A_copy(A);
2873  std::vector<int32> elements(NR, -1);
2874  BaseFloat alpha = -1 + (0.33 * (Rand() % 5));
2875  for (int32 r = 0; r < NR; r++) {
2876  MatrixIndexT c = Rand() % NC;
2877  if (WithProb(0.6)) {
2878  elements[r] = c;
2879  A(r, c) += alpha;
2880  }
2881  }
2882  CuArray<int32> cu_elements(elements);
2883  A_copy.AddToElements(alpha, cu_elements);
2884  KALDI_ASSERT(ApproxEqual(A_copy, A));
2885  }
2886 }
2887 
2888 template<typename Real>
2889 static void UnitTestCuMatrixLookup() {
2890  for (int32 i = 0; i < 2; i++) {
2891  int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200;
2892  CuMatrix<Real> H(dimM, dimN);
2893  H.SetRandn();
2894 
2895  int32 num_elements = 10 + Rand() % 10;
2896  std::vector<Int32Pair> indices;
2897  std::vector<Real> reference;
2898  std::vector<Real> output;
2899  output.resize(num_elements);
2900 
2901  // Generates the indices and the reference.
2902  for (int32 j = 0; j < num_elements; j++) {
2903  MatrixIndexT r = Rand() % dimM;
2904  MatrixIndexT c = Rand() % dimN;
2905 
2906  Int32Pair tmp_pair;
2907  tmp_pair.first = r;
2908  tmp_pair.second = c;
2909  indices.push_back(tmp_pair);
2910  reference.push_back(H(r, c));
2911  }
2912 
2913  H.Lookup(indices, &(output[0]));
2914 
2915  KALDI_ASSERT(reference == output);
2916  }
2917 }
2918 
2919 template<typename Real>
2921  CuMatrix<Real> m1(10,9), m2(10,9);
2922  CuMatrix<Real> mask_same, mask_different;
2923  m1.SetRandUniform(); // U[0,1]
2924  m2.SetRandUniform(); m2.Add(10.0); // U[10,11]
2925 
2926  m1.EqualElementMask(m1,&mask_same); // all elements ones
2927  m1.EqualElementMask(m2,&mask_different); // all elements zeros
2928 
2929  //KALDI_LOG << m1 << m2 << mask_same << mask_different;
2930  KALDI_ASSERT(mask_same.Sum() == 10*9);
2931  KALDI_ASSERT(mask_different.Sum() == 0.0);
2932 
2933  //check matrices with different strides:
2934  CuMatrix<Real> m3(m1.Range(1,6,2,6));
2935  CuMatrix<Real> m4(5,5,kSetZero);
2936  m1.Range(1,5,2,5).EqualElementMask(m3.Range(0,5,0,5),&m4); // strides 9, 6, 5
2937  KALDI_ASSERT(m4.Sum() == 25);
2938 
2939 }
2940 
2941 template<typename Real> void CudaMatrixUnitTest() {
2942  UnitTestCuMatrixApplyExpSpecial<Real>();
2943  UnitTestCuMatrixApplyExpLimited<Real>();
2944  UnitTextCuMatrixAddSmatMat<Real>();
2945  UnitTextCuMatrixAddMatSmat<Real>();
2946  UnitTextCuMatrixAddSmat<Real>();
2947  UnitTestCuMatrixTraceMatMat<Real>();
2948  UnitTestCuMatrixObjfDeriv<Real>();
2949  //test CuMatrix<Real> methods by cross-check with Matrix
2950  UnitTestCuMatrixCopyCross<Real>();
2951  UnitTestCuMatrixCopyCross2<Real>();
2952  UnitTestCuMatrixApplyLog<Real>();
2953  UnitTestCuMatrixApplyExp<Real>();
2954  UnitTestCuMatrixSetRandn<Real>();
2955  UnitTestCuMatrixSetRandUniform<Real>();
2956  UnitTestCuMatrixScale<Real>();
2957  UnitTestCuMatrixSigmoid<Real>();
2958  UnitTestCuMatrixSoftHinge<Real>();
2959  UnitTestCuMatrixApplyPow<Real>();
2960  UnitTestCuMatrixApplyPowAbs<Real>();
2961  UnitTestCuMatrixSet<Real>();
2962  UnitTestCuMatrixAdd<Real>();
2963  UnitTestCuMatrixApplyFloor<Real>();
2964  UnitTestCuMatrixApplyCeiling<Real>();
2965  UnitTestCuMatrixApplyHeaviside<Real>();
2966  UnitTestCuMatrixHeaviside<Real>();
2967  UnitTestCuMatrixMulElements<Real>();
2968  UnitTestCuMatrixDivElements<Real>();
2969  UnitTestCuMatrixMax<Real>();
2970  UnitTestCuMatrixMin<Real>();
2971  UnitTestCuMatrixMulColsVec<Real>();
2972  UnitTestCuMatrixMulRowsVec<Real>();
2973  UnitTestCuMatrixDivRowsVec<Real>();
2974  UnitTestCuMatrixAddMat<Real>();
2975  UnitTestCuMatrixAddMatBlocks1<Real>();
2976  UnitTestCuMatrixAddMatBlocks1Trans<Real>();
2977  UnitTestCuMatrixAddMatBlocks2<Real>();
2978  UnitTestCuMatrixReduceSum<Real>();
2979  UnitTestCuMatrixReduceMax<Real>();
2980  UnitTestCuMatrixReduceMin<Real>();
2981  UnitTestCuMatrixAddVecToCols<Real>();
2982  UnitTestCuMatrixAddVecToRows<Real>();
2983  UnitTestCuMatrixAddMatMat<Real>();
2984  UnitTestCuMatrixAddVecVec<Real>();
2985  UnitTestCuMatrixSymAddMat2<Real>();
2986  UnitTestCuMatrixAddMatMatBatched<Real>();
2987  UnitTestCuMatrixSymInvertPosDef<Real>();
2988  UnitTestCuMatrixCopyFromMat<Real>();
2989  UnitTestCuMatrixCopyFromTp<Real>();
2990  UnitTestCuMatrixAddMatTp<Real>();
2991  UnitTestCuMatrixCopyCols<Real>();
2992  UnitTestCuMatrixAddCols<Real>();
2993  UnitTestCuMatrixSumColumnRanges<Real>();
2994  UnitTestCuMatrixCopyRows<Real>();
2995  UnitTestCuMatrixCopyRowsFromVec<Real>();
2996  UnitTestCuMatrixCopyColsFromVec<Real>();
2997  UnitTestCuMatrixCopyToRows<Real>();
2998  UnitTestCuMatrixAddRows<Real>();
2999  UnitTestCuMatrixMulRows<Real>();
3000  UnitTestCuMatrixAddToRows<Real>();
3001  UnitTestCuMatrixAddRowRanges<Real>();
3002  UnitTestCuMatrixAddTpMat<Real>();
3003  UnitTestCuMatrixTranspose<Real>();
3004  UnitTestCuMatrixCopyUpperToLower<Real>();
3005  UnitTestCuMatrixCopyLowerToUpper<Real>();
3006  UnitTestCuMatrixSetZeroAboveDiag<Real>();
3007  UnitTestCuMatrixAddElements<Real>();
3008  UnitTestCuMatrixAddToElements<Real>();
3009  UnitTestCuMatrixLookup<Real>();
3010  UnitTestCuMatrixEqualElementMask<Real>();
3011  // test CuVector<Real> methods
3012  UnitTestCuVectorAddVec<Real>();
3013  UnitTestCuVectorAddRowSumMat<Real>();
3014  UnitTestCuVectorAddRowSumMatLarge<Real>();
3015  UnitTestCuVectorAddColSumMat<Real>();
3016  UnitTestCuVectorAddColSumMatLarge<Real>();
3017  UnitTestCuSubMatrix<Real>();
3018  UnitTestCuMatrixInvertElements<Real>();
3019  UnitTestCuVectorInvertElements<Real>();
3020  UnitTestCuMatrixIO<Real>();
3021  UnitTestCuSigmoid<Real>();
3022  UnitTestCuApproxEqual<Real>();
3023  UnitTestCuCopy<Real, float>();
3024 #if HAVE_CUDA == 1
3025  if (CuDevice::Instantiate().DoublePrecisionSupported())
3026 #endif
3027  UnitTestCuCopy<Real, double>();
3028  UnitTestCuMatrixAddToDiag<Real>();
3029  UnitTestCuMatrixAdd2<Real>();
3030  UnitTestCuDiffSigmoid<Real>();
3031  UnitTestCuDiffSoftmax<Real>();
3032  UnitTestCuDiffLogSoftmax<Real>();
3033  UnitTestCuMatrixGroupPnorm<Real>();
3034  UnitTestCuMatrixDiffGroupPnorm<Real>();
3035  UnitTestCuMatrixGroupMax<Real>();
3036  UnitTestCuMatrixGroupMaxDeriv<Real>();
3037  UnitTestCuMatrixMulRowsVec<Real>();
3038  UnitTestCuMatrixMulRowsGroupMat<Real>();
3039  UnitTestCuFindRowMaxId<Real>();
3040  UnitTestCuSoftmax<Real>();
3041  UnitTestCuLogSoftmax<Real>();
3042  UnitTestCuDiffXent<Real>();
3043  UnitTestCheck<Real>();
3044  UnitTestSwapCu2Cu<Real>();
3045  UnitTestSwapCu2M<Real>();
3046  UnitTestCuMatrixAddDiagVecMat<Real>();
3047  UnitTestCuMatrixAddMatDiagVec<Real>();
3048  UnitTestCuMatrixAddMatMatElements<Real>();
3049  UnitTestCuMatrixSetMatMatDivMat<Real>();
3050  UnitTestCuTanh<Real>();
3051  UnitTestCuCholesky<Real>();
3052  UnitTestCuDiffTanh<Real>();
3053  UnitTestCuVectorAddTpVec<Real>();
3054  UnitTestCuVectorMulTp<Real>();
3055 }
3056 
3057 
3058 } // namespace kaldi
3059 
3060 
3061 int main() {
3062  SetVerboseLevel(1);
3063  int32 loop = 0;
3064  bool test_threads = true;
3065  // num_threads only matters if test_threads == true. Don't make it
3066  // to large, because it will affect CPU usage if you are using CPU.
3067  int32 num_threads = 4;
3068 
3069 
3070 #if HAVE_CUDA == 1
3071  for (loop = 0; loop < 2; loop++) {
3072  CuDevice::Instantiate().SetDebugStrideMode(true);
3073  if (test_threads)
3074  CuDevice::Instantiate().AllowMultithreading();
3075  if (loop == 0)
3076  CuDevice::Instantiate().SelectGpuId("no");
3077  else
3078  CuDevice::Instantiate().SelectGpuId("yes");
3079 #endif
3080 
3081  if (test_threads) {
3082  KALDI_LOG << "Doing matrix unit test with "
3083  << num_threads << " threads.";
3084  std::vector<std::thread*> threads;
3085  for (int32 i = 0; i < num_threads - 1; i++)
3086  threads.push_back(new std::thread(kaldi::CudaMatrixUnitTest<float>));
3087  // the last thread running is the main thread.
3088  kaldi::CudaMatrixUnitTest<float>();
3089  for (size_t i = 0; i < threads.size(); i++) {
3090  threads[i]->join();
3091  delete threads[i];
3092  }
3093  } else {
3094  kaldi::CudaMatrixUnitTest<float>();
3095  }
3096 
3097 #if HAVE_CUDA == 1
3098  if (CuDevice::Instantiate().DoublePrecisionSupported()) {
3099  kaldi::CudaMatrixUnitTest<double>();
3100  } else {
3101  KALDI_WARN << "Double precision not supported";
3102  }
3103 #else
3104  kaldi::CudaMatrixUnitTest<double>();
3105 #endif
3106 
3107  if (loop == 0)
3108  KALDI_LOG << "Tests without GPU use succeeded.";
3109  else
3110  KALDI_LOG << "Tests with GPU use (if available) succeeded.";
3111 #if HAVE_CUDA == 1
3112  } // No for loop if 'HAVE_CUDA != 1',
3113  CuDevice::Instantiate().PrintProfile();
3114 #endif
3115  return 0;
3116 }
static void UnitTestCuMatrixAdd()
static void UnitTestCuMatrixMulRowsVec()
void UnitTestCuMatrixCopyCross2()
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Add(const Real alpha)
Add a scalar to each element.
static void UnitTestCuMatrixCopyRowsFromVec()
Real Min() const
Returns minimum element of matrix.
void ApplyCeiling(Real ceiling_val)
Definition: cu-matrix.h:455
static void UnitTestCuMatrixAddMatMatElements()
Packed symetric matrix class.
Definition: matrix-common.h:62
static void UnitTestCuMatrixAddMatMatBatched()
void SoftHinge(const CuMatrixBase< Real > &src)
Apply the function y = log(1 + exp(x)), to each element.
Definition: cu-matrix.cc:1555
void ApplyPow(Real power)
Definition: cu-matrix.h:438
void Tanh(const MatrixBase< Real > &src)
Set each element to the tanh of the corresponding element of "src".
static void UnitTestCuMatrixCopyToRows()
void CopyFromVec(const std::vector< T > &src)
This function resizes if needed.
Definition: cu-array-inl.h:120
static int32 DoubleFactorial(int32 i)
void GroupMax(const CuMatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j where G = x.NumCols() / y.NumCols() must be an integer.
Definition: cu-matrix.cc:1617
static void UnitTestCuMatrixAddRows()
void MulTp(const CuTpMatrix< Real > &M, const MatrixTransposeType trans)
Multiplies this vector by lower-triangular marix: *this <– *this *M.
Definition: cu-vector.cc:727
void UnitTestSwapCu2Cu()
void Write(std::ostream &os, bool binary) const
Definition: cu-matrix.cc:502
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670
void Transpose()
Transpose the matrix.
static void UnitTestCuMatrixScale()
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
static void UnitTestCuMatrixMin()
static void UnitTestCuLogSoftmax()
void CopyToVec(std::vector< T > *dst) const
This function resizes *dst if needed.
Definition: cu-array-inl.h:177
void AddSmatMat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, Real beta)
(*this) = alpha * op(A) * B + beta * (*this), where A is sparse.
Definition: cu-matrix.cc:1024
void CopyToMat(MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
Definition: cu-matrix.cc:447
void AddTpMat(const Real alpha, const TpMatrix< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
Definition: kaldi-matrix.h:700
void AddToElements(Real alpha, const CuArrayBase< int32 > &elements)
This is a rather special purpose function; we might generalize it later by adding a transpose-type op...
Definition: cu-matrix.cc:3344
static void UnitTestCuMatrixAddRowRanges()
void AddMatTp(const Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuTpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
Definition: cu-matrix.h:641
static void UnitTestCuMatrixSoftHinge()
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Real Cond() const
Returns condition number by computing Svd.
static void UnitTestCuMatrixApplyExpSpecial()
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void CopyColFromMat(const CuMatrixBase< Real > &mat, MatrixIndexT col)
Definition: cu-vector.cc:103
static void UnitTextCuMatrixAddMatSmat()
static void UnitTestCuMatrixReduceSum()
Real Trace() const
Definition: sp-matrix.cc:171
void AddRowRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, do (*this)(r, c) += src(j, c), where j ranges from ind...
Definition: cu-matrix.cc:2931
Real Max() const
Returns maximum element of matrix.
Real Sum() const
Definition: cu-vector.cc:297
bool WithProb(BaseFloat prob, struct RandomState *state)
Definition: kaldi-math.cc:72
void AddElements(Real alpha, const std::vector< MatrixElement< Real > > &input)
Definition: cu-matrix.cc:3277
Real Trace(bool check_square=true) const
Returns trace of matrix.
Real Sum() const
Definition: cu-matrix.cc:3012
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:653
static void UnitTestCuMatrixApplyPow()
static void UnitTestCuMatrixGroupMaxDeriv()
static void UnitTestCuVectorAddColSumMatLarge()
static void UnitTestCuMatrixSetZeroAboveDiag()
void ApplyFloor(Real floor_val)
Definition: cu-matrix.h:451
void GroupPnormDeriv(const MatrixBase< Real > &input, const MatrixBase< Real > &output, Real power)
Calculate derivatives for the GroupPnorm function above...
void AddMat(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transA=kNoTrans)
*this += alpha * M [or M^T]
void AddVecToCols(Real alpha, const CuVectorBase< Real > &col, Real beta=1.0)
(for each column c of *this), c = alpha * col + beta * c
Definition: cu-matrix.cc:1232
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
void AddToDiag(const Real alpha)
Add a scalar to each diagonal element.
void AddSmat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A.
Definition: cu-matrix.cc:985
void MulTp(const TpMatrix< Real > &M, const MatrixTransposeType trans)
Multiplies this vector by lower-triangular matrix: *this <– *this *M.
static void UnitTestCuMatrixCopyUpperToLower()
void MulRowsGroupMat(const MatrixBase< Real > &src)
Divide each row into src.NumCols() equal groups, and then scale i&#39;th row&#39;s j&#39;th group of elements by ...
void AddToDiag(Real value)
Adds "value" to the diagonal elements of the matrix.
Definition: cu-matrix.cc:604
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
Definition: cu-matrix.cc:954
void DivRowsVec(const CuVectorBase< Real > &div)
divide i&#39;th row by scale[i]
Definition: cu-matrix.cc:899
A class for storing matrices.
Definition: kaldi-matrix.h:823
void AddMatMatElements(const Real alpha, const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const Real beta)
*this = beta * *this + alpha * A .* B (.* element by element multiplication)
Definition: cu-matrix.cc:1447
static void UnitTestCuMatrixApplyExpLimited()
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
static void UnitTestCuMatrixSetRandUniform()
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
void DivElements(const MatrixBase< Real > &A)
Divide each element by the corresponding element of a given matrix.
void Min(const CuMatrixBase< Real > &A)
Do, elementwise, *this = min(*this, A).
Definition: cu-matrix.cc:740
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
static void UnitTestCuMatrixCopyFromMat()
static void UnitTestCuMatrixAddMatDiagVec()
static void UnitTestCuMatrixMax()
static void UnitTestCuMatrixMulRowsGroupMat()
static void UnitTestCuMatrixAddMatBlocks1Trans()
static void UnitTestCuMatrixInvertElements()
static void UnitTestCuMatrixAddElements()
static void UnitTestCuMatrixCopyFromTp()
void GroupMax(const MatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j Requires src.NumRows() == this->NumRows() an...
static void UnitTestCuMatrixApplyPowAbs()
void SetRandUniform()
Sets to numbers uniformly distributed on (0, 1)
void AddTpMat(const Real alpha, const CuTpMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
Definition: cu-matrix.h:632
static void UnitTestCuMatrixAddToDiag()
static void UnitTestCuMatrixMulRows()
static void UnitTestCuMatrixObjfDeriv()
void InvertElements()
invert the matrix by elements.
Definition: cu-matrix.cc:932
bool ApproxEqual(const CuMatrixBase< Real > &other, float tol=0.01) const
True if ((*this)-other).FrobeniusNorm() <= tol * this->FrobeniusNorm()
Definition: cu-matrix.cc:2137
static void UnitTestCuSigmoid()
static void UnitTestCuMatrixGroupMax()
bool IsUnit(Real tol=0.001) const
Definition: cu-matrix.cc:629
static void InitRand(VectorBase< Real > *v)
static void UnitTestCuMatrixCopyColsFromVec()
void SetRandn()
< Set to unit matrix.
void Lookup(const std::vector< Int32Pair > &indexes, Real *output) const
Definition: cu-matrix.cc:3370
static void UnitTestCuMatrixEqualElementMask()
static void UnitTestCuMatrixSymInvertPosDef()
void UnitTestCuTanh()
void SetVerboseLevel(int32 i)
This should be rarely used, except by programs using Kaldi as library; command-line programs set the ...
Definition: kaldi-error.h:64
void AddMatBlocks(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
This function is like AddMat (it does *this += alpha * src), except that it supports cases where *thi...
Definition: cu-matrix.cc:1119
void Scale(Real value)
Definition: cu-matrix.cc:644
static void UnitTestCuMatrixAddMatMat()
void SymInvertPosDef()
Inversion for positive definite symmetric matrices.
Definition: cu-matrix.cc:2111
static void UnitTestCuMatrixSetRandn()
static void UnitTestCuDiffXent()
static void UnitTestCuMatrixLookup()
static void UnitTestCuMatrixAddCols()
static void UnitTextCuMatrixAddSmatMat()
void CompObjfAndDeriv(const std::vector< MatrixElement< Real > > &elements, const CuMatrix< Real > &A, Real *tot_objf, Real *tot_weight)
Here, A is interpreted as a matrix of probabilities, and "elements" as a list of posteriors (possibly...
Definition: cu-matrix.cc:1661
static void UnitTestCuVectorAddVec()
void SoftHinge(const MatrixBase< Real > &src)
Set each element to y = log(1 + exp(x))
void AddMatMatBatched(const Real alpha, std::vector< CuSubMatrix< Real > * > &C, const std::vector< CuSubMatrix< Real > * > &A, MatrixTransposeType transA, const std::vector< CuSubMatrix< Real > * > &B, MatrixTransposeType transB, const Real beta)
Does multiple matrix multiplications, executing them in parallel using cuBLAS&#39;s gemmBatched if we are...
Definition: cu-matrix.cc:2207
static void UnitTestCuMatrixApplyLog()
static void UnitTextCuMatrixAddSmat()
int main()
static void UnitTestCuApproxEqual()
int32 MatrixIndexT
Definition: matrix-common.h:98
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
Definition: cu-vector.cc:1078
static void UnitTestCuMatrixHeaviside()
static void UnitTestCuSubMatrix()
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void Max(const CuMatrixBase< Real > &A)
Do, elementwise, *this = max(*this, A).
Definition: cu-matrix.cc:715
static void UnitTestCuVectorAddTpVec()
double Log(double x)
Definition: kaldi-math.h:100
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
void ApplyPowAbs(Real power, bool include_sign=false)
Definition: cu-matrix.h:443
static void UnitTestCuMatrixSymAddMat2()
static void UnitTestCuMatrixIO()
void AddColSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the columns of the matrix, add to vector.
Definition: cu-vector.cc:1298
void Sigmoid(const CuMatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src": element by element...
Definition: cu-matrix.cc:1534
void Scale(Real alpha)
Multiply each element with a scalar value.
void Add(Real value)
Definition: cu-matrix.cc:582
void DiffXent(const CuArrayBase< int32 > &tgt, CuVector< Real > *log_post_tgt)
Differentiate the block [softmax+cross-entropy] : dE/da = posterior_mat - target_mat, &#39;E&#39; is error function, &#39;a&#39; is activation on softmax input.
Definition: cu-matrix.cc:1957
void Swap(Matrix< Real > *mat)
Definition: cu-matrix.cc:123
void AddToRows(Real alpha, const CuArrayBase< MatrixIndexT > &indexes, CuMatrixBase< Real > *dst) const
For each row i of *this, adds this->Row(i) to dst->Row(indexes(i)) if indexes(i) >= 0...
Definition: cu-matrix.cc:2869
void SetZero()
Math operations, some calling kernels.
Definition: cu-matrix.cc:509
void SoftMaxPerRow(const CuMatrixBase< Real > &src)
Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow.
Definition: cu-matrix.cc:1717
static void UnitTestCuVectorInvertElements()
void AddVecToRows(const Real alpha, const VectorBase< OtherReal > &v)
[each row of *this] += alpha * v
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667
static void UnitTestCuMatrixApplyFloor()
static void UnitTestCuMatrixAddMat()
static void UnitTestCuFindRowMaxId()
static void UnitTestCuDiffLogSoftmax()
struct rnnlm::@11::@12 n
static void UnitTestCuMatrixAdd2()
void SymAddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transA, Real beta)
*this = beta * *this + alpha * M M^T, for symmetric matrices.
Definition: cu-matrix.cc:1353
static void UnitTestCuMatrixAddMatBlocks1()
void CopyColsFromVec(const CuVectorBase< Real > &v)
Copies vector into matrix, column-by-column.
Definition: cu-matrix.cc:2376
static void UnitTestCuVectorAddRowSumMat()
void CopyFromTp(const TpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given tpmatrix. (no resize is done).
void SetRandn()
Sets to random values of a normal distribution.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
#define KALDI_ERR
Definition: kaldi-error.h:147
static void UnitTestCuMatrixAddToRows()
static void UnitTestCuDiffSigmoid()
static void UnitTestCuMatrixApplyCeiling()
static void UnitTestCuMatrixMulElements()
void GroupPnorm(const CuMatrixBase< Real > &src, Real pow)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p) where G = x...
Definition: cu-matrix.cc:1576
Packed symetric matrix class.
Definition: matrix-common.h:63
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
void Cholesky(CuMatrixBase< Real > *inv_cholesky=NULL)
This function does sets *this to the Cholesky factor of *this (i.e.
Definition: cu-matrix.cc:1987
static void UnitTestCuSoftmax()
#define KALDI_WARN
Definition: kaldi-error.h:150
static void UnitTestCuMatrixApplyExp()
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void DivElements(const CuMatrixBase< Real > &A)
Divide two matrices elementwise: C = A ./ A.
Definition: cu-matrix.cc:691
void DiffSoftmaxPerRow(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the softmax function.
Definition: cu-matrix.cc:1868
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
Real Sum() const
Returns sum of all elements in matrix.
static void UnitTestCuMatrixAddDiagVecMat()
void GroupMaxDeriv(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > &output)
Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax fun...
Definition: cu-matrix.cc:874
void Scale(Real alpha)
Multiplies all elements by this constant.
void ApplyCeiling(Real ceiling_val)
Definition: kaldi-matrix.h:358
void UnitTestSwapCu2M()
static void UnitTestCuMatrixDivElements()
void MulElements(const MatrixBase< Real > &A)
Element by element multiplication with a given matrix.
void DiffTanh(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the tanh function.
Definition: cu-matrix.cc:1809
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
void SetRandn()
Set vector to random normally-distributed noise.
static void UnitTestCuMatrixSetMatMatDivMat()
static void UnitTestCuMatrixAddMatBlocks2()
void FindRowMaxId(CuArray< int32 > *id) const
Find the id of the maximal element for each row (resizes the &#39;id&#39; array to the appropriate size)...
Definition: cu-matrix.cc:1829
void MulColsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = (*this) * diag(scale).
void Heaviside(const CuMatrixBase< Real > &src)
Set each element to the Heaviside function of the corresponding element of "src", which we define as ...
Definition: cu-matrix.cc:2435
void AddVec(Real alpha, const CuVectorBase< Real > &vec, Real beta=1.0)
Definition: cu-vector.cc:1237
void UnitTestCheck()
void MulRowsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = diag(scale) * (*this).
void AddSmatMat(Real alpha, const SparseMatrix< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, Real beta)
(*this) = alpha * op(A) * B + beta * (*this), where A is sparse.
void DiffSigmoid(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the sigmoid function.
Definition: cu-matrix.cc:1764
void MulColsVec(const CuVectorBase< Real > &scale)
scale i&#39;th column by scale[i]
Definition: cu-matrix.cc:765
static void UnitTestCuMatrixSet()
void SumColumnRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, sets (*this)(r, c) to the sum src(r, j), where j ranges from indexes[c].first through indexes[c].second - 1.
Definition: cu-matrix.cc:2893
static void UnitTestCuMatrixDiffGroupPnorm()
static void UnitTestCuMatrixAddToElements()
void GroupPnorm(const MatrixBase< Real > &src, Real power)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / p).
void ApplyExpSpecial()
Definition: cu-matrix.h:468
static void UnitTestCuMatrixTranspose()
static void UnitTestCuDiffSoftmax()
void ApplyExpLimited(Real lower_limit, Real upper_limit)
Definition: cu-matrix.h:464
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
void InvertElements()
Inverts all the elements of the matrix.
static void UnitTestCuMatrixCopyCols()
void DiffLogSoftmaxPerRow(const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv)
Differentiate backward through the log softmax function.
Definition: cu-matrix.cc:1903
void DiffGroupPnorm(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv, Real power)
Differentiate backward through the GroupPnorm function.
Definition: cu-matrix.cc:841
void AddTpVec(const Real alpha, const TpMatrix< Real > &M, const MatrixTransposeType trans, const VectorBase< Real > &v, const Real beta)
Add triangular matrix times vector: this <– beta*this + alpha*M*v.
A class representing a vector.
Definition: kaldi-vector.h:406
void SetRandn(BaseFloat zero_prob)
Sets up to a pseudo-randomly initialized matrix, with each element zero with probability zero_prob an...
void InvertElements()
Invert all elements.
static void UnitTestCuMatrixSigmoid()
void UnitTestCuMatrixCopyCross()
static void UnitTestCuMatrixGroupPnorm()
void AddSmat(Real alpha, const SparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A [or A^T].
void AddVecToCols(const Real alpha, const VectorBase< OtherReal > &v)
[each col of *this] += alpha * v
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
static void UnitTestCuMatrixDivRowsVec()
void Read(std::istream &is, bool binary)
I/O functions.
Definition: cu-matrix.cc:494
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void CudaMatrixUnitTest()
void CopyFromTp(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:280
static void UnitTestCuMatrixCopyRows()
static void UnitTestCuMatrixReduceMax()
static void UnitTestCuMatrixSumColumnRanges()
static void UnitTestCuDiffTanh()
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
static void UnitTestCuMatrixAddVecToRows()
MatrixTransposeType
Definition: matrix-common.h:32
void MulRowsGroupMat(const CuMatrixBase< Real > &src)
divide each row into src.NumCols() groups, and then scale i&#39;th row&#39;s jth group of elements by src[i...
Definition: cu-matrix.cc:816
void Scale(Real value)
Definition: cu-vector.cc:1216
static void UnitTestCuMatrixAddTpMat()
void AddDiagMatMat(Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const MatrixBase< Real > &N, MatrixTransposeType transN, Real beta=1.0)
Add the diagonal of a matrix product: *this = diag(M N), assuming the "trans" arguments are both kNoT...
static void UnitTestCuMatrixAddVecToCols()
static void UnitTestCuMatrixAddMatTp()
static void UnitTestCuMatrixApplyHeaviside()
int32_cuda second
Definition: cu-matrixdim.h:80
void CopyToRows(const CuArrayBase< Real *> &dst) const
For each row r of this matrix, copies it to the array of floats at the location given by dst[r]...
Definition: cu-matrix.cc:2744
static void UnitTestCuCopy()
static void UnitTestCuMatrixAddVecVec()
static void UnitTestCuMatrixMulColsVec()
static void RandZeroToOneMatrix(MatrixBase< Real > *mat)
void AddDiagVecMat(const Real alpha, const VectorBase< Real > &v, const MatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].
static void UnitTestCuVectorAddRowSumMatLarge()
void LogSoftMaxPerRow(const CuMatrixBase< Real > &src)
LogSoftmax nonlinearity Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row...
Definition: cu-matrix.cc:1740
void GroupMaxDeriv(const MatrixBase< Real > &input, const MatrixBase< Real > &output)
Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax fun...
void AddMatTp(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const TpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
Definition: kaldi-matrix.h:725
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void AddColSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of columns of M) + beta * *this.
static void UnitTestCuVectorAddColSumMat()
void SetZero()
Set vector to all zeros.
void ApplyFloor(Real floor_val)
Definition: kaldi-matrix.h:354
void AddTpVec(const Real alpha, const CuTpMatrix< Real > &M, const MatrixTransposeType trans, const CuVectorBase< Real > &v, const Real beta)
Add triangular matrix times vector: this <– beta*this + alpha*M*v.
Definition: cu-vector.cc:698
void ApplyPow(Real power)
Definition: kaldi-matrix.h:341
void CopyRowsFromVec(const VectorBase< Real > &v)
This function has two modes of operation.
static void UnitTestCuVectorMulTp()
void Tanh(const CuMatrixBase< Real > &src)
Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src).
Definition: cu-matrix.cc:1786
void AddMatSmat(Real alpha, const MatrixBase< Real > &A, const SparseMatrix< Real > &B, MatrixTransposeType transB, Real beta)
(*this) = alpha * A * op(B) + beta * (*this), where B is sparse and op(B) is either B or trans(B) dep...
void Invert(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Definition: sp-matrix.cc:219
#define KALDI_LOG
Definition: kaldi-error.h:153
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
void CopyToVec(VectorBase< OtherReal > *dst) const
Definition: cu-vector.cc:938
static void UnitTestCuMatrixCopyLowerToUpper()
void Set(Real value)
Definition: cu-matrix.cc:531
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i&#39;th row by scale[i]
Definition: cu-matrix.cc:792
static void UnitTestCuMatrixReduceMin()
Sub-matrix representation.
Definition: kaldi-matrix.h:988
void AddMatSmat(Real alpha, const CuMatrixBase< Real > &A, const CuSparseMatrix< Real > &B, MatrixTransposeType transB, Real beta)
(*this) = alpha * A * op(B) + beta * (*this), where B is sparse and op(B) is either B or trans(B) dep...
Definition: cu-matrix.cc:1080
void CopyColsFromVec(const VectorBase< Real > &v)
Copies vector into matrix, column-by-column.
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265
void AddVecVec(Real alpha, const CuVectorBase< Real > &x, const CuVectorBase< Real > &y)
A = alpha * x * y^T + A .
Definition: cu-matrix.cc:1329
void AddRowSumMat(Real alpha, const CuMatrixBase< Real > &mat, Real beta=1.0)
Sum the rows of the matrix, add to vector.
Definition: cu-vector.cc:1277
void Set(Real)
Sets all elements to a specific value.
void SetZeroAboveDiag()
Zeroes all elements for which col > row.
Definition: cu-matrix.cc:554
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95
void ApplyPowAbs(Real power, bool include_sign=false)
Definition: kaldi-matrix.h:346
static void UnitTestCuMatrixTraceMatMat()
void Sigmoid(const MatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src".
const Real * RowData(MatrixIndexT r) const
Get raw row pointer (const).
Definition: cu-matrix.h:740
static void UnitTestCuCholesky()
int32_cuda first
Definition: cu-matrixdim.h:79