jama-svd.h
Go to the documentation of this file.
1 // matrix/jama-svd.h
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 // This file consists of a port and modification of materials from
21 // JAMA: A Java Matrix Package
22 // under the following notice: This software is a cooperative product of
23 // The MathWorks and the National Institute of Standards and Technology (NIST)
24 // which has been released to the public. This notice and the original code are
25 // available at http://math.nist.gov/javanumerics/jama/domain.notice
26 
27 
28 #ifndef KALDI_MATRIX_JAMA_SVD_H_
29 #define KALDI_MATRIX_JAMA_SVD_H_ 1
30 
31 
32 #include "matrix/kaldi-matrix.h"
33 #include "matrix/sp-matrix.h"
34 #include "matrix/cblas-wrappers.h"
35 
36 namespace kaldi {
37 
38 #if defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
39 // using ATLAS as our math library, which doesn't have SVD -> need
40 // to implement it.
41 
42 // This routine is a modified form of jama_svd.h which is part of the TNT distribution.
43 // (originally comes from JAMA).
44 
64 template<typename Real>
65 bool MatrixBase<Real>::JamaSvd(VectorBase<Real> *s_in,
66  MatrixBase<Real> *U_in,
67  MatrixBase<Real> *V_in) { // Destructive!
68  KALDI_ASSERT(s_in != NULL && U_in != this && V_in != this);
69  int wantu = (U_in != NULL), wantv = (V_in != NULL);
70  Matrix<Real> Utmp, Vtmp;
71  MatrixBase<Real> &U = (U_in ? *U_in : Utmp), &V = (V_in ? *V_in : Vtmp);
72  VectorBase<Real> &s = *s_in;
73 
74  int m = num_rows_, n = num_cols_;
75  KALDI_ASSERT(m>=n && m != 0 && n != 0);
76  if (wantu) KALDI_ASSERT((int)U.num_rows_ == m && (int)U.num_cols_ == n);
77  if (wantv) KALDI_ASSERT((int)V.num_rows_ == n && (int)V.num_cols_ == n);
78  KALDI_ASSERT((int)s.Dim() == n); // n<=m so n is min.
79 
80  int nu = n;
81  U.SetZero(); // make sure all zero.
82  Vector<Real> e(n);
83  Vector<Real> work(m);
84  MatrixBase<Real> &A(*this);
85  Real *adata = A.Data(), *workdata = work.Data(), *edata = e.Data(),
86  *udata = U.Data(), *vdata = V.Data();
87  int astride = static_cast<int>(A.Stride()),
88  ustride = static_cast<int>(U.Stride()),
89  vstride = static_cast<int>(V.Stride());
90  int i = 0, j = 0, k = 0;
91 
92  // Reduce A to bidiagonal form, storing the diagonal elements
93  // in s and the super-diagonal elements in e.
94 
95  int nct = std::min(m-1, n);
96  int nrt = std::max(0, std::min(n-2, m));
97  for (k = 0; k < std::max(nct, nrt); k++) {
98  if (k < nct) {
99 
100  // Compute the transformation for the k-th column and
101  // place the k-th diagonal in s(k).
102  // Compute 2-norm of k-th column without under/overflow.
103  s(k) = 0;
104  for (i = k; i < m; i++) {
105  s(k) = hypot(s(k), A(i, k));
106  }
107  if (s(k) != 0.0) {
108  if (A(k, k) < 0.0) {
109  s(k) = -s(k);
110  }
111  for (i = k; i < m; i++) {
112  A(i, k) /= s(k);
113  }
114  A(k, k) += 1.0;
115  }
116  s(k) = -s(k);
117  }
118  for (j = k+1; j < n; j++) {
119  if ((k < nct) && (s(k) != 0.0)) {
120 
121  // Apply the transformation.
122 
123  Real t = cblas_Xdot(m - k, adata + astride*k + k, astride,
124  adata + astride*k + j, astride);
125  /*for (i = k; i < m; i++) {
126  t += adata[i*astride + k]*adata[i*astride + j]; // A(i, k)*A(i, j); // 3
127  }*/
128  t = -t/A(k, k);
129  cblas_Xaxpy(m - k, t, adata + k*astride + k, astride,
130  adata + k*astride + j, astride);
131  /*for (i = k; i < m; i++) {
132  adata[i*astride + j] += t*adata[i*astride + k]; // A(i, j) += t*A(i, k); // 5
133  }*/
134  }
135 
136  // Place the k-th row of A into e for the
137  // subsequent calculation of the row transformation.
138 
139  e(j) = A(k, j);
140  }
141  if (wantu & (k < nct)) {
142 
143  // Place the transformation in U for subsequent back
144  // multiplication.
145 
146  for (i = k; i < m; i++) {
147  U(i, k) = A(i, k);
148  }
149  }
150  if (k < nrt) {
151 
152  // Compute the k-th row transformation and place the
153  // k-th super-diagonal in e(k).
154  // Compute 2-norm without under/overflow.
155  e(k) = 0;
156  for (i = k+1; i < n; i++) {
157  e(k) = hypot(e(k), e(i));
158  }
159  if (e(k) != 0.0) {
160  if (e(k+1) < 0.0) {
161  e(k) = -e(k);
162  }
163  for (i = k+1; i < n; i++) {
164  e(i) /= e(k);
165  }
166  e(k+1) += 1.0;
167  }
168  e(k) = -e(k);
169  if ((k+1 < m) & (e(k) != 0.0)) {
170 
171  // Apply the transformation.
172 
173  for (i = k+1; i < m; i++) {
174  work(i) = 0.0;
175  }
176  for (j = k+1; j < n; j++) {
177  for (i = k+1; i < m; i++) {
178  workdata[i] += edata[j] * adata[i*astride + j]; // work(i) += e(j)*A(i, j); // 5
179  }
180  }
181  for (j = k+1; j < n; j++) {
182  Real t(-e(j)/e(k+1));
183  cblas_Xaxpy(m - (k+1), t, workdata + (k+1), 1,
184  adata + (k+1)*astride + j, astride);
185  /*
186  for (i = k+1; i < m; i++) {
187  adata[i*astride + j] += t*workdata[i]; // A(i, j) += t*work(i); // 5
188  }*/
189  }
190  }
191  if (wantv) {
192 
193  // Place the transformation in V for subsequent
194  // back multiplication.
195 
196  for (i = k+1; i < n; i++) {
197  V(i, k) = e(i);
198  }
199  }
200  }
201  }
202 
203  // Set up the final bidiagonal matrix or order p.
204 
205  int p = std::min(n, m+1);
206  if (nct < n) {
207  s(nct) = A(nct, nct);
208  }
209  if (m < p) {
210  s(p-1) = 0.0;
211  }
212  if (nrt+1 < p) {
213  e(nrt) = A(nrt, p-1);
214  }
215  e(p-1) = 0.0;
216 
217  // If required, generate U.
218 
219  if (wantu) {
220  for (j = nct; j < nu; j++) {
221  for (i = 0; i < m; i++) {
222  U(i, j) = 0.0;
223  }
224  U(j, j) = 1.0;
225  }
226  for (k = nct-1; k >= 0; k--) {
227  if (s(k) != 0.0) {
228  for (j = k+1; j < nu; j++) {
229  Real t = cblas_Xdot(m - k, udata + k*ustride + k, ustride, udata + k*ustride + j, ustride);
230  //for (i = k; i < m; i++) {
231  // t += udata[i*ustride + k]*udata[i*ustride + j]; // t += U(i, k)*U(i, j); // 8
232  // }
233  t = -t/U(k, k);
234  cblas_Xaxpy(m - k, t, udata + ustride*k + k, ustride,
235  udata + k*ustride + j, ustride);
236  /*for (i = k; i < m; i++) {
237  udata[i*ustride + j] += t*udata[i*ustride + k]; // U(i, j) += t*U(i, k); // 4
238  }*/
239  }
240  for (i = k; i < m; i++ ) {
241  U(i, k) = -U(i, k);
242  }
243  U(k, k) = 1.0 + U(k, k);
244  for (i = 0; i < k-1; i++) {
245  U(i, k) = 0.0;
246  }
247  } else {
248  for (i = 0; i < m; i++) {
249  U(i, k) = 0.0;
250  }
251  U(k, k) = 1.0;
252  }
253  }
254  }
255 
256  // If required, generate V.
257 
258  if (wantv) {
259  for (k = n-1; k >= 0; k--) {
260  if ((k < nrt) & (e(k) != 0.0)) {
261  for (j = k+1; j < nu; j++) {
262  Real t = cblas_Xdot(n - (k+1), vdata + (k+1)*vstride + k, vstride,
263  vdata + (k+1)*vstride + j, vstride);
264  /*Real t (0.0);
265  for (i = k+1; i < n; i++) {
266  t += vdata[i*vstride + k]*vdata[i*vstride + j]; // t += V(i, k)*V(i, j); // 7
267  }*/
268  t = -t/V(k+1, k);
269  cblas_Xaxpy(n - (k+1), t, vdata + (k+1)*vstride + k, vstride,
270  vdata + (k+1)*vstride + j, vstride);
271  /*for (i = k+1; i < n; i++) {
272  vdata[i*vstride + j] += t*vdata[i*vstride + k]; // V(i, j) += t*V(i, k); // 7
273  }*/
274  }
275  }
276  for (i = 0; i < n; i++) {
277  V(i, k) = 0.0;
278  }
279  V(k, k) = 1.0;
280  }
281  }
282 
283  // Main iteration loop for the singular values.
284 
285  int pp = p-1;
286  int iter = 0;
287  // note: -52.0 is from Jama code; the -23 is the extension
288  // to float, because mantissa length in (double, float)
289  // is (52, 23) bits respectively.
290  Real eps(pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));
291  // Note: the -966 was taken from Jama code, but the -120 is a guess
292  // of how to extend this to float... the exponent in double goes
293  // from -1022 .. 1023, and in float from -126..127. I'm not sure
294  // what the significance of 966 is, so -120 just represents a number
295  // that's a bit less negative than -126. If we get convergence
296  // failure in float only, this may mean that we have to make the
297  // -120 value less negative.
298  Real tiny(pow(2.0, sizeof(Real) == 4 ? -120.0: -966.0 ));
299 
300  while (p > 0) {
301  int k = 0;
302  int kase = 0;
303 
304  if (iter == 500 || iter == 750) {
305  KALDI_WARN << "Svd taking a long time: making convergence criterion less exact.";
306  eps = pow(static_cast<Real>(0.8), eps);
307  tiny = pow(static_cast<Real>(0.8), tiny);
308  }
309  if (iter > 1000) {
310  KALDI_WARN << "Svd not converging on matrix of size " << m << " by " <<n;
311  return false;
312  }
313 
314  // This section of the program inspects for
315  // negligible elements in the s and e arrays. On
316  // completion the variables kase and k are set as follows.
317 
318  // kase = 1 if s(p) and e(k-1) are negligible and k < p
319  // kase = 2 if s(k) is negligible and k < p
320  // kase = 3 if e(k-1) is negligible, k < p, and
321  // s(k), ..., s(p) are not negligible (qr step).
322  // kase = 4 if e(p-1) is negligible (convergence).
323 
324  for (k = p-2; k >= -1; k--) {
325  if (k == -1) {
326  break;
327  }
328  if (std::abs(e(k)) <=
329  tiny + eps*(std::abs(s(k)) + std::abs(s(k+1)))) {
330  e(k) = 0.0;
331  break;
332  }
333  }
334  if (k == p-2) {
335  kase = 4;
336  } else {
337  int ks;
338  for (ks = p-1; ks >= k; ks--) {
339  if (ks == k) {
340  break;
341  }
342  Real t( (ks != p ? std::abs(e(ks)) : 0.) +
343  (ks != k+1 ? std::abs(e(ks-1)) : 0.));
344  if (std::abs(s(ks)) <= tiny + eps*t) {
345  s(ks) = 0.0;
346  break;
347  }
348  }
349  if (ks == k) {
350  kase = 3;
351  } else if (ks == p-1) {
352  kase = 1;
353  } else {
354  kase = 2;
355  k = ks;
356  }
357  }
358  k++;
359 
360  // Perform the task indicated by kase.
361 
362  switch (kase) {
363 
364  // Deflate negligible s(p).
365 
366  case 1: {
367  Real f(e(p-2));
368  e(p-2) = 0.0;
369  for (j = p-2; j >= k; j--) {
370  Real t( hypot(s(j), f));
371  Real cs(s(j)/t);
372  Real sn(f/t);
373  s(j) = t;
374  if (j != k) {
375  f = -sn*e(j-1);
376  e(j-1) = cs*e(j-1);
377  }
378  if (wantv) {
379  for (i = 0; i < n; i++) {
380  t = cs*V(i, j) + sn*V(i, p-1);
381  V(i, p-1) = -sn*V(i, j) + cs*V(i, p-1);
382  V(i, j) = t;
383  }
384  }
385  }
386  }
387  break;
388 
389  // Split at negligible s(k).
390 
391  case 2: {
392  Real f(e(k-1));
393  e(k-1) = 0.0;
394  for (j = k; j < p; j++) {
395  Real t(hypot(s(j), f));
396  Real cs( s(j)/t);
397  Real sn(f/t);
398  s(j) = t;
399  f = -sn*e(j);
400  e(j) = cs*e(j);
401  if (wantu) {
402  for (i = 0; i < m; i++) {
403  t = cs*U(i, j) + sn*U(i, k-1);
404  U(i, k-1) = -sn*U(i, j) + cs*U(i, k-1);
405  U(i, j) = t;
406  }
407  }
408  }
409  }
410  break;
411 
412  // Perform one qr step.
413 
414  case 3: {
415 
416  // Calculate the shift.
417 
418  Real scale = std::max(std::max(std::max(std::max(
419  std::abs(s(p-1)), std::abs(s(p-2))), std::abs(e(p-2))),
420  std::abs(s(k))), std::abs(e(k)));
421  Real sp = s(p-1)/scale;
422  Real spm1 = s(p-2)/scale;
423  Real epm1 = e(p-2)/scale;
424  Real sk = s(k)/scale;
425  Real ek = e(k)/scale;
426  Real b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/2.0;
427  Real c = (sp*epm1)*(sp*epm1);
428  Real shift = 0.0;
429  if ((b != 0.0) || (c != 0.0)) {
430  shift = std::sqrt(b*b + c);
431  if (b < 0.0) {
432  shift = -shift;
433  }
434  shift = c/(b + shift);
435  }
436  Real f = (sk + sp)*(sk - sp) + shift;
437  Real g = sk*ek;
438 
439  // Chase zeros.
440 
441  for (j = k; j < p-1; j++) {
442  Real t = hypot(f, g);
443  Real cs = f/t;
444  Real sn = g/t;
445  if (j != k) {
446  e(j-1) = t;
447  }
448  f = cs*s(j) + sn*e(j);
449  e(j) = cs*e(j) - sn*s(j);
450  g = sn*s(j+1);
451  s(j+1) = cs*s(j+1);
452  if (wantv) {
453  cblas_Xrot(n, vdata + j, vstride, vdata + j+1, vstride, cs, sn);
454  /*for (i = 0; i < n; i++) {
455  t = cs*vdata[i*vstride + j] + sn*vdata[i*vstride + j+1]; // t = cs*V(i, j) + sn*V(i, j+1); // 13
456  vdata[i*vstride + j+1] = -sn*vdata[i*vstride + j] + cs*vdata[i*vstride + j+1]; // V(i, j+1) = -sn*V(i, j) + cs*V(i, j+1); // 5
457  vdata[i*vstride + j] = t; // V(i, j) = t; // 4
458  }*/
459  }
460  t = hypot(f, g);
461  cs = f/t;
462  sn = g/t;
463  s(j) = t;
464  f = cs*e(j) + sn*s(j+1);
465  s(j+1) = -sn*e(j) + cs*s(j+1);
466  g = sn*e(j+1);
467  e(j+1) = cs*e(j+1);
468  if (wantu && (j < m-1)) {
469  cblas_Xrot(m, udata + j, ustride, udata + j+1, ustride, cs, sn);
470  /*for (i = 0; i < m; i++) {
471  t = cs*udata[i*ustride + j] + sn*udata[i*ustride + j+1]; // t = cs*U(i, j) + sn*U(i, j+1); // 7
472  udata[i*ustride + j+1] = -sn*udata[i*ustride + j] +cs*udata[i*ustride + j+1]; // U(i, j+1) = -sn*U(i, j) + cs*U(i, j+1); // 8
473  udata[i*ustride + j] = t; // U(i, j) = t; // 1
474  }*/
475  }
476  }
477  e(p-2) = f;
478  iter = iter + 1;
479  }
480  break;
481 
482  // Convergence.
483 
484  case 4: {
485 
486  // Make the singular values positive.
487 
488  if (s(k) <= 0.0) {
489  s(k) = (s(k) < 0.0 ? -s(k) : 0.0);
490  if (wantv) {
491  for (i = 0; i <= pp; i++) {
492  V(i, k) = -V(i, k);
493  }
494  }
495  }
496 
497  // Order the singular values.
498 
499  while (k < pp) {
500  if (s(k) >= s(k+1)) {
501  break;
502  }
503  Real t = s(k);
504  s(k) = s(k+1);
505  s(k+1) = t;
506  if (wantv && (k < n-1)) {
507  for (i = 0; i < n; i++) {
508  t = V(i, k+1); V(i, k+1) = V(i, k); V(i, k) = t;
509  }
510  }
511  if (wantu && (k < m-1)) {
512  for (i = 0; i < m; i++) {
513  t = U(i, k+1); U(i, k+1) = U(i, k); U(i, k) = t;
514  }
515  }
516  k++;
517  }
518  iter = 0;
519  p--;
520  }
521  break;
522  }
523  }
524  return true;
525 }
526 
527 #endif // defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
528 
529 } // namespace kaldi
530 
531 #endif // KALDI_MATRIX_JAMA_SVD_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
float cblas_Xdot(const int N, const float *const X, const int incX, const float *const Y, const int incY)
struct rnnlm::@11::@12 n
#define KALDI_WARN
Definition: kaldi-error.h:150
void cblas_Xaxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY)
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void cblas_Xrot(const int N, float *X, const int incX, float *Y, const int incY, const float c, const float s)